diff options
Diffstat (limited to 'drivers/iommu')
56 files changed, 2559 insertions, 667 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 0a33d995d15d..70d29b14d851 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -200,6 +200,7 @@ source "drivers/iommu/riscv/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI + select IRQ_MSI_LIB help Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or @@ -305,7 +306,6 @@ config APPLE_DART depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_DART select IOMMU_API select IOMMU_IO_PGTABLE_DART - default ARCH_APPLE help Support for Apple DART (Device Address Resolution Table) IOMMUs found in Apple ARM SoCs like the M1. diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 994063e5586f..ecef69c11144 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -7,6 +7,7 @@ config AMD_IOMMU select PCI_ATS select PCI_PRI select PCI_PASID + select IRQ_MSI_LIB select MMU_NOTIFIER select IOMMU_API select IOMMU_IOVA diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 29a8864381c3..9b4b589a54b5 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -28,9 +28,9 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size); #ifdef CONFIG_AMD_IOMMU_DEBUGFS -void amd_iommu_debugfs_setup(struct amd_iommu *iommu); +void amd_iommu_debugfs_setup(void); #else -static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} +static inline void amd_iommu_debugfs_setup(void) {} #endif /* Needed for interrupt remapping */ @@ -42,7 +42,9 @@ int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; extern enum protection_domain_mode amd_iommu_pgtable; extern int amd_iommu_gpt_level; +extern u8 amd_iommu_hpt_level; extern unsigned long amd_iommu_pgsize_bitmap; +extern bool amd_iommu_hatdis; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index ccbab3a4811a..5219d7ddfdaa 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -94,6 +94,7 @@ #define FEATURE_GA BIT_ULL(7) #define FEATURE_HE BIT_ULL(8) #define FEATURE_PC BIT_ULL(9) +#define FEATURE_HATS GENMASK_ULL(11, 10) #define FEATURE_GATS GENMASK_ULL(13, 12) #define FEATURE_GLX GENMASK_ULL(15, 14) #define FEATURE_GAM_VAPIC BIT_ULL(21) @@ -460,6 +461,9 @@ /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 +/* IOMMU HATDIS for IVHD type 11h and 40h */ +#define IOMMU_IVHD_ATTR_HATDIS_SHIFT 0 + /* IOMMU Extended Feature Register (EFR) */ #define IOMMU_EFR_XTSUP_SHIFT 2 #define IOMMU_EFR_GASUP_SHIFT 7 @@ -558,7 +562,8 @@ struct amd_io_pgtable { }; enum protection_domain_mode { - PD_MODE_V1 = 1, + PD_MODE_NONE, + PD_MODE_V1, PD_MODE_V2, }; @@ -790,6 +795,8 @@ struct amd_iommu { #ifdef CONFIG_AMD_IOMMU_DEBUGFS /* DebugFS Info */ struct dentry *debugfs; + int dbg_mmio_offset; + int dbg_cap_offset; #endif /* IOPF support */ @@ -891,6 +898,13 @@ struct dev_table_entry { }; /* + * Structure defining one entry in the command buffer + */ +struct iommu_cmd { + u32 data[4]; +}; + +/* * Structure to sture persistent DTE flags from IVHD */ struct ivhd_dte_flags { @@ -1054,7 +1068,6 @@ struct irq_2_irte { }; struct amd_ir_data { - u32 cached_ga_tag; struct amd_iommu *iommu; struct irq_2_irte irq_2_irte; struct msi_msg msi_entry; diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index 545372fcc72f..10fa217a7119 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -11,22 +11,382 @@ #include <linux/pci.h> #include "amd_iommu.h" +#include "../irq_remapping.h" static struct dentry *amd_iommu_debugfs; -static DEFINE_MUTEX(amd_iommu_debugfs_lock); #define MAX_NAME_LEN 20 +#define OFS_IN_SZ 8 +#define DEVID_IN_SZ 16 -void amd_iommu_debugfs_setup(struct amd_iommu *iommu) +static int sbdf = -1; + +static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct amd_iommu *iommu = m->private; + int ret; + + iommu->dbg_mmio_offset = -1; + + if (cnt > OFS_IN_SZ) + return -EINVAL; + + ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset); + if (ret) + return ret; + + if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) { + iommu->dbg_mmio_offset = -1; + return -EINVAL; + } + + return cnt; +} + +static int iommu_mmio_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + u64 value; + + if (iommu->dbg_mmio_offset < 0) { + seq_puts(m, "Please provide mmio register's offset\n"); + return 0; + } + + value = readq(iommu->mmio_base + iommu->dbg_mmio_offset); + seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(iommu_mmio); + +static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct amd_iommu *iommu = m->private; + int ret; + + iommu->dbg_cap_offset = -1; + + if (cnt > OFS_IN_SZ) + return -EINVAL; + + ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset); + if (ret) + return ret; + + /* Capability register at offset 0x14 is the last IOMMU capability register. */ + if (iommu->dbg_cap_offset > 0x14) { + iommu->dbg_cap_offset = -1; + return -EINVAL; + } + + return cnt; +} + +static int iommu_capability_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + u32 value; + int err; + + if (iommu->dbg_cap_offset < 0) { + seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n"); + return 0; + } + + err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value); + if (err) { + seq_printf(m, "Not able to read capability register at 0x%x\n", + iommu->dbg_cap_offset); + return 0; + } + + seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(iommu_capability); + +static int iommu_cmdbuf_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + struct iommu_cmd *cmd; + unsigned long flag; + u32 head, tail; + int i; + + raw_spin_lock_irqsave(&iommu->lock, flag); + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + seq_printf(m, "CMD Buffer Head Offset:%d Tail Offset:%d\n", + (head >> 4) & 0x7fff, (tail >> 4) & 0x7fff); + for (i = 0; i < CMD_BUFFER_ENTRIES; i++) { + cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd)); + seq_printf(m, "%3d: %08x %08x %08x %08x\n", i, cmd->data[0], + cmd->data[1], cmd->data[2], cmd->data[3]); + } + raw_spin_unlock_irqrestore(&iommu->lock, flag); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_cmdbuf); + +static ssize_t devid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct amd_iommu_pci_seg *pci_seg; + int seg, bus, slot, func; + struct amd_iommu *iommu; + char *srcid_ptr; + u16 devid; + int i; + + sbdf = -1; + + if (cnt >= DEVID_IN_SZ) + return -EINVAL; + + srcid_ptr = memdup_user_nul(ubuf, cnt); + if (IS_ERR(srcid_ptr)) + return PTR_ERR(srcid_ptr); + + i = sscanf(srcid_ptr, "%x:%x:%x.%x", &seg, &bus, &slot, &func); + if (i != 4) { + i = sscanf(srcid_ptr, "%x:%x.%x", &bus, &slot, &func); + if (i != 3) { + kfree(srcid_ptr); + return -EINVAL; + } + seg = 0; + } + + devid = PCI_DEVID(bus, PCI_DEVFN(slot, func)); + + /* Check if user device id input is a valid input */ + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + if (devid > pci_seg->last_bdf) { + kfree(srcid_ptr); + return -EINVAL; + } + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) { + kfree(srcid_ptr); + return -ENODEV; + } + break; + } + + if (pci_seg->id != seg) { + kfree(srcid_ptr); + return -EINVAL; + } + + sbdf = PCI_SEG_DEVID_TO_SBDF(seg, devid); + + kfree(srcid_ptr); + + return cnt; +} + +static int devid_show(struct seq_file *m, void *unused) { + u16 devid; + + if (sbdf >= 0) { + devid = PCI_SBDF_TO_DEVID(sbdf); + seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); + } else + seq_puts(m, "No or Invalid input provided\n"); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(devid); + +static void dump_dte(struct seq_file *m, struct amd_iommu_pci_seg *pci_seg, u16 devid) +{ + struct dev_table_entry *dev_table; + struct amd_iommu *iommu; + + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) + return; + + dev_table = get_dev_table(iommu); + if (!dev_table) { + seq_puts(m, "Device table not found"); + return; + } + + seq_printf(m, "%-12s %16s %16s %16s %16s iommu\n", "DeviceId", + "QWORD[3]", "QWORD[2]", "QWORD[1]", "QWORD[0]"); + seq_printf(m, "%04x:%02x:%02x.%x ", pci_seg->id, PCI_BUS_NUM(devid), + PCI_SLOT(devid), PCI_FUNC(devid)); + for (int i = 3; i >= 0; --i) + seq_printf(m, "%016llx ", dev_table[devid].data[i]); + seq_printf(m, "iommu%d\n", iommu->index); +} + +static int iommu_devtbl_show(struct seq_file *m, void *unused) +{ + struct amd_iommu_pci_seg *pci_seg; + u16 seg, devid; + + if (sbdf < 0) { + seq_puts(m, "Enter a valid device ID to 'devid' file\n"); + return 0; + } + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + dump_dte(m, pci_seg, devid); + break; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_devtbl); + +static void dump_128_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len) +{ + struct irte_ga *ptr, *irte; + int index; + + for (index = 0; index < int_tab_len; index++) { + ptr = (struct irte_ga *)table->table; + irte = &ptr[index]; + + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !irte->lo.fields_vapic.valid) + continue; + else if (!irte->lo.fields_remap.valid) + continue; + seq_printf(m, "IRT[%04d] %016llx %016llx\n", index, irte->hi.val, irte->lo.val); + } +} + +static void dump_32_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len) +{ + union irte *ptr, *irte; + int index; + + for (index = 0; index < int_tab_len; index++) { + ptr = (union irte *)table->table; + irte = &ptr[index]; + + if (!irte->fields.valid) + continue; + seq_printf(m, "IRT[%04d] %08x\n", index, irte->val); + } +} + +static void dump_irte(struct seq_file *m, u16 devid, struct amd_iommu_pci_seg *pci_seg) +{ + struct dev_table_entry *dev_table; + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + u16 int_tab_len; + + table = pci_seg->irq_lookup_table[devid]; + if (!table) { + seq_printf(m, "IRQ lookup table not set for %04x:%02x:%02x:%x\n", + pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); + return; + } + + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) + return; + + dev_table = get_dev_table(iommu); + if (!dev_table) { + seq_puts(m, "Device table not found"); + return; + } + + int_tab_len = dev_table[devid].data[2] & DTE_INTTABLEN_MASK; + if (int_tab_len != DTE_INTTABLEN_512 && int_tab_len != DTE_INTTABLEN_2K) { + seq_puts(m, "The device's DTE contains an invalid IRT length value."); + return; + } + + seq_printf(m, "DeviceId %04x:%02x:%02x.%x\n", pci_seg->id, PCI_BUS_NUM(devid), + PCI_SLOT(devid), PCI_FUNC(devid)); + + raw_spin_lock_irqsave(&table->lock, flags); + if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + dump_128_irte(m, table, BIT(int_tab_len >> 1)); + else + dump_32_irte(m, table, BIT(int_tab_len >> 1)); + seq_puts(m, "\n"); + raw_spin_unlock_irqrestore(&table->lock, flags); +} + +static int iommu_irqtbl_show(struct seq_file *m, void *unused) +{ + struct amd_iommu_pci_seg *pci_seg; + u16 devid, seg; + + if (!irq_remapping_enabled) { + seq_puts(m, "Interrupt remapping is disabled\n"); + return 0; + } + + if (sbdf < 0) { + seq_puts(m, "Enter a valid device ID to 'devid' file\n"); + return 0; + } + + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + dump_irte(m, devid, pci_seg); + break; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_irqtbl); + +void amd_iommu_debugfs_setup(void) +{ + struct amd_iommu *iommu; char name[MAX_NAME_LEN + 1]; - mutex_lock(&amd_iommu_debugfs_lock); - if (!amd_iommu_debugfs) - amd_iommu_debugfs = debugfs_create_dir("amd", - iommu_debugfs_dir); - mutex_unlock(&amd_iommu_debugfs_lock); + amd_iommu_debugfs = debugfs_create_dir("amd", iommu_debugfs_dir); + + for_each_iommu(iommu) { + iommu->dbg_mmio_offset = -1; + iommu->dbg_cap_offset = -1; + + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); + iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + + debugfs_create_file("mmio", 0644, iommu->debugfs, iommu, + &iommu_mmio_fops); + debugfs_create_file("capability", 0644, iommu->debugfs, iommu, + &iommu_capability_fops); + debugfs_create_file("cmdbuf", 0444, iommu->debugfs, iommu, + &iommu_cmdbuf_fops); + } - snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); - iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + debugfs_create_file("devid", 0644, amd_iommu_debugfs, NULL, + &devid_fops); + debugfs_create_file("devtbl", 0444, amd_iommu_debugfs, NULL, + &iommu_devtbl_fops); + debugfs_create_file("irqtbl", 0444, amd_iommu_debugfs, NULL, + &iommu_irqtbl_fops); } diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 9c17dfa76703..7b5af6176de9 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -152,6 +152,8 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; +/* Host page table level */ +u8 amd_iommu_hpt_level; /* Guest page table level */ int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; @@ -168,6 +170,9 @@ static int amd_iommu_target_ivhd_type; u64 amd_iommu_efr; u64 amd_iommu_efr2; +/* Host (v1) page table is not supported*/ +bool amd_iommu_hatdis; + /* SNP is enabled on the system? */ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); @@ -1792,6 +1797,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { + pr_warn_once("Host Address Translation is not supported.\n"); + amd_iommu_hatdis = true; + } + early_iommu_features_init(iommu, h); break; @@ -2112,7 +2122,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) return ret; } - iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + if (ret || amd_iommu_pgtable == PD_MODE_NONE) { + /* + * Remove sysfs if DMA translation is not supported by the + * IOMMU. Do not return an error to enable IRQ remapping + * in state_next(), DTE[V, TV] must eventually be set to 0. + */ + iommu_device_sysfs_remove(&iommu->iommu); + } return pci_enable_device(iommu->dev); } @@ -2573,7 +2591,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) u32 devid; struct dev_table_entry *dev_table = pci_seg->dev_table; - if (dev_table == NULL) + if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { @@ -3033,6 +3051,7 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; int ret; acpi_status status; + u8 efr_hats; if (!amd_iommu_detected) return -ENODEV; @@ -3077,6 +3096,19 @@ static int __init early_amd_iommu_init(void) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; + efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); + if (efr_hats != 0x3) { + /* + * efr[HATS] bits specify the maximum host translation level + * supported, with LEVEL 4 being initial max level. + */ + amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL; + } else { + pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", + efr_hats); + amd_iommu_hatdis = true; + } + if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); @@ -3084,6 +3116,17 @@ static int __init early_amd_iommu_init(void) } } + if (amd_iommu_hatdis) { + /* + * Host (v1) page table is not available. Attempt to use + * Guest (v2) page table. + */ + if (amd_iommu_v2_pgtbl_supported()) + amd_iommu_pgtable = PD_MODE_V2; + else + amd_iommu_pgtable = PD_MODE_NONE; + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -3376,7 +3419,6 @@ int amd_iommu_enable_faulting(unsigned int cpu) */ static int __init amd_iommu_init(void) { - struct amd_iommu *iommu; int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); @@ -3390,8 +3432,8 @@ static int __init amd_iommu_init(void) } #endif - for_each_iommu(iommu) - amd_iommu_debugfs_setup(iommu); + if (!ret) + amd_iommu_debugfs_setup(); return ret; } diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 4d308c071134..a91e71f981ef 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -125,7 +125,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, goto out; ret = false; - if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level)) goto out; *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); @@ -526,7 +526,7 @@ static void v1_free_pgtable(struct io_pgtable *iop) /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); + pgtable->mode > amd_iommu_hpt_level); free_sub_pt(pgtable->root, pgtable->mode, &freelist); iommu_put_pages_list(&freelist); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 3117d99cf83d..eb348c63a8d0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -25,6 +25,7 @@ #include <linux/notifier.h> #include <linux/export.h> #include <linux/irq.h> +#include <linux/irqchip/irq-msi-lib.h> #include <linux/msi.h> #include <linux/irqdomain.h> #include <linux/percpu.h> @@ -63,13 +64,6 @@ static const struct iommu_dirty_ops amd_dirty_ops; int amd_iommu_max_glx_val = -1; /* - * general struct to manage commands send to an IOMMU - */ -struct iommu_cmd { - u32 data[4]; -}; - -/* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ @@ -634,8 +628,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) static void pdev_enable_caps(struct pci_dev *pdev) { - pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); + pdev_enable_cap_ats(pdev); pdev_enable_cap_pri(pdev); } @@ -2424,6 +2418,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) pci_max_pasids(to_pci_dev(dev))); } + if (amd_iommu_pgtable == PD_MODE_NONE) { + pr_warn_once("%s: DMA translation not supported by iommu.\n", + __func__); + iommu_dev = ERR_PTR(-ENODEV); + goto out_err; + } + out_err: iommu_completion_wait(iommu); @@ -2511,6 +2512,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain, case PD_MODE_V2: fmt = AMD_IOMMU_V2; break; + case PD_MODE_NONE: + WARN_ON_ONCE(1); + return -EPERM; } domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev); @@ -2524,14 +2528,30 @@ static int pdom_setup_pgtable(struct protection_domain *domain, static inline u64 dma_max_address(enum protection_domain_mode pgtable) { if (pgtable == PD_MODE_V1) - return ~0ULL; + return PM_LEVEL_SIZE(amd_iommu_hpt_level); - /* V2 with 4/5 level page table */ - return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); + /* + * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page + * Translation" shows that the V2 table sign extends the top of the + * address space creating a reserved region in the middle of the + * translation, just like the CPU does. Further Vasant says the docs are + * incomplete and this only applies to non-zero PASIDs. If the AMDv2 + * page table is assigned to the 0 PASID then there is no sign extension + * check. + * + * Since the IOMMU must have a fixed geometry, and the core code does + * not understand sign extended addressing, we have to chop off the high + * bit to get consistent behavior with attachments of the domain to any + * PASID. + */ + return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1); } static bool amd_iommu_hd_support(struct amd_iommu *iommu) { + if (amd_iommu_hatdis) + return false; + return iommu && (iommu->features & FEATURE_HDSUP); } @@ -3804,13 +3824,70 @@ static const struct irq_domain_ops amd_ir_domain_ops = { .deactivate = irq_remapping_deactivate, }; -int amd_iommu_activate_guest_mode(void *data) +static void __amd_iommu_update_ga(struct irte_ga *entry, int cpu, + bool ga_log_intr) +{ + if (cpu >= 0) { + entry->lo.fields_vapic.destination = + APICID_TO_IRTE_DEST_LO(cpu); + entry->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(cpu); + entry->lo.fields_vapic.is_run = true; + entry->lo.fields_vapic.ga_log_intr = false; + } else { + entry->lo.fields_vapic.is_run = false; + entry->lo.fields_vapic.ga_log_intr = ga_log_intr; + } +} + +/* + * Update the pCPU information for an IRTE that is configured to post IRQs to + * a vCPU, without issuing an IOMMU invalidation for the IRTE. + * + * If the vCPU is associated with a pCPU (@cpu >= 0), configure the Destination + * with the pCPU's APIC ID, set IsRun, and clear GALogIntr. If the vCPU isn't + * associated with a pCPU (@cpu < 0), clear IsRun and set/clear GALogIntr based + * on input from the caller (e.g. KVM only requests GALogIntr when the vCPU is + * blocking and requires a notification wake event). I.e. treat vCPUs that are + * associated with a pCPU as running. This API is intended to be used when a + * vCPU is scheduled in/out (or stops running for any reason), to do a fast + * update of IsRun, GALogIntr, and (conditionally) Destination. + * + * Per the IOMMU spec, the Destination, IsRun, and GATag fields are not cached + * and thus don't require an invalidation to ensure the IOMMU consumes fresh + * information. + */ +int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr) +{ + struct amd_ir_data *ir_data = (struct amd_ir_data *)data; + struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + + if (!entry || !entry->lo.fields_vapic.guest_mode) + return 0; + + if (!ir_data->iommu) + return -ENODEV; + + __amd_iommu_update_ga(entry, cpu, ga_log_intr); + + return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, + ir_data->irq_2_irte.index, entry); +} +EXPORT_SYMBOL(amd_iommu_update_ga); + +int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr) { struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; u64 valid; - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry) + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + + if (!entry) return 0; valid = entry->lo.fields_vapic.valid; @@ -3820,11 +3897,12 @@ int amd_iommu_activate_guest_mode(void *data) entry->lo.fields_vapic.valid = valid; entry->lo.fields_vapic.guest_mode = 1; - entry->lo.fields_vapic.ga_log_intr = 1; entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr; entry->hi.fields.vector = ir_data->ga_vector; entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; + __amd_iommu_update_ga(entry, cpu, ga_log_intr); + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry); } @@ -3837,8 +3915,10 @@ int amd_iommu_deactivate_guest_mode(void *data) struct irq_cfg *cfg = ir_data->cfg; u64 valid; - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || - !entry || !entry->lo.fields_vapic.guest_mode) + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + + if (!entry || !entry->lo.fields_vapic.guest_mode) return 0; valid = entry->lo.fields_remap.valid; @@ -3860,11 +3940,10 @@ int amd_iommu_deactivate_guest_mode(void *data) } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); -static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) +static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *info) { int ret; - struct amd_iommu_pi_data *pi_data = vcpu_info; - struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; + struct amd_iommu_pi_data *pi_data = info; struct amd_ir_data *ir_data = data->chip_data; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; @@ -3885,25 +3964,20 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) return -EINVAL; ir_data->cfg = irqd_cfg(data); - pi_data->ir_data = ir_data; - pi_data->prev_ga_tag = ir_data->cached_ga_tag; - if (pi_data->is_guest_mode) { - ir_data->ga_root_ptr = (pi_data->base >> 12); - ir_data->ga_vector = vcpu_pi_info->vector; + if (pi_data) { + pi_data->ir_data = ir_data; + + ir_data->ga_root_ptr = (pi_data->vapic_addr >> 12); + ir_data->ga_vector = pi_data->vector; ir_data->ga_tag = pi_data->ga_tag; - ret = amd_iommu_activate_guest_mode(ir_data); - if (!ret) - ir_data->cached_ga_tag = pi_data->ga_tag; + if (pi_data->is_guest_mode) + ret = amd_iommu_activate_guest_mode(ir_data, pi_data->cpu, + pi_data->ga_log_intr); + else + ret = amd_iommu_deactivate_guest_mode(ir_data); } else { ret = amd_iommu_deactivate_guest_mode(ir_data); - - /* - * This communicates the ga_tag back to the caller - * so that it can do all the necessary clean up. - */ - if (!ret) - ir_data->cached_ga_tag = 0; } return ret; @@ -3970,54 +4044,30 @@ static struct irq_chip amd_ir_chip = { static const struct msi_parent_ops amdvi_msi_parent_ops = { .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .bus_select_token = DOMAIN_BUS_AMDVI, + .bus_select_mask = MATCH_PCI_MSI, .prefix = "IR-", .init_dev_msi_info = msi_parent_init_dev_msi_info, }; int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { - struct fwnode_handle *fn; + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index), + .ops = &amd_ir_domain_ops, + .domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI, + .host_data = iommu, + .parent = arch_get_ir_parent_domain(), + }; - fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index); - if (!fn) + if (!info.fwnode) return -ENOMEM; - iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0, - fn, &amd_ir_domain_ops, iommu); + + iommu->ir_domain = msi_create_parent_irq_domain(&info, &amdvi_msi_parent_ops); if (!iommu->ir_domain) { - irq_domain_free_fwnode(fn); + irq_domain_free_fwnode(info.fwnode); return -ENOMEM; } - - irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | - IRQ_DOMAIN_FLAG_ISOLATED_MSI; - iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; - return 0; } - -int amd_iommu_update_ga(int cpu, bool is_run, void *data) -{ - struct amd_ir_data *ir_data = (struct amd_ir_data *)data; - struct irte_ga *entry = (struct irte_ga *) ir_data->entry; - - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || - !entry || !entry->lo.fields_vapic.guest_mode) - return 0; - - if (!ir_data->iommu) - return -ENODEV; - - if (cpu >= 0) { - entry->lo.fields_vapic.destination = - APICID_TO_IRTE_DEST_LO(cpu); - entry->hi.fields.destination = - APICID_TO_IRTE_DEST_HI(cpu); - } - entry->lo.fields_vapic.is_run = is_run; - - return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry); -} -EXPORT_SYMBOL(amd_iommu_update_ga); #endif diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 757d24f67ad4..190f28d76615 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -991,7 +991,6 @@ static const struct iommu_ops apple_dart_iommu_ops = { .of_xlate = apple_dart_of_xlate, .def_domain_type = apple_dart_def_domain_type, .get_resv_regions = apple_dart_get_resv_regions, - .pgsize_bitmap = -1UL, /* Restricted during dart probe */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = apple_dart_attach_dev_paging, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index e4fd8d522af8..8cd8929bbfdf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -7,13 +7,22 @@ #include "arm-smmu-v3.h" -void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) +void *arm_smmu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops; struct iommu_hw_info_arm_smmuv3 *info; u32 __iomem *base_idr; unsigned int i; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) { + if (!impl_ops || !impl_ops->hw_info) + return ERR_PTR(-EOPNOTSUPP); + return impl_ops->hw_info(master->smmu, length, type); + } + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); @@ -216,7 +225,7 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg, return 0; } -static struct iommu_domain * +struct iommu_domain * arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, const struct iommu_user_data *user_data) { @@ -327,8 +336,8 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu, return 0; } -static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, - struct iommu_user_data_array *array) +int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array) { struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); struct arm_smmu_device *smmu = vsmmu->smmu; @@ -382,25 +391,14 @@ static const struct iommufd_viommu_ops arm_vsmmu_ops = { .cache_invalidate = arm_vsmmu_cache_invalidate, }; -struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, - struct iommu_domain *parent, - struct iommufd_ctx *ictx, - unsigned int viommu_type) +size_t arm_smmu_get_viommu_size(struct device *dev, + enum iommu_viommu_type viommu_type) { - struct arm_smmu_device *smmu = - iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct arm_smmu_domain *s2_parent = to_smmu_domain(parent); - struct arm_vsmmu *vsmmu; - - if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3) - return ERR_PTR(-EOPNOTSUPP); + struct arm_smmu_device *smmu = master->smmu; if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) - return ERR_PTR(-EOPNOTSUPP); - - if (s2_parent->smmu != master->smmu) - return ERR_PTR(-EINVAL); + return 0; /* * FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW @@ -408,7 +406,7 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, * any change to remove this. */ if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) - return ERR_PTR(-EOPNOTSUPP); + return 0; /* * Must support some way to prevent the VM from bypassing the cache @@ -420,19 +418,39 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, */ if (!arm_smmu_master_canwbs(master) && !(smmu->features & ARM_SMMU_FEAT_S2FWB)) - return ERR_PTR(-EOPNOTSUPP); + return 0; - vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core, - &arm_vsmmu_ops); - if (IS_ERR(vsmmu)) - return ERR_CAST(vsmmu); + if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) + return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core); + + if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size) + return 0; + return smmu->impl_ops->get_viommu_size(viommu_type); +} + +int arm_vsmmu_init(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data) +{ + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); + struct arm_smmu_device *smmu = + container_of(viommu->iommu_dev, struct arm_smmu_device, iommu); + struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain); + + if (s2_parent->smmu != smmu) + return -EINVAL; vsmmu->smmu = smmu; vsmmu->s2_parent = s2_parent; /* FIXME Move VMID allocation from the S2 domain allocation to here */ vsmmu->vmid = s2_parent->s2_cfg.vmid; - return &vsmmu->core; + if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) { + viommu->ops = &arm_vsmmu_ops; + return 0; + } + + return smmu->impl_ops->vsmmu_init(vsmmu, user_data); } int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 0601dece0a0d..59a480974d80 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -220,6 +220,9 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) feat_mask |= ARM_SMMU_FEAT_VAX; } + if (system_supports_bbml2_noabort()) + feat_mask |= ARM_SMMU_FEAT_BBML2; + if ((smmu->features & feat_mask) != feat_mask) return false; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 10cc6dc26b7b..5968043ac802 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -38,7 +38,7 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); -static struct iommu_ops arm_smmu_ops; +static const struct iommu_ops arm_smmu_ops; static struct iommu_dirty_ops arm_smmu_dirty_ops; enum arm_smmu_msi_index { @@ -2906,8 +2906,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); if (!master_domain) { - kfree(state->vmaster); - return -ENOMEM; + ret = -ENOMEM; + goto err_free_vmaster; } master_domain->domain = new_domain; master_domain->master = master; @@ -2941,7 +2941,6 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, !arm_smmu_master_canwbs(master)) { spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - kfree(state->vmaster); ret = -EINVAL; goto err_iopf; } @@ -2967,6 +2966,8 @@ err_iopf: arm_smmu_disable_iopf(master, master_domain); err_free_master_domain: kfree(master_domain); +err_free_vmaster: + kfree(state->vmaster); return ret; } @@ -3674,7 +3675,7 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static struct iommu_ops arm_smmu_ops = { +static const struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, @@ -3688,9 +3689,9 @@ static struct iommu_ops arm_smmu_ops = { .get_resv_regions = arm_smmu_get_resv_regions, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, - .viommu_alloc = arm_vsmmu_alloc, + .get_viommu_size = arm_smmu_get_viommu_size, + .viommu_init = arm_vsmmu_init, .user_pasid_table = 1, - .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, @@ -4457,6 +4458,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) if (FIELD_GET(IDR3_FWB, reg)) smmu->features |= ARM_SMMU_FEAT_S2FWB; + if (FIELD_GET(IDR3_BBM, reg) == 2) + smmu->features |= ARM_SMMU_FEAT_BBML2; + /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); @@ -4504,11 +4508,6 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->oas = 48; } - if (arm_smmu_ops.pgsize_bitmap == -1UL) - arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; - else - arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; - /* Set the DMA mask for our table walker */ if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas))) dev_warn(smmu->dev, @@ -4702,6 +4701,7 @@ static void arm_smmu_impl_remove(void *data) static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) { struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); + const struct arm_smmu_impl_ops *ops; int ret; if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) @@ -4712,11 +4712,24 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) if (IS_ERR(new_smmu)) return new_smmu; + ops = new_smmu->impl_ops; + if (ops) { + /* get_viommu_size and vsmmu_init ops must be paired */ + if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) { + ret = -EINVAL; + goto err_remove; + } + } + ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, new_smmu); if (ret) return ERR_PTR(ret); return new_smmu; + +err_remove: + arm_smmu_impl_remove(new_smmu); + return ERR_PTR(ret); } static int arm_smmu_device_probe(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index ea41d790463e..ae23aacc3840 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -16,6 +16,7 @@ #include <linux/sizes.h> struct arm_smmu_device; +struct arm_vsmmu; /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 @@ -60,6 +61,7 @@ struct arm_smmu_device; #define ARM_SMMU_IDR3 0xc #define IDR3_FWB (1 << 8) #define IDR3_RIL (1 << 10) +#define IDR3_BBM GENMASK(12, 11) #define ARM_SMMU_IDR5 0x14 #define IDR5_STALL_MAX GENMASK(31, 16) @@ -720,6 +722,16 @@ struct arm_smmu_impl_ops { int (*init_structures)(struct arm_smmu_device *smmu); struct arm_smmu_cmdq *(*get_secondary_cmdq)( struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); + /* + * An implementation should define its own type other than the default + * IOMMU_HW_INFO_TYPE_ARM_SMMUV3. And it must validate the input @type + * to return its own structure. + */ + void *(*hw_info)(struct arm_smmu_device *smmu, u32 *length, + enum iommu_hw_info_type *type); + size_t (*get_viommu_size)(enum iommu_viommu_type viommu_type); + int (*vsmmu_init)(struct arm_vsmmu *vsmmu, + const struct iommu_user_data *user_data); }; /* An SMMUv3 instance */ @@ -755,6 +767,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HA (1 << 21) #define ARM_SMMU_FEAT_HD (1 << 22) #define ARM_SMMU_FEAT_S2FWB (1 << 23) +#define ARM_SMMU_FEAT_BBML2 (1 << 24) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -1033,19 +1046,29 @@ struct arm_vsmmu { }; #if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD) -void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type); -struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev, - struct iommu_domain *parent, - struct iommufd_ctx *ictx, - unsigned int viommu_type); +void *arm_smmu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type); +size_t arm_smmu_get_viommu_size(struct device *dev, + enum iommu_viommu_type viommu_type); +int arm_vsmmu_init(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data); int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, struct arm_smmu_nested_domain *nested_domain); void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state); void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master); int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt); +struct iommu_domain * +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, + const struct iommu_user_data *user_data); +int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu, + struct iommu_user_data_array *array); #else +#define arm_smmu_get_viommu_size NULL #define arm_smmu_hw_info NULL -#define arm_vsmmu_alloc NULL +#define arm_vsmmu_init NULL +#define arm_vsmmu_alloc_domain_nested NULL +#define arm_vsmmu_cache_invalidate NULL static inline int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state, diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index dd7d030d2e89..be1aaaf8cd17 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -8,7 +8,9 @@ #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/iommu.h> +#include <linux/iommufd.h> #include <linux/iopoll.h> +#include <uapi/linux/iommufd.h> #include <acpi/acpixf.h> @@ -26,8 +28,10 @@ #define CMDQV_EN BIT(0) #define TEGRA241_CMDQV_PARAM 0x0004 +#define CMDQV_NUM_SID_PER_VM_LOG2 GENMASK(15, 12) #define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8) #define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4) +#define CMDQV_VER GENMASK(3, 0) #define TEGRA241_CMDQV_STATUS 0x0008 #define CMDQV_ENABLED BIT(0) @@ -53,6 +57,9 @@ #define VINTF_STATUS GENMASK(3, 1) #define VINTF_ENABLED BIT(0) +#define TEGRA241_VINTF_SID_MATCH(s) (0x0040 + 0x4*(s)) +#define TEGRA241_VINTF_SID_REPLACE(s) (0x0080 + 0x4*(s)) + #define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \ (0x00C0 + 0x8*(m)) #define LVCMDQ_ERR_MAP_NUM_64 2 @@ -114,16 +121,20 @@ MODULE_PARM_DESC(bypass_vcmdq, /** * struct tegra241_vcmdq - Virtual Command Queue + * @core: Embedded iommufd_hw_queue structure * @idx: Global index in the CMDQV * @lidx: Local index in the VINTF * @enabled: Enable status * @cmdqv: Parent CMDQV pointer * @vintf: Parent VINTF pointer + * @prev: Previous LVCMDQ to depend on * @cmdq: Command Queue struct * @page0: MMIO Page0 base address * @page1: MMIO Page1 base address */ struct tegra241_vcmdq { + struct iommufd_hw_queue core; + u16 idx; u16 lidx; @@ -131,22 +142,30 @@ struct tegra241_vcmdq { struct tegra241_cmdqv *cmdqv; struct tegra241_vintf *vintf; + struct tegra241_vcmdq *prev; struct arm_smmu_cmdq cmdq; void __iomem *page0; void __iomem *page1; }; +#define hw_queue_to_vcmdq(v) container_of(v, struct tegra241_vcmdq, core) /** * struct tegra241_vintf - Virtual Interface + * @vsmmu: Embedded arm_vsmmu structure * @idx: Global index in the CMDQV * @enabled: Enable status * @hyp_own: Owned by hypervisor (in-kernel) * @cmdqv: Parent CMDQV pointer * @lvcmdqs: List of logical VCMDQ pointers + * @lvcmdq_mutex: Lock to serialize user-allocated lvcmdqs * @base: MMIO base address + * @mmap_offset: Offset argument for mmap() syscall + * @sids: Stream ID mapping resources */ struct tegra241_vintf { + struct arm_vsmmu vsmmu; + u16 idx; bool enabled; @@ -154,19 +173,41 @@ struct tegra241_vintf { struct tegra241_cmdqv *cmdqv; struct tegra241_vcmdq **lvcmdqs; + struct mutex lvcmdq_mutex; /* user space race */ void __iomem *base; + unsigned long mmap_offset; + + struct ida sids; }; +#define viommu_to_vintf(v) container_of(v, struct tegra241_vintf, vsmmu.core) + +/** + * struct tegra241_vintf_sid - Virtual Interface Stream ID Mapping + * @core: Embedded iommufd_vdevice structure, holding virtual Stream ID + * @vintf: Parent VINTF pointer + * @sid: Physical Stream ID + * @idx: Mapping index in the VINTF + */ +struct tegra241_vintf_sid { + struct iommufd_vdevice core; + struct tegra241_vintf *vintf; + u32 sid; + u8 idx; +}; +#define vdev_to_vsid(v) container_of(v, struct tegra241_vintf_sid, core) /** * struct tegra241_cmdqv - CMDQ-V for SMMUv3 * @smmu: SMMUv3 device * @dev: CMDQV device * @base: MMIO base address + * @base_phys: MMIO physical base address, for mmap * @irq: IRQ number * @num_vintfs: Total number of VINTFs * @num_vcmdqs: Total number of VCMDQs * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF + * @num_sids_per_vintf: Total number of SID mappings per VINTF * @vintf_ids: VINTF id allocator * @vintfs: List of VINTFs */ @@ -175,12 +216,14 @@ struct tegra241_cmdqv { struct device *dev; void __iomem *base; + phys_addr_t base_phys; int irq; /* CMDQV Hardware Params */ u16 num_vintfs; u16 num_vcmdqs; u16 num_lvcmdqs_per_vintf; + u16 num_sids_per_vintf; struct ida vintf_ids; @@ -252,6 +295,20 @@ static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) /* ISR Functions */ +static void tegra241_vintf_user_handle_error(struct tegra241_vintf *vintf) +{ + struct iommufd_viommu *viommu = &vintf->vsmmu.core; + struct iommu_vevent_tegra241_cmdqv vevent_data; + int i; + + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) + vevent_data.lvcmdq_err_map[i] = + readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + iommufd_viommu_report_event(viommu, IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV, + &vevent_data, sizeof(vevent_data)); +} + static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) { int i; @@ -297,6 +354,14 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) vintf_map &= ~BIT_ULL(0); } + /* Handle other user VINTFs and their LVCMDQs */ + while (vintf_map) { + unsigned long idx = __ffs64(vintf_map); + + tegra241_vintf_user_handle_error(cmdqv->vintfs[idx]); + vintf_map &= ~BIT_ULL(idx); + } + return IRQ_HANDLED; } @@ -351,6 +416,30 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, /* HW Reset Functions */ +/* + * When a guest-owned VCMDQ is disabled, if the guest did not enqueue a CMD_SYNC + * following an ATC_INV command at the end of the guest queue while this ATC_INV + * is timed out, the TIMEOUT will not be reported until this VCMDQ gets assigned + * to the next VM, which will be a false alarm potentially causing some unwanted + * behavior in the new VM. Thus, a guest-owned VCMDQ must flush the TIMEOUT when + * it gets disabled. This can be done by just issuing a CMD_SYNC to SMMU CMDQ. + */ +static void tegra241_vcmdq_hw_flush_timeout(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; + u64 cmd_sync[CMDQ_ENT_DWORDS] = {}; + + cmd_sync[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) | + FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE); + + /* + * It does not hurt to insert another CMD_SYNC, taking advantage of the + * arm_smmu_cmdq_issue_cmdlist() that waits for the CMD_SYNC completion. + */ + arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, cmd_sync, 1, true); +} + +/* This function is for LVCMDQ, so @vcmdq must not be unmapped yet */ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) { char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); @@ -363,6 +452,8 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); } + tegra241_vcmdq_hw_flush_timeout(vcmdq); + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); @@ -379,6 +470,7 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h); } +/* This function is for LVCMDQ, so @vcmdq must be mapped prior */ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) { char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); @@ -404,14 +496,45 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) return 0; } +/* Unmap a global VCMDQ from the pre-assigned LVCMDQ */ +static void tegra241_vcmdq_unmap_lvcmdq(struct tegra241_vcmdq *vcmdq) +{ + u32 regval = readl(REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + + writel(regval & ~CMDQV_CMDQ_ALLOCATED, + REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + dev_dbg(vcmdq->cmdqv->dev, "%sunmapped\n", h); +} + static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf) { - u16 lidx; + u16 lidx = vintf->cmdqv->num_lvcmdqs_per_vintf; + int sidx; - for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) - if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) + /* HW requires to unmap LVCMDQs in descending order */ + while (lidx--) { + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + tegra241_vcmdq_unmap_lvcmdq(vintf->lvcmdqs[lidx]); + } + } vintf_write_config(vintf, 0); + for (sidx = 0; sidx < vintf->cmdqv->num_sids_per_vintf; sidx++) { + writel(0, REG_VINTF(vintf, SID_MATCH(sidx))); + writel(0, REG_VINTF(vintf, SID_REPLACE(sidx))); + } +} + +/* Map a global VCMDQ to the pre-assigned LVCMDQ */ +static void tegra241_vcmdq_map_lvcmdq(struct tegra241_vcmdq *vcmdq) +{ + u32 regval = readl(REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + + writel(regval | CMDQV_CMDQ_ALLOCATED, + REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx))); + dev_dbg(vcmdq->cmdqv->dev, "%smapped\n", h); } static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) @@ -429,7 +552,8 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a * restricted set of supported commands. */ - regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own) | + FIELD_PREP(VINTF_VMID, vintf->vsmmu.vmid); writel(regval, REG_VINTF(vintf, CONFIG)); ret = vintf_write_config(vintf, regval | VINTF_EN); @@ -441,8 +565,10 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) */ vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG))); + /* HW requires to map LVCMDQs in ascending order */ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { + tegra241_vcmdq_map_lvcmdq(vintf->lvcmdqs[lidx]); ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]); if (ret) { tegra241_vintf_hw_deinit(vintf); @@ -476,7 +602,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx); regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx); - regval |= CMDQV_CMDQ_ALLOCATED; writel_relaxed(regval, REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++))); } @@ -555,7 +680,9 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) dev_dbg(vintf->cmdqv->dev, "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64)); - kfree(vcmdq); + /* Guest-owned VCMDQ is free-ed with hw_queue by iommufd core */ + if (vcmdq->vintf->hyp_own) + kfree(vcmdq); } static struct tegra241_vcmdq * @@ -628,28 +755,27 @@ static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx, /* Remove Helpers */ -static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) -{ - tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); - tegra241_vintf_free_lvcmdq(vintf, lidx); -} - static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) { struct tegra241_vintf *vintf = cmdqv->vintfs[idx]; u16 lidx; + tegra241_vintf_hw_deinit(vintf); + /* Remove LVCMDQ resources */ for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) if (vintf->lvcmdqs[lidx]) - tegra241_vintf_remove_lvcmdq(vintf, lidx); - - /* Remove VINTF resources */ - tegra241_vintf_hw_deinit(vintf); + tegra241_vintf_free_lvcmdq(vintf, lidx); dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx); tegra241_cmdqv_deinit_vintf(cmdqv, idx); - kfree(vintf); + if (!vintf->hyp_own) { + mutex_destroy(&vintf->lvcmdq_mutex); + ida_destroy(&vintf->sids); + /* Guest-owned VINTF is free-ed with viommu by iommufd core */ + } else { + kfree(vintf); + } } static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) @@ -677,10 +803,51 @@ static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) put_device(cmdqv->dev); /* smmu->impl_dev */ } +static int +tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu, + const struct iommu_user_data *user_data); + +static void *tegra241_cmdqv_hw_info(struct arm_smmu_device *smmu, u32 *length, + enum iommu_hw_info_type *type) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct iommu_hw_info_tegra241_cmdqv *info; + u32 regval; + + if (*type != IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV) + return ERR_PTR(-EOPNOTSUPP); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return ERR_PTR(-ENOMEM); + + regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM)); + info->log2vcmdqs = ilog2(cmdqv->num_lvcmdqs_per_vintf); + info->log2vsids = ilog2(cmdqv->num_sids_per_vintf); + info->version = FIELD_GET(CMDQV_VER, regval); + + *length = sizeof(*info); + *type = IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV; + return info; +} + +static size_t tegra241_cmdqv_get_vintf_size(enum iommu_viommu_type viommu_type) +{ + if (viommu_type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV) + return 0; + return VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core); +} + static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = { + /* For in-kernel use */ .get_secondary_cmdq = tegra241_cmdqv_get_cmdq, .device_reset = tegra241_cmdqv_hw_reset, .device_remove = tegra241_cmdqv_remove, + /* For user-space use */ + .hw_info = tegra241_cmdqv_hw_info, + .get_viommu_size = tegra241_cmdqv_get_vintf_size, + .vsmmu_init = tegra241_cmdqv_init_vintf_user, }; /* Probe Functions */ @@ -822,10 +989,12 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, cmdqv->irq = irq; cmdqv->base = base; cmdqv->dev = smmu->impl_dev; + cmdqv->base_phys = res->start; if (cmdqv->irq > 0) { - ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv", - cmdqv); + ret = request_threaded_irq(irq, NULL, tegra241_cmdqv_isr, + IRQF_ONESHOT, "tegra241-cmdqv", + cmdqv); if (ret) { dev_err(cmdqv->dev, "failed to request irq (%d): %d\n", cmdqv->irq, ret); @@ -837,6 +1006,8 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval); cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval); cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs; + cmdqv->num_sids_per_vintf = + 1 << FIELD_GET(CMDQV_NUM_SID_PER_VM_LOG2, regval); cmdqv->vintfs = kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL); @@ -890,3 +1061,287 @@ out_fallback: put_device(smmu->impl_dev); return ERR_PTR(-ENODEV); } + +/* User space VINTF and VCMDQ Functions */ + +static size_t tegra241_vintf_get_vcmdq_size(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type) +{ + if (queue_type != IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV) + return 0; + return HW_QUEUE_STRUCT_SIZE(struct tegra241_vcmdq, core); +} + +static int tegra241_vcmdq_hw_init_user(struct tegra241_vcmdq *vcmdq) +{ + char header[64]; + + /* Configure the vcmdq only; User space does the enabling */ + writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); + + dev_dbg(vcmdq->cmdqv->dev, "%sinited at host PA 0x%llx size 0x%lx\n", + lvcmdq_error_header(vcmdq, header, 64), + vcmdq->cmdq.q.q_base & VCMDQ_ADDR, + 1UL << (vcmdq->cmdq.q.q_base & VCMDQ_LOG2SIZE)); + return 0; +} + +static void +tegra241_vintf_destroy_lvcmdq_user(struct iommufd_hw_queue *hw_queue) +{ + struct tegra241_vcmdq *vcmdq = hw_queue_to_vcmdq(hw_queue); + + mutex_lock(&vcmdq->vintf->lvcmdq_mutex); + tegra241_vcmdq_hw_deinit(vcmdq); + tegra241_vcmdq_unmap_lvcmdq(vcmdq); + tegra241_vintf_free_lvcmdq(vcmdq->vintf, vcmdq->lidx); + if (vcmdq->prev) + iommufd_hw_queue_undepend(vcmdq, vcmdq->prev, core); + mutex_unlock(&vcmdq->vintf->lvcmdq_mutex); +} + +static int tegra241_vintf_alloc_lvcmdq_user(struct iommufd_hw_queue *hw_queue, + u32 lidx, phys_addr_t base_addr_pa) +{ + struct tegra241_vintf *vintf = viommu_to_vintf(hw_queue->viommu); + struct tegra241_vcmdq *vcmdq = hw_queue_to_vcmdq(hw_queue); + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + struct arm_smmu_device *smmu = &cmdqv->smmu; + struct tegra241_vcmdq *prev = NULL; + u32 log2size, max_n_shift; + char header[64]; + int ret; + + if (hw_queue->type != IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV) + return -EOPNOTSUPP; + if (lidx >= cmdqv->num_lvcmdqs_per_vintf) + return -EINVAL; + + mutex_lock(&vintf->lvcmdq_mutex); + + if (vintf->lvcmdqs[lidx]) { + ret = -EEXIST; + goto unlock; + } + + /* + * HW requires to map LVCMDQs in ascending order, so reject if the + * previous lvcmdqs is not allocated yet. + */ + if (lidx) { + prev = vintf->lvcmdqs[lidx - 1]; + if (!prev) { + ret = -EIO; + goto unlock; + } + } + + /* + * hw_queue->length must be a power of 2, in range of + * [ 32, 2 ^ (idr[1].CMDQS + CMDQ_ENT_SZ_SHIFT) ] + */ + max_n_shift = FIELD_GET(IDR1_CMDQS, + readl_relaxed(smmu->base + ARM_SMMU_IDR1)); + if (!is_power_of_2(hw_queue->length) || hw_queue->length < 32 || + hw_queue->length > (1 << (max_n_shift + CMDQ_ENT_SZ_SHIFT))) { + ret = -EINVAL; + goto unlock; + } + log2size = ilog2(hw_queue->length) - CMDQ_ENT_SZ_SHIFT; + + /* base_addr_pa must be aligned to hw_queue->length */ + if (base_addr_pa & ~VCMDQ_ADDR || + base_addr_pa & (hw_queue->length - 1)) { + ret = -EINVAL; + goto unlock; + } + + /* + * HW requires to unmap LVCMDQs in descending order, so destroy() must + * follow this rule. Set a dependency on its previous LVCMDQ so iommufd + * core will help enforce it. + */ + if (prev) { + ret = iommufd_hw_queue_depend(vcmdq, prev, core); + if (ret) + goto unlock; + } + vcmdq->prev = prev; + + ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq); + if (ret) + goto undepend_vcmdq; + + dev_dbg(cmdqv->dev, "%sallocated\n", + lvcmdq_error_header(vcmdq, header, 64)); + + tegra241_vcmdq_map_lvcmdq(vcmdq); + + vcmdq->cmdq.q.q_base = base_addr_pa & VCMDQ_ADDR; + vcmdq->cmdq.q.q_base |= log2size; + + ret = tegra241_vcmdq_hw_init_user(vcmdq); + if (ret) + goto unmap_lvcmdq; + + hw_queue->destroy = &tegra241_vintf_destroy_lvcmdq_user; + mutex_unlock(&vintf->lvcmdq_mutex); + return 0; + +unmap_lvcmdq: + tegra241_vcmdq_unmap_lvcmdq(vcmdq); + tegra241_vintf_deinit_lvcmdq(vintf, lidx); +undepend_vcmdq: + if (vcmdq->prev) + iommufd_hw_queue_undepend(vcmdq, vcmdq->prev, core); +unlock: + mutex_unlock(&vintf->lvcmdq_mutex); + return ret; +} + +static void tegra241_cmdqv_destroy_vintf_user(struct iommufd_viommu *viommu) +{ + struct tegra241_vintf *vintf = viommu_to_vintf(viommu); + + if (vintf->mmap_offset) + iommufd_viommu_destroy_mmap(&vintf->vsmmu.core, + vintf->mmap_offset); + tegra241_cmdqv_remove_vintf(vintf->cmdqv, vintf->idx); +} + +static void tegra241_vintf_destroy_vsid(struct iommufd_vdevice *vdev) +{ + struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev); + struct tegra241_vintf *vintf = vsid->vintf; + + writel(0, REG_VINTF(vintf, SID_MATCH(vsid->idx))); + writel(0, REG_VINTF(vintf, SID_REPLACE(vsid->idx))); + ida_free(&vintf->sids, vsid->idx); + dev_dbg(vintf->cmdqv->dev, + "VINTF%u: deallocated SID_REPLACE%d for pSID=%x\n", vintf->idx, + vsid->idx, vsid->sid); +} + +static int tegra241_vintf_init_vsid(struct iommufd_vdevice *vdev) +{ + struct device *dev = iommufd_vdevice_to_device(vdev); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct tegra241_vintf *vintf = viommu_to_vintf(vdev->viommu); + struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev); + struct arm_smmu_stream *stream = &master->streams[0]; + u64 virt_sid = vdev->virt_id; + int sidx; + + if (virt_sid > UINT_MAX) + return -EINVAL; + + WARN_ON_ONCE(master->num_streams != 1); + + /* Find an empty pair of SID_REPLACE and SID_MATCH */ + sidx = ida_alloc_max(&vintf->sids, vintf->cmdqv->num_sids_per_vintf - 1, + GFP_KERNEL); + if (sidx < 0) + return sidx; + + writel(stream->id, REG_VINTF(vintf, SID_REPLACE(sidx))); + writel(virt_sid << 1 | 0x1, REG_VINTF(vintf, SID_MATCH(sidx))); + dev_dbg(vintf->cmdqv->dev, + "VINTF%u: allocated SID_REPLACE%d for pSID=%x, vSID=%x\n", + vintf->idx, sidx, stream->id, (u32)virt_sid); + + vsid->idx = sidx; + vsid->vintf = vintf; + vsid->sid = stream->id; + + vdev->destroy = &tegra241_vintf_destroy_vsid; + return 0; +} + +static struct iommufd_viommu_ops tegra241_cmdqv_viommu_ops = { + .destroy = tegra241_cmdqv_destroy_vintf_user, + .alloc_domain_nested = arm_vsmmu_alloc_domain_nested, + /* Non-accelerated commands will be still handled by the kernel */ + .cache_invalidate = arm_vsmmu_cache_invalidate, + .vdevice_size = VDEVICE_STRUCT_SIZE(struct tegra241_vintf_sid, core), + .vdevice_init = tegra241_vintf_init_vsid, + .get_hw_queue_size = tegra241_vintf_get_vcmdq_size, + .hw_queue_init_phys = tegra241_vintf_alloc_lvcmdq_user, +}; + +static int +tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu, + const struct iommu_user_data *user_data) +{ + struct tegra241_cmdqv *cmdqv = + container_of(vsmmu->smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf = viommu_to_vintf(&vsmmu->core); + struct iommu_viommu_tegra241_cmdqv data; + phys_addr_t page0_base; + int ret; + + /* + * Unsupported type should be rejected by tegra241_cmdqv_get_vintf_size. + * Seeing one here indicates a kernel bug or some data corruption. + */ + if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)) + return -EOPNOTSUPP; + + if (!user_data) + return -EINVAL; + + ret = iommu_copy_struct_from_user(&data, user_data, + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV, + out_vintf_mmap_length); + if (ret) + return ret; + + ret = tegra241_cmdqv_init_vintf(cmdqv, cmdqv->num_vintfs - 1, vintf); + if (ret < 0) { + dev_err(cmdqv->dev, "no more available vintf\n"); + return ret; + } + + /* + * Initialize the user-owned VINTF without a LVCMDQ, as it cannot pre- + * allocate a LVCMDQ until user space wants one, for security reasons. + * It is different than the kernel-owned VINTF0, which had pre-assigned + * and pre-allocated global VCMDQs that would be mapped to the LVCMDQs + * by the tegra241_vintf_hw_init() call. + */ + ret = tegra241_vintf_hw_init(vintf, false); + if (ret) + goto deinit_vintf; + + page0_base = cmdqv->base_phys + TEGRA241_VINTFi_PAGE0(vintf->idx); + ret = iommufd_viommu_alloc_mmap(&vintf->vsmmu.core, page0_base, SZ_64K, + &vintf->mmap_offset); + if (ret) + goto hw_deinit_vintf; + + data.out_vintf_mmap_length = SZ_64K; + data.out_vintf_mmap_offset = vintf->mmap_offset; + ret = iommu_copy_struct_to_user(user_data, &data, + IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV, + out_vintf_mmap_length); + if (ret) + goto free_mmap; + + ida_init(&vintf->sids); + mutex_init(&vintf->lvcmdq_mutex); + + dev_dbg(cmdqv->dev, "VINTF%u: allocated with vmid (%d)\n", vintf->idx, + vintf->vsmmu.vmid); + + vsmmu->core.ops = &tegra241_cmdqv_viommu_ops; + return 0; + +free_mmap: + iommufd_viommu_destroy_mmap(&vintf->vsmmu.core, vintf->mmap_offset); +hw_deinit_vintf: + tegra241_vintf_hw_deinit(vintf); +deinit_vintf: + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); + return ret; +} + +MODULE_IMPORT_NS("IOMMUFD"); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 62874b18f645..57c097e87613 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -355,7 +355,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain, priv->set_prr_addr = NULL; if (of_device_is_compatible(np, "qcom,smmu-500") && - of_device_is_compatible(np, "qcom,adreno-smmu")) { + !of_device_is_compatible(np, "qcom,sm8250-smmu-500") && + of_device_is_compatible(np, "qcom,adreno-smmu")) { priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit; priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr; } @@ -379,6 +380,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sdm670-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, + { .compatible = "qcom,sm6115-mdss" }, { .compatible = "qcom,sm6350-mdss" }, { .compatible = "qcom,sm6375-mdss" }, { .compatible = "qcom,sm8150-mdss" }, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 8d95b14c7d5a..4ced4b5bee4d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -109,7 +109,7 @@ static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) } static struct platform_driver arm_smmu_driver; -static struct iommu_ops arm_smmu_ops; +static const struct iommu_ops arm_smmu_ops; #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS static struct device_node *dev_get_dev_node(struct device *dev) @@ -919,6 +919,8 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain) static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; + struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = cfg->smmu; /* * Allocate the domain and initialise some of its data structures. @@ -931,6 +933,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) mutex_init(&smmu_domain->init_mutex); spin_lock_init(&smmu_domain->cb_lock); + smmu_domain->domain.pgsize_bitmap = smmu->pgsize_bitmap; return &smmu_domain->domain; } @@ -1627,7 +1630,7 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static struct iommu_ops arm_smmu_ops = { +static const struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, @@ -1639,7 +1642,6 @@ static struct iommu_ops arm_smmu_ops = { .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, .def_domain_type = arm_smmu_def_domain_type, - .pgsize_bitmap = -1UL, /* Restricted during device attach */ .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, @@ -1919,10 +1921,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K) smmu->pgsize_bitmap |= SZ_64K | SZ_512M; - if (arm_smmu_ops.pgsize_bitmap == -1UL) - arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap; - else - arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap; dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", smmu->pgsize_bitmap); diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 3907924646a2..c5be95e56031 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -229,7 +229,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, goto out_unlock; pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, + .pgsize_bitmap = domain->pgsize_bitmap, .ias = 32, .oas = 40, .tlb = &qcom_flush_ops, @@ -246,8 +246,6 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, goto out_clear_iommu; } - /* Update the domain's page sizes to reflect the page table format */ - domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_end = (1ULL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; @@ -337,6 +335,7 @@ static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev) mutex_init(&qcom_domain->init_mutex); spin_lock_init(&qcom_domain->pgtbl_lock); + qcom_domain->domain.pgsize_bitmap = SZ_4K; return &qcom_domain->domain; } @@ -598,7 +597,6 @@ static const struct iommu_ops qcom_iommu_ops = { .probe_device = qcom_iommu_probe_device, .device_group = generic_device_group, .of_xlate = qcom_iommu_of_xlate, - .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, .map_pages = qcom_iommu_map, diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index fcb6a0f7c082..b6edd178fe25 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -22,6 +22,7 @@ #include <linux/pm_runtime.h> #include <linux/slab.h> +#include "dma-iommu.h" #include "iommu-pages.h" typedef u32 sysmmu_iova_t; @@ -925,6 +926,8 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev) spin_lock_init(&domain->pgtablelock); INIT_LIST_HEAD(&domain->clients); + domain->domain.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE; + domain->domain.geometry.aperture_start = 0; domain->domain.geometry.aperture_end = ~0UL; domain->domain.geometry.force_aperture = true; @@ -1477,7 +1480,7 @@ static const struct iommu_ops exynos_iommu_ops = { .device_group = generic_device_group, .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, - .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, + .get_resv_regions = iommu_dma_get_resv_regions, .of_xlate = exynos_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = exynos_iommu_attach_device, diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 761ab647f372..0961ac805944 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -193,15 +193,13 @@ struct hyperv_root_ir_data { static void hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) { - u64 status; - u32 vector; - struct irq_cfg *cfg; - int ioapic_id; - const struct cpumask *affinity; - int cpu; - struct hv_interrupt_entry entry; struct hyperv_root_ir_data *data = irq_data->chip_data; + struct hv_interrupt_entry entry; + const struct cpumask *affinity; struct IO_APIC_route_entry e; + struct irq_cfg *cfg; + int cpu, ioapic_id; + u32 vector; cfg = irqd_cfg(irq_data); affinity = irq_data_get_effective_affinity_mask(irq_data); @@ -214,23 +212,16 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) && data->entry.ioapic_rte.as_uint64) { entry = data->entry; - status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); - - if (status != HV_STATUS_SUCCESS) - hv_status_debug(status, "failed to unmap\n"); + (void)hv_unmap_ioapic_interrupt(ioapic_id, &entry); data->entry.ioapic_rte.as_uint64 = 0; data->entry.source = 0; /* Invalid source */ } - status = hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, - vector, &entry); - - if (status != HV_STATUS_SUCCESS) { - hv_status_err(status, "map failed\n"); + if (hv_map_ioapic_interrupt(ioapic_id, data->is_level, cpu, + vector, &entry)) return; - } data->entry = entry; @@ -322,10 +313,10 @@ static void hyperv_root_irq_remapping_free(struct irq_domain *domain, data = irq_data->chip_data; e = &data->entry; - if (e->source == HV_DEVICE_TYPE_IOAPIC - && e->ioapic_rte.as_uint64) - hv_unmap_ioapic_interrupt(data->ioapic_id, - &data->entry); + if (e->source == HV_DEVICE_TYPE_IOAPIC && + e->ioapic_rte.as_uint64) + (void)hv_unmap_ioapic_interrupt(data->ioapic_id, + &data->entry); kfree(data); } diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index fc35cba59145..265e7290256b 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -40,9 +40,8 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id, } /* Assign a cache tag with specified type to domain. */ -static int cache_tag_assign(struct dmar_domain *domain, u16 did, - struct device *dev, ioasid_t pasid, - enum cache_tag_type type) +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; @@ -371,7 +370,7 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag * struct intel_iommu *iommu = tag->iommu; u64 type = DMA_TLB_PSI_FLUSH; - if (domain->use_first_level) { + if (intel_domain_is_fs_paging(domain)) { qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr, pages, ih, domain->qi_batch); return; @@ -423,22 +422,6 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_ domain->qi_batch); } -static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag) -{ - struct intel_iommu *iommu = tag->iommu; - struct device_domain_info *info; - u16 sid; - - info = dev_iommu_priv_get(tag->dev); - sid = PCI_DEVID(info->bus, info->devfn); - - qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH, domain->qi_batch); - if (info->dtlb_extra_inval) - qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0, - MAX_AGAW_PFN_WIDTH, domain->qi_batch); -} - /* * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive) * when the memory mappings in the target domain have been modified. @@ -451,7 +434,13 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, struct cache_tag *tag; unsigned long flags; - addr = calculate_psi_aligned_address(start, end, &pages, &mask); + if (start == 0 && end == ULONG_MAX) { + addr = 0; + pages = -1; + mask = MAX_AGAW_PFN_WIDTH; + } else { + addr = calculate_psi_aligned_address(start, end, &pages, &mask); + } spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { @@ -492,31 +481,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, */ void cache_tag_flush_all(struct dmar_domain *domain) { - struct intel_iommu *iommu = NULL; - struct cache_tag *tag; - unsigned long flags; - - spin_lock_irqsave(&domain->cache_lock, flags); - list_for_each_entry(tag, &domain->cache_tags, node) { - if (iommu && iommu != tag->iommu) - qi_batch_flush_descs(iommu, domain->qi_batch); - iommu = tag->iommu; - - switch (tag->type) { - case CACHE_TAG_IOTLB: - case CACHE_TAG_NESTING_IOTLB: - cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0); - break; - case CACHE_TAG_DEVTLB: - case CACHE_TAG_NESTING_DEVTLB: - cache_tag_flush_devtlb_all(domain, tag); - break; - } - - trace_cache_tag_flush_all(tag); - } - qi_batch_flush_descs(iommu, domain->qi_batch); - spin_unlock_irqrestore(&domain->cache_lock, flags); + cache_tag_flush_range(domain, 0, ULONG_MAX, 0); } /* @@ -546,7 +511,8 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, qi_batch_flush_descs(iommu, domain->qi_batch); iommu = tag->iommu; - if (!cap_caching_mode(iommu->cap) || domain->use_first_level) { + if (!cap_caching_mode(iommu->cap) || + intel_domain_is_fs_paging(domain)) { iommu_flush_write_buffer(iommu); continue; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b61d9ea27aa9..ec975c73cfe6 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -935,14 +935,11 @@ void __init detect_intel_iommu(void) pci_request_acs(); } -#ifdef CONFIG_X86 if (!ret) { x86_init.iommu.iommu_init = intel_iommu_init; x86_platform.iommu_shutdown = intel_iommu_shutdown; } -#endif - if (dmar_tbl) { acpi_put_table(dmar_tbl); dmar_tbl = NULL; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7aa3932251b2..19955e222c2b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -57,6 +57,8 @@ static void __init check_tylersburg_isoch(void); static int rwbf_quirk; +#define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap)) + /* * set to 1 to panic kernel if can't successfully enable VT-d * (used when kernel is launched w/ TXT) @@ -1391,28 +1393,10 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) if (--info->refcnt == 0) { ida_free(&iommu->domain_ida, info->did); xa_erase(&domain->iommu_array, iommu->seq_id); - domain->nid = NUMA_NO_NODE; kfree(info); } } -static void domain_exit(struct dmar_domain *domain) -{ - if (domain->pgd) { - struct iommu_pages_list freelist = - IOMMU_PAGES_LIST_INIT(freelist); - - domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); - iommu_put_pages_list(&freelist); - } - - if (WARN_ON(!list_empty(&domain->devices))) - return; - - kfree(domain->qi_batch); - kfree(domain); -} - /* * For kdump cases, old valid entries may be cached due to the * in-flight DMA and copied pgtable, but there is no unmapping @@ -1480,6 +1464,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; int ret; + if (WARN_ON(!intel_domain_is_ss_paging(domain))) + return -EINVAL; + pr_debug("Set context mapping for %02x:%02x.%d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1736,15 +1723,14 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 intel_context_flush_no_pasid(info, context, did); } -int __domain_setup_first_level(struct intel_iommu *iommu, - struct device *dev, ioasid_t pasid, - u16 did, pgd_t *pgd, int flags, - struct iommu_domain *old) +int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, + ioasid_t pasid, u16 did, phys_addr_t fsptptr, + int flags, struct iommu_domain *old) { if (!old) - return intel_pasid_setup_first_level(iommu, dev, pgd, - pasid, did, flags); - return intel_pasid_replace_first_level(iommu, dev, pgd, pasid, did, + return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, + did, flags); + return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did, iommu_domain_did(old, iommu), flags); } @@ -1793,7 +1779,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu, return __domain_setup_first_level(iommu, dev, pasid, domain_id_iommu(domain, iommu), - (pgd_t *)pgd, flags, old); + __pa(pgd), flags, old); } static int dmar_domain_attach_device(struct dmar_domain *domain, @@ -1819,12 +1805,14 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (!sm_supported(iommu)) ret = domain_context_mapping(domain, dev); - else if (domain->use_first_level) + else if (intel_domain_is_fs_paging(domain)) ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID, NULL); - else + else if (intel_domain_is_ss_paging(domain)) ret = domain_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID, NULL); + else if (WARN_ON(true)) + ret = -EINVAL; if (ret) goto out_block_translation; @@ -3286,10 +3274,14 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st spin_lock_init(&domain->lock); spin_lock_init(&domain->cache_lock); xa_init(&domain->iommu_array); + INIT_LIST_HEAD(&domain->s1_domains); + spin_lock_init(&domain->s1_lock); domain->nid = dev_to_node(dev); domain->use_first_level = first_stage; + domain->domain.type = IOMMU_DOMAIN_UNMANAGED; + /* calculate the address width */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -3331,71 +3323,168 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st } static struct iommu_domain * -intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, - const struct iommu_user_data *user_data) +intel_iommu_domain_alloc_first_stage(struct device *dev, + struct intel_iommu *iommu, u32 flags) +{ + struct dmar_domain *dmar_domain; + + if (flags & ~IOMMU_HWPT_ALLOC_PASID) + return ERR_PTR(-EOPNOTSUPP); + + /* Only SL is available in legacy mode */ + if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) + return ERR_PTR(-EOPNOTSUPP); + + dmar_domain = paging_domain_alloc(dev, true); + if (IS_ERR(dmar_domain)) + return ERR_CAST(dmar_domain); + + dmar_domain->domain.ops = &intel_fs_paging_domain_ops; + /* + * iotlb sync for map is only needed for legacy implementations that + * explicitly require flushing internal write buffers to ensure memory + * coherence. + */ + if (rwbf_required(iommu)) + dmar_domain->iotlb_sync_map = true; + + return &dmar_domain->domain; +} + +static struct iommu_domain * +intel_iommu_domain_alloc_second_stage(struct device *dev, + struct intel_iommu *iommu, u32 flags) { - struct device_domain_info *info = dev_iommu_priv_get(dev); - bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; - bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; - struct intel_iommu *iommu = info->iommu; struct dmar_domain *dmar_domain; - struct iommu_domain *domain; - bool first_stage; if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID))) return ERR_PTR(-EOPNOTSUPP); - if (nested_parent && !nested_supported(iommu)) + + if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && + !nested_supported(iommu)) || + ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && + !ssads_supported(iommu))) return ERR_PTR(-EOPNOTSUPP); - if (user_data || (dirty_tracking && !ssads_supported(iommu))) + + /* Legacy mode always supports second stage */ + if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) return ERR_PTR(-EOPNOTSUPP); + dmar_domain = paging_domain_alloc(dev, false); + if (IS_ERR(dmar_domain)) + return ERR_CAST(dmar_domain); + + dmar_domain->domain.ops = &intel_ss_paging_domain_ops; + dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; + + if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) + dmar_domain->domain.dirty_ops = &intel_dirty_ops; + /* - * Always allocate the guest compatible page table unless - * IOMMU_HWPT_ALLOC_NEST_PARENT or IOMMU_HWPT_ALLOC_DIRTY_TRACKING - * is specified. + * Besides the internal write buffer flush, the caching mode used for + * legacy nested translation (which utilizes shadowing page tables) + * also requires iotlb sync on map. */ - if (nested_parent || dirty_tracking) { - if (!sm_supported(iommu) || !ecap_slts(iommu->ecap)) - return ERR_PTR(-EOPNOTSUPP); - first_stage = false; - } else { - first_stage = first_level_by_default(iommu); - } + if (rwbf_required(iommu) || cap_caching_mode(iommu->cap)) + dmar_domain->iotlb_sync_map = true; - dmar_domain = paging_domain_alloc(dev, first_stage); - if (IS_ERR(dmar_domain)) - return ERR_CAST(dmar_domain); - domain = &dmar_domain->domain; - domain->type = IOMMU_DOMAIN_UNMANAGED; - domain->owner = &intel_iommu_ops; - domain->ops = intel_iommu_ops.default_domain_ops; - - if (nested_parent) { - dmar_domain->nested_parent = true; - INIT_LIST_HEAD(&dmar_domain->s1_domains); - spin_lock_init(&dmar_domain->s1_lock); - } + return &dmar_domain->domain; +} - if (dirty_tracking) { - if (dmar_domain->use_first_level) { - iommu_domain_free(domain); - return ERR_PTR(-EOPNOTSUPP); - } - domain->dirty_ops = &intel_dirty_ops; - } +static struct iommu_domain * +intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, + const struct iommu_user_data *user_data) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct iommu_domain *domain; - return domain; + if (user_data) + return ERR_PTR(-EOPNOTSUPP); + + /* Prefer first stage if possible by default. */ + domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags); + if (domain != ERR_PTR(-EOPNOTSUPP)) + return domain; + return intel_iommu_domain_alloc_second_stage(dev, iommu, flags); } static void intel_iommu_domain_free(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - WARN_ON(dmar_domain->nested_parent && - !list_empty(&dmar_domain->s1_domains)); - domain_exit(dmar_domain); + if (WARN_ON(dmar_domain->nested_parent && + !list_empty(&dmar_domain->s1_domains))) + return; + + if (WARN_ON(!list_empty(&dmar_domain->devices))) + return; + + if (dmar_domain->pgd) { + struct iommu_pages_list freelist = + IOMMU_PAGES_LIST_INIT(freelist); + + domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw), + &freelist); + iommu_put_pages_list(&freelist); + } + + kfree(dmar_domain->qi_batch); + kfree(dmar_domain); +} + +static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain, + struct intel_iommu *iommu) +{ + if (WARN_ON(dmar_domain->domain.dirty_ops || + dmar_domain->nested_parent)) + return -EINVAL; + + /* Only SL is available in legacy mode */ + if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) + return -EINVAL; + + /* Same page size support */ + if (!cap_fl1gp_support(iommu->cap) && + (dmar_domain->domain.pgsize_bitmap & SZ_1G)) + return -EINVAL; + + /* iotlb sync on map requirement */ + if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map) + return -EINVAL; + + return 0; +} + +static int +paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain, + struct intel_iommu *iommu) +{ + unsigned int sslps = cap_super_page_val(iommu->cap); + + if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu)) + return -EINVAL; + if (dmar_domain->nested_parent && !nested_supported(iommu)) + return -EINVAL; + + /* Legacy mode always supports second stage */ + if (sm_supported(iommu) && !ecap_slts(iommu->ecap)) + return -EINVAL; + + /* Same page size support */ + if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M)) + return -EINVAL; + if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G)) + return -EINVAL; + + /* iotlb sync on map requirement */ + if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) && + !dmar_domain->iotlb_sync_map) + return -EINVAL; + + return 0; } int paging_domain_compatible(struct iommu_domain *domain, struct device *dev) @@ -3403,28 +3492,29 @@ int paging_domain_compatible(struct iommu_domain *domain, struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct intel_iommu *iommu = info->iommu; + int ret = -EINVAL; int addr_width; - if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING))) - return -EPERM; + if (intel_domain_is_fs_paging(dmar_domain)) + ret = paging_domain_compatible_first_stage(dmar_domain, iommu); + else if (intel_domain_is_ss_paging(dmar_domain)) + ret = paging_domain_compatible_second_stage(dmar_domain, iommu); + else if (WARN_ON(true)) + ret = -EINVAL; + if (ret) + return ret; + /* + * FIXME this is locked wrong, it needs to be under the + * dmar_domain->lock + */ if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) return -EINVAL; - if (domain->dirty_ops && !ssads_supported(iommu)) - return -EINVAL; - if (dmar_domain->iommu_coherency != iommu_paging_structure_coherency(iommu)) return -EINVAL; - if (dmar_domain->iommu_superpage != - iommu_superpage_capability(iommu, dmar_domain->use_first_level)) - return -EINVAL; - - if (dmar_domain->use_first_level && - (!sm_supported(iommu) || !ecap_flts(iommu->ecap))) - return -EINVAL; /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); @@ -3610,44 +3700,41 @@ static bool domain_support_force_snooping(struct dmar_domain *domain) return support; } -static void domain_set_force_snooping(struct dmar_domain *domain) +static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain) { + struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct device_domain_info *info; - assert_spin_locked(&domain->lock); - /* - * Second level page table supports per-PTE snoop control. The - * iommu_map() interface will handle this by setting SNP bit. - */ - if (!domain->use_first_level) { - domain->set_pte_snp = true; - return; - } + guard(spinlock_irqsave)(&dmar_domain->lock); + + if (dmar_domain->force_snooping) + return true; + + if (!domain_support_force_snooping(dmar_domain)) + return false; - list_for_each_entry(info, &domain->devices, link) + dmar_domain->force_snooping = true; + list_for_each_entry(info, &dmar_domain->devices, link) intel_pasid_setup_page_snoop_control(info->iommu, info->dev, IOMMU_NO_PASID); + return true; } -static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) +static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - unsigned long flags; - if (dmar_domain->force_snooping) - return true; - - spin_lock_irqsave(&dmar_domain->lock, flags); + guard(spinlock_irqsave)(&dmar_domain->lock); if (!domain_support_force_snooping(dmar_domain) || - (!dmar_domain->use_first_level && dmar_domain->has_mappings)) { - spin_unlock_irqrestore(&dmar_domain->lock, flags); + dmar_domain->has_mappings) return false; - } - domain_set_force_snooping(dmar_domain); + /* + * Second level page table supports per-PTE snoop control. The + * iommu_map() interface will handle this by setting SNP bit. + */ + dmar_domain->set_pte_snp = true; dmar_domain->force_snooping = true; - spin_unlock_irqrestore(&dmar_domain->lock, flags); - return true; } @@ -3780,8 +3867,17 @@ static void intel_iommu_probe_finalize(struct device *dev) !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) + if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) { iommu_enable_pci_ats(info); + /* Assign a DEVTLB cache tag to the default domain. */ + if (info->ats_enabled && info->domain) { + u16 did = domain_id_iommu(info->domain, iommu); + + if (cache_tag_assign(info->domain, did, dev, + IOMMU_NO_PASID, CACHE_TAG_DEVTLB)) + iommu_disable_pci_ats(info); + } + } iommu_enable_pci_pri(info); } @@ -3945,7 +4041,10 @@ static bool risky_device(struct pci_dev *pdev) static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, unsigned long iova, size_t size) { - cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + + if (dmar_domain->iotlb_sync_map) + cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1); return 0; } @@ -3991,8 +4090,8 @@ static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, { struct device_domain_info *info = dev_iommu_priv_get(dev); - iopf_for_domain_remove(old, dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); + iopf_for_domain_remove(old, dev); domain_remove_dev_pasid(old, dev, pasid); return 0; @@ -4069,12 +4168,15 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, if (ret) goto out_remove_dev_pasid; - if (dmar_domain->use_first_level) + if (intel_domain_is_fs_paging(dmar_domain)) ret = domain_setup_first_level(iommu, dmar_domain, dev, pasid, old); - else + else if (intel_domain_is_ss_paging(dmar_domain)) ret = domain_setup_second_level(iommu, dmar_domain, dev, pasid, old); + else if (WARN_ON(true)) + ret = -EINVAL; + if (ret) goto out_unwind_iopf; @@ -4091,12 +4193,17 @@ out_remove_dev_pasid: return ret; } -static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) +static void *intel_iommu_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct iommu_hw_info_vtd *vtd; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_INTEL_VTD) + return ERR_PTR(-EOPNOTSUPP); + vtd = kzalloc(sizeof(*vtd), GFP_KERNEL); if (!vtd) return ERR_PTR(-ENOMEM); @@ -4349,6 +4456,32 @@ static struct iommu_domain identity_domain = { }, }; +const struct iommu_domain_ops intel_fs_paging_domain_ops = { + .attach_dev = intel_iommu_attach_device, + .set_dev_pasid = intel_iommu_set_dev_pasid, + .map_pages = intel_iommu_map_pages, + .unmap_pages = intel_iommu_unmap_pages, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, + .flush_iotlb_all = intel_flush_iotlb_all, + .iotlb_sync = intel_iommu_tlb_sync, + .iova_to_phys = intel_iommu_iova_to_phys, + .free = intel_iommu_domain_free, + .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs, +}; + +const struct iommu_domain_ops intel_ss_paging_domain_ops = { + .attach_dev = intel_iommu_attach_device, + .set_dev_pasid = intel_iommu_set_dev_pasid, + .map_pages = intel_iommu_map_pages, + .unmap_pages = intel_iommu_unmap_pages, + .iotlb_sync_map = intel_iommu_iotlb_sync_map, + .flush_iotlb_all = intel_flush_iotlb_all, + .iotlb_sync = intel_iommu_tlb_sync, + .iova_to_phys = intel_iommu_iova_to_phys, + .free = intel_iommu_domain_free, + .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss, +}; + const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, @@ -4365,20 +4498,7 @@ const struct iommu_ops intel_iommu_ops = { .device_group = intel_iommu_device_group, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, - .pgsize_bitmap = SZ_4K, .page_response = intel_iommu_page_response, - .default_domain_ops = &(const struct iommu_domain_ops) { - .attach_dev = intel_iommu_attach_device, - .set_dev_pasid = intel_iommu_set_dev_pasid, - .map_pages = intel_iommu_map_pages, - .unmap_pages = intel_iommu_unmap_pages, - .iotlb_sync_map = intel_iommu_iotlb_sync_map, - .flush_iotlb_all = intel_flush_iotlb_all, - .iotlb_sync = intel_iommu_tlb_sync, - .iova_to_phys = intel_iommu_iova_to_phys, - .free = intel_iommu_domain_free, - .enforce_cache_coherency = intel_iommu_enforce_cache_coherency, - } }; static void quirk_iommu_igfx(struct pci_dev *dev) diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 3ddbcc603de2..d09b92871659 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -614,6 +614,9 @@ struct dmar_domain { u8 has_mappings:1; /* Has mappings configured through * iommu_map() interface. */ + u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write + * buffer when creating mappings. + */ spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ @@ -1252,10 +1255,9 @@ domain_add_dev_pasid(struct iommu_domain *domain, void domain_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -int __domain_setup_first_level(struct intel_iommu *iommu, - struct device *dev, ioasid_t pasid, - u16 did, pgd_t *pgd, int flags, - struct iommu_domain *old); +int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, + ioasid_t pasid, u16 did, phys_addr_t fsptptr, + int flags, struct iommu_domain *old); int dmar_ir_support(void); @@ -1289,6 +1291,8 @@ struct cache_tag { unsigned int users; }; +int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev, + ioasid_t pasid, enum cache_tag_type type); int cache_tag_assign_domain(struct dmar_domain *domain, struct device *dev, ioasid_t pasid); void cache_tag_unassign_domain(struct dmar_domain *domain, @@ -1376,6 +1380,18 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, u8 devfn, int alloc); extern const struct iommu_ops intel_iommu_ops; +extern const struct iommu_domain_ops intel_fs_paging_domain_ops; +extern const struct iommu_domain_ops intel_ss_paging_domain_ops; + +static inline bool intel_domain_is_fs_paging(struct dmar_domain *domain) +{ + return domain->domain.ops == &intel_fs_paging_domain_ops; +} + +static inline bool intel_domain_is_ss_paging(struct dmar_domain *domain) +{ + return domain->domain.ops == &intel_ss_paging_domain_ops; +} #ifdef CONFIG_INTEL_IOMMU extern int intel_iommu_sm; diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index cf7b6882ec75..4f9b01dc91e8 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -10,6 +10,7 @@ #include <linux/hpet.h> #include <linux/pci.h> #include <linux/irq.h> +#include <linux/irqchip/irq-msi-lib.h> #include <linux/acpi.h> #include <linux/irqdomain.h> #include <linux/crash_dump.h> @@ -518,8 +519,14 @@ static void iommu_enable_irq_remapping(struct intel_iommu *iommu) static int intel_setup_irq_remapping(struct intel_iommu *iommu) { + struct irq_domain_info info = { + .ops = &intel_ir_domain_ops, + .parent = arch_get_ir_parent_domain(), + .domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI, + .size = INTR_REMAP_TABLE_ENTRIES, + .host_data = iommu, + }; struct ir_table *ir_table; - struct fwnode_handle *fn; unsigned long *bitmap; void *ir_table_base; @@ -544,25 +551,16 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) goto out_free_pages; } - fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id); - if (!fn) + info.fwnode = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id); + if (!info.fwnode) goto out_free_bitmap; - iommu->ir_domain = - irq_domain_create_hierarchy(arch_get_ir_parent_domain(), - 0, INTR_REMAP_TABLE_ENTRIES, - fn, &intel_ir_domain_ops, - iommu); + iommu->ir_domain = msi_create_parent_irq_domain(&info, &dmar_msi_parent_ops); if (!iommu->ir_domain) { pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); goto out_free_fwnode; } - irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | - IRQ_DOMAIN_FLAG_ISOLATED_MSI; - iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; - ir_table->base = ir_table_base; ir_table->bitmap = bitmap; iommu->ir_table = ir_table; @@ -608,7 +606,7 @@ out_free_ir_domain: irq_domain_remove(iommu->ir_domain); iommu->ir_domain = NULL; out_free_fwnode: - irq_domain_free_fwnode(fn); + irq_domain_free_fwnode(info.fwnode); out_free_bitmap: bitmap_free(bitmap); out_free_pages: @@ -1244,10 +1242,10 @@ static void intel_ir_compose_msi_msg(struct irq_data *irq_data, static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) { struct intel_ir_data *ir_data = data->chip_data; - struct vcpu_data *vcpu_pi_info = info; + struct intel_iommu_pi_data *pi_data = info; /* stop posting interrupts, back to the default mode */ - if (!vcpu_pi_info) { + if (!pi_data) { __intel_ir_reconfigure_irte(data, true); } else { struct irte irte_pi; @@ -1265,10 +1263,10 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) /* Update the posted mode fields */ irte_pi.p_pst = 1; irte_pi.p_urgent = 0; - irte_pi.p_vector = vcpu_pi_info->vector; - irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >> + irte_pi.p_vector = pi_data->vector; + irte_pi.pda_l = (pi_data->pi_desc_addr >> (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT); - irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & + irte_pi.pda_h = (pi_data->pi_desc_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); ir_data->irq_2_iommu.posted_vcpu = true; @@ -1530,6 +1528,8 @@ static const struct irq_domain_ops intel_ir_domain_ops = { static const struct msi_parent_ops dmar_msi_parent_ops = { .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .bus_select_token = DOMAIN_BUS_DMAR, + .bus_select_mask = MATCH_PCI_MSI, .prefix = "IR-", .init_dev_msi_info = msi_parent_init_dev_msi_info, }; diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index fc312f649f9e..1b6ad9c900a5 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -216,8 +216,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, /* Must be nested domain */ if (user_data->type != IOMMU_HWPT_DATA_VTD_S1) return ERR_PTR(-EOPNOTSUPP); - if (parent->ops != intel_iommu_ops.default_domain_ops || - !s2_domain->nested_parent) + if (!intel_domain_is_ss_paging(s2_domain) || !s2_domain->nested_parent) return ERR_PTR(-EINVAL); ret = iommu_copy_struct_from_user(&vtd, user_data, @@ -229,7 +228,6 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, if (!domain) return ERR_PTR(-ENOMEM); - domain->use_first_level = true; domain->s2_domain = s2_domain; domain->s1_cfg = vtd; domain->domain.ops = &intel_nested_domain_ops; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index ac67a056b6c8..52f678975da7 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -348,14 +348,15 @@ static void intel_pasid_flush_present(struct intel_iommu *iommu, */ static void pasid_pte_config_first_level(struct intel_iommu *iommu, struct pasid_entry *pte, - pgd_t *pgd, u16 did, int flags) + phys_addr_t fsptptr, u16 did, + int flags) { lockdep_assert_held(&iommu->lock); pasid_clear_entry(pte); /* Setup the first level page table pointer: */ - pasid_set_flptr(pte, (u64)__pa(pgd)); + pasid_set_flptr(pte, fsptptr); if (flags & PASID_FLAG_FL5LP) pasid_set_flpm(pte, 1); @@ -372,9 +373,9 @@ static void pasid_pte_config_first_level(struct intel_iommu *iommu, pasid_set_present(pte); } -int intel_pasid_setup_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, - u32 pasid, u16 did, int flags) +int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, + phys_addr_t fsptptr, u32 pasid, u16 did, + int flags) { struct pasid_entry *pte; @@ -402,7 +403,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, return -EBUSY; } - pasid_pte_config_first_level(iommu, pte, pgd, did, flags); + pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags); spin_unlock(&iommu->lock); @@ -412,7 +413,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } int intel_pasid_replace_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, + struct device *dev, phys_addr_t fsptptr, u32 pasid, u16 did, u16 old_did, int flags) { @@ -430,7 +431,7 @@ int intel_pasid_replace_first_level(struct intel_iommu *iommu, return -EINVAL; } - pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags); + pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags); spin_lock(&iommu->lock); pte = intel_pasid_get_entry(dev, pasid); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index fd0fd1a0df84..a771a77d4239 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -288,9 +288,9 @@ extern unsigned int intel_pasid_max_id; int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); struct pasid_table *intel_pasid_get_table(struct device *dev); -int intel_pasid_setup_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, - u32 pasid, u16 did, int flags); +int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev, + phys_addr_t fsptptr, u32 pasid, u16 did, + int flags); int intel_pasid_setup_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u32 pasid); @@ -302,9 +302,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, u32 pasid, struct dmar_domain *domain); int intel_pasid_replace_first_level(struct intel_iommu *iommu, - struct device *dev, pgd_t *pgd, - u32 pasid, u16 did, u16 old_did, - int flags); + struct device *dev, phys_addr_t fsptptr, + u32 pasid, u16 did, u16 old_did, int flags); int intel_pasid_replace_second_level(struct intel_iommu *iommu, struct dmar_domain *domain, struct device *dev, u16 old_did, diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index f3da596410b5..e147f71f91b7 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -171,7 +171,7 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, /* Setup the pasid table: */ sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = __domain_setup_first_level(iommu, dev, pasid, - FLPT_DEFAULT_DID, mm->pgd, + FLPT_DEFAULT_DID, __pa(mm->pgd), sflags, old); if (ret) goto out_unwind_iopf; @@ -214,7 +214,6 @@ struct iommu_domain *intel_svm_domain_alloc(struct device *dev, return ERR_PTR(-ENOMEM); domain->domain.ops = &intel_svm_domain_ops; - domain->use_first_level = true; INIT_LIST_HEAD(&domain->dev_pasids); INIT_LIST_HEAD(&domain->cache_tags); spin_lock_init(&domain->cache_lock); diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h index 9defdae6ebae..6311ba3f1691 100644 --- a/drivers/iommu/intel/trace.h +++ b/drivers/iommu/intel/trace.h @@ -130,11 +130,6 @@ DEFINE_EVENT(cache_tag_log, cache_tag_unassign, TP_ARGS(tag) ); -DEFINE_EVENT(cache_tag_log, cache_tag_flush_all, - TP_PROTO(struct cache_tag *tag), - TP_ARGS(tag) -); - DECLARE_EVENT_CLASS(cache_tag_flush, TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end, unsigned long addr, unsigned long pages, unsigned long mask), diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 96425e92f313..7e8e2216c294 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -85,11 +85,6 @@ #define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) -#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) -/* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) -#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ - ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ #define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55) @@ -155,8 +150,6 @@ #define iopte_type(pte) \ (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) -#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) - #define iopte_writeable_dirty(pte) \ (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index a4b606c591da..060ebe330ee1 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2002,13 +2002,6 @@ static void iommu_domain_init(struct iommu_domain *domain, unsigned int type, domain->owner = ops; if (!domain->ops) domain->ops = ops->default_domain_ops; - - /* - * If not already set, assume all sizes by default; the driver - * may override this later - */ - if (!domain->pgsize_bitmap) - domain->pgsize_bitmap = ops->pgsize_bitmap; } static struct iommu_domain * diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 86244403b532..65fbd098f9e9 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -137,6 +137,57 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx, } } +static void iommufd_device_remove_vdev(struct iommufd_device *idev) +{ + struct iommufd_vdevice *vdev; + + mutex_lock(&idev->igroup->lock); + /* prevent new references from vdev */ + idev->destroying = true; + /* vdev has been completely destroyed by userspace */ + if (!idev->vdev) + goto out_unlock; + + vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id); + /* + * An ongoing vdev destroy ioctl has removed the vdev from the object + * xarray, but has not finished iommufd_vdevice_destroy() yet as it + * needs the same mutex. We exit the locking then wait on wait_cnt + * reference for the vdev destruction. + */ + if (IS_ERR(vdev)) + goto out_unlock; + + /* Should never happen */ + if (WARN_ON(vdev != idev->vdev)) { + iommufd_put_object(idev->ictx, &vdev->obj); + goto out_unlock; + } + + /* + * vdev is still alive. Hold a users refcount to prevent racing with + * userspace destruction, then use iommufd_object_tombstone_user() to + * destroy it and leave a tombstone. + */ + refcount_inc(&vdev->obj.users); + iommufd_put_object(idev->ictx, &vdev->obj); + mutex_unlock(&idev->igroup->lock); + iommufd_object_tombstone_user(idev->ictx, &vdev->obj); + return; + +out_unlock: + mutex_unlock(&idev->igroup->lock); +} + +void iommufd_device_pre_destroy(struct iommufd_object *obj) +{ + struct iommufd_device *idev = + container_of(obj, struct iommufd_device, obj); + + /* Release the wait_cnt reference on this */ + iommufd_device_remove_vdev(idev); +} + void iommufd_device_destroy(struct iommufd_object *obj) { struct iommufd_device *idev = @@ -485,8 +536,7 @@ iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid) lockdep_assert_held(&idev->igroup->lock); - handle = - iommu_attach_handle_get(idev->igroup->group, pasid, 0); + handle = iommu_attach_handle_get(idev->igroup->group, pasid, 0); if (IS_ERR(handle)) return NULL; return to_iommufd_handle(handle); @@ -1049,7 +1099,7 @@ static int iommufd_access_change_ioas(struct iommufd_access *access, } if (cur_ioas) { - if (access->ops->unmap) { + if (!iommufd_access_is_internal(access) && access->ops->unmap) { mutex_unlock(&access->ioas_lock); access->ops->unmap(access->data, 0, ULONG_MAX); mutex_lock(&access->ioas_lock); @@ -1085,7 +1135,39 @@ void iommufd_access_destroy_object(struct iommufd_object *obj) if (access->ioas) WARN_ON(iommufd_access_change_ioas(access, NULL)); mutex_unlock(&access->ioas_lock); - iommufd_ctx_put(access->ictx); + if (!iommufd_access_is_internal(access)) + iommufd_ctx_put(access->ictx); +} + +static struct iommufd_access *__iommufd_access_create(struct iommufd_ctx *ictx) +{ + struct iommufd_access *access; + + /* + * There is no uAPI for the access object, but to keep things symmetric + * use the object infrastructure anyhow. + */ + access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + if (IS_ERR(access)) + return access; + + /* The calling driver is a user until iommufd_access_destroy() */ + refcount_inc(&access->obj.users); + mutex_init(&access->ioas_lock); + return access; +} + +struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx) +{ + struct iommufd_access *access; + + access = __iommufd_access_create(ictx); + if (IS_ERR(access)) + return access; + access->iova_alignment = PAGE_SIZE; + + iommufd_object_finalize(ictx, &access->obj); + return access; } /** @@ -1107,11 +1189,7 @@ iommufd_access_create(struct iommufd_ctx *ictx, { struct iommufd_access *access; - /* - * There is no uAPI for the access object, but to keep things symmetric - * use the object infrastructure anyhow. - */ - access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + access = __iommufd_access_create(ictx); if (IS_ERR(access)) return access; @@ -1123,13 +1201,10 @@ iommufd_access_create(struct iommufd_ctx *ictx, else access->iova_alignment = 1; - /* The calling driver is a user until iommufd_access_destroy() */ - refcount_inc(&access->obj.users); access->ictx = ictx; iommufd_ctx_get(ictx); iommufd_object_finalize(ictx, &access->obj); *id = access->obj.id; - mutex_init(&access->ioas_lock); return access; } EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD"); @@ -1174,6 +1249,22 @@ int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id) } EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD"); +int iommufd_access_attach_internal(struct iommufd_access *access, + struct iommufd_ioas *ioas) +{ + int rc; + + mutex_lock(&access->ioas_lock); + if (WARN_ON(access->ioas)) { + mutex_unlock(&access->ioas_lock); + return -EINVAL; + } + + rc = iommufd_access_change_ioas(access, ioas); + mutex_unlock(&access->ioas_lock); + return rc; +} + int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id) { int rc; @@ -1215,7 +1306,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, xa_lock(&ioas->iopt.access_list); xa_for_each(&ioas->iopt.access_list, index, access) { - if (!iommufd_lock_obj(&access->obj)) + if (!iommufd_lock_obj(&access->obj) || + iommufd_access_is_internal(access)) continue; xa_unlock(&ioas->iopt.access_list); @@ -1239,6 +1331,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length) { + bool internal = iommufd_access_is_internal(access); struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; @@ -1265,7 +1358,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, area, iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, - min(last_iova, iopt_area_last_iova(area)))); + min(last_iova, iopt_area_last_iova(area))), + internal); WARN_ON(!iopt_area_contig_done(&iter)); up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); @@ -1314,6 +1408,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length, struct page **out_pages, unsigned int flags) { + bool internal = iommufd_access_is_internal(access); struct iopt_area_contig_iter iter; struct io_pagetable *iopt; unsigned long last_iova; @@ -1322,7 +1417,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, /* Driver's ops don't support pin_pages */ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && - WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) + WARN_ON(access->iova_alignment != PAGE_SIZE || + (!internal && !access->ops->unmap))) return -EINVAL; if (!length) @@ -1356,7 +1452,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, } rc = iopt_area_add_access(area, index, last_index, out_pages, - flags); + flags, internal); if (rc) goto err_remove; out_pages += last_index - index + 1; @@ -1379,7 +1475,8 @@ err_remove: iopt_area_iova_to_index(area, iter.cur_iova), iopt_area_iova_to_index( area, min(last_iova, - iopt_area_last_iova(area)))); + iopt_area_last_iova(area))), + internal); } up_read(&iopt->iova_rwsem); mutex_unlock(&access->ioas_lock); @@ -1453,6 +1550,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD"); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) { + const u32 SUPPORTED_FLAGS = IOMMU_HW_INFO_FLAG_INPUT_TYPE; struct iommu_hw_info *cmd = ucmd->cmd; void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr); const struct iommu_ops *ops; @@ -1462,9 +1560,14 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) void *data; int rc; - if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] || - cmd->__reserved[2]) + if (cmd->flags & ~SUPPORTED_FLAGS) return -EOPNOTSUPP; + if (cmd->__reserved[0] || cmd->__reserved[1] || cmd->__reserved[2]) + return -EOPNOTSUPP; + + /* Clear the type field since drivers don't support a random input */ + if (!(cmd->flags & IOMMU_HW_INFO_FLAG_INPUT_TYPE)) + cmd->in_data_type = IOMMU_HW_INFO_TYPE_DEFAULT; idev = iommufd_get_device(ucmd, cmd->dev_id); if (IS_ERR(idev)) @@ -1484,7 +1587,7 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) */ if (WARN_ON_ONCE(cmd->out_data_type == IOMMU_HW_INFO_TYPE_NONE)) { - rc = -ENODEV; + rc = -EOPNOTSUPP; goto out_free; } } else { diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c index 922cd1fe7ec2..6f1010da221c 100644 --- a/drivers/iommu/iommufd/driver.c +++ b/drivers/iommu/iommufd/driver.c @@ -3,38 +3,91 @@ */ #include "iommufd_private.h" -struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, - size_t size, - enum iommufd_object_type type) +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +int _iommufd_object_depend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) { - struct iommufd_object *obj; + /* Reject self dependency that dead locks */ + if (obj_dependent == obj_depended) + return -EINVAL; + /* Only support dependency between two objects of the same type */ + if (obj_dependent->type != obj_depended->type) + return -EINVAL; + + refcount_inc(&obj_depended->users); + return 0; +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_depend, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_object_undepend(struct iommufd_object *obj_dependent, + struct iommufd_object *obj_depended) +{ + if (WARN_ON_ONCE(obj_dependent == obj_depended || + obj_dependent->type != obj_depended->type)) + return; + + refcount_dec(&obj_depended->users); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD"); + +/* + * Allocate an @offset to return to user space to use for an mmap() syscall + * + * Driver should use a per-structure helper in include/linux/iommufd.h + */ +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner, + phys_addr_t mmio_addr, size_t length, + unsigned long *offset) +{ + struct iommufd_mmap *immap; + unsigned long startp; int rc; - obj = kzalloc(size, GFP_KERNEL_ACCOUNT); - if (!obj) - return ERR_PTR(-ENOMEM); - obj->type = type; - /* Starts out bias'd by 1 until it is removed from the xarray */ - refcount_set(&obj->shortterm_users, 1); - refcount_set(&obj->users, 1); + if (!PAGE_ALIGNED(mmio_addr)) + return -EINVAL; + if (!length || !PAGE_ALIGNED(length)) + return -EINVAL; - /* - * Reserve an ID in the xarray but do not publish the pointer yet since - * the caller hasn't initialized it yet. Once the pointer is published - * in the xarray and visible to other threads we can't reliably destroy - * it anymore, so the caller must complete all errorable operations - * before calling iommufd_object_finalize(). - */ - rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, - GFP_KERNEL_ACCOUNT); - if (rc) - goto out_free; - return obj; -out_free: - kfree(obj); - return ERR_PTR(rc); + immap = kzalloc(sizeof(*immap), GFP_KERNEL); + if (!immap) + return -ENOMEM; + immap->owner = owner; + immap->length = length; + immap->mmio_addr = mmio_addr; + + /* Skip the first page to ease caller identifying the returned offset */ + rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->length, + PAGE_SIZE, ULONG_MAX, GFP_KERNEL); + if (rc < 0) { + kfree(immap); + return rc; + } + + /* mmap() syscall will right-shift the offset in vma->vm_pgoff too */ + immap->vm_pgoff = startp >> PAGE_SHIFT; + *offset = startp; + return 0; +} +EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD"); + +/* Driver should use a per-structure helper in include/linux/iommufd.h */ +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx, + struct iommufd_object *owner, unsigned long offset) +{ + struct iommufd_mmap *immap; + + immap = mtree_erase(&ictx->mt_mmap, offset); + WARN_ON_ONCE(!immap || immap->owner != owner); + kfree(immap); +} +EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD"); + +struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev) +{ + return vdev->idev->dev; } -EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, "IOMMUFD"); +EXPORT_SYMBOL_NS_GPL(iommufd_vdevice_to_device, "IOMMUFD"); /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, @@ -45,7 +98,7 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, lockdep_assert_held(&viommu->vdevs.xa_lock); vdev = xa_load(&viommu->vdevs, vdev_id); - return vdev ? vdev->dev : NULL; + return vdev ? iommufd_vdevice_to_device(vdev) : NULL; } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); @@ -62,8 +115,8 @@ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, xa_lock(&viommu->vdevs); xa_for_each(&viommu->vdevs, index, vdev) { - if (vdev->dev == dev) { - *vdev_id = vdev->id; + if (iommufd_vdevice_to_device(vdev) == dev) { + *vdev_id = vdev->virt_id; rc = 0; break; } diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c index e373b9eec7f5..fc4de63b0bce 100644 --- a/drivers/iommu/iommufd/eventq.c +++ b/drivers/iommu/iommufd/eventq.c @@ -427,8 +427,8 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) if (cmd->flags) return -EOPNOTSUPP; - fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT, - common.obj); + fault = __iommufd_object_alloc_ucmd(ucmd, fault, IOMMUFD_OBJ_FAULT, + common.obj); if (IS_ERR(fault)) return PTR_ERR(fault); @@ -437,10 +437,8 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]", ucmd->ictx, &iommufd_fault_fops); - if (fdno < 0) { - rc = fdno; - goto out_abort; - } + if (fdno < 0) + return fdno; cmd->out_fault_id = fault->common.obj.id; cmd->out_fault_fd = fdno; @@ -448,7 +446,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) goto out_put_fdno; - iommufd_object_finalize(ucmd->ictx, &fault->common.obj); fd_install(fdno, fault->common.filep); @@ -456,9 +453,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) out_put_fdno: put_unused_fd(fdno); fput(fault->common.filep); -out_abort: - iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj); - return rc; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 487779470261..fe789c2dc0c9 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -264,7 +264,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { - rc = -EINVAL; + rc = -EOPNOTSUPP; goto out_abort; } return hwpt_nested; @@ -309,10 +309,8 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, refcount_inc(&viommu->obj.users); hwpt_nested->parent = viommu->hwpt; - hwpt->domain = - viommu->ops->alloc_domain_nested(viommu, - flags & ~IOMMU_HWPT_FAULT_ID_VALID, - user_data); + hwpt->domain = viommu->ops->alloc_domain_nested( + viommu, flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); hwpt->domain = NULL; @@ -323,7 +321,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD; if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { - rc = -EINVAL; + rc = -EOPNOTSUPP; goto out_abort; } return hwpt_nested; diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 8a790e597e12..c0360c450880 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) return iter->area; } -static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, - unsigned long length, - unsigned long iova_alignment, - unsigned long page_offset) +static bool __alloc_iova_check_range(unsigned long *start, unsigned long last, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) { - if (span->is_used || span->last_hole - span->start_hole < length - 1) + unsigned long aligned_start; + + /* ALIGN_UP() */ + if (check_add_overflow(*start, iova_alignment - 1, &aligned_start)) return false; + aligned_start &= ~(iova_alignment - 1); + aligned_start |= page_offset; - span->start_hole = ALIGN(span->start_hole, iova_alignment) | - page_offset; - if (span->start_hole > span->last_hole || - span->last_hole - span->start_hole < length - 1) + if (aligned_start >= last || last - aligned_start < length - 1) return false; + *start = aligned_start; return true; } -static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, +static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, unsigned long length, unsigned long iova_alignment, unsigned long page_offset) { - if (span->is_hole || span->last_used - span->start_used < length - 1) + if (span->is_used) return false; + return __alloc_iova_check_range(&span->start_hole, span->last_hole, + length, iova_alignment, page_offset); +} - span->start_used = ALIGN(span->start_used, iova_alignment) | - page_offset; - if (span->start_used > span->last_used || - span->last_used - span->start_used < length - 1) +static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_hole) return false; - return true; + return __alloc_iova_check_range(&span->start_used, span->last_used, + length, iova_alignment, page_offset); } /* @@ -719,6 +728,12 @@ again: goto out_unlock_iova; } + /* The area is locked by an object that has not been destroyed */ + if (area->num_locks) { + rc = -EBUSY; + goto out_unlock_iova; + } + if (area_first < start || area_last > last) { rc = -ENOENT; goto out_unlock_iova; @@ -743,8 +758,10 @@ again: iommufd_access_notify_unmap(iopt, area_first, length); /* Something is not responding to unmap requests. */ tries++; - if (WARN_ON(tries > 100)) - return -EDEADLOCK; + if (WARN_ON(tries > 100)) { + rc = -EDEADLOCK; + goto out_unmapped; + } goto again; } @@ -766,6 +783,7 @@ again: out_unlock_iova: up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); +out_unmapped: if (unmapped) *unmapped = unmapped_bytes; return rc; @@ -1410,8 +1428,7 @@ out_unlock: } void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access, - u32 iopt_access_list_id) + struct iommufd_access *access, u32 iopt_access_list_id) { down_write(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h index 10c928a9a463..b6064f4ce4af 100644 --- a/drivers/iommu/iommufd/io_pagetable.h +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -48,6 +48,7 @@ struct iopt_area { int iommu_prot; bool prevent_access : 1; unsigned int num_accesses; + unsigned int num_locks; }; struct iopt_allowed { @@ -238,9 +239,9 @@ void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start, int iopt_area_add_access(struct iopt_area *area, unsigned long start, unsigned long last, struct page **out_pages, - unsigned int flags); + unsigned int flags, bool lock_area); void iopt_area_remove_access(struct iopt_area *area, unsigned long start, - unsigned long last); + unsigned long last, bool unlock_area); int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, void *data, unsigned long length, unsigned int flags); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 9ccc83341f32..0da2a81eedfa 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -7,6 +7,7 @@ #include <linux/iommu.h> #include <linux/iommufd.h> #include <linux/iova_bitmap.h> +#include <linux/maple_tree.h> #include <linux/rwsem.h> #include <linux/uaccess.h> #include <linux/xarray.h> @@ -44,6 +45,7 @@ struct iommufd_ctx { struct xarray groups; wait_queue_head_t destroy_wait; struct rw_semaphore ioas_creation_lock; + struct maple_tree mt_mmap; struct mutex sw_msi_lock; struct list_head sw_msi_list; @@ -55,6 +57,18 @@ struct iommufd_ctx { struct iommufd_ioas *vfio_ioas; }; +/* Entry for iommufd_ctx::mt_mmap */ +struct iommufd_mmap { + struct iommufd_object *owner; + + /* Page-shifted start position in mt_mmap to validate vma->vm_pgoff */ + unsigned long vm_pgoff; + + /* Physical range for io_remap_pfn_range() */ + phys_addr_t mmio_addr; + size_t length; +}; + /* * The IOVA to PFN map. The map automatically copies the PFNs into multiple * domains and permits sharing of PFNs between io_pagetable instances. This @@ -135,6 +149,7 @@ struct iommufd_ucmd { void __user *ubuffer; u32 user_size; void *cmd; + struct iommufd_object *new_obj; }; int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, @@ -154,7 +169,7 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj) { if (!refcount_inc_not_zero(&obj->users)) return false; - if (!refcount_inc_not_zero(&obj->shortterm_users)) { + if (!refcount_inc_not_zero(&obj->wait_cnt)) { /* * If the caller doesn't already have a ref on obj this must be * called under the xa_lock. Otherwise the caller is holding a @@ -172,11 +187,11 @@ static inline void iommufd_put_object(struct iommufd_ctx *ictx, struct iommufd_object *obj) { /* - * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees - * a spurious !0 users with a 0 shortterm_users. + * Users first, then wait_cnt so that REMOVE_WAIT never sees a spurious + * !0 users with a 0 wait_cnt. */ refcount_dec(&obj->users); - if (refcount_dec_and_test(&obj->shortterm_users)) + if (refcount_dec_and_test(&obj->wait_cnt)) wake_up_interruptible_all(&ictx->destroy_wait); } @@ -187,7 +202,8 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); enum { - REMOVE_WAIT_SHORTTERM = 1, + REMOVE_WAIT = BIT(0), + REMOVE_OBJ_TOMBSTONE = BIT(1), }; int iommufd_object_remove(struct iommufd_ctx *ictx, struct iommufd_object *to_destroy, u32 id, @@ -195,15 +211,35 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, /* * The caller holds a users refcount and wants to destroy the object. At this - * point the caller has no shortterm_users reference and at least the xarray - * will be holding one. + * point the caller has no wait_cnt reference and at least the xarray will be + * holding one. */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { int ret; - ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); + ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT); + + /* + * If there is a bug and we couldn't destroy the object then we did put + * back the caller's users refcount and will eventually try to free it + * again during close. + */ + WARN_ON(ret); +} + +/* + * Similar to iommufd_object_destroy_user(), except that the object ID is left + * reserved/tombstoned. + */ +static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + int ret; + + ret = iommufd_object_remove(ictx, obj, obj->id, + REMOVE_WAIT | REMOVE_OBJ_TOMBSTONE); /* * If there is a bug and we couldn't destroy the object then we did put @@ -230,6 +266,15 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, iommufd_object_remove(ictx, obj, obj->id, 0); } +/* + * Callers of these normal object allocators must call iommufd_object_finalize() + * to finalize the object, or call iommufd_object_abort_and_destroy() to revert + * the allocation. + */ +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); + #define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ @@ -243,6 +288,26 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, __iommufd_object_alloc(ictx, ptr, type, obj) /* + * Callers of these _ucmd allocators should not call iommufd_object_finalize() + * or iommufd_object_abort_and_destroy(), as the core automatically does that. + */ +struct iommufd_object * +_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, size_t size, + enum iommufd_object_type type); + +#define __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj) \ + container_of(_iommufd_object_alloc_ucmd( \ + ucmd, \ + sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ + offsetof(typeof(*(ptr)), \ + obj) != 0), \ + type), \ + typeof(*(ptr)), obj) + +#define iommufd_object_alloc_ucmd(ucmd, ptr, type) \ + __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj) + +/* * The IO Address Space (IOAS) pagetable is a virtual page table backed by the * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The * mapping is copied into all of the associated domains and made available to @@ -266,8 +331,7 @@ struct iommufd_ioas { static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, u32 id) { - return container_of(iommufd_get_object(ictx, id, - IOMMUFD_OBJ_IOAS), + return container_of(iommufd_get_object(ictx, id, IOMMUFD_OBJ_IOAS), struct iommufd_ioas, obj); } @@ -425,6 +489,8 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; + struct iommufd_vdevice *vdev; + bool destroying; }; static inline struct iommufd_device * @@ -435,6 +501,7 @@ iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) struct iommufd_device, obj); } +void iommufd_device_pre_destroy(struct iommufd_object *obj); void iommufd_device_destroy(struct iommufd_object *obj); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); @@ -452,10 +519,32 @@ struct iommufd_access { int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); void iopt_remove_access(struct io_pagetable *iopt, - struct iommufd_access *access, - u32 iopt_access_list_id); + struct iommufd_access *access, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); +/* iommufd_access for internal use */ +static inline bool iommufd_access_is_internal(struct iommufd_access *access) +{ + return !access->ictx; +} + +struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx); + +static inline void +iommufd_access_destroy_internal(struct iommufd_ctx *ictx, + struct iommufd_access *access) +{ + iommufd_object_destroy_user(ictx, &access->obj); +} + +int iommufd_access_attach_internal(struct iommufd_access *access, + struct iommufd_ioas *ioas); + +static inline void iommufd_access_detach_internal(struct iommufd_access *access) +{ + iommufd_access_detach(access); +} + struct iommufd_eventq { struct iommufd_object obj; struct iommufd_ctx *ictx; @@ -528,7 +617,7 @@ struct iommufd_veventq { struct list_head node; /* for iommufd_viommu::veventqs */ struct iommufd_vevent lost_events_header; - unsigned int type; + enum iommu_veventq_type type; unsigned int depth; /* Use common.lock for protection */ @@ -583,7 +672,8 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) } static inline struct iommufd_veventq * -iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type) +iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, + enum iommu_veventq_type type) { struct iommufd_veventq *veventq, *next; @@ -600,14 +690,17 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_viommu_destroy(struct iommufd_object *obj); int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_vdevice_destroy(struct iommufd_object *obj); +void iommufd_vdevice_abort(struct iommufd_object *obj); +int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_hw_queue_destroy(struct iommufd_object *obj); -struct iommufd_vdevice { - struct iommufd_object obj; - struct iommufd_ctx *ictx; - struct iommufd_viommu *viommu; - struct device *dev; - u64 id; /* per-vIOMMU virtual ID */ -}; +static inline struct iommufd_vdevice * +iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id) +{ + return container_of(iommufd_get_object(ictx, id, + IOMMUFD_OBJ_VDEVICE), + struct iommufd_vdevice, obj); +} #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 1cd7e8394129..8fc618b2bcf9 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -227,6 +227,23 @@ struct iommu_hwpt_invalidate_selftest { #define IOMMU_VIOMMU_TYPE_SELFTEST 0xdeadbeef +/** + * struct iommu_viommu_selftest - vIOMMU data for Mock driver + * (IOMMU_VIOMMU_TYPE_SELFTEST) + * @in_data: Input random data from user space + * @out_data: Output data (matching @in_data) to user space + * @out_mmap_offset: The offset argument for mmap syscall + * @out_mmap_length: The length argument for mmap syscall + * + * Simply set @out_data=@in_data for a loopback test + */ +struct iommu_viommu_selftest { + __u32 in_data; + __u32 out_data; + __aligned_u64 out_mmap_offset; + __aligned_u64 out_mmap_length; +}; + /* Should not be equal to any defined value in enum iommu_viommu_invalidate_data_type */ #define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST 0xdeadbeef #define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID 0xdadbeef @@ -252,4 +269,7 @@ struct iommu_viommu_event_selftest { __u32 virt_id; }; +#define IOMMU_HW_QUEUE_TYPE_SELFTEST 0xdeadbeef +#define IOMMU_TEST_HW_QUEUE_MAX 2 + #endif diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 39a86a4a1d3a..4514575818fc 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -407,7 +407,6 @@ void iova_bitmap_set(struct iova_bitmap *bitmap, update_indexes: if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) { - /* * The attempt to advance the base index to @iova * may fail if it's out of bounds, or pinning the pages diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 3df468f64e7d..15af7ced0501 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -23,12 +23,72 @@ #include "iommufd_test.h" struct iommufd_object_ops { + void (*pre_destroy)(struct iommufd_object *obj); void (*destroy)(struct iommufd_object *obj); void (*abort)(struct iommufd_object *obj); }; static const struct iommufd_object_ops iommufd_object_ops[]; static struct miscdevice vfio_misc_dev; +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->wait_cnt, 1); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, + GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} + +struct iommufd_object *_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *new_obj; + + /* Something is coded wrong if this is hit */ + if (WARN_ON(ucmd->new_obj)) + return ERR_PTR(-EBUSY); + + /* + * An abort op means that its caller needs to invoke it within a lock in + * the caller. So it doesn't work with _iommufd_object_alloc_ucmd() that + * will invoke the abort op in iommufd_object_abort_and_destroy(), which + * must be outside the caller's lock. + */ + if (WARN_ON(iommufd_object_ops[type].abort)) + return ERR_PTR(-EOPNOTSUPP); + + new_obj = _iommufd_object_alloc(ucmd->ictx, size, type); + if (IS_ERR(new_obj)) + return new_obj; + + ucmd->new_obj = new_obj; + return new_obj; +} + /* * Allow concurrent access to the object. * @@ -95,20 +155,22 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } -static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, - struct iommufd_object *to_destroy) +static int iommufd_object_dec_wait(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) { - if (refcount_dec_and_test(&to_destroy->shortterm_users)) + if (refcount_dec_and_test(&to_destroy->wait_cnt)) return 0; + if (iommufd_object_ops[to_destroy->type].pre_destroy) + iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy); + if (wait_event_timeout(ictx->destroy_wait, - refcount_read(&to_destroy->shortterm_users) == - 0, - msecs_to_jiffies(60000))) + refcount_read(&to_destroy->wait_cnt) == 0, + msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); - refcount_inc(&to_destroy->shortterm_users); + refcount_inc(&to_destroy->wait_cnt); return -EBUSY; } @@ -122,17 +184,18 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - bool zerod_shortterm = false; + bool zerod_wait_cnt = false; int ret; /* - * The purpose of the shortterm_users is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must increment - * shortterm_users, such as during ioctl execution. + * The purpose of the wait_cnt is to ensure deterministic destruction + * of objects used by external drivers and destroyed by this function. + * Incrementing this wait_cnt should either be short lived, such as + * during ioctl execution, or be revoked and blocked during + * pre_destroy(), such as vdev holding the idev's refcount. */ - if (flags & REMOVE_WAIT_SHORTTERM) { - ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (flags & REMOVE_WAIT) { + ret = iommufd_object_dec_wait(ictx, to_destroy); if (ret) { /* * We have a bug. Put back the callers reference and @@ -141,7 +204,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, refcount_dec(&to_destroy->users); return ret; } - zerod_shortterm = true; + zerod_wait_cnt = true; } xa_lock(&ictx->objects); @@ -167,17 +230,17 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, goto err_xa; } - xas_store(&xas, NULL); + xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; xa_unlock(&ictx->objects); /* - * Since users is zero any positive users_shortterm must be racing + * Since users is zero any positive wait_cnt must be racing * iommufd_put_object(), or we have a bug. */ - if (!zerod_shortterm) { - ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (!zerod_wait_cnt) { + ret = iommufd_object_dec_wait(ictx, obj); if (WARN_ON(ret)) return ret; } @@ -187,9 +250,9 @@ int iommufd_object_remove(struct iommufd_ctx *ictx, return 0; err_xa: - if (zerod_shortterm) { + if (zerod_wait_cnt) { /* Restore the xarray owned reference */ - refcount_set(&obj->shortterm_users, 1); + refcount_set(&obj->wait_cnt, 1); } xa_unlock(&ictx->objects); @@ -226,6 +289,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE); init_waitqueue_head(&ictx->destroy_wait); mutex_init(&ictx->sw_msi_lock); INIT_LIST_HEAD(&ictx->sw_msi_list); @@ -252,19 +316,41 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) while (!xa_empty(&ictx->objects)) { unsigned int destroyed = 0; unsigned long index; + bool empty = true; + /* + * We can't use xa_empty() to end the loop as the tombstones + * are stored as XA_ZERO_ENTRY in the xarray. However + * xa_for_each() automatically converts them to NULL and skips + * them causing xa_empty() to be kept false. Thus once + * xa_for_each() finds no further !NULL entries the loop is + * done. + */ xa_for_each(&ictx->objects, index, obj) { + empty = false; if (!refcount_dec_if_one(&obj->users)) continue; + destroyed++; xa_erase(&ictx->objects, index); iommufd_object_ops[obj->type].destroy(obj); kfree(obj); } + + if (empty) + break; + /* Bug related to users refcount */ if (WARN_ON(!destroyed)) break; } + + /* + * There may be some tombstones left over from + * iommufd_object_tombstone_user() + */ + xa_destroy(&ictx->objects); + WARN_ON(!xa_empty(&ictx->groups)); mutex_destroy(&ictx->sw_msi_lock); @@ -305,6 +391,7 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_fault_alloc fault; struct iommu_hw_info info; + struct iommu_hw_queue_alloc hw_queue; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_invalidate cache; @@ -347,6 +434,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { struct iommu_fault_alloc, out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), + IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl, + struct iommu_hw_queue_alloc, length), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, @@ -417,14 +506,83 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, if (ret) return ret; ret = op->execute(&ucmd); + + if (ucmd.new_obj) { + if (ret) + iommufd_object_abort_and_destroy(ictx, ucmd.new_obj); + else + iommufd_object_finalize(ictx, ucmd.new_obj); + } return ret; } +static void iommufd_fops_vma_open(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_inc(&immap->owner->users); +} + +static void iommufd_fops_vma_close(struct vm_area_struct *vma) +{ + struct iommufd_mmap *immap = vma->vm_private_data; + + refcount_dec(&immap->owner->users); +} + +static const struct vm_operations_struct iommufd_vma_ops = { + .open = iommufd_fops_vma_open, + .close = iommufd_fops_vma_close, +}; + +/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */ +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct iommufd_ctx *ictx = filp->private_data; + size_t length = vma->vm_end - vma->vm_start; + struct iommufd_mmap *immap; + int rc; + + if (!PAGE_ALIGNED(length)) + return -EINVAL; + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + if (vma->vm_flags & VM_EXEC) + return -EPERM; + + /* vma->vm_pgoff carries a page-shifted start position to an immap */ + immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT); + if (!immap) + return -ENXIO; + /* + * mtree_load() returns the immap for any contained mmio_addr, so only + * allow the exact immap thing to be mapped + */ + if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length) + return -ENXIO; + + vma->vm_pgoff = 0; + vma->vm_private_data = immap; + vma->vm_ops = &iommufd_vma_ops; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, + immap->mmio_addr >> PAGE_SHIFT, length, + vma->vm_page_prot); + if (rc) + return rc; + + /* vm_ops.open won't be called for mmap itself. */ + refcount_inc(&immap->owner->users); + return rc; +} + static const struct file_operations iommufd_fops = { .owner = THIS_MODULE, .open = iommufd_fops_open, .release = iommufd_fops_release, .unlocked_ioctl = iommufd_fops_ioctl, + .mmap = iommufd_fops_mmap, }; /** @@ -498,11 +656,15 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_access_destroy_object, }, [IOMMUFD_OBJ_DEVICE] = { + .pre_destroy = iommufd_device_pre_destroy, .destroy = iommufd_device_destroy, }, [IOMMUFD_OBJ_FAULT] = { .destroy = iommufd_fault_destroy, }, + [IOMMUFD_OBJ_HW_QUEUE] = { + .destroy = iommufd_hw_queue_destroy, + }, [IOMMUFD_OBJ_HWPT_PAGING] = { .destroy = iommufd_hwpt_paging_destroy, .abort = iommufd_hwpt_paging_abort, @@ -516,6 +678,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { }, [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, + .abort = iommufd_vdevice_abort, }, [IOMMUFD_OBJ_VEVENTQ] = { .destroy = iommufd_veventq_destroy, @@ -539,7 +702,6 @@ static struct miscdevice iommu_misc_dev = { .mode = 0660, }; - static struct miscdevice vfio_misc_dev = { .minor = VFIO_MINOR, .name = "vfio", diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 3427749bc5ce..c3433b845561 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -1287,8 +1287,7 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, } static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte, - unsigned long length, - bool writable) + unsigned long length, bool writable) { struct iopt_pages *pages; @@ -1328,7 +1327,7 @@ struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, struct iopt_pages *pages; unsigned long end; void __user *uptr_down = - (void __user *) ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); + (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); if (check_add_overflow((unsigned long)uptr, length, &end)) return ERR_PTR(-EOVERFLOW); @@ -2104,6 +2103,7 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, * @last_index: Inclusive last page index * @out_pages: Output list of struct page's representing the PFNs * @flags: IOMMUFD_ACCESS_RW_* flags + * @lock_area: Fail userspace munmap on this area * * Record that an in-kernel access will be accessing the pages, ensure they are * pinned, and return the PFNs as a simple list of 'struct page *'. @@ -2111,8 +2111,8 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, * This should be undone through a matching call to iopt_area_remove_access() */ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, - unsigned long last_index, struct page **out_pages, - unsigned int flags) + unsigned long last_index, struct page **out_pages, + unsigned int flags, bool lock_area) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; @@ -2125,6 +2125,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, access = iopt_pages_get_exact_access(pages, start_index, last_index); if (access) { area->num_accesses++; + if (lock_area) + area->num_locks++; access->users++; iopt_pages_fill_from_xarray(pages, start_index, last_index, out_pages); @@ -2146,6 +2148,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, access->node.last = last_index; access->users = 1; area->num_accesses++; + if (lock_area) + area->num_locks++; interval_tree_insert(&access->node, &pages->access_itree); mutex_unlock(&pages->mutex); return 0; @@ -2162,12 +2166,13 @@ err_unlock: * @area: The source of PFNs * @start_index: First page index * @last_index: Inclusive last page index + * @unlock_area: Must match the matching iopt_area_add_access()'s lock_area * * Undo iopt_area_add_access() and unpin the pages if necessary. The caller * must stop using the PFNs before calling this. */ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, - unsigned long last_index) + unsigned long last_index, bool unlock_area) { struct iopt_pages *pages = area->pages; struct iopt_pages_access *access; @@ -2178,6 +2183,10 @@ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, goto out_unlock; WARN_ON(area->num_accesses == 0 || access->users == 0); + if (unlock_area) { + WARN_ON(area->num_locks == 0); + area->num_locks--; + } area->num_accesses--; access->users--; if (access->users) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 6bd0abf9a641..61686603c769 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -138,7 +138,6 @@ to_mock_domain(struct iommu_domain *domain) struct mock_iommu_domain_nested { struct iommu_domain domain; struct mock_viommu *mock_viommu; - struct mock_iommu_domain *parent; u32 iotlb[MOCK_NESTED_DOMAIN_IOTLB_NUM]; }; @@ -151,6 +150,11 @@ to_mock_nested(struct iommu_domain *domain) struct mock_viommu { struct iommufd_viommu core; struct mock_iommu_domain *s2_parent; + struct mock_hw_queue *hw_queue[IOMMU_TEST_HW_QUEUE_MAX]; + struct mutex queue_mutex; + + unsigned long mmap_offset; + u32 *page; /* Mmap page to test u32 type of in_data */ }; static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) @@ -158,6 +162,19 @@ static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu) return container_of(viommu, struct mock_viommu, core); } +struct mock_hw_queue { + struct iommufd_hw_queue core; + struct mock_viommu *mock_viommu; + struct mock_hw_queue *prev; + u16 index; +}; + +static inline struct mock_hw_queue * +to_mock_hw_queue(struct iommufd_hw_queue *hw_queue) +{ + return container_of(hw_queue, struct mock_hw_queue, core); +} + enum selftest_obj_type { TYPE_IDEV, }; @@ -288,10 +305,15 @@ static struct iommu_domain mock_blocking_domain = { .ops = &mock_blocking_ops, }; -static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) +static void *mock_domain_hw_info(struct device *dev, u32 *length, + enum iommu_hw_info_type *type) { struct iommu_test_hw_info *info; + if (*type != IOMMU_HW_INFO_TYPE_DEFAULT && + *type != IOMMU_HW_INFO_TYPE_SELFTEST) + return ERR_PTR(-EOPNOTSUPP); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return ERR_PTR(-ENOMEM); @@ -434,7 +456,6 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent, mock_nested = __mock_domain_alloc_nested(user_data); if (IS_ERR(mock_nested)) return ERR_CAST(mock_nested); - mock_nested->parent = mock_parent; return &mock_nested->domain; } @@ -671,9 +692,15 @@ static void mock_viommu_destroy(struct iommufd_viommu *viommu) { struct mock_iommu_device *mock_iommu = container_of( viommu->iommu_dev, struct mock_iommu_device, iommu_dev); + struct mock_viommu *mock_viommu = to_mock_viommu(viommu); if (refcount_dec_and_test(&mock_iommu->users)) complete(&mock_iommu->complete); + if (mock_viommu->mmap_offset) + iommufd_viommu_destroy_mmap(&mock_viommu->core, + mock_viommu->mmap_offset); + free_page((unsigned long)mock_viommu->page); + mutex_destroy(&mock_viommu->queue_mutex); /* iommufd core frees mock_viommu and viommu */ } @@ -692,7 +719,6 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, if (IS_ERR(mock_nested)) return ERR_CAST(mock_nested); mock_nested->mock_viommu = mock_viommu; - mock_nested->parent = mock_viommu->s2_parent; return &mock_nested->domain; } @@ -766,31 +792,149 @@ out: return rc; } +static size_t mock_viommu_get_hw_queue_size(struct iommufd_viommu *viommu, + enum iommu_hw_queue_type queue_type) +{ + if (queue_type != IOMMU_HW_QUEUE_TYPE_SELFTEST) + return 0; + return HW_QUEUE_STRUCT_SIZE(struct mock_hw_queue, core); +} + +static void mock_hw_queue_destroy(struct iommufd_hw_queue *hw_queue) +{ + struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue); + struct mock_viommu *mock_viommu = mock_hw_queue->mock_viommu; + + mutex_lock(&mock_viommu->queue_mutex); + mock_viommu->hw_queue[mock_hw_queue->index] = NULL; + if (mock_hw_queue->prev) + iommufd_hw_queue_undepend(mock_hw_queue, mock_hw_queue->prev, + core); + mutex_unlock(&mock_viommu->queue_mutex); +} + +/* Test iommufd_hw_queue_depend/undepend() */ +static int mock_hw_queue_init_phys(struct iommufd_hw_queue *hw_queue, u32 index, + phys_addr_t base_addr_pa) +{ + struct mock_viommu *mock_viommu = to_mock_viommu(hw_queue->viommu); + struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue); + struct mock_hw_queue *prev = NULL; + int rc = 0; + + if (index >= IOMMU_TEST_HW_QUEUE_MAX) + return -EINVAL; + + mutex_lock(&mock_viommu->queue_mutex); + + if (mock_viommu->hw_queue[index]) { + rc = -EEXIST; + goto unlock; + } + + if (index) { + prev = mock_viommu->hw_queue[index - 1]; + if (!prev) { + rc = -EIO; + goto unlock; + } + } + + /* + * Test to catch a kernel bug if the core converted the physical address + * incorrectly. Let mock_domain_iova_to_phys() WARN_ON if it fails. + */ + if (base_addr_pa != iommu_iova_to_phys(&mock_viommu->s2_parent->domain, + hw_queue->base_addr)) { + rc = -EFAULT; + goto unlock; + } + + if (prev) { + rc = iommufd_hw_queue_depend(mock_hw_queue, prev, core); + if (rc) + goto unlock; + } + + mock_hw_queue->prev = prev; + mock_hw_queue->mock_viommu = mock_viommu; + mock_viommu->hw_queue[index] = mock_hw_queue; + + hw_queue->destroy = &mock_hw_queue_destroy; +unlock: + mutex_unlock(&mock_viommu->queue_mutex); + return rc; +} + static struct iommufd_viommu_ops mock_viommu_ops = { .destroy = mock_viommu_destroy, .alloc_domain_nested = mock_viommu_alloc_domain_nested, .cache_invalidate = mock_viommu_cache_invalidate, + .get_hw_queue_size = mock_viommu_get_hw_queue_size, + .hw_queue_init_phys = mock_hw_queue_init_phys, }; -static struct iommufd_viommu *mock_viommu_alloc(struct device *dev, - struct iommu_domain *domain, - struct iommufd_ctx *ictx, - unsigned int viommu_type) +static size_t mock_get_viommu_size(struct device *dev, + enum iommu_viommu_type viommu_type) { - struct mock_iommu_device *mock_iommu = - iommu_get_iommu_dev(dev, struct mock_iommu_device, iommu_dev); - struct mock_viommu *mock_viommu; - if (viommu_type != IOMMU_VIOMMU_TYPE_SELFTEST) - return ERR_PTR(-EOPNOTSUPP); + return 0; + return VIOMMU_STRUCT_SIZE(struct mock_viommu, core); +} + +static int mock_viommu_init(struct iommufd_viommu *viommu, + struct iommu_domain *parent_domain, + const struct iommu_user_data *user_data) +{ + struct mock_iommu_device *mock_iommu = container_of( + viommu->iommu_dev, struct mock_iommu_device, iommu_dev); + struct mock_viommu *mock_viommu = to_mock_viommu(viommu); + struct iommu_viommu_selftest data; + int rc; + + if (user_data) { + rc = iommu_copy_struct_from_user( + &data, user_data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data); + if (rc) + return rc; + + /* Allocate two pages */ + mock_viommu->page = + (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!mock_viommu->page) + return -ENOMEM; - mock_viommu = iommufd_viommu_alloc(ictx, struct mock_viommu, core, - &mock_viommu_ops); - if (IS_ERR(mock_viommu)) - return ERR_CAST(mock_viommu); + rc = iommufd_viommu_alloc_mmap(&mock_viommu->core, + __pa(mock_viommu->page), + PAGE_SIZE * 2, + &mock_viommu->mmap_offset); + if (rc) + goto err_free_page; + + /* For loopback tests on both the page and out_data */ + *mock_viommu->page = data.in_data; + data.out_data = data.in_data; + data.out_mmap_length = PAGE_SIZE * 2; + data.out_mmap_offset = mock_viommu->mmap_offset; + rc = iommu_copy_struct_to_user( + user_data, &data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data); + if (rc) + goto err_destroy_mmap; + } refcount_inc(&mock_iommu->users); - return &mock_viommu->core; + mutex_init(&mock_viommu->queue_mutex); + mock_viommu->s2_parent = to_mock_domain(parent_domain); + + viommu->ops = &mock_viommu_ops; + return 0; + +err_destroy_mmap: + iommufd_viommu_destroy_mmap(&mock_viommu->core, + mock_viommu->mmap_offset); +err_free_page: + free_page((unsigned long)mock_viommu->page); + return rc; } static const struct iommu_ops mock_ops = { @@ -801,7 +945,6 @@ static const struct iommu_ops mock_ops = { .default_domain = &mock_blocking_domain, .blocked_domain = &mock_blocking_domain, .owner = THIS_MODULE, - .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, .domain_alloc_paging_flags = mock_domain_alloc_paging_flags, .domain_alloc_nested = mock_domain_alloc_nested, @@ -810,7 +953,8 @@ static const struct iommu_ops mock_ops = { .probe_device = mock_probe_device, .page_response = mock_domain_page_response, .user_pasid_table = true, - .viommu_alloc = mock_viommu_alloc, + .get_viommu_size = mock_get_viommu_size, + .viommu_init = mock_viommu_init, .default_domain_ops = &(struct iommu_domain_ops){ .free = mock_domain_free, @@ -1216,9 +1360,8 @@ static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd, return 0; } -static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, - u32 mockpt_id, unsigned int iotlb_id, - u32 iotlb) +static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, u32 mockpt_id, + unsigned int iotlb_id, u32 iotlb) { struct mock_iommu_domain_nested *mock_nested; struct iommufd_hw_pagetable *hwpt; @@ -1491,7 +1634,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, int rc; /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ - if (length > 16*1024*1024) + if (length > 16 * 1024 * 1024) return -ENOMEM; if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ)) @@ -1508,7 +1651,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, if (flags & MOCK_FLAGS_ACCESS_SYZ) iova = iommufd_test_syz_conv_iova(staccess->access, - &cmd->access_pages.iova); + &cmd->access_pages.iova); npages = (ALIGN(iova + length, PAGE_SIZE) - ALIGN_DOWN(iova, PAGE_SIZE)) / @@ -1584,7 +1727,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, int rc; /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ - if (length > 16*1024*1024) + if (length > 16 * 1024 * 1024) return -ENOMEM; if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH | @@ -1610,7 +1753,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, if (flags & MOCK_FLAGS_ACCESS_SYZ) iova = iommufd_test_syz_conv_iova(staccess->access, - &cmd->access_rw.iova); + &cmd->access_rw.iova); rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); if (rc) @@ -1665,7 +1808,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, goto out_put; } - if (copy_from_user(tmp, uptr,DIV_ROUND_UP(max, BITS_PER_BYTE))) { + if (copy_from_user(tmp, uptr, DIV_ROUND_UP(max, BITS_PER_BYTE))) { rc = -EFAULT; goto out_free; } @@ -1701,7 +1844,7 @@ out_put: static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, struct iommu_test_cmd *cmd) { - struct iopf_fault event = { }; + struct iopf_fault event = {}; struct iommufd_device *idev; idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id); @@ -1832,8 +1975,7 @@ static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd, rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) - iommufd_device_detach(sobj->idev.idev, - cmd->pasid_attach.pasid); + iommufd_device_detach(sobj->idev.idev, cmd->pasid_attach.pasid); out_sobj: iommufd_put_object(ucmd->ictx, &sobj->obj); @@ -2004,8 +2146,8 @@ int __init iommufd_test_init(void) goto err_bus; rc = iommu_device_register_bus(&mock_iommu.iommu_dev, &mock_ops, - &iommufd_mock_bus_type.bus, - &iommufd_mock_bus_type.nb); + &iommufd_mock_bus_type.bus, + &iommufd_mock_bus_type.nb); if (rc) goto err_sysfs; diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c index 01df2b985f02..2ca5809b238b 100644 --- a/drivers/iommu/iommufd/viommu.c +++ b/drivers/iommu/iommufd/viommu.c @@ -17,10 +17,16 @@ void iommufd_viommu_destroy(struct iommufd_object *obj) int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) { struct iommu_viommu_alloc *cmd = ucmd->cmd; + const struct iommu_user_data user_data = { + .type = cmd->type, + .uptr = u64_to_user_ptr(cmd->data_uptr), + .len = cmd->data_len, + }; struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_viommu *viommu; struct iommufd_device *idev; const struct iommu_ops *ops; + size_t viommu_size; int rc; if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT) @@ -31,7 +37,22 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) return PTR_ERR(idev); ops = dev_iommu_ops(idev->dev); - if (!ops->viommu_alloc) { + if (!ops->get_viommu_size || !ops->viommu_init) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + + viommu_size = ops->get_viommu_size(idev->dev, cmd->type); + if (!viommu_size) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + + /* + * It is a driver bug for providing a viommu_size smaller than the core + * vIOMMU structure size + */ + if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) { rc = -EOPNOTSUPP; goto out_put_idev; } @@ -47,8 +68,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_hwpt; } - viommu = ops->viommu_alloc(idev->dev, hwpt_paging->common.domain, - ucmd->ictx, cmd->type); + viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd( + ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU); if (IS_ERR(viommu)) { rc = PTR_ERR(viommu); goto out_put_hwpt; @@ -68,15 +89,20 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) */ viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev); + rc = ops->viommu_init(viommu, hwpt_paging->common.domain, + user_data.len ? &user_data : NULL); + if (rc) + goto out_put_hwpt; + + /* It is a driver bug that viommu->ops isn't filled */ + if (WARN_ON_ONCE(!viommu->ops)) { + rc = -EOPNOTSUPP; + goto out_put_hwpt; + } + cmd->out_viommu_id = viommu->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); - if (rc) - goto out_abort; - iommufd_object_finalize(ucmd->ictx, &viommu->obj); - goto out_put_hwpt; -out_abort: - iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj); out_put_hwpt: iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); out_put_idev: @@ -84,22 +110,41 @@ out_put_idev: return rc; } -void iommufd_vdevice_destroy(struct iommufd_object *obj) +void iommufd_vdevice_abort(struct iommufd_object *obj) { struct iommufd_vdevice *vdev = container_of(obj, struct iommufd_vdevice, obj); struct iommufd_viommu *viommu = vdev->viommu; + struct iommufd_device *idev = vdev->idev; + + lockdep_assert_held(&idev->igroup->lock); + if (vdev->destroy) + vdev->destroy(vdev); /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */ - xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL); + xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL); refcount_dec(&viommu->obj.users); - put_device(vdev->dev); + idev->vdev = NULL; +} + +void iommufd_vdevice_destroy(struct iommufd_object *obj) +{ + struct iommufd_vdevice *vdev = + container_of(obj, struct iommufd_vdevice, obj); + struct iommufd_device *idev = vdev->idev; + struct iommufd_ctx *ictx = idev->ictx; + + mutex_lock(&idev->igroup->lock); + iommufd_vdevice_abort(obj); + mutex_unlock(&idev->igroup->lock); + iommufd_put_object(ictx, &idev->obj); } int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) { struct iommu_vdevice_alloc *cmd = ucmd->cmd; struct iommufd_vdevice *vdev, *curr; + size_t vdev_size = sizeof(*vdev); struct iommufd_viommu *viommu; struct iommufd_device *idev; u64 virt_id = cmd->virt_id; @@ -124,17 +169,54 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_put_idev; } - vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE); + mutex_lock(&idev->igroup->lock); + if (idev->destroying) { + rc = -ENOENT; + goto out_unlock_igroup; + } + + if (idev->vdev) { + rc = -EEXIST; + goto out_unlock_igroup; + } + + if (viommu->ops && viommu->ops->vdevice_size) { + /* + * It is a driver bug for: + * - ops->vdevice_size smaller than the core structure size + * - not implementing a pairing ops->vdevice_init op + */ + if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size || + !viommu->ops->vdevice_init)) { + rc = -EOPNOTSUPP; + goto out_put_idev; + } + vdev_size = viommu->ops->vdevice_size; + } + + vdev = (struct iommufd_vdevice *)_iommufd_object_alloc( + ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE); if (IS_ERR(vdev)) { rc = PTR_ERR(vdev); - goto out_put_idev; + goto out_unlock_igroup; } - vdev->id = virt_id; - vdev->dev = idev->dev; - get_device(idev->dev); + vdev->virt_id = virt_id; vdev->viommu = viommu; refcount_inc(&viommu->obj.users); + /* + * A wait_cnt reference is held on the idev so long as we have the + * pointer. iommufd_device_pre_destroy() will revoke it before the + * idev real destruction. + */ + vdev->idev = idev; + + /* + * iommufd_device_destroy() delays until idev->vdev is NULL before + * freeing the idev, which only happens once the vdev is finished + * destruction. + */ + idev->vdev = vdev; curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL); if (curr) { @@ -142,17 +224,206 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd) goto out_abort; } + if (viommu->ops && viommu->ops->vdevice_init) { + rc = viommu->ops->vdevice_init(vdev); + if (rc) + goto out_abort; + } + cmd->out_vdevice_id = vdev->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) goto out_abort; iommufd_object_finalize(ucmd->ictx, &vdev->obj); - goto out_put_idev; + goto out_unlock_igroup; out_abort: iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj); +out_unlock_igroup: + mutex_unlock(&idev->igroup->lock); out_put_idev: - iommufd_put_object(ucmd->ictx, &idev->obj); + if (rc) + iommufd_put_object(ucmd->ictx, &idev->obj); +out_put_viommu: + iommufd_put_object(ucmd->ictx, &viommu->obj); + return rc; +} + +static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx, + struct iommufd_access *access, + u64 base_iova, size_t length) +{ + u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova); + u64 offset = base_iova - aligned_iova; + + iommufd_access_unpin_pages(access, aligned_iova, + PAGE_ALIGN(length + offset)); + iommufd_access_detach_internal(access); + iommufd_access_destroy_internal(ictx, access); +} + +void iommufd_hw_queue_destroy(struct iommufd_object *obj) +{ + struct iommufd_hw_queue *hw_queue = + container_of(obj, struct iommufd_hw_queue, obj); + + if (hw_queue->destroy) + hw_queue->destroy(hw_queue); + if (hw_queue->access) + iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx, + hw_queue->access, + hw_queue->base_addr, + hw_queue->length); + if (hw_queue->viommu) + refcount_dec(&hw_queue->viommu->obj.users); +} + +/* + * When the HW accesses the guest queue via physical addresses, the underlying + * physical pages of the guest queue must be contiguous. Also, for the security + * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of + * the guest queue from the nesting parent iopt while the HW is still accessing + * the guest queue memory physically, such a HW queue must require an access to + * pin the underlying pages and prevent that from happening. + */ +static struct iommufd_access * +iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd, + struct iommufd_viommu *viommu, phys_addr_t *base_pa) +{ + u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova); + u64 offset = cmd->nesting_parent_iova - aligned_iova; + struct iommufd_access *access; + struct page **pages; + size_t max_npages; + size_t length; + size_t i; + int rc; + + /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */ + if (check_add_overflow(offset, cmd->length, &length)) + return ERR_PTR(-ERANGE); + if (check_add_overflow(length, PAGE_SIZE - 1, &length)) + return ERR_PTR(-ERANGE); + max_npages = length / PAGE_SIZE; + /* length needs to be page aligned too */ + length = max_npages * PAGE_SIZE; + + /* + * Use kvcalloc() to avoid memory fragmentation for a large page array. + * Set __GFP_NOWARN to avoid syzkaller blowups + */ + pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN); + if (!pages) + return ERR_PTR(-ENOMEM); + + access = iommufd_access_create_internal(viommu->ictx); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_free; + } + + rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas); + if (rc) + goto out_destroy; + + rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0); + if (rc) + goto out_detach; + + /* Validate if the underlying physical pages are contiguous */ + for (i = 1; i < max_npages; i++) { + if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1) + continue; + rc = -EFAULT; + goto out_unpin; + } + + *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset; + kfree(pages); + return access; + +out_unpin: + iommufd_access_unpin_pages(access, aligned_iova, length); +out_detach: + iommufd_access_detach_internal(access); +out_destroy: + iommufd_access_destroy_internal(viommu->ictx, access); +out_free: + kfree(pages); + return ERR_PTR(rc); +} + +int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_hw_queue_alloc *cmd = ucmd->cmd; + struct iommufd_hw_queue *hw_queue; + struct iommufd_viommu *viommu; + struct iommufd_access *access; + size_t hw_queue_size; + phys_addr_t base_pa; + u64 last; + int rc; + + if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT) + return -EOPNOTSUPP; + if (!cmd->length) + return -EINVAL; + if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1, + &last)) + return -EOVERFLOW; + + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); + if (IS_ERR(viommu)) + return PTR_ERR(viommu); + + if (!viommu->ops || !viommu->ops->get_hw_queue_size || + !viommu->ops->hw_queue_init_phys) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type); + if (!hw_queue_size) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + /* + * It is a driver bug for providing a hw_queue_size smaller than the + * core HW queue structure size + */ + if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) { + rc = -EOPNOTSUPP; + goto out_put_viommu; + } + + hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd( + ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE); + if (IS_ERR(hw_queue)) { + rc = PTR_ERR(hw_queue); + goto out_put_viommu; + } + + access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_put_viommu; + } + + hw_queue->viommu = viommu; + refcount_inc(&viommu->obj.users); + hw_queue->access = access; + hw_queue->type = cmd->type; + hw_queue->length = cmd->length; + hw_queue->base_addr = cmd->nesting_parent_iova; + + rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa); + if (rc) + goto out_put_viommu; + + cmd->out_hw_queue_id = hw_queue->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + out_put_viommu: iommufd_put_object(ucmd->ictx, &viommu->obj); return rc; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 90341b24a811..ffa892f65714 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -430,7 +430,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) * non-secure mode. */ domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; - domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; + domain->cfg.pgsize_bitmap = domain->io_domain.pgsize_bitmap; domain->cfg.ias = 32; domain->cfg.oas = 40; domain->cfg.tlb = &ipmmu_flush_ops; @@ -571,6 +571,7 @@ static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev) return NULL; mutex_init(&domain->mutex); + domain->io_domain.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; return &domain->io_domain; } @@ -882,7 +883,6 @@ static const struct iommu_ops ipmmu_ops = { */ .device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA) ? generic_device_group : generic_single_device_group, - .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, .of_xlate = ipmmu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = ipmmu_attach_device, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 2769e4544038..43a61ba021a5 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -312,6 +312,8 @@ static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev) INIT_LIST_HEAD(&priv->list_attached); + priv->domain.pgsize_bitmap = MSM_IOMMU_PGSIZES; + priv->domain.geometry.aperture_start = 0; priv->domain.geometry.aperture_end = (1ULL << 32) - 1; priv->domain.geometry.force_aperture = true; @@ -339,7 +341,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv) spin_lock_init(&priv->pgtlock); priv->cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, + .pgsize_bitmap = priv->domain.pgsize_bitmap, .ias = 32, .oas = 32, .tlb = &msm_iommu_flush_ops, @@ -352,8 +354,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv) return -EINVAL; } - msm_iommu_ops.pgsize_bitmap = priv->cfg.pgsize_bitmap; - return 0; } @@ -692,7 +692,6 @@ static struct iommu_ops msm_iommu_ops = { .domain_alloc_paging = msm_iommu_domain_alloc_paging, .probe_device = msm_iommu_probe_device, .device_group = generic_device_group, - .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = msm_iommu_attach_dev, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index cb95fecf6016..0e0285348d2b 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -648,7 +648,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, if (share_dom) { dom->iop = share_dom->iop; dom->cfg = share_dom->cfg; - dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap; + dom->domain.pgsize_bitmap = share_dom->domain.pgsize_bitmap; goto update_iova_region; } @@ -656,7 +656,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_PERMS | IO_PGTABLE_QUIRK_ARM_MTK_EXT, - .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, + .pgsize_bitmap = dom->domain.pgsize_bitmap, .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32, .iommu_dev = data->dev, }; @@ -675,9 +675,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, return -ENOMEM; } - /* Update our support page sizes bitmap */ - dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; - data->share_dom = dom; update_iova_region: @@ -697,6 +694,7 @@ static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev) if (!dom) return NULL; mutex_init(&dom->mutex); + dom->domain.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M; return &dom->domain; } @@ -1019,7 +1017,6 @@ static const struct iommu_ops mtk_iommu_ops = { .device_group = mtk_iommu_device_group, .of_xlate = mtk_iommu_of_xlate, .get_resv_regions = mtk_iommu_get_resv_regions, - .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_attach_device, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 66824982e05f..10cc0b1197e8 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -288,6 +288,8 @@ static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev) if (!dom) return NULL; + dom->domain.pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE; + return &dom->domain; } @@ -509,14 +511,10 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) static void mtk_iommu_v1_probe_finalize(struct device *dev) { - struct dma_iommu_mapping *mtk_mapping; - struct mtk_iommu_v1_data *data; + __maybe_unused struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); int err; - data = dev_iommu_priv_get(dev); - mtk_mapping = data->mapping; - - err = arm_iommu_attach_device(dev, mtk_mapping); + err = arm_iommu_attach_device(dev, data->mapping); if (err) dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n"); } @@ -582,7 +580,6 @@ static const struct iommu_ops mtk_iommu_v1_ops = { .probe_finalize = mtk_iommu_v1_probe_finalize, .release_device = mtk_iommu_v1_release_device, .device_group = generic_device_group, - .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 3c62337f43c6..6fb93927bdb9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1123,29 +1123,15 @@ static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, struct omap_iommu *obj) { struct device_node *np = pdev->dev.of_node; - int ret; if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) return 0; - if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) { - dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n"); - return -EINVAL; - } - - obj->syscfg = - syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig"); - if (IS_ERR(obj->syscfg)) { - /* can fail with -EPROBE_DEFER */ - ret = PTR_ERR(obj->syscfg); - return ret; - } - - if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1, - &obj->id)) { - dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n"); - return -EINVAL; - } + obj->syscfg = syscon_regmap_lookup_by_phandle_args(np, "ti,syscon-mmuconfig", + 1, &obj->id); + if (IS_ERR(obj->syscfg)) + return dev_err_probe(&pdev->dev, PTR_ERR(obj->syscfg), + "ti,syscon-mmuconfig property is missing\n"); if (obj->id != 0 && obj->id != 1) { dev_err(&pdev->dev, "invalid IOMMU instance id\n"); @@ -1584,6 +1570,8 @@ static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev) spin_lock_init(&omap_domain->lock); + omap_domain->domain.pgsize_bitmap = OMAP_IOMMU_PGSIZES; + omap_domain->domain.geometry.aperture_start = 0; omap_domain->domain.geometry.aperture_end = (1ULL << 32) - 1; omap_domain->domain.geometry.force_aperture = true; @@ -1735,7 +1723,6 @@ static const struct iommu_ops omap_iommu_ops = { .release_device = omap_iommu_release_device, .device_group = generic_single_device_group, .of_xlate = omap_iommu_of_xlate, - .pgsize_bitmap = OMAP_IOMMU_PGSIZES, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = omap_iommu_attach_dev, .map_pages = omap_iommu_map, diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c index bb57092ca901..2d0d31ba2886 100644 --- a/drivers/iommu/riscv/iommu.c +++ b/drivers/iommu/riscv/iommu.c @@ -1533,7 +1533,6 @@ static void riscv_iommu_release_device(struct device *dev) } static const struct iommu_ops riscv_iommu_ops = { - .pgsize_bitmap = SZ_4K, .of_xlate = riscv_iommu_of_xlate, .identity_domain = &riscv_iommu_identity_domain, .blocked_domain = &riscv_iommu_blocking_domain, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 22f74ba33a0e..0861dd469bd8 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1081,6 +1081,8 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) spin_lock_init(&rk_domain->dt_lock); INIT_LIST_HEAD(&rk_domain->iommus); + rk_domain->domain.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP; + rk_domain->domain.geometry.aperture_start = 0; rk_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); rk_domain->domain.geometry.force_aperture = true; @@ -1157,7 +1159,6 @@ static int rk_iommu_of_xlate(struct device *dev, return -ENOMEM; data->iommu = platform_get_drvdata(iommu_dev); - data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); platform_device_put(iommu_dev); @@ -1171,7 +1172,6 @@ static const struct iommu_ops rk_iommu_ops = { .probe_device = rk_iommu_probe_device, .release_device = rk_iommu_release_device, .device_group = generic_single_device_group, - .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP, .of_xlate = rk_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = rk_iommu_attach_device, @@ -1195,6 +1195,8 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!iommu) return -ENOMEM; + iommu->domain = &rk_identity_domain; + platform_set_drvdata(pdev, iommu); iommu->dev = dev; iommu->num_mmu = 0; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 433b59f43530..9c80d61deb2c 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -557,6 +557,7 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev) } zdev->end_dma = zdev->start_dma + aperture_size - 1; + s390_domain->domain.pgsize_bitmap = SZ_4K; s390_domain->domain.geometry.force_aperture = true; s390_domain->domain.geometry.aperture_start = 0; s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain); @@ -1158,7 +1159,6 @@ static struct iommu_domain blocking_domain = { .domain_alloc_paging = s390_domain_alloc_paging, \ .probe_device = s390_iommu_probe_device, \ .device_group = generic_device_group, \ - .pgsize_bitmap = SZ_4K, \ .get_resv_regions = s390_iommu_get_resv_regions, \ .default_domain_ops = &(const struct iommu_domain_ops) { \ .attach_dev = s390_iommu_attach_device, \ diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 941d1f361c8c..c7ca1d8a0b15 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -143,6 +143,8 @@ static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev) spin_lock_init(&dom->pgtlock); + dom->domain.pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE; + dom->domain.geometry.aperture_start = 0; dom->domain.geometry.aperture_end = SZ_256M - 1; dom->domain.geometry.force_aperture = true; @@ -410,7 +412,6 @@ static const struct iommu_ops sprd_iommu_ops = { .probe_device = sprd_iommu_probe_device, .device_group = generic_single_device_group, .of_xlate = sprd_iommu_of_xlate, - .pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sprd_iommu_attach_device, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 76c9620af4bb..de10b569d9a9 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -697,6 +697,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev) refcount_set(&sun50i_domain->refcnt, 1); + sun50i_domain->domain.pgsize_bitmap = SZ_4K; + sun50i_domain->domain.geometry.aperture_start = 0; sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32); sun50i_domain->domain.geometry.force_aperture = true; @@ -842,7 +844,6 @@ static int sun50i_iommu_of_xlate(struct device *dev, static const struct iommu_ops sun50i_iommu_ops = { .identity_domain = &sun50i_iommu_identity_domain, - .pgsize_bitmap = SZ_4K, .device_group = generic_single_device_group, .domain_alloc_paging = sun50i_iommu_domain_alloc_paging, .of_xlate = sun50i_iommu_of_xlate, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index e58fe9d8b9e7..36cdd5fbab07 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -318,6 +318,8 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev) spin_lock_init(&as->lock); + as->domain.pgsize_bitmap = SZ_4K; + /* setup aperture */ as->domain.geometry.aperture_start = 0; as->domain.geometry.aperture_end = 0xffffffff; @@ -1002,7 +1004,6 @@ static const struct iommu_ops tegra_smmu_ops = { .probe_device = tegra_smmu_probe_device, .device_group = tegra_smmu_device_group, .of_xlate = tegra_smmu_of_xlate, - .pgsize_bitmap = SZ_4K, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = tegra_smmu_attach_dev, .map_pages = tegra_smmu_map, diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index ecd41fb03e5a..532db1de201b 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -998,7 +998,7 @@ static void viommu_get_resv_regions(struct device *dev, struct list_head *head) iommu_dma_get_resv_regions(dev, head); } -static struct iommu_ops viommu_ops; +static const struct iommu_ops viommu_ops; static struct virtio_driver virtio_iommu_drv; static int viommu_match_node(struct device *dev, const void *data) @@ -1086,7 +1086,7 @@ static bool viommu_capable(struct device *dev, enum iommu_cap cap) } } -static struct iommu_ops viommu_ops = { +static const struct iommu_ops viommu_ops = { .capable = viommu_capable, .domain_alloc_identity = viommu_domain_alloc_identity, .domain_alloc_paging = viommu_domain_alloc_paging, @@ -1217,8 +1217,6 @@ static int viommu_probe(struct virtio_device *vdev) viommu->first_domain++; } - viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap; - virtio_device_ready(vdev); /* Populate the event queue with buffers */ |