summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig2
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/amd_iommu.h6
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h17
-rw-r--r--drivers/iommu/amd/debugfs.c378
-rw-r--r--drivers/iommu/amd/init.c52
-rw-r--r--drivers/iommu/amd/io_pgtable.c4
-rw-r--r--drivers/iommu/amd/iommu.c192
-rw-r--r--drivers/iommu/apple-dart.c1
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c70
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c34
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h33
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c493
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c4
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c12
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c6
-rw-r--r--drivers/iommu/exynos-iommu.c5
-rw-r--r--drivers/iommu/intel/cache.c55
-rw-r--r--drivers/iommu/intel/dmar.c3
-rw-r--r--drivers/iommu/intel/iommu.c361
-rw-r--r--drivers/iommu/intel/iommu.h22
-rw-r--r--drivers/iommu/intel/irq_remapping.c38
-rw-r--r--drivers/iommu/intel/nested.c4
-rw-r--r--drivers/iommu/intel/pasid.c17
-rw-r--r--drivers/iommu/intel/pasid.h11
-rw-r--r--drivers/iommu/intel/svm.c3
-rw-r--r--drivers/iommu/intel/trace.h5
-rw-r--r--drivers/iommu/io-pgtable-arm.c7
-rw-r--r--drivers/iommu/iommu.c7
-rw-r--r--drivers/iommu/iommufd/device.c143
-rw-r--r--drivers/iommu/iommufd/driver.c113
-rw-r--r--drivers/iommu/iommufd/eventq.c14
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c10
-rw-r--r--drivers/iommu/iommufd/io_pagetable.c57
-rw-r--r--drivers/iommu/iommufd/io_pagetable.h5
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h135
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h20
-rw-r--r--drivers/iommu/iommufd/iova_bitmap.c1
-rw-r--r--drivers/iommu/iommufd/main.c206
-rw-r--r--drivers/iommu/iommufd/pages.c21
-rw-r--r--drivers/iommu/iommufd/selftest.c208
-rw-r--r--drivers/iommu/iommufd/viommu.c309
-rw-r--r--drivers/iommu/ipmmu-vmsa.c4
-rw-r--r--drivers/iommu/msm_iommu.c7
-rw-r--r--drivers/iommu/mtk_iommu.c9
-rw-r--r--drivers/iommu/mtk_iommu_v1.c11
-rw-r--r--drivers/iommu/omap-iommu.c27
-rw-r--r--drivers/iommu/riscv/iommu.c1
-rw-r--r--drivers/iommu/rockchip-iommu.c3
-rw-r--r--drivers/iommu/s390-iommu.c2
-rw-r--r--drivers/iommu/sprd-iommu.c3
-rw-r--r--drivers/iommu/sun50i-iommu.c3
-rw-r--r--drivers/iommu/tegra-smmu.c3
-rw-r--r--drivers/iommu/virtio-iommu.c6
54 files changed, 2523 insertions, 641 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 0a33d995d15d..70d29b14d851 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -200,6 +200,7 @@ source "drivers/iommu/riscv/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
+ select IRQ_MSI_LIB
help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
@@ -305,7 +306,6 @@ config APPLE_DART
depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_DART
select IOMMU_API
select IOMMU_IO_PGTABLE_DART
- default ARCH_APPLE
help
Support for Apple DART (Device Address Resolution Table) IOMMUs
found in Apple ARM SoCs like the M1.
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 994063e5586f..ecef69c11144 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -7,6 +7,7 @@ config AMD_IOMMU
select PCI_ATS
select PCI_PRI
select PCI_PASID
+ select IRQ_MSI_LIB
select MMU_NOTIFIER
select IOMMU_API
select IOMMU_IOVA
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 29a8864381c3..9b4b589a54b5 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -28,9 +28,9 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
gfp_t gfp, size_t size);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
-void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
+void amd_iommu_debugfs_setup(void);
#else
-static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
+static inline void amd_iommu_debugfs_setup(void) {}
#endif
/* Needed for interrupt remapping */
@@ -42,7 +42,9 @@ int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir;
extern enum protection_domain_mode amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
+extern u8 amd_iommu_hpt_level;
extern unsigned long amd_iommu_pgsize_bitmap;
+extern bool amd_iommu_hatdis;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index ccbab3a4811a..5219d7ddfdaa 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -94,6 +94,7 @@
#define FEATURE_GA BIT_ULL(7)
#define FEATURE_HE BIT_ULL(8)
#define FEATURE_PC BIT_ULL(9)
+#define FEATURE_HATS GENMASK_ULL(11, 10)
#define FEATURE_GATS GENMASK_ULL(13, 12)
#define FEATURE_GLX GENMASK_ULL(15, 14)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
@@ -460,6 +461,9 @@
/* IOMMU Feature Reporting Field (for IVHD type 10h */
#define IOMMU_FEAT_GASUP_SHIFT 6
+/* IOMMU HATDIS for IVHD type 11h and 40h */
+#define IOMMU_IVHD_ATTR_HATDIS_SHIFT 0
+
/* IOMMU Extended Feature Register (EFR) */
#define IOMMU_EFR_XTSUP_SHIFT 2
#define IOMMU_EFR_GASUP_SHIFT 7
@@ -558,7 +562,8 @@ struct amd_io_pgtable {
};
enum protection_domain_mode {
- PD_MODE_V1 = 1,
+ PD_MODE_NONE,
+ PD_MODE_V1,
PD_MODE_V2,
};
@@ -790,6 +795,8 @@ struct amd_iommu {
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
struct dentry *debugfs;
+ int dbg_mmio_offset;
+ int dbg_cap_offset;
#endif
/* IOPF support */
@@ -891,6 +898,13 @@ struct dev_table_entry {
};
/*
+ * Structure defining one entry in the command buffer
+ */
+struct iommu_cmd {
+ u32 data[4];
+};
+
+/*
* Structure to sture persistent DTE flags from IVHD
*/
struct ivhd_dte_flags {
@@ -1054,7 +1068,6 @@ struct irq_2_irte {
};
struct amd_ir_data {
- u32 cached_ga_tag;
struct amd_iommu *iommu;
struct irq_2_irte irq_2_irte;
struct msi_msg msi_entry;
diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c
index 545372fcc72f..10fa217a7119 100644
--- a/drivers/iommu/amd/debugfs.c
+++ b/drivers/iommu/amd/debugfs.c
@@ -11,22 +11,382 @@
#include <linux/pci.h>
#include "amd_iommu.h"
+#include "../irq_remapping.h"
static struct dentry *amd_iommu_debugfs;
-static DEFINE_MUTEX(amd_iommu_debugfs_lock);
#define MAX_NAME_LEN 20
+#define OFS_IN_SZ 8
+#define DEVID_IN_SZ 16
-void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
+static int sbdf = -1;
+
+static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct seq_file *m = filp->private_data;
+ struct amd_iommu *iommu = m->private;
+ int ret;
+
+ iommu->dbg_mmio_offset = -1;
+
+ if (cnt > OFS_IN_SZ)
+ return -EINVAL;
+
+ ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset);
+ if (ret)
+ return ret;
+
+ if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) {
+ iommu->dbg_mmio_offset = -1;
+ return -EINVAL;
+ }
+
+ return cnt;
+}
+
+static int iommu_mmio_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu *iommu = m->private;
+ u64 value;
+
+ if (iommu->dbg_mmio_offset < 0) {
+ seq_puts(m, "Please provide mmio register's offset\n");
+ return 0;
+ }
+
+ value = readq(iommu->mmio_base + iommu->dbg_mmio_offset);
+ seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value);
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(iommu_mmio);
+
+static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct seq_file *m = filp->private_data;
+ struct amd_iommu *iommu = m->private;
+ int ret;
+
+ iommu->dbg_cap_offset = -1;
+
+ if (cnt > OFS_IN_SZ)
+ return -EINVAL;
+
+ ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset);
+ if (ret)
+ return ret;
+
+ /* Capability register at offset 0x14 is the last IOMMU capability register. */
+ if (iommu->dbg_cap_offset > 0x14) {
+ iommu->dbg_cap_offset = -1;
+ return -EINVAL;
+ }
+
+ return cnt;
+}
+
+static int iommu_capability_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu *iommu = m->private;
+ u32 value;
+ int err;
+
+ if (iommu->dbg_cap_offset < 0) {
+ seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n");
+ return 0;
+ }
+
+ err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value);
+ if (err) {
+ seq_printf(m, "Not able to read capability register at 0x%x\n",
+ iommu->dbg_cap_offset);
+ return 0;
+ }
+
+ seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value);
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(iommu_capability);
+
+static int iommu_cmdbuf_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu *iommu = m->private;
+ struct iommu_cmd *cmd;
+ unsigned long flag;
+ u32 head, tail;
+ int i;
+
+ raw_spin_lock_irqsave(&iommu->lock, flag);
+ head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ seq_printf(m, "CMD Buffer Head Offset:%d Tail Offset:%d\n",
+ (head >> 4) & 0x7fff, (tail >> 4) & 0x7fff);
+ for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
+ cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
+ seq_printf(m, "%3d: %08x %08x %08x %08x\n", i, cmd->data[0],
+ cmd->data[1], cmd->data[2], cmd->data[3]);
+ }
+ raw_spin_unlock_irqrestore(&iommu->lock, flag);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_cmdbuf);
+
+static ssize_t devid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ int seg, bus, slot, func;
+ struct amd_iommu *iommu;
+ char *srcid_ptr;
+ u16 devid;
+ int i;
+
+ sbdf = -1;
+
+ if (cnt >= DEVID_IN_SZ)
+ return -EINVAL;
+
+ srcid_ptr = memdup_user_nul(ubuf, cnt);
+ if (IS_ERR(srcid_ptr))
+ return PTR_ERR(srcid_ptr);
+
+ i = sscanf(srcid_ptr, "%x:%x:%x.%x", &seg, &bus, &slot, &func);
+ if (i != 4) {
+ i = sscanf(srcid_ptr, "%x:%x.%x", &bus, &slot, &func);
+ if (i != 3) {
+ kfree(srcid_ptr);
+ return -EINVAL;
+ }
+ seg = 0;
+ }
+
+ devid = PCI_DEVID(bus, PCI_DEVFN(slot, func));
+
+ /* Check if user device id input is a valid input */
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id != seg)
+ continue;
+ if (devid > pci_seg->last_bdf) {
+ kfree(srcid_ptr);
+ return -EINVAL;
+ }
+ iommu = pci_seg->rlookup_table[devid];
+ if (!iommu) {
+ kfree(srcid_ptr);
+ return -ENODEV;
+ }
+ break;
+ }
+
+ if (pci_seg->id != seg) {
+ kfree(srcid_ptr);
+ return -EINVAL;
+ }
+
+ sbdf = PCI_SEG_DEVID_TO_SBDF(seg, devid);
+
+ kfree(srcid_ptr);
+
+ return cnt;
+}
+
+static int devid_show(struct seq_file *m, void *unused)
{
+ u16 devid;
+
+ if (sbdf >= 0) {
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+ seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf),
+ PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid));
+ } else
+ seq_puts(m, "No or Invalid input provided\n");
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(devid);
+
+static void dump_dte(struct seq_file *m, struct amd_iommu_pci_seg *pci_seg, u16 devid)
+{
+ struct dev_table_entry *dev_table;
+ struct amd_iommu *iommu;
+
+ iommu = pci_seg->rlookup_table[devid];
+ if (!iommu)
+ return;
+
+ dev_table = get_dev_table(iommu);
+ if (!dev_table) {
+ seq_puts(m, "Device table not found");
+ return;
+ }
+
+ seq_printf(m, "%-12s %16s %16s %16s %16s iommu\n", "DeviceId",
+ "QWORD[3]", "QWORD[2]", "QWORD[1]", "QWORD[0]");
+ seq_printf(m, "%04x:%02x:%02x.%x ", pci_seg->id, PCI_BUS_NUM(devid),
+ PCI_SLOT(devid), PCI_FUNC(devid));
+ for (int i = 3; i >= 0; --i)
+ seq_printf(m, "%016llx ", dev_table[devid].data[i]);
+ seq_printf(m, "iommu%d\n", iommu->index);
+}
+
+static int iommu_devtbl_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ u16 seg, devid;
+
+ if (sbdf < 0) {
+ seq_puts(m, "Enter a valid device ID to 'devid' file\n");
+ return 0;
+ }
+ seg = PCI_SBDF_TO_SEGID(sbdf);
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id != seg)
+ continue;
+ dump_dte(m, pci_seg, devid);
+ break;
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_devtbl);
+
+static void dump_128_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len)
+{
+ struct irte_ga *ptr, *irte;
+ int index;
+
+ for (index = 0; index < int_tab_len; index++) {
+ ptr = (struct irte_ga *)table->table;
+ irte = &ptr[index];
+
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+ !irte->lo.fields_vapic.valid)
+ continue;
+ else if (!irte->lo.fields_remap.valid)
+ continue;
+ seq_printf(m, "IRT[%04d] %016llx %016llx\n", index, irte->hi.val, irte->lo.val);
+ }
+}
+
+static void dump_32_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len)
+{
+ union irte *ptr, *irte;
+ int index;
+
+ for (index = 0; index < int_tab_len; index++) {
+ ptr = (union irte *)table->table;
+ irte = &ptr[index];
+
+ if (!irte->fields.valid)
+ continue;
+ seq_printf(m, "IRT[%04d] %08x\n", index, irte->val);
+ }
+}
+
+static void dump_irte(struct seq_file *m, u16 devid, struct amd_iommu_pci_seg *pci_seg)
+{
+ struct dev_table_entry *dev_table;
+ struct irq_remap_table *table;
+ struct amd_iommu *iommu;
+ unsigned long flags;
+ u16 int_tab_len;
+
+ table = pci_seg->irq_lookup_table[devid];
+ if (!table) {
+ seq_printf(m, "IRQ lookup table not set for %04x:%02x:%02x:%x\n",
+ pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid));
+ return;
+ }
+
+ iommu = pci_seg->rlookup_table[devid];
+ if (!iommu)
+ return;
+
+ dev_table = get_dev_table(iommu);
+ if (!dev_table) {
+ seq_puts(m, "Device table not found");
+ return;
+ }
+
+ int_tab_len = dev_table[devid].data[2] & DTE_INTTABLEN_MASK;
+ if (int_tab_len != DTE_INTTABLEN_512 && int_tab_len != DTE_INTTABLEN_2K) {
+ seq_puts(m, "The device's DTE contains an invalid IRT length value.");
+ return;
+ }
+
+ seq_printf(m, "DeviceId %04x:%02x:%02x.%x\n", pci_seg->id, PCI_BUS_NUM(devid),
+ PCI_SLOT(devid), PCI_FUNC(devid));
+
+ raw_spin_lock_irqsave(&table->lock, flags);
+ if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ dump_128_irte(m, table, BIT(int_tab_len >> 1));
+ else
+ dump_32_irte(m, table, BIT(int_tab_len >> 1));
+ seq_puts(m, "\n");
+ raw_spin_unlock_irqrestore(&table->lock, flags);
+}
+
+static int iommu_irqtbl_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ u16 devid, seg;
+
+ if (!irq_remapping_enabled) {
+ seq_puts(m, "Interrupt remapping is disabled\n");
+ return 0;
+ }
+
+ if (sbdf < 0) {
+ seq_puts(m, "Enter a valid device ID to 'devid' file\n");
+ return 0;
+ }
+
+ seg = PCI_SBDF_TO_SEGID(sbdf);
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id != seg)
+ continue;
+ dump_irte(m, devid, pci_seg);
+ break;
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_irqtbl);
+
+void amd_iommu_debugfs_setup(void)
+{
+ struct amd_iommu *iommu;
char name[MAX_NAME_LEN + 1];
- mutex_lock(&amd_iommu_debugfs_lock);
- if (!amd_iommu_debugfs)
- amd_iommu_debugfs = debugfs_create_dir("amd",
- iommu_debugfs_dir);
- mutex_unlock(&amd_iommu_debugfs_lock);
+ amd_iommu_debugfs = debugfs_create_dir("amd", iommu_debugfs_dir);
+
+ for_each_iommu(iommu) {
+ iommu->dbg_mmio_offset = -1;
+ iommu->dbg_cap_offset = -1;
+
+ snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index);
+ iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs);
+
+ debugfs_create_file("mmio", 0644, iommu->debugfs, iommu,
+ &iommu_mmio_fops);
+ debugfs_create_file("capability", 0644, iommu->debugfs, iommu,
+ &iommu_capability_fops);
+ debugfs_create_file("cmdbuf", 0444, iommu->debugfs, iommu,
+ &iommu_cmdbuf_fops);
+ }
- snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index);
- iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs);
+ debugfs_create_file("devid", 0644, amd_iommu_debugfs, NULL,
+ &devid_fops);
+ debugfs_create_file("devtbl", 0444, amd_iommu_debugfs, NULL,
+ &iommu_devtbl_fops);
+ debugfs_create_file("irqtbl", 0444, amd_iommu_debugfs, NULL,
+ &iommu_irqtbl_fops);
}
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 9c17dfa76703..7b5af6176de9 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -152,6 +152,8 @@ bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
+/* Host page table level */
+u8 amd_iommu_hpt_level;
/* Guest page table level */
int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
@@ -168,6 +170,9 @@ static int amd_iommu_target_ivhd_type;
u64 amd_iommu_efr;
u64 amd_iommu_efr2;
+/* Host (v1) page table is not supported*/
+bool amd_iommu_hatdis;
+
/* SNP is enabled on the system? */
bool amd_iommu_snp_en;
EXPORT_SYMBOL(amd_iommu_snp_en);
@@ -1792,6 +1797,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
+ if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
+ pr_warn_once("Host Address Translation is not supported.\n");
+ amd_iommu_hatdis = true;
+ }
+
early_iommu_features_init(iommu, h);
break;
@@ -2112,7 +2122,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
return ret;
}
- iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
+ ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
+ if (ret || amd_iommu_pgtable == PD_MODE_NONE) {
+ /*
+ * Remove sysfs if DMA translation is not supported by the
+ * IOMMU. Do not return an error to enable IRQ remapping
+ * in state_next(), DTE[V, TV] must eventually be set to 0.
+ */
+ iommu_device_sysfs_remove(&iommu->iommu);
+ }
return pci_enable_device(iommu->dev);
}
@@ -2573,7 +2591,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
u32 devid;
struct dev_table_entry *dev_table = pci_seg->dev_table;
- if (dev_table == NULL)
+ if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
return;
for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
@@ -3033,6 +3051,7 @@ static int __init early_amd_iommu_init(void)
struct acpi_table_header *ivrs_base;
int ret;
acpi_status status;
+ u8 efr_hats;
if (!amd_iommu_detected)
return -ENODEV;
@@ -3077,6 +3096,19 @@ static int __init early_amd_iommu_init(void)
FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
+ efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr);
+ if (efr_hats != 0x3) {
+ /*
+ * efr[HATS] bits specify the maximum host translation level
+ * supported, with LEVEL 4 being initial max level.
+ */
+ amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL;
+ } else {
+ pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n",
+ efr_hats);
+ amd_iommu_hatdis = true;
+ }
+
if (amd_iommu_pgtable == PD_MODE_V2) {
if (!amd_iommu_v2_pgtbl_supported()) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
@@ -3084,6 +3116,17 @@ static int __init early_amd_iommu_init(void)
}
}
+ if (amd_iommu_hatdis) {
+ /*
+ * Host (v1) page table is not available. Attempt to use
+ * Guest (v2) page table.
+ */
+ if (amd_iommu_v2_pgtbl_supported())
+ amd_iommu_pgtable = PD_MODE_V2;
+ else
+ amd_iommu_pgtable = PD_MODE_NONE;
+ }
+
/* Disable any previously enabled IOMMUs */
if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();
@@ -3376,7 +3419,6 @@ int amd_iommu_enable_faulting(unsigned int cpu)
*/
static int __init amd_iommu_init(void)
{
- struct amd_iommu *iommu;
int ret;
ret = iommu_go_to_state(IOMMU_INITIALIZED);
@@ -3390,8 +3432,8 @@ static int __init amd_iommu_init(void)
}
#endif
- for_each_iommu(iommu)
- amd_iommu_debugfs_setup(iommu);
+ if (!ret)
+ amd_iommu_debugfs_setup();
return ret;
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 4d308c071134..a91e71f981ef 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -125,7 +125,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
goto out;
ret = false;
- if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL))
+ if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level))
goto out;
*pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
@@ -526,7 +526,7 @@ static void v1_free_pgtable(struct io_pgtable *iop)
/* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
- pgtable->mode > PAGE_MODE_6_LEVEL);
+ pgtable->mode > amd_iommu_hpt_level);
free_sub_pt(pgtable->root, pgtable->mode, &freelist);
iommu_put_pages_list(&freelist);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 3117d99cf83d..eb348c63a8d0 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -25,6 +25,7 @@
#include <linux/notifier.h>
#include <linux/export.h>
#include <linux/irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/msi.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
@@ -63,13 +64,6 @@ static const struct iommu_dirty_ops amd_dirty_ops;
int amd_iommu_max_glx_val = -1;
/*
- * general struct to manage commands send to an IOMMU
- */
-struct iommu_cmd {
- u32 data[4];
-};
-
-/*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use.
*/
@@ -634,8 +628,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
static void pdev_enable_caps(struct pci_dev *pdev)
{
- pdev_enable_cap_ats(pdev);
pdev_enable_cap_pasid(pdev);
+ pdev_enable_cap_ats(pdev);
pdev_enable_cap_pri(pdev);
}
@@ -2424,6 +2418,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
pci_max_pasids(to_pci_dev(dev)));
}
+ if (amd_iommu_pgtable == PD_MODE_NONE) {
+ pr_warn_once("%s: DMA translation not supported by iommu.\n",
+ __func__);
+ iommu_dev = ERR_PTR(-ENODEV);
+ goto out_err;
+ }
+
out_err:
iommu_completion_wait(iommu);
@@ -2511,6 +2512,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
case PD_MODE_V2:
fmt = AMD_IOMMU_V2;
break;
+ case PD_MODE_NONE:
+ WARN_ON_ONCE(1);
+ return -EPERM;
}
domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
@@ -2524,14 +2528,30 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
static inline u64 dma_max_address(enum protection_domain_mode pgtable)
{
if (pgtable == PD_MODE_V1)
- return ~0ULL;
+ return PM_LEVEL_SIZE(amd_iommu_hpt_level);
- /* V2 with 4/5 level page table */
- return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
+ /*
+ * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page
+ * Translation" shows that the V2 table sign extends the top of the
+ * address space creating a reserved region in the middle of the
+ * translation, just like the CPU does. Further Vasant says the docs are
+ * incomplete and this only applies to non-zero PASIDs. If the AMDv2
+ * page table is assigned to the 0 PASID then there is no sign extension
+ * check.
+ *
+ * Since the IOMMU must have a fixed geometry, and the core code does
+ * not understand sign extended addressing, we have to chop off the high
+ * bit to get consistent behavior with attachments of the domain to any
+ * PASID.
+ */
+ return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1);
}
static bool amd_iommu_hd_support(struct amd_iommu *iommu)
{
+ if (amd_iommu_hatdis)
+ return false;
+
return iommu && (iommu->features & FEATURE_HDSUP);
}
@@ -3804,13 +3824,70 @@ static const struct irq_domain_ops amd_ir_domain_ops = {
.deactivate = irq_remapping_deactivate,
};
-int amd_iommu_activate_guest_mode(void *data)
+static void __amd_iommu_update_ga(struct irte_ga *entry, int cpu,
+ bool ga_log_intr)
+{
+ if (cpu >= 0) {
+ entry->lo.fields_vapic.destination =
+ APICID_TO_IRTE_DEST_LO(cpu);
+ entry->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cpu);
+ entry->lo.fields_vapic.is_run = true;
+ entry->lo.fields_vapic.ga_log_intr = false;
+ } else {
+ entry->lo.fields_vapic.is_run = false;
+ entry->lo.fields_vapic.ga_log_intr = ga_log_intr;
+ }
+}
+
+/*
+ * Update the pCPU information for an IRTE that is configured to post IRQs to
+ * a vCPU, without issuing an IOMMU invalidation for the IRTE.
+ *
+ * If the vCPU is associated with a pCPU (@cpu >= 0), configure the Destination
+ * with the pCPU's APIC ID, set IsRun, and clear GALogIntr. If the vCPU isn't
+ * associated with a pCPU (@cpu < 0), clear IsRun and set/clear GALogIntr based
+ * on input from the caller (e.g. KVM only requests GALogIntr when the vCPU is
+ * blocking and requires a notification wake event). I.e. treat vCPUs that are
+ * associated with a pCPU as running. This API is intended to be used when a
+ * vCPU is scheduled in/out (or stops running for any reason), to do a fast
+ * update of IsRun, GALogIntr, and (conditionally) Destination.
+ *
+ * Per the IOMMU spec, the Destination, IsRun, and GATag fields are not cached
+ * and thus don't require an invalidation to ensure the IOMMU consumes fresh
+ * information.
+ */
+int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr)
+{
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
+ if (!entry || !entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ if (!ir_data->iommu)
+ return -ENODEV;
+
+ __amd_iommu_update_ga(entry, cpu, ga_log_intr);
+
+ return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry);
+}
+EXPORT_SYMBOL(amd_iommu_update_ga);
+
+int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr)
{
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
u64 valid;
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry)
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
+ if (!entry)
return 0;
valid = entry->lo.fields_vapic.valid;
@@ -3820,11 +3897,12 @@ int amd_iommu_activate_guest_mode(void *data)
entry->lo.fields_vapic.valid = valid;
entry->lo.fields_vapic.guest_mode = 1;
- entry->lo.fields_vapic.ga_log_intr = 1;
entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
entry->hi.fields.vector = ir_data->ga_vector;
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
+ __amd_iommu_update_ga(entry, cpu, ga_log_intr);
+
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry);
}
@@ -3837,8 +3915,10 @@ int amd_iommu_deactivate_guest_mode(void *data)
struct irq_cfg *cfg = ir_data->cfg;
u64 valid;
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
- !entry || !entry->lo.fields_vapic.guest_mode)
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
+ if (!entry || !entry->lo.fields_vapic.guest_mode)
return 0;
valid = entry->lo.fields_remap.valid;
@@ -3860,11 +3940,10 @@ int amd_iommu_deactivate_guest_mode(void *data)
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
-static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
+static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *info)
{
int ret;
- struct amd_iommu_pi_data *pi_data = vcpu_info;
- struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
+ struct amd_iommu_pi_data *pi_data = info;
struct amd_ir_data *ir_data = data->chip_data;
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct iommu_dev_data *dev_data;
@@ -3885,25 +3964,20 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
return -EINVAL;
ir_data->cfg = irqd_cfg(data);
- pi_data->ir_data = ir_data;
- pi_data->prev_ga_tag = ir_data->cached_ga_tag;
- if (pi_data->is_guest_mode) {
- ir_data->ga_root_ptr = (pi_data->base >> 12);
- ir_data->ga_vector = vcpu_pi_info->vector;
+ if (pi_data) {
+ pi_data->ir_data = ir_data;
+
+ ir_data->ga_root_ptr = (pi_data->vapic_addr >> 12);
+ ir_data->ga_vector = pi_data->vector;
ir_data->ga_tag = pi_data->ga_tag;
- ret = amd_iommu_activate_guest_mode(ir_data);
- if (!ret)
- ir_data->cached_ga_tag = pi_data->ga_tag;
+ if (pi_data->is_guest_mode)
+ ret = amd_iommu_activate_guest_mode(ir_data, pi_data->cpu,
+ pi_data->ga_log_intr);
+ else
+ ret = amd_iommu_deactivate_guest_mode(ir_data);
} else {
ret = amd_iommu_deactivate_guest_mode(ir_data);
-
- /*
- * This communicates the ga_tag back to the caller
- * so that it can do all the necessary clean up.
- */
- if (!ret)
- ir_data->cached_ga_tag = 0;
}
return ret;
@@ -3970,54 +4044,30 @@ static struct irq_chip amd_ir_chip = {
static const struct msi_parent_ops amdvi_msi_parent_ops = {
.supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI,
+ .bus_select_token = DOMAIN_BUS_AMDVI,
+ .bus_select_mask = MATCH_PCI_MSI,
.prefix = "IR-",
.init_dev_msi_info = msi_parent_init_dev_msi_info,
};
int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
{
- struct fwnode_handle *fn;
+ struct irq_domain_info info = {
+ .fwnode = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index),
+ .ops = &amd_ir_domain_ops,
+ .domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI,
+ .host_data = iommu,
+ .parent = arch_get_ir_parent_domain(),
+ };
- fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index);
- if (!fn)
+ if (!info.fwnode)
return -ENOMEM;
- iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0,
- fn, &amd_ir_domain_ops, iommu);
+
+ iommu->ir_domain = msi_create_parent_irq_domain(&info, &amdvi_msi_parent_ops);
if (!iommu->ir_domain) {
- irq_domain_free_fwnode(fn);
+ irq_domain_free_fwnode(info.fwnode);
return -ENOMEM;
}
-
- irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
- IRQ_DOMAIN_FLAG_ISOLATED_MSI;
- iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops;
-
return 0;
}
-
-int amd_iommu_update_ga(int cpu, bool is_run, void *data)
-{
- struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
- struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
-
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
- !entry || !entry->lo.fields_vapic.guest_mode)
- return 0;
-
- if (!ir_data->iommu)
- return -ENODEV;
-
- if (cpu >= 0) {
- entry->lo.fields_vapic.destination =
- APICID_TO_IRTE_DEST_LO(cpu);
- entry->hi.fields.destination =
- APICID_TO_IRTE_DEST_HI(cpu);
- }
- entry->lo.fields_vapic.is_run = is_run;
-
- return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry);
-}
-EXPORT_SYMBOL(amd_iommu_update_ga);
#endif
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 757d24f67ad4..190f28d76615 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -991,7 +991,6 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.of_xlate = apple_dart_of_xlate,
.def_domain_type = apple_dart_def_domain_type,
.get_resv_regions = apple_dart_get_resv_regions,
- .pgsize_bitmap = -1UL, /* Restricted during dart probe */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = apple_dart_attach_dev_paging,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index e4fd8d522af8..8cd8929bbfdf 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -7,13 +7,22 @@
#include "arm-smmu-v3.h"
-void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
+void *arm_smmu_hw_info(struct device *dev, u32 *length,
+ enum iommu_hw_info_type *type)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ const struct arm_smmu_impl_ops *impl_ops = master->smmu->impl_ops;
struct iommu_hw_info_arm_smmuv3 *info;
u32 __iomem *base_idr;
unsigned int i;
+ if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
+ *type != IOMMU_HW_INFO_TYPE_ARM_SMMUV3) {
+ if (!impl_ops || !impl_ops->hw_info)
+ return ERR_PTR(-EOPNOTSUPP);
+ return impl_ops->hw_info(master->smmu, length, type);
+ }
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
@@ -216,7 +225,7 @@ static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg,
return 0;
}
-static struct iommu_domain *
+struct iommu_domain *
arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
const struct iommu_user_data *user_data)
{
@@ -327,8 +336,8 @@ static int arm_vsmmu_convert_user_cmd(struct arm_vsmmu *vsmmu,
return 0;
}
-static int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
- struct iommu_user_data_array *array)
+int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
+ struct iommu_user_data_array *array)
{
struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
struct arm_smmu_device *smmu = vsmmu->smmu;
@@ -382,25 +391,14 @@ static const struct iommufd_viommu_ops arm_vsmmu_ops = {
.cache_invalidate = arm_vsmmu_cache_invalidate,
};
-struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
- struct iommu_domain *parent,
- struct iommufd_ctx *ictx,
- unsigned int viommu_type)
+size_t arm_smmu_get_viommu_size(struct device *dev,
+ enum iommu_viommu_type viommu_type)
{
- struct arm_smmu_device *smmu =
- iommu_get_iommu_dev(dev, struct arm_smmu_device, iommu);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- struct arm_smmu_domain *s2_parent = to_smmu_domain(parent);
- struct arm_vsmmu *vsmmu;
-
- if (viommu_type != IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
- return ERR_PTR(-EOPNOTSUPP);
+ struct arm_smmu_device *smmu = master->smmu;
if (!(smmu->features & ARM_SMMU_FEAT_NESTING))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (s2_parent->smmu != master->smmu)
- return ERR_PTR(-EINVAL);
+ return 0;
/*
* FORCE_SYNC is not set with FEAT_NESTING. Some study of the exact HW
@@ -408,7 +406,7 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
* any change to remove this.
*/
if (WARN_ON(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC))
- return ERR_PTR(-EOPNOTSUPP);
+ return 0;
/*
* Must support some way to prevent the VM from bypassing the cache
@@ -420,19 +418,39 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
*/
if (!arm_smmu_master_canwbs(master) &&
!(smmu->features & ARM_SMMU_FEAT_S2FWB))
- return ERR_PTR(-EOPNOTSUPP);
+ return 0;
- vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,
- &arm_vsmmu_ops);
- if (IS_ERR(vsmmu))
- return ERR_CAST(vsmmu);
+ if (viommu_type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3)
+ return VIOMMU_STRUCT_SIZE(struct arm_vsmmu, core);
+
+ if (!smmu->impl_ops || !smmu->impl_ops->get_viommu_size)
+ return 0;
+ return smmu->impl_ops->get_viommu_size(viommu_type);
+}
+
+int arm_vsmmu_init(struct iommufd_viommu *viommu,
+ struct iommu_domain *parent_domain,
+ const struct iommu_user_data *user_data)
+{
+ struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
+ struct arm_smmu_device *smmu =
+ container_of(viommu->iommu_dev, struct arm_smmu_device, iommu);
+ struct arm_smmu_domain *s2_parent = to_smmu_domain(parent_domain);
+
+ if (s2_parent->smmu != smmu)
+ return -EINVAL;
vsmmu->smmu = smmu;
vsmmu->s2_parent = s2_parent;
/* FIXME Move VMID allocation from the S2 domain allocation to here */
vsmmu->vmid = s2_parent->s2_cfg.vmid;
- return &vsmmu->core;
+ if (viommu->type == IOMMU_VIOMMU_TYPE_ARM_SMMUV3) {
+ viommu->ops = &arm_vsmmu_ops;
+ return 0;
+ }
+
+ return smmu->impl_ops->vsmmu_init(vsmmu, user_data);
}
int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 39e933086f8f..5968043ac802 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -38,7 +38,7 @@ module_param(disable_msipolling, bool, 0444);
MODULE_PARM_DESC(disable_msipolling,
"Disable MSI-based polling for CMD_SYNC completion.");
-static struct iommu_ops arm_smmu_ops;
+static const struct iommu_ops arm_smmu_ops;
static struct iommu_dirty_ops arm_smmu_dirty_ops;
enum arm_smmu_msi_index {
@@ -2906,8 +2906,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
if (!master_domain) {
- kfree(state->vmaster);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto err_free_vmaster;
}
master_domain->domain = new_domain;
master_domain->master = master;
@@ -2941,7 +2941,6 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
!arm_smmu_master_canwbs(master)) {
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
- kfree(state->vmaster);
ret = -EINVAL;
goto err_iopf;
}
@@ -2967,6 +2966,8 @@ err_iopf:
arm_smmu_disable_iopf(master, master_domain);
err_free_master_domain:
kfree(master_domain);
+err_free_vmaster:
+ kfree(state->vmaster);
return ret;
}
@@ -3674,7 +3675,7 @@ static int arm_smmu_def_domain_type(struct device *dev)
return 0;
}
-static struct iommu_ops arm_smmu_ops = {
+static const struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
@@ -3688,9 +3689,9 @@ static struct iommu_ops arm_smmu_ops = {
.get_resv_regions = arm_smmu_get_resv_regions,
.page_response = arm_smmu_page_response,
.def_domain_type = arm_smmu_def_domain_type,
- .viommu_alloc = arm_vsmmu_alloc,
+ .get_viommu_size = arm_smmu_get_viommu_size,
+ .viommu_init = arm_vsmmu_init,
.user_pasid_table = 1,
- .pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = arm_smmu_attach_dev,
@@ -4507,11 +4508,6 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
smmu->oas = 48;
}
- if (arm_smmu_ops.pgsize_bitmap == -1UL)
- arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
- else
- arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
-
/* Set the DMA mask for our table walker */
if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
dev_warn(smmu->dev,
@@ -4705,6 +4701,7 @@ static void arm_smmu_impl_remove(void *data)
static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
{
struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
+ const struct arm_smmu_impl_ops *ops;
int ret;
if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
@@ -4715,11 +4712,24 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
if (IS_ERR(new_smmu))
return new_smmu;
+ ops = new_smmu->impl_ops;
+ if (ops) {
+ /* get_viommu_size and vsmmu_init ops must be paired */
+ if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
+ ret = -EINVAL;
+ goto err_remove;
+ }
+ }
+
ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
new_smmu);
if (ret)
return ERR_PTR(ret);
return new_smmu;
+
+err_remove:
+ arm_smmu_impl_remove(new_smmu);
+ return ERR_PTR(ret);
}
static int arm_smmu_device_probe(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index a33bf520ba97..ae23aacc3840 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -16,6 +16,7 @@
#include <linux/sizes.h>
struct arm_smmu_device;
+struct arm_vsmmu;
/* MMIO registers */
#define ARM_SMMU_IDR0 0x0
@@ -721,6 +722,16 @@ struct arm_smmu_impl_ops {
int (*init_structures)(struct arm_smmu_device *smmu);
struct arm_smmu_cmdq *(*get_secondary_cmdq)(
struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
+ /*
+ * An implementation should define its own type other than the default
+ * IOMMU_HW_INFO_TYPE_ARM_SMMUV3. And it must validate the input @type
+ * to return its own structure.
+ */
+ void *(*hw_info)(struct arm_smmu_device *smmu, u32 *length,
+ enum iommu_hw_info_type *type);
+ size_t (*get_viommu_size)(enum iommu_viommu_type viommu_type);
+ int (*vsmmu_init)(struct arm_vsmmu *vsmmu,
+ const struct iommu_user_data *user_data);
};
/* An SMMUv3 instance */
@@ -1035,19 +1046,29 @@ struct arm_vsmmu {
};
#if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD)
-void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type);
-struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
- struct iommu_domain *parent,
- struct iommufd_ctx *ictx,
- unsigned int viommu_type);
+void *arm_smmu_hw_info(struct device *dev, u32 *length,
+ enum iommu_hw_info_type *type);
+size_t arm_smmu_get_viommu_size(struct device *dev,
+ enum iommu_viommu_type viommu_type);
+int arm_vsmmu_init(struct iommufd_viommu *viommu,
+ struct iommu_domain *parent_domain,
+ const struct iommu_user_data *user_data);
int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
struct arm_smmu_nested_domain *nested_domain);
void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state);
void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master);
int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt);
+struct iommu_domain *
+arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+ const struct iommu_user_data *user_data);
+int arm_vsmmu_cache_invalidate(struct iommufd_viommu *viommu,
+ struct iommu_user_data_array *array);
#else
+#define arm_smmu_get_viommu_size NULL
#define arm_smmu_hw_info NULL
-#define arm_vsmmu_alloc NULL
+#define arm_vsmmu_init NULL
+#define arm_vsmmu_alloc_domain_nested NULL
+#define arm_vsmmu_cache_invalidate NULL
static inline int
arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index dd7d030d2e89..be1aaaf8cd17 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -8,7 +8,9 @@
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/iommu.h>
+#include <linux/iommufd.h>
#include <linux/iopoll.h>
+#include <uapi/linux/iommufd.h>
#include <acpi/acpixf.h>
@@ -26,8 +28,10 @@
#define CMDQV_EN BIT(0)
#define TEGRA241_CMDQV_PARAM 0x0004
+#define CMDQV_NUM_SID_PER_VM_LOG2 GENMASK(15, 12)
#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8)
#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4)
+#define CMDQV_VER GENMASK(3, 0)
#define TEGRA241_CMDQV_STATUS 0x0008
#define CMDQV_ENABLED BIT(0)
@@ -53,6 +57,9 @@
#define VINTF_STATUS GENMASK(3, 1)
#define VINTF_ENABLED BIT(0)
+#define TEGRA241_VINTF_SID_MATCH(s) (0x0040 + 0x4*(s))
+#define TEGRA241_VINTF_SID_REPLACE(s) (0x0080 + 0x4*(s))
+
#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \
(0x00C0 + 0x8*(m))
#define LVCMDQ_ERR_MAP_NUM_64 2
@@ -114,16 +121,20 @@ MODULE_PARM_DESC(bypass_vcmdq,
/**
* struct tegra241_vcmdq - Virtual Command Queue
+ * @core: Embedded iommufd_hw_queue structure
* @idx: Global index in the CMDQV
* @lidx: Local index in the VINTF
* @enabled: Enable status
* @cmdqv: Parent CMDQV pointer
* @vintf: Parent VINTF pointer
+ * @prev: Previous LVCMDQ to depend on
* @cmdq: Command Queue struct
* @page0: MMIO Page0 base address
* @page1: MMIO Page1 base address
*/
struct tegra241_vcmdq {
+ struct iommufd_hw_queue core;
+
u16 idx;
u16 lidx;
@@ -131,22 +142,30 @@ struct tegra241_vcmdq {
struct tegra241_cmdqv *cmdqv;
struct tegra241_vintf *vintf;
+ struct tegra241_vcmdq *prev;
struct arm_smmu_cmdq cmdq;
void __iomem *page0;
void __iomem *page1;
};
+#define hw_queue_to_vcmdq(v) container_of(v, struct tegra241_vcmdq, core)
/**
* struct tegra241_vintf - Virtual Interface
+ * @vsmmu: Embedded arm_vsmmu structure
* @idx: Global index in the CMDQV
* @enabled: Enable status
* @hyp_own: Owned by hypervisor (in-kernel)
* @cmdqv: Parent CMDQV pointer
* @lvcmdqs: List of logical VCMDQ pointers
+ * @lvcmdq_mutex: Lock to serialize user-allocated lvcmdqs
* @base: MMIO base address
+ * @mmap_offset: Offset argument for mmap() syscall
+ * @sids: Stream ID mapping resources
*/
struct tegra241_vintf {
+ struct arm_vsmmu vsmmu;
+
u16 idx;
bool enabled;
@@ -154,19 +173,41 @@ struct tegra241_vintf {
struct tegra241_cmdqv *cmdqv;
struct tegra241_vcmdq **lvcmdqs;
+ struct mutex lvcmdq_mutex; /* user space race */
void __iomem *base;
+ unsigned long mmap_offset;
+
+ struct ida sids;
};
+#define viommu_to_vintf(v) container_of(v, struct tegra241_vintf, vsmmu.core)
+
+/**
+ * struct tegra241_vintf_sid - Virtual Interface Stream ID Mapping
+ * @core: Embedded iommufd_vdevice structure, holding virtual Stream ID
+ * @vintf: Parent VINTF pointer
+ * @sid: Physical Stream ID
+ * @idx: Mapping index in the VINTF
+ */
+struct tegra241_vintf_sid {
+ struct iommufd_vdevice core;
+ struct tegra241_vintf *vintf;
+ u32 sid;
+ u8 idx;
+};
+#define vdev_to_vsid(v) container_of(v, struct tegra241_vintf_sid, core)
/**
* struct tegra241_cmdqv - CMDQ-V for SMMUv3
* @smmu: SMMUv3 device
* @dev: CMDQV device
* @base: MMIO base address
+ * @base_phys: MMIO physical base address, for mmap
* @irq: IRQ number
* @num_vintfs: Total number of VINTFs
* @num_vcmdqs: Total number of VCMDQs
* @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
+ * @num_sids_per_vintf: Total number of SID mappings per VINTF
* @vintf_ids: VINTF id allocator
* @vintfs: List of VINTFs
*/
@@ -175,12 +216,14 @@ struct tegra241_cmdqv {
struct device *dev;
void __iomem *base;
+ phys_addr_t base_phys;
int irq;
/* CMDQV Hardware Params */
u16 num_vintfs;
u16 num_vcmdqs;
u16 num_lvcmdqs_per_vintf;
+ u16 num_sids_per_vintf;
struct ida vintf_ids;
@@ -252,6 +295,20 @@ static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval)
/* ISR Functions */
+static void tegra241_vintf_user_handle_error(struct tegra241_vintf *vintf)
+{
+ struct iommufd_viommu *viommu = &vintf->vsmmu.core;
+ struct iommu_vevent_tegra241_cmdqv vevent_data;
+ int i;
+
+ for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++)
+ vevent_data.lvcmdq_err_map[i] =
+ readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i)));
+
+ iommufd_viommu_report_event(viommu, IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV,
+ &vevent_data, sizeof(vevent_data));
+}
+
static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf)
{
int i;
@@ -297,6 +354,14 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
vintf_map &= ~BIT_ULL(0);
}
+ /* Handle other user VINTFs and their LVCMDQs */
+ while (vintf_map) {
+ unsigned long idx = __ffs64(vintf_map);
+
+ tegra241_vintf_user_handle_error(cmdqv->vintfs[idx]);
+ vintf_map &= ~BIT_ULL(idx);
+ }
+
return IRQ_HANDLED;
}
@@ -351,6 +416,30 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
/* HW Reset Functions */
+/*
+ * When a guest-owned VCMDQ is disabled, if the guest did not enqueue a CMD_SYNC
+ * following an ATC_INV command at the end of the guest queue while this ATC_INV
+ * is timed out, the TIMEOUT will not be reported until this VCMDQ gets assigned
+ * to the next VM, which will be a false alarm potentially causing some unwanted
+ * behavior in the new VM. Thus, a guest-owned VCMDQ must flush the TIMEOUT when
+ * it gets disabled. This can be done by just issuing a CMD_SYNC to SMMU CMDQ.
+ */
+static void tegra241_vcmdq_hw_flush_timeout(struct tegra241_vcmdq *vcmdq)
+{
+ struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
+ u64 cmd_sync[CMDQ_ENT_DWORDS] = {};
+
+ cmd_sync[0] = FIELD_PREP(CMDQ_0_OP, CMDQ_OP_CMD_SYNC) |
+ FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_NONE);
+
+ /*
+ * It does not hurt to insert another CMD_SYNC, taking advantage of the
+ * arm_smmu_cmdq_issue_cmdlist() that waits for the CMD_SYNC completion.
+ */
+ arm_smmu_cmdq_issue_cmdlist(smmu, &smmu->cmdq, cmd_sync, 1, true);
+}
+
+/* This function is for LVCMDQ, so @vcmdq must not be unmapped yet */
static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
{
char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
@@ -363,6 +452,8 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
}
+ tegra241_vcmdq_hw_flush_timeout(vcmdq);
+
writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD));
writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS));
writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE));
@@ -379,6 +470,7 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h);
}
+/* This function is for LVCMDQ, so @vcmdq must be mapped prior */
static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
{
char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
@@ -404,14 +496,45 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
return 0;
}
+/* Unmap a global VCMDQ from the pre-assigned LVCMDQ */
+static void tegra241_vcmdq_unmap_lvcmdq(struct tegra241_vcmdq *vcmdq)
+{
+ u32 regval = readl(REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx)));
+ char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+
+ writel(regval & ~CMDQV_CMDQ_ALLOCATED,
+ REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx)));
+ dev_dbg(vcmdq->cmdqv->dev, "%sunmapped\n", h);
+}
+
static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf)
{
- u16 lidx;
+ u16 lidx = vintf->cmdqv->num_lvcmdqs_per_vintf;
+ int sidx;
- for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
- if (vintf->lvcmdqs && vintf->lvcmdqs[lidx])
+ /* HW requires to unmap LVCMDQs in descending order */
+ while (lidx--) {
+ if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) {
tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+ tegra241_vcmdq_unmap_lvcmdq(vintf->lvcmdqs[lidx]);
+ }
+ }
vintf_write_config(vintf, 0);
+ for (sidx = 0; sidx < vintf->cmdqv->num_sids_per_vintf; sidx++) {
+ writel(0, REG_VINTF(vintf, SID_MATCH(sidx)));
+ writel(0, REG_VINTF(vintf, SID_REPLACE(sidx)));
+ }
+}
+
+/* Map a global VCMDQ to the pre-assigned LVCMDQ */
+static void tegra241_vcmdq_map_lvcmdq(struct tegra241_vcmdq *vcmdq)
+{
+ u32 regval = readl(REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx)));
+ char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+
+ writel(regval | CMDQV_CMDQ_ALLOCATED,
+ REG_CMDQV(vcmdq->cmdqv, CMDQ_ALLOC(vcmdq->idx)));
+ dev_dbg(vcmdq->cmdqv->dev, "%smapped\n", h);
}
static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
@@ -429,7 +552,8 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
* whether enabling it here or not, as !HYP_OWN cmdq HWs only support a
* restricted set of supported commands.
*/
- regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own);
+ regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own) |
+ FIELD_PREP(VINTF_VMID, vintf->vsmmu.vmid);
writel(regval, REG_VINTF(vintf, CONFIG));
ret = vintf_write_config(vintf, regval | VINTF_EN);
@@ -441,8 +565,10 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
*/
vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG)));
+ /* HW requires to map LVCMDQs in ascending order */
for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) {
if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) {
+ tegra241_vcmdq_map_lvcmdq(vintf->lvcmdqs[lidx]);
ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]);
if (ret) {
tegra241_vintf_hw_deinit(vintf);
@@ -476,7 +602,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx);
regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx);
- regval |= CMDQV_CMDQ_ALLOCATED;
writel_relaxed(regval,
REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++)));
}
@@ -555,7 +680,9 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
dev_dbg(vintf->cmdqv->dev,
"%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64));
- kfree(vcmdq);
+ /* Guest-owned VCMDQ is free-ed with hw_queue by iommufd core */
+ if (vcmdq->vintf->hyp_own)
+ kfree(vcmdq);
}
static struct tegra241_vcmdq *
@@ -628,28 +755,27 @@ static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx,
/* Remove Helpers */
-static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
-{
- tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
- tegra241_vintf_free_lvcmdq(vintf, lidx);
-}
-
static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
{
struct tegra241_vintf *vintf = cmdqv->vintfs[idx];
u16 lidx;
+ tegra241_vintf_hw_deinit(vintf);
+
/* Remove LVCMDQ resources */
for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
if (vintf->lvcmdqs[lidx])
- tegra241_vintf_remove_lvcmdq(vintf, lidx);
-
- /* Remove VINTF resources */
- tegra241_vintf_hw_deinit(vintf);
+ tegra241_vintf_free_lvcmdq(vintf, lidx);
dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx);
tegra241_cmdqv_deinit_vintf(cmdqv, idx);
- kfree(vintf);
+ if (!vintf->hyp_own) {
+ mutex_destroy(&vintf->lvcmdq_mutex);
+ ida_destroy(&vintf->sids);
+ /* Guest-owned VINTF is free-ed with viommu by iommufd core */
+ } else {
+ kfree(vintf);
+ }
}
static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
@@ -677,10 +803,51 @@ static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
put_device(cmdqv->dev); /* smmu->impl_dev */
}
+static int
+tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
+ const struct iommu_user_data *user_data);
+
+static void *tegra241_cmdqv_hw_info(struct arm_smmu_device *smmu, u32 *length,
+ enum iommu_hw_info_type *type)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(smmu, struct tegra241_cmdqv, smmu);
+ struct iommu_hw_info_tegra241_cmdqv *info;
+ u32 regval;
+
+ if (*type != IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+
+ regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM));
+ info->log2vcmdqs = ilog2(cmdqv->num_lvcmdqs_per_vintf);
+ info->log2vsids = ilog2(cmdqv->num_sids_per_vintf);
+ info->version = FIELD_GET(CMDQV_VER, regval);
+
+ *length = sizeof(*info);
+ *type = IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV;
+ return info;
+}
+
+static size_t tegra241_cmdqv_get_vintf_size(enum iommu_viommu_type viommu_type)
+{
+ if (viommu_type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
+ return 0;
+ return VIOMMU_STRUCT_SIZE(struct tegra241_vintf, vsmmu.core);
+}
+
static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
+ /* For in-kernel use */
.get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
.device_reset = tegra241_cmdqv_hw_reset,
.device_remove = tegra241_cmdqv_remove,
+ /* For user-space use */
+ .hw_info = tegra241_cmdqv_hw_info,
+ .get_viommu_size = tegra241_cmdqv_get_vintf_size,
+ .vsmmu_init = tegra241_cmdqv_init_vintf_user,
};
/* Probe Functions */
@@ -822,10 +989,12 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
cmdqv->irq = irq;
cmdqv->base = base;
cmdqv->dev = smmu->impl_dev;
+ cmdqv->base_phys = res->start;
if (cmdqv->irq > 0) {
- ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv",
- cmdqv);
+ ret = request_threaded_irq(irq, NULL, tegra241_cmdqv_isr,
+ IRQF_ONESHOT, "tegra241-cmdqv",
+ cmdqv);
if (ret) {
dev_err(cmdqv->dev, "failed to request irq (%d): %d\n",
cmdqv->irq, ret);
@@ -837,6 +1006,8 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval);
cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval);
cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs;
+ cmdqv->num_sids_per_vintf =
+ 1 << FIELD_GET(CMDQV_NUM_SID_PER_VM_LOG2, regval);
cmdqv->vintfs =
kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL);
@@ -890,3 +1061,287 @@ out_fallback:
put_device(smmu->impl_dev);
return ERR_PTR(-ENODEV);
}
+
+/* User space VINTF and VCMDQ Functions */
+
+static size_t tegra241_vintf_get_vcmdq_size(struct iommufd_viommu *viommu,
+ enum iommu_hw_queue_type queue_type)
+{
+ if (queue_type != IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV)
+ return 0;
+ return HW_QUEUE_STRUCT_SIZE(struct tegra241_vcmdq, core);
+}
+
+static int tegra241_vcmdq_hw_init_user(struct tegra241_vcmdq *vcmdq)
+{
+ char header[64];
+
+ /* Configure the vcmdq only; User space does the enabling */
+ writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));
+
+ dev_dbg(vcmdq->cmdqv->dev, "%sinited at host PA 0x%llx size 0x%lx\n",
+ lvcmdq_error_header(vcmdq, header, 64),
+ vcmdq->cmdq.q.q_base & VCMDQ_ADDR,
+ 1UL << (vcmdq->cmdq.q.q_base & VCMDQ_LOG2SIZE));
+ return 0;
+}
+
+static void
+tegra241_vintf_destroy_lvcmdq_user(struct iommufd_hw_queue *hw_queue)
+{
+ struct tegra241_vcmdq *vcmdq = hw_queue_to_vcmdq(hw_queue);
+
+ mutex_lock(&vcmdq->vintf->lvcmdq_mutex);
+ tegra241_vcmdq_hw_deinit(vcmdq);
+ tegra241_vcmdq_unmap_lvcmdq(vcmdq);
+ tegra241_vintf_free_lvcmdq(vcmdq->vintf, vcmdq->lidx);
+ if (vcmdq->prev)
+ iommufd_hw_queue_undepend(vcmdq, vcmdq->prev, core);
+ mutex_unlock(&vcmdq->vintf->lvcmdq_mutex);
+}
+
+static int tegra241_vintf_alloc_lvcmdq_user(struct iommufd_hw_queue *hw_queue,
+ u32 lidx, phys_addr_t base_addr_pa)
+{
+ struct tegra241_vintf *vintf = viommu_to_vintf(hw_queue->viommu);
+ struct tegra241_vcmdq *vcmdq = hw_queue_to_vcmdq(hw_queue);
+ struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+ struct arm_smmu_device *smmu = &cmdqv->smmu;
+ struct tegra241_vcmdq *prev = NULL;
+ u32 log2size, max_n_shift;
+ char header[64];
+ int ret;
+
+ if (hw_queue->type != IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV)
+ return -EOPNOTSUPP;
+ if (lidx >= cmdqv->num_lvcmdqs_per_vintf)
+ return -EINVAL;
+
+ mutex_lock(&vintf->lvcmdq_mutex);
+
+ if (vintf->lvcmdqs[lidx]) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+
+ /*
+ * HW requires to map LVCMDQs in ascending order, so reject if the
+ * previous lvcmdqs is not allocated yet.
+ */
+ if (lidx) {
+ prev = vintf->lvcmdqs[lidx - 1];
+ if (!prev) {
+ ret = -EIO;
+ goto unlock;
+ }
+ }
+
+ /*
+ * hw_queue->length must be a power of 2, in range of
+ * [ 32, 2 ^ (idr[1].CMDQS + CMDQ_ENT_SZ_SHIFT) ]
+ */
+ max_n_shift = FIELD_GET(IDR1_CMDQS,
+ readl_relaxed(smmu->base + ARM_SMMU_IDR1));
+ if (!is_power_of_2(hw_queue->length) || hw_queue->length < 32 ||
+ hw_queue->length > (1 << (max_n_shift + CMDQ_ENT_SZ_SHIFT))) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ log2size = ilog2(hw_queue->length) - CMDQ_ENT_SZ_SHIFT;
+
+ /* base_addr_pa must be aligned to hw_queue->length */
+ if (base_addr_pa & ~VCMDQ_ADDR ||
+ base_addr_pa & (hw_queue->length - 1)) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ /*
+ * HW requires to unmap LVCMDQs in descending order, so destroy() must
+ * follow this rule. Set a dependency on its previous LVCMDQ so iommufd
+ * core will help enforce it.
+ */
+ if (prev) {
+ ret = iommufd_hw_queue_depend(vcmdq, prev, core);
+ if (ret)
+ goto unlock;
+ }
+ vcmdq->prev = prev;
+
+ ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq);
+ if (ret)
+ goto undepend_vcmdq;
+
+ dev_dbg(cmdqv->dev, "%sallocated\n",
+ lvcmdq_error_header(vcmdq, header, 64));
+
+ tegra241_vcmdq_map_lvcmdq(vcmdq);
+
+ vcmdq->cmdq.q.q_base = base_addr_pa & VCMDQ_ADDR;
+ vcmdq->cmdq.q.q_base |= log2size;
+
+ ret = tegra241_vcmdq_hw_init_user(vcmdq);
+ if (ret)
+ goto unmap_lvcmdq;
+
+ hw_queue->destroy = &tegra241_vintf_destroy_lvcmdq_user;
+ mutex_unlock(&vintf->lvcmdq_mutex);
+ return 0;
+
+unmap_lvcmdq:
+ tegra241_vcmdq_unmap_lvcmdq(vcmdq);
+ tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+undepend_vcmdq:
+ if (vcmdq->prev)
+ iommufd_hw_queue_undepend(vcmdq, vcmdq->prev, core);
+unlock:
+ mutex_unlock(&vintf->lvcmdq_mutex);
+ return ret;
+}
+
+static void tegra241_cmdqv_destroy_vintf_user(struct iommufd_viommu *viommu)
+{
+ struct tegra241_vintf *vintf = viommu_to_vintf(viommu);
+
+ if (vintf->mmap_offset)
+ iommufd_viommu_destroy_mmap(&vintf->vsmmu.core,
+ vintf->mmap_offset);
+ tegra241_cmdqv_remove_vintf(vintf->cmdqv, vintf->idx);
+}
+
+static void tegra241_vintf_destroy_vsid(struct iommufd_vdevice *vdev)
+{
+ struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev);
+ struct tegra241_vintf *vintf = vsid->vintf;
+
+ writel(0, REG_VINTF(vintf, SID_MATCH(vsid->idx)));
+ writel(0, REG_VINTF(vintf, SID_REPLACE(vsid->idx)));
+ ida_free(&vintf->sids, vsid->idx);
+ dev_dbg(vintf->cmdqv->dev,
+ "VINTF%u: deallocated SID_REPLACE%d for pSID=%x\n", vintf->idx,
+ vsid->idx, vsid->sid);
+}
+
+static int tegra241_vintf_init_vsid(struct iommufd_vdevice *vdev)
+{
+ struct device *dev = iommufd_vdevice_to_device(vdev);
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct tegra241_vintf *vintf = viommu_to_vintf(vdev->viommu);
+ struct tegra241_vintf_sid *vsid = vdev_to_vsid(vdev);
+ struct arm_smmu_stream *stream = &master->streams[0];
+ u64 virt_sid = vdev->virt_id;
+ int sidx;
+
+ if (virt_sid > UINT_MAX)
+ return -EINVAL;
+
+ WARN_ON_ONCE(master->num_streams != 1);
+
+ /* Find an empty pair of SID_REPLACE and SID_MATCH */
+ sidx = ida_alloc_max(&vintf->sids, vintf->cmdqv->num_sids_per_vintf - 1,
+ GFP_KERNEL);
+ if (sidx < 0)
+ return sidx;
+
+ writel(stream->id, REG_VINTF(vintf, SID_REPLACE(sidx)));
+ writel(virt_sid << 1 | 0x1, REG_VINTF(vintf, SID_MATCH(sidx)));
+ dev_dbg(vintf->cmdqv->dev,
+ "VINTF%u: allocated SID_REPLACE%d for pSID=%x, vSID=%x\n",
+ vintf->idx, sidx, stream->id, (u32)virt_sid);
+
+ vsid->idx = sidx;
+ vsid->vintf = vintf;
+ vsid->sid = stream->id;
+
+ vdev->destroy = &tegra241_vintf_destroy_vsid;
+ return 0;
+}
+
+static struct iommufd_viommu_ops tegra241_cmdqv_viommu_ops = {
+ .destroy = tegra241_cmdqv_destroy_vintf_user,
+ .alloc_domain_nested = arm_vsmmu_alloc_domain_nested,
+ /* Non-accelerated commands will be still handled by the kernel */
+ .cache_invalidate = arm_vsmmu_cache_invalidate,
+ .vdevice_size = VDEVICE_STRUCT_SIZE(struct tegra241_vintf_sid, core),
+ .vdevice_init = tegra241_vintf_init_vsid,
+ .get_hw_queue_size = tegra241_vintf_get_vcmdq_size,
+ .hw_queue_init_phys = tegra241_vintf_alloc_lvcmdq_user,
+};
+
+static int
+tegra241_cmdqv_init_vintf_user(struct arm_vsmmu *vsmmu,
+ const struct iommu_user_data *user_data)
+{
+ struct tegra241_cmdqv *cmdqv =
+ container_of(vsmmu->smmu, struct tegra241_cmdqv, smmu);
+ struct tegra241_vintf *vintf = viommu_to_vintf(&vsmmu->core);
+ struct iommu_viommu_tegra241_cmdqv data;
+ phys_addr_t page0_base;
+ int ret;
+
+ /*
+ * Unsupported type should be rejected by tegra241_cmdqv_get_vintf_size.
+ * Seeing one here indicates a kernel bug or some data corruption.
+ */
+ if (WARN_ON(vsmmu->core.type != IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV))
+ return -EOPNOTSUPP;
+
+ if (!user_data)
+ return -EINVAL;
+
+ ret = iommu_copy_struct_from_user(&data, user_data,
+ IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV,
+ out_vintf_mmap_length);
+ if (ret)
+ return ret;
+
+ ret = tegra241_cmdqv_init_vintf(cmdqv, cmdqv->num_vintfs - 1, vintf);
+ if (ret < 0) {
+ dev_err(cmdqv->dev, "no more available vintf\n");
+ return ret;
+ }
+
+ /*
+ * Initialize the user-owned VINTF without a LVCMDQ, as it cannot pre-
+ * allocate a LVCMDQ until user space wants one, for security reasons.
+ * It is different than the kernel-owned VINTF0, which had pre-assigned
+ * and pre-allocated global VCMDQs that would be mapped to the LVCMDQs
+ * by the tegra241_vintf_hw_init() call.
+ */
+ ret = tegra241_vintf_hw_init(vintf, false);
+ if (ret)
+ goto deinit_vintf;
+
+ page0_base = cmdqv->base_phys + TEGRA241_VINTFi_PAGE0(vintf->idx);
+ ret = iommufd_viommu_alloc_mmap(&vintf->vsmmu.core, page0_base, SZ_64K,
+ &vintf->mmap_offset);
+ if (ret)
+ goto hw_deinit_vintf;
+
+ data.out_vintf_mmap_length = SZ_64K;
+ data.out_vintf_mmap_offset = vintf->mmap_offset;
+ ret = iommu_copy_struct_to_user(user_data, &data,
+ IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV,
+ out_vintf_mmap_length);
+ if (ret)
+ goto free_mmap;
+
+ ida_init(&vintf->sids);
+ mutex_init(&vintf->lvcmdq_mutex);
+
+ dev_dbg(cmdqv->dev, "VINTF%u: allocated with vmid (%d)\n", vintf->idx,
+ vintf->vsmmu.vmid);
+
+ vsmmu->core.ops = &tegra241_cmdqv_viommu_ops;
+ return 0;
+
+free_mmap:
+ iommufd_viommu_destroy_mmap(&vintf->vsmmu.core, vintf->mmap_offset);
+hw_deinit_vintf:
+ tegra241_vintf_hw_deinit(vintf);
+deinit_vintf:
+ tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
+ return ret;
+}
+
+MODULE_IMPORT_NS("IOMMUFD");
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 62874b18f645..57c097e87613 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -355,7 +355,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
priv->set_prr_addr = NULL;
if (of_device_is_compatible(np, "qcom,smmu-500") &&
- of_device_is_compatible(np, "qcom,adreno-smmu")) {
+ !of_device_is_compatible(np, "qcom,sm8250-smmu-500") &&
+ of_device_is_compatible(np, "qcom,adreno-smmu")) {
priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
}
@@ -379,6 +380,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sdm670-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
+ { .compatible = "qcom,sm6115-mdss" },
{ .compatible = "qcom,sm6350-mdss" },
{ .compatible = "qcom,sm6375-mdss" },
{ .compatible = "qcom,sm8150-mdss" },
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 8d95b14c7d5a..4ced4b5bee4d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -109,7 +109,7 @@ static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
}
static struct platform_driver arm_smmu_driver;
-static struct iommu_ops arm_smmu_ops;
+static const struct iommu_ops arm_smmu_ops;
#ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
static struct device_node *dev_get_dev_node(struct device *dev)
@@ -919,6 +919,8 @@ static void arm_smmu_destroy_domain_context(struct arm_smmu_domain *smmu_domain)
static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
{
struct arm_smmu_domain *smmu_domain;
+ struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = cfg->smmu;
/*
* Allocate the domain and initialise some of its data structures.
@@ -931,6 +933,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
mutex_init(&smmu_domain->init_mutex);
spin_lock_init(&smmu_domain->cb_lock);
+ smmu_domain->domain.pgsize_bitmap = smmu->pgsize_bitmap;
return &smmu_domain->domain;
}
@@ -1627,7 +1630,7 @@ static int arm_smmu_def_domain_type(struct device *dev)
return 0;
}
-static struct iommu_ops arm_smmu_ops = {
+static const struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
@@ -1639,7 +1642,6 @@ static struct iommu_ops arm_smmu_ops = {
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
.def_domain_type = arm_smmu_def_domain_type,
- .pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = arm_smmu_attach_dev,
@@ -1919,10 +1921,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
- if (arm_smmu_ops.pgsize_bitmap == -1UL)
- arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
- else
- arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
smmu->pgsize_bitmap);
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 3907924646a2..c5be95e56031 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -229,7 +229,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
goto out_unlock;
pgtbl_cfg = (struct io_pgtable_cfg) {
- .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap,
+ .pgsize_bitmap = domain->pgsize_bitmap,
.ias = 32,
.oas = 40,
.tlb = &qcom_flush_ops,
@@ -246,8 +246,6 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
goto out_clear_iommu;
}
- /* Update the domain's page sizes to reflect the page table format */
- domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
domain->geometry.aperture_end = (1ULL << pgtbl_cfg.ias) - 1;
domain->geometry.force_aperture = true;
@@ -337,6 +335,7 @@ static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev)
mutex_init(&qcom_domain->init_mutex);
spin_lock_init(&qcom_domain->pgtbl_lock);
+ qcom_domain->domain.pgsize_bitmap = SZ_4K;
return &qcom_domain->domain;
}
@@ -598,7 +597,6 @@ static const struct iommu_ops qcom_iommu_ops = {
.probe_device = qcom_iommu_probe_device,
.device_group = generic_device_group,
.of_xlate = qcom_iommu_of_xlate,
- .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = qcom_iommu_attach_dev,
.map_pages = qcom_iommu_map,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index fcb6a0f7c082..b6edd178fe25 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -22,6 +22,7 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
+#include "dma-iommu.h"
#include "iommu-pages.h"
typedef u32 sysmmu_iova_t;
@@ -925,6 +926,8 @@ static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
spin_lock_init(&domain->pgtablelock);
INIT_LIST_HEAD(&domain->clients);
+ domain->domain.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE;
+
domain->domain.geometry.aperture_start = 0;
domain->domain.geometry.aperture_end = ~0UL;
domain->domain.geometry.force_aperture = true;
@@ -1477,7 +1480,7 @@ static const struct iommu_ops exynos_iommu_ops = {
.device_group = generic_device_group,
.probe_device = exynos_iommu_probe_device,
.release_device = exynos_iommu_release_device,
- .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
+ .get_resv_regions = iommu_dma_get_resv_regions,
.of_xlate = exynos_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = exynos_iommu_attach_device,
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 47692cbfaabd..265e7290256b 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -370,7 +370,7 @@ static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *
struct intel_iommu *iommu = tag->iommu;
u64 type = DMA_TLB_PSI_FLUSH;
- if (domain->use_first_level) {
+ if (intel_domain_is_fs_paging(domain)) {
qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
pages, ih, domain->qi_batch);
return;
@@ -422,22 +422,6 @@ static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_
domain->qi_batch);
}
-static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
-{
- struct intel_iommu *iommu = tag->iommu;
- struct device_domain_info *info;
- u16 sid;
-
- info = dev_iommu_priv_get(tag->dev);
- sid = PCI_DEVID(info->bus, info->devfn);
-
- qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
- MAX_AGAW_PFN_WIDTH, domain->qi_batch);
- if (info->dtlb_extra_inval)
- qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
- MAX_AGAW_PFN_WIDTH, domain->qi_batch);
-}
-
/*
* Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
* when the memory mappings in the target domain have been modified.
@@ -450,7 +434,13 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
struct cache_tag *tag;
unsigned long flags;
- addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+ if (start == 0 && end == ULONG_MAX) {
+ addr = 0;
+ pages = -1;
+ mask = MAX_AGAW_PFN_WIDTH;
+ } else {
+ addr = calculate_psi_aligned_address(start, end, &pages, &mask);
+ }
spin_lock_irqsave(&domain->cache_lock, flags);
list_for_each_entry(tag, &domain->cache_tags, node) {
@@ -491,31 +481,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
*/
void cache_tag_flush_all(struct dmar_domain *domain)
{
- struct intel_iommu *iommu = NULL;
- struct cache_tag *tag;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->cache_lock, flags);
- list_for_each_entry(tag, &domain->cache_tags, node) {
- if (iommu && iommu != tag->iommu)
- qi_batch_flush_descs(iommu, domain->qi_batch);
- iommu = tag->iommu;
-
- switch (tag->type) {
- case CACHE_TAG_IOTLB:
- case CACHE_TAG_NESTING_IOTLB:
- cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
- break;
- case CACHE_TAG_DEVTLB:
- case CACHE_TAG_NESTING_DEVTLB:
- cache_tag_flush_devtlb_all(domain, tag);
- break;
- }
-
- trace_cache_tag_flush_all(tag);
- }
- qi_batch_flush_descs(iommu, domain->qi_batch);
- spin_unlock_irqrestore(&domain->cache_lock, flags);
+ cache_tag_flush_range(domain, 0, ULONG_MAX, 0);
}
/*
@@ -545,7 +511,8 @@ void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
qi_batch_flush_descs(iommu, domain->qi_batch);
iommu = tag->iommu;
- if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
+ if (!cap_caching_mode(iommu->cap) ||
+ intel_domain_is_fs_paging(domain)) {
iommu_flush_write_buffer(iommu);
continue;
}
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index b61d9ea27aa9..ec975c73cfe6 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -935,14 +935,11 @@ void __init detect_intel_iommu(void)
pci_request_acs();
}
-#ifdef CONFIG_X86
if (!ret) {
x86_init.iommu.iommu_init = intel_iommu_init;
x86_platform.iommu_shutdown = intel_iommu_shutdown;
}
-#endif
-
if (dmar_tbl) {
acpi_put_table(dmar_tbl);
dmar_tbl = NULL;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 148b944143b8..19955e222c2b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -57,6 +57,8 @@
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
+#define rwbf_required(iommu) (rwbf_quirk || cap_rwbf((iommu)->cap))
+
/*
* set to 1 to panic kernel if can't successfully enable VT-d
* (used when kernel is launched w/ TXT)
@@ -1391,28 +1393,10 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
if (--info->refcnt == 0) {
ida_free(&iommu->domain_ida, info->did);
xa_erase(&domain->iommu_array, iommu->seq_id);
- domain->nid = NUMA_NO_NODE;
kfree(info);
}
}
-static void domain_exit(struct dmar_domain *domain)
-{
- if (domain->pgd) {
- struct iommu_pages_list freelist =
- IOMMU_PAGES_LIST_INIT(freelist);
-
- domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
- iommu_put_pages_list(&freelist);
- }
-
- if (WARN_ON(!list_empty(&domain->devices)))
- return;
-
- kfree(domain->qi_batch);
- kfree(domain);
-}
-
/*
* For kdump cases, old valid entries may be cached due to the
* in-flight DMA and copied pgtable, but there is no unmapping
@@ -1480,6 +1464,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct context_entry *context;
int ret;
+ if (WARN_ON(!intel_domain_is_ss_paging(domain)))
+ return -EINVAL;
+
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1736,15 +1723,14 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
intel_context_flush_no_pasid(info, context, did);
}
-int __domain_setup_first_level(struct intel_iommu *iommu,
- struct device *dev, ioasid_t pasid,
- u16 did, pgd_t *pgd, int flags,
- struct iommu_domain *old)
+int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
+ ioasid_t pasid, u16 did, phys_addr_t fsptptr,
+ int flags, struct iommu_domain *old)
{
if (!old)
- return intel_pasid_setup_first_level(iommu, dev, pgd,
- pasid, did, flags);
- return intel_pasid_replace_first_level(iommu, dev, pgd, pasid, did,
+ return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid,
+ did, flags);
+ return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did,
iommu_domain_did(old, iommu),
flags);
}
@@ -1793,7 +1779,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
return __domain_setup_first_level(iommu, dev, pasid,
domain_id_iommu(domain, iommu),
- (pgd_t *)pgd, flags, old);
+ __pa(pgd), flags, old);
}
static int dmar_domain_attach_device(struct dmar_domain *domain,
@@ -1819,12 +1805,14 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (!sm_supported(iommu))
ret = domain_context_mapping(domain, dev);
- else if (domain->use_first_level)
+ else if (intel_domain_is_fs_paging(domain))
ret = domain_setup_first_level(iommu, domain, dev,
IOMMU_NO_PASID, NULL);
- else
+ else if (intel_domain_is_ss_paging(domain))
ret = domain_setup_second_level(iommu, domain, dev,
IOMMU_NO_PASID, NULL);
+ else if (WARN_ON(true))
+ ret = -EINVAL;
if (ret)
goto out_block_translation;
@@ -3286,10 +3274,14 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
spin_lock_init(&domain->lock);
spin_lock_init(&domain->cache_lock);
xa_init(&domain->iommu_array);
+ INIT_LIST_HEAD(&domain->s1_domains);
+ spin_lock_init(&domain->s1_lock);
domain->nid = dev_to_node(dev);
domain->use_first_level = first_stage;
+ domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
+
/* calculate the address width */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@@ -3331,71 +3323,168 @@ static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_st
}
static struct iommu_domain *
-intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
- const struct iommu_user_data *user_data)
+intel_iommu_domain_alloc_first_stage(struct device *dev,
+ struct intel_iommu *iommu, u32 flags)
+{
+ struct dmar_domain *dmar_domain;
+
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /* Only SL is available in legacy mode */
+ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dmar_domain = paging_domain_alloc(dev, true);
+ if (IS_ERR(dmar_domain))
+ return ERR_CAST(dmar_domain);
+
+ dmar_domain->domain.ops = &intel_fs_paging_domain_ops;
+ /*
+ * iotlb sync for map is only needed for legacy implementations that
+ * explicitly require flushing internal write buffers to ensure memory
+ * coherence.
+ */
+ if (rwbf_required(iommu))
+ dmar_domain->iotlb_sync_map = true;
+
+ return &dmar_domain->domain;
+}
+
+static struct iommu_domain *
+intel_iommu_domain_alloc_second_stage(struct device *dev,
+ struct intel_iommu *iommu, u32 flags)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
- bool nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
- struct intel_iommu *iommu = info->iommu;
struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
- bool first_stage;
if (flags &
(~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID)))
return ERR_PTR(-EOPNOTSUPP);
- if (nested_parent && !nested_supported(iommu))
+
+ if (((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) &&
+ !nested_supported(iommu)) ||
+ ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
+ !ssads_supported(iommu)))
return ERR_PTR(-EOPNOTSUPP);
- if (user_data || (dirty_tracking && !ssads_supported(iommu)))
+
+ /* Legacy mode always supports second stage */
+ if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
return ERR_PTR(-EOPNOTSUPP);
+ dmar_domain = paging_domain_alloc(dev, false);
+ if (IS_ERR(dmar_domain))
+ return ERR_CAST(dmar_domain);
+
+ dmar_domain->domain.ops = &intel_ss_paging_domain_ops;
+ dmar_domain->nested_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
+
+ if (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING)
+ dmar_domain->domain.dirty_ops = &intel_dirty_ops;
+
/*
- * Always allocate the guest compatible page table unless
- * IOMMU_HWPT_ALLOC_NEST_PARENT or IOMMU_HWPT_ALLOC_DIRTY_TRACKING
- * is specified.
+ * Besides the internal write buffer flush, the caching mode used for
+ * legacy nested translation (which utilizes shadowing page tables)
+ * also requires iotlb sync on map.
*/
- if (nested_parent || dirty_tracking) {
- if (!sm_supported(iommu) || !ecap_slts(iommu->ecap))
- return ERR_PTR(-EOPNOTSUPP);
- first_stage = false;
- } else {
- first_stage = first_level_by_default(iommu);
- }
+ if (rwbf_required(iommu) || cap_caching_mode(iommu->cap))
+ dmar_domain->iotlb_sync_map = true;
- dmar_domain = paging_domain_alloc(dev, first_stage);
- if (IS_ERR(dmar_domain))
- return ERR_CAST(dmar_domain);
- domain = &dmar_domain->domain;
- domain->type = IOMMU_DOMAIN_UNMANAGED;
- domain->owner = &intel_iommu_ops;
- domain->ops = intel_iommu_ops.default_domain_ops;
-
- if (nested_parent) {
- dmar_domain->nested_parent = true;
- INIT_LIST_HEAD(&dmar_domain->s1_domains);
- spin_lock_init(&dmar_domain->s1_lock);
- }
+ return &dmar_domain->domain;
+}
- if (dirty_tracking) {
- if (dmar_domain->use_first_level) {
- iommu_domain_free(domain);
- return ERR_PTR(-EOPNOTSUPP);
- }
- domain->dirty_ops = &intel_dirty_ops;
- }
+static struct iommu_domain *
+intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
+ const struct iommu_user_data *user_data)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct iommu_domain *domain;
- return domain;
+ if (user_data)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /* Prefer first stage if possible by default. */
+ domain = intel_iommu_domain_alloc_first_stage(dev, iommu, flags);
+ if (domain != ERR_PTR(-EOPNOTSUPP))
+ return domain;
+ return intel_iommu_domain_alloc_second_stage(dev, iommu, flags);
}
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- WARN_ON(dmar_domain->nested_parent &&
- !list_empty(&dmar_domain->s1_domains));
- domain_exit(dmar_domain);
+ if (WARN_ON(dmar_domain->nested_parent &&
+ !list_empty(&dmar_domain->s1_domains)))
+ return;
+
+ if (WARN_ON(!list_empty(&dmar_domain->devices)))
+ return;
+
+ if (dmar_domain->pgd) {
+ struct iommu_pages_list freelist =
+ IOMMU_PAGES_LIST_INIT(freelist);
+
+ domain_unmap(dmar_domain, 0, DOMAIN_MAX_PFN(dmar_domain->gaw),
+ &freelist);
+ iommu_put_pages_list(&freelist);
+ }
+
+ kfree(dmar_domain->qi_batch);
+ kfree(dmar_domain);
+}
+
+static int paging_domain_compatible_first_stage(struct dmar_domain *dmar_domain,
+ struct intel_iommu *iommu)
+{
+ if (WARN_ON(dmar_domain->domain.dirty_ops ||
+ dmar_domain->nested_parent))
+ return -EINVAL;
+
+ /* Only SL is available in legacy mode */
+ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
+ return -EINVAL;
+
+ /* Same page size support */
+ if (!cap_fl1gp_support(iommu->cap) &&
+ (dmar_domain->domain.pgsize_bitmap & SZ_1G))
+ return -EINVAL;
+
+ /* iotlb sync on map requirement */
+ if ((rwbf_required(iommu)) && !dmar_domain->iotlb_sync_map)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+paging_domain_compatible_second_stage(struct dmar_domain *dmar_domain,
+ struct intel_iommu *iommu)
+{
+ unsigned int sslps = cap_super_page_val(iommu->cap);
+
+ if (dmar_domain->domain.dirty_ops && !ssads_supported(iommu))
+ return -EINVAL;
+ if (dmar_domain->nested_parent && !nested_supported(iommu))
+ return -EINVAL;
+
+ /* Legacy mode always supports second stage */
+ if (sm_supported(iommu) && !ecap_slts(iommu->ecap))
+ return -EINVAL;
+
+ /* Same page size support */
+ if (!(sslps & BIT(0)) && (dmar_domain->domain.pgsize_bitmap & SZ_2M))
+ return -EINVAL;
+ if (!(sslps & BIT(1)) && (dmar_domain->domain.pgsize_bitmap & SZ_1G))
+ return -EINVAL;
+
+ /* iotlb sync on map requirement */
+ if ((rwbf_required(iommu) || cap_caching_mode(iommu->cap)) &&
+ !dmar_domain->iotlb_sync_map)
+ return -EINVAL;
+
+ return 0;
}
int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
@@ -3403,28 +3492,29 @@ int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
+ int ret = -EINVAL;
int addr_width;
- if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
- return -EPERM;
+ if (intel_domain_is_fs_paging(dmar_domain))
+ ret = paging_domain_compatible_first_stage(dmar_domain, iommu);
+ else if (intel_domain_is_ss_paging(dmar_domain))
+ ret = paging_domain_compatible_second_stage(dmar_domain, iommu);
+ else if (WARN_ON(true))
+ ret = -EINVAL;
+ if (ret)
+ return ret;
+ /*
+ * FIXME this is locked wrong, it needs to be under the
+ * dmar_domain->lock
+ */
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
- if (domain->dirty_ops && !ssads_supported(iommu))
- return -EINVAL;
-
if (dmar_domain->iommu_coherency !=
iommu_paging_structure_coherency(iommu))
return -EINVAL;
- if (dmar_domain->iommu_superpage !=
- iommu_superpage_capability(iommu, dmar_domain->use_first_level))
- return -EINVAL;
-
- if (dmar_domain->use_first_level &&
- (!sm_supported(iommu) || !ecap_flts(iommu->ecap)))
- return -EINVAL;
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
@@ -3610,44 +3700,41 @@ static bool domain_support_force_snooping(struct dmar_domain *domain)
return support;
}
-static void domain_set_force_snooping(struct dmar_domain *domain)
+static bool intel_iommu_enforce_cache_coherency_fs(struct iommu_domain *domain)
{
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct device_domain_info *info;
- assert_spin_locked(&domain->lock);
- /*
- * Second level page table supports per-PTE snoop control. The
- * iommu_map() interface will handle this by setting SNP bit.
- */
- if (!domain->use_first_level) {
- domain->set_pte_snp = true;
- return;
- }
+ guard(spinlock_irqsave)(&dmar_domain->lock);
- list_for_each_entry(info, &domain->devices, link)
+ if (dmar_domain->force_snooping)
+ return true;
+
+ if (!domain_support_force_snooping(dmar_domain))
+ return false;
+
+ dmar_domain->force_snooping = true;
+ list_for_each_entry(info, &dmar_domain->devices, link)
intel_pasid_setup_page_snoop_control(info->iommu, info->dev,
IOMMU_NO_PASID);
+ return true;
}
-static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
+static bool intel_iommu_enforce_cache_coherency_ss(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long flags;
- if (dmar_domain->force_snooping)
- return true;
-
- spin_lock_irqsave(&dmar_domain->lock, flags);
+ guard(spinlock_irqsave)(&dmar_domain->lock);
if (!domain_support_force_snooping(dmar_domain) ||
- (!dmar_domain->use_first_level && dmar_domain->has_mappings)) {
- spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ dmar_domain->has_mappings)
return false;
- }
- domain_set_force_snooping(dmar_domain);
+ /*
+ * Second level page table supports per-PTE snoop control. The
+ * iommu_map() interface will handle this by setting SNP bit.
+ */
+ dmar_domain->set_pte_snp = true;
dmar_domain->force_snooping = true;
- spin_unlock_irqrestore(&dmar_domain->lock, flags);
-
return true;
}
@@ -3954,7 +4041,10 @@ static bool risky_device(struct pci_dev *pdev)
static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
- cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ if (dmar_domain->iotlb_sync_map)
+ cache_tag_flush_range_np(dmar_domain, iova, iova + size - 1);
return 0;
}
@@ -4000,8 +4090,8 @@ static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- iopf_for_domain_remove(old, dev);
intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
+ iopf_for_domain_remove(old, dev);
domain_remove_dev_pasid(old, dev, pasid);
return 0;
@@ -4078,12 +4168,15 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (ret)
goto out_remove_dev_pasid;
- if (dmar_domain->use_first_level)
+ if (intel_domain_is_fs_paging(dmar_domain))
ret = domain_setup_first_level(iommu, dmar_domain,
dev, pasid, old);
- else
+ else if (intel_domain_is_ss_paging(dmar_domain))
ret = domain_setup_second_level(iommu, dmar_domain,
dev, pasid, old);
+ else if (WARN_ON(true))
+ ret = -EINVAL;
+
if (ret)
goto out_unwind_iopf;
@@ -4100,12 +4193,17 @@ out_remove_dev_pasid:
return ret;
}
-static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
+static void *intel_iommu_hw_info(struct device *dev, u32 *length,
+ enum iommu_hw_info_type *type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct iommu_hw_info_vtd *vtd;
+ if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
+ *type != IOMMU_HW_INFO_TYPE_INTEL_VTD)
+ return ERR_PTR(-EOPNOTSUPP);
+
vtd = kzalloc(sizeof(*vtd), GFP_KERNEL);
if (!vtd)
return ERR_PTR(-ENOMEM);
@@ -4358,6 +4456,32 @@ static struct iommu_domain identity_domain = {
},
};
+const struct iommu_domain_ops intel_fs_paging_domain_ops = {
+ .attach_dev = intel_iommu_attach_device,
+ .set_dev_pasid = intel_iommu_set_dev_pasid,
+ .map_pages = intel_iommu_map_pages,
+ .unmap_pages = intel_iommu_unmap_pages,
+ .iotlb_sync_map = intel_iommu_iotlb_sync_map,
+ .flush_iotlb_all = intel_flush_iotlb_all,
+ .iotlb_sync = intel_iommu_tlb_sync,
+ .iova_to_phys = intel_iommu_iova_to_phys,
+ .free = intel_iommu_domain_free,
+ .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_fs,
+};
+
+const struct iommu_domain_ops intel_ss_paging_domain_ops = {
+ .attach_dev = intel_iommu_attach_device,
+ .set_dev_pasid = intel_iommu_set_dev_pasid,
+ .map_pages = intel_iommu_map_pages,
+ .unmap_pages = intel_iommu_unmap_pages,
+ .iotlb_sync_map = intel_iommu_iotlb_sync_map,
+ .flush_iotlb_all = intel_flush_iotlb_all,
+ .iotlb_sync = intel_iommu_tlb_sync,
+ .iova_to_phys = intel_iommu_iova_to_phys,
+ .free = intel_iommu_domain_free,
+ .enforce_cache_coherency = intel_iommu_enforce_cache_coherency_ss,
+};
+
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
@@ -4374,20 +4498,7 @@ const struct iommu_ops intel_iommu_ops = {
.device_group = intel_iommu_device_group,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
- .pgsize_bitmap = SZ_4K,
.page_response = intel_iommu_page_response,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = intel_iommu_attach_device,
- .set_dev_pasid = intel_iommu_set_dev_pasid,
- .map_pages = intel_iommu_map_pages,
- .unmap_pages = intel_iommu_unmap_pages,
- .iotlb_sync_map = intel_iommu_iotlb_sync_map,
- .flush_iotlb_all = intel_flush_iotlb_all,
- .iotlb_sync = intel_iommu_tlb_sync,
- .iova_to_phys = intel_iommu_iova_to_phys,
- .free = intel_iommu_domain_free,
- .enforce_cache_coherency = intel_iommu_enforce_cache_coherency,
- }
};
static void quirk_iommu_igfx(struct pci_dev *dev)
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 2d1afab5eedc..d09b92871659 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -614,6 +614,9 @@ struct dmar_domain {
u8 has_mappings:1; /* Has mappings configured through
* iommu_map() interface.
*/
+ u8 iotlb_sync_map:1; /* Need to flush IOTLB cache or write
+ * buffer when creating mappings.
+ */
spinlock_t lock; /* Protect device tracking lists */
struct list_head devices; /* all devices' list */
@@ -1252,10 +1255,9 @@ domain_add_dev_pasid(struct iommu_domain *domain,
void domain_remove_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
-int __domain_setup_first_level(struct intel_iommu *iommu,
- struct device *dev, ioasid_t pasid,
- u16 did, pgd_t *pgd, int flags,
- struct iommu_domain *old);
+int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
+ ioasid_t pasid, u16 did, phys_addr_t fsptptr,
+ int flags, struct iommu_domain *old);
int dmar_ir_support(void);
@@ -1378,6 +1380,18 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc);
extern const struct iommu_ops intel_iommu_ops;
+extern const struct iommu_domain_ops intel_fs_paging_domain_ops;
+extern const struct iommu_domain_ops intel_ss_paging_domain_ops;
+
+static inline bool intel_domain_is_fs_paging(struct dmar_domain *domain)
+{
+ return domain->domain.ops == &intel_fs_paging_domain_ops;
+}
+
+static inline bool intel_domain_is_ss_paging(struct dmar_domain *domain)
+{
+ return domain->domain.ops == &intel_ss_paging_domain_ops;
+}
#ifdef CONFIG_INTEL_IOMMU
extern int intel_iommu_sm;
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index cf7b6882ec75..4f9b01dc91e8 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -10,6 +10,7 @@
#include <linux/hpet.h>
#include <linux/pci.h>
#include <linux/irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/acpi.h>
#include <linux/irqdomain.h>
#include <linux/crash_dump.h>
@@ -518,8 +519,14 @@ static void iommu_enable_irq_remapping(struct intel_iommu *iommu)
static int intel_setup_irq_remapping(struct intel_iommu *iommu)
{
+ struct irq_domain_info info = {
+ .ops = &intel_ir_domain_ops,
+ .parent = arch_get_ir_parent_domain(),
+ .domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI,
+ .size = INTR_REMAP_TABLE_ENTRIES,
+ .host_data = iommu,
+ };
struct ir_table *ir_table;
- struct fwnode_handle *fn;
unsigned long *bitmap;
void *ir_table_base;
@@ -544,25 +551,16 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
goto out_free_pages;
}
- fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id);
- if (!fn)
+ info.fwnode = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id);
+ if (!info.fwnode)
goto out_free_bitmap;
- iommu->ir_domain =
- irq_domain_create_hierarchy(arch_get_ir_parent_domain(),
- 0, INTR_REMAP_TABLE_ENTRIES,
- fn, &intel_ir_domain_ops,
- iommu);
+ iommu->ir_domain = msi_create_parent_irq_domain(&info, &dmar_msi_parent_ops);
if (!iommu->ir_domain) {
pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
goto out_free_fwnode;
}
- irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
- IRQ_DOMAIN_FLAG_ISOLATED_MSI;
- iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops;
-
ir_table->base = ir_table_base;
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
@@ -608,7 +606,7 @@ out_free_ir_domain:
irq_domain_remove(iommu->ir_domain);
iommu->ir_domain = NULL;
out_free_fwnode:
- irq_domain_free_fwnode(fn);
+ irq_domain_free_fwnode(info.fwnode);
out_free_bitmap:
bitmap_free(bitmap);
out_free_pages:
@@ -1244,10 +1242,10 @@ static void intel_ir_compose_msi_msg(struct irq_data *irq_data,
static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
{
struct intel_ir_data *ir_data = data->chip_data;
- struct vcpu_data *vcpu_pi_info = info;
+ struct intel_iommu_pi_data *pi_data = info;
/* stop posting interrupts, back to the default mode */
- if (!vcpu_pi_info) {
+ if (!pi_data) {
__intel_ir_reconfigure_irte(data, true);
} else {
struct irte irte_pi;
@@ -1265,10 +1263,10 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
/* Update the posted mode fields */
irte_pi.p_pst = 1;
irte_pi.p_urgent = 0;
- irte_pi.p_vector = vcpu_pi_info->vector;
- irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >>
+ irte_pi.p_vector = pi_data->vector;
+ irte_pi.pda_l = (pi_data->pi_desc_addr >>
(32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
- irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
+ irte_pi.pda_h = (pi_data->pi_desc_addr >> 32) &
~(-1UL << PDA_HIGH_BIT);
ir_data->irq_2_iommu.posted_vcpu = true;
@@ -1530,6 +1528,8 @@ static const struct irq_domain_ops intel_ir_domain_ops = {
static const struct msi_parent_ops dmar_msi_parent_ops = {
.supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI,
+ .bus_select_token = DOMAIN_BUS_DMAR,
+ .bus_select_mask = MATCH_PCI_MSI,
.prefix = "IR-",
.init_dev_msi_info = msi_parent_init_dev_msi_info,
};
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index fc312f649f9e..1b6ad9c900a5 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -216,8 +216,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
/* Must be nested domain */
if (user_data->type != IOMMU_HWPT_DATA_VTD_S1)
return ERR_PTR(-EOPNOTSUPP);
- if (parent->ops != intel_iommu_ops.default_domain_ops ||
- !s2_domain->nested_parent)
+ if (!intel_domain_is_ss_paging(s2_domain) || !s2_domain->nested_parent)
return ERR_PTR(-EINVAL);
ret = iommu_copy_struct_from_user(&vtd, user_data,
@@ -229,7 +228,6 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
if (!domain)
return ERR_PTR(-ENOMEM);
- domain->use_first_level = true;
domain->s2_domain = s2_domain;
domain->s1_cfg = vtd;
domain->domain.ops = &intel_nested_domain_ops;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index ac67a056b6c8..52f678975da7 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -348,14 +348,15 @@ static void intel_pasid_flush_present(struct intel_iommu *iommu,
*/
static void pasid_pte_config_first_level(struct intel_iommu *iommu,
struct pasid_entry *pte,
- pgd_t *pgd, u16 did, int flags)
+ phys_addr_t fsptptr, u16 did,
+ int flags)
{
lockdep_assert_held(&iommu->lock);
pasid_clear_entry(pte);
/* Setup the first level page table pointer: */
- pasid_set_flptr(pte, (u64)__pa(pgd));
+ pasid_set_flptr(pte, fsptptr);
if (flags & PASID_FLAG_FL5LP)
pasid_set_flpm(pte, 1);
@@ -372,9 +373,9 @@ static void pasid_pte_config_first_level(struct intel_iommu *iommu,
pasid_set_present(pte);
}
-int intel_pasid_setup_first_level(struct intel_iommu *iommu,
- struct device *dev, pgd_t *pgd,
- u32 pasid, u16 did, int flags)
+int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
+ phys_addr_t fsptptr, u32 pasid, u16 did,
+ int flags)
{
struct pasid_entry *pte;
@@ -402,7 +403,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EBUSY;
}
- pasid_pte_config_first_level(iommu, pte, pgd, did, flags);
+ pasid_pte_config_first_level(iommu, pte, fsptptr, did, flags);
spin_unlock(&iommu->lock);
@@ -412,7 +413,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
}
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
- struct device *dev, pgd_t *pgd,
+ struct device *dev, phys_addr_t fsptptr,
u32 pasid, u16 did, u16 old_did,
int flags)
{
@@ -430,7 +431,7 @@ int intel_pasid_replace_first_level(struct intel_iommu *iommu,
return -EINVAL;
}
- pasid_pte_config_first_level(iommu, &new_pte, pgd, did, flags);
+ pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags);
spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index fd0fd1a0df84..a771a77d4239 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -288,9 +288,9 @@ extern unsigned int intel_pasid_max_id;
int intel_pasid_alloc_table(struct device *dev);
void intel_pasid_free_table(struct device *dev);
struct pasid_table *intel_pasid_get_table(struct device *dev);
-int intel_pasid_setup_first_level(struct intel_iommu *iommu,
- struct device *dev, pgd_t *pgd,
- u32 pasid, u16 did, int flags);
+int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
+ phys_addr_t fsptptr, u32 pasid, u16 did,
+ int flags);
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u32 pasid);
@@ -302,9 +302,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
u32 pasid, struct dmar_domain *domain);
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
- struct device *dev, pgd_t *pgd,
- u32 pasid, u16 did, u16 old_did,
- int flags);
+ struct device *dev, phys_addr_t fsptptr,
+ u32 pasid, u16 did, u16 old_did, int flags);
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev, u16 old_did,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index f3da596410b5..e147f71f91b7 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -171,7 +171,7 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
ret = __domain_setup_first_level(iommu, dev, pasid,
- FLPT_DEFAULT_DID, mm->pgd,
+ FLPT_DEFAULT_DID, __pa(mm->pgd),
sflags, old);
if (ret)
goto out_unwind_iopf;
@@ -214,7 +214,6 @@ struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
return ERR_PTR(-ENOMEM);
domain->domain.ops = &intel_svm_domain_ops;
- domain->use_first_level = true;
INIT_LIST_HEAD(&domain->dev_pasids);
INIT_LIST_HEAD(&domain->cache_tags);
spin_lock_init(&domain->cache_lock);
diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h
index 9defdae6ebae..6311ba3f1691 100644
--- a/drivers/iommu/intel/trace.h
+++ b/drivers/iommu/intel/trace.h
@@ -130,11 +130,6 @@ DEFINE_EVENT(cache_tag_log, cache_tag_unassign,
TP_ARGS(tag)
);
-DEFINE_EVENT(cache_tag_log, cache_tag_flush_all,
- TP_PROTO(struct cache_tag *tag),
- TP_ARGS(tag)
-);
-
DECLARE_EVENT_CLASS(cache_tag_flush,
TP_PROTO(struct cache_tag *tag, unsigned long start, unsigned long end,
unsigned long addr, unsigned long pages, unsigned long mask),
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 96425e92f313..7e8e2216c294 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -85,11 +85,6 @@
#define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5)
#define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0)
-#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2)
-/* Ignore the contiguous bit for block splitting */
-#define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM)
-#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \
- ARM_LPAE_PTE_ATTR_HI_MASK)
/* Software bit for solving coherency races */
#define ARM_LPAE_PTE_SW_SYNC (((arm_lpae_iopte)1) << 55)
@@ -155,8 +150,6 @@
#define iopte_type(pte) \
(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
-#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK)
-
#define iopte_writeable_dirty(pte) \
(((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a4b606c591da..060ebe330ee1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2002,13 +2002,6 @@ static void iommu_domain_init(struct iommu_domain *domain, unsigned int type,
domain->owner = ops;
if (!domain->ops)
domain->ops = ops->default_domain_ops;
-
- /*
- * If not already set, assume all sizes by default; the driver
- * may override this later
- */
- if (!domain->pgsize_bitmap)
- domain->pgsize_bitmap = ops->pgsize_bitmap;
}
static struct iommu_domain *
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 86244403b532..65fbd098f9e9 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -137,6 +137,57 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
}
}
+static void iommufd_device_remove_vdev(struct iommufd_device *idev)
+{
+ struct iommufd_vdevice *vdev;
+
+ mutex_lock(&idev->igroup->lock);
+ /* prevent new references from vdev */
+ idev->destroying = true;
+ /* vdev has been completely destroyed by userspace */
+ if (!idev->vdev)
+ goto out_unlock;
+
+ vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id);
+ /*
+ * An ongoing vdev destroy ioctl has removed the vdev from the object
+ * xarray, but has not finished iommufd_vdevice_destroy() yet as it
+ * needs the same mutex. We exit the locking then wait on wait_cnt
+ * reference for the vdev destruction.
+ */
+ if (IS_ERR(vdev))
+ goto out_unlock;
+
+ /* Should never happen */
+ if (WARN_ON(vdev != idev->vdev)) {
+ iommufd_put_object(idev->ictx, &vdev->obj);
+ goto out_unlock;
+ }
+
+ /*
+ * vdev is still alive. Hold a users refcount to prevent racing with
+ * userspace destruction, then use iommufd_object_tombstone_user() to
+ * destroy it and leave a tombstone.
+ */
+ refcount_inc(&vdev->obj.users);
+ iommufd_put_object(idev->ictx, &vdev->obj);
+ mutex_unlock(&idev->igroup->lock);
+ iommufd_object_tombstone_user(idev->ictx, &vdev->obj);
+ return;
+
+out_unlock:
+ mutex_unlock(&idev->igroup->lock);
+}
+
+void iommufd_device_pre_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_device *idev =
+ container_of(obj, struct iommufd_device, obj);
+
+ /* Release the wait_cnt reference on this */
+ iommufd_device_remove_vdev(idev);
+}
+
void iommufd_device_destroy(struct iommufd_object *obj)
{
struct iommufd_device *idev =
@@ -485,8 +536,7 @@ iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
lockdep_assert_held(&idev->igroup->lock);
- handle =
- iommu_attach_handle_get(idev->igroup->group, pasid, 0);
+ handle = iommu_attach_handle_get(idev->igroup->group, pasid, 0);
if (IS_ERR(handle))
return NULL;
return to_iommufd_handle(handle);
@@ -1049,7 +1099,7 @@ static int iommufd_access_change_ioas(struct iommufd_access *access,
}
if (cur_ioas) {
- if (access->ops->unmap) {
+ if (!iommufd_access_is_internal(access) && access->ops->unmap) {
mutex_unlock(&access->ioas_lock);
access->ops->unmap(access->data, 0, ULONG_MAX);
mutex_lock(&access->ioas_lock);
@@ -1085,7 +1135,39 @@ void iommufd_access_destroy_object(struct iommufd_object *obj)
if (access->ioas)
WARN_ON(iommufd_access_change_ioas(access, NULL));
mutex_unlock(&access->ioas_lock);
- iommufd_ctx_put(access->ictx);
+ if (!iommufd_access_is_internal(access))
+ iommufd_ctx_put(access->ictx);
+}
+
+static struct iommufd_access *__iommufd_access_create(struct iommufd_ctx *ictx)
+{
+ struct iommufd_access *access;
+
+ /*
+ * There is no uAPI for the access object, but to keep things symmetric
+ * use the object infrastructure anyhow.
+ */
+ access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
+ if (IS_ERR(access))
+ return access;
+
+ /* The calling driver is a user until iommufd_access_destroy() */
+ refcount_inc(&access->obj.users);
+ mutex_init(&access->ioas_lock);
+ return access;
+}
+
+struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx)
+{
+ struct iommufd_access *access;
+
+ access = __iommufd_access_create(ictx);
+ if (IS_ERR(access))
+ return access;
+ access->iova_alignment = PAGE_SIZE;
+
+ iommufd_object_finalize(ictx, &access->obj);
+ return access;
}
/**
@@ -1107,11 +1189,7 @@ iommufd_access_create(struct iommufd_ctx *ictx,
{
struct iommufd_access *access;
- /*
- * There is no uAPI for the access object, but to keep things symmetric
- * use the object infrastructure anyhow.
- */
- access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
+ access = __iommufd_access_create(ictx);
if (IS_ERR(access))
return access;
@@ -1123,13 +1201,10 @@ iommufd_access_create(struct iommufd_ctx *ictx,
else
access->iova_alignment = 1;
- /* The calling driver is a user until iommufd_access_destroy() */
- refcount_inc(&access->obj.users);
access->ictx = ictx;
iommufd_ctx_get(ictx);
iommufd_object_finalize(ictx, &access->obj);
*id = access->obj.id;
- mutex_init(&access->ioas_lock);
return access;
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_create, "IOMMUFD");
@@ -1174,6 +1249,22 @@ int iommufd_access_attach(struct iommufd_access *access, u32 ioas_id)
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_attach, "IOMMUFD");
+int iommufd_access_attach_internal(struct iommufd_access *access,
+ struct iommufd_ioas *ioas)
+{
+ int rc;
+
+ mutex_lock(&access->ioas_lock);
+ if (WARN_ON(access->ioas)) {
+ mutex_unlock(&access->ioas_lock);
+ return -EINVAL;
+ }
+
+ rc = iommufd_access_change_ioas(access, ioas);
+ mutex_unlock(&access->ioas_lock);
+ return rc;
+}
+
int iommufd_access_replace(struct iommufd_access *access, u32 ioas_id)
{
int rc;
@@ -1215,7 +1306,8 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
xa_lock(&ioas->iopt.access_list);
xa_for_each(&ioas->iopt.access_list, index, access) {
- if (!iommufd_lock_obj(&access->obj))
+ if (!iommufd_lock_obj(&access->obj) ||
+ iommufd_access_is_internal(access))
continue;
xa_unlock(&ioas->iopt.access_list);
@@ -1239,6 +1331,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
void iommufd_access_unpin_pages(struct iommufd_access *access,
unsigned long iova, unsigned long length)
{
+ bool internal = iommufd_access_is_internal(access);
struct iopt_area_contig_iter iter;
struct io_pagetable *iopt;
unsigned long last_iova;
@@ -1265,7 +1358,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
area, iopt_area_iova_to_index(area, iter.cur_iova),
iopt_area_iova_to_index(
area,
- min(last_iova, iopt_area_last_iova(area))));
+ min(last_iova, iopt_area_last_iova(area))),
+ internal);
WARN_ON(!iopt_area_contig_done(&iter));
up_read(&iopt->iova_rwsem);
mutex_unlock(&access->ioas_lock);
@@ -1314,6 +1408,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
unsigned long length, struct page **out_pages,
unsigned int flags)
{
+ bool internal = iommufd_access_is_internal(access);
struct iopt_area_contig_iter iter;
struct io_pagetable *iopt;
unsigned long last_iova;
@@ -1322,7 +1417,8 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
/* Driver's ops don't support pin_pages */
if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
- WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
+ WARN_ON(access->iova_alignment != PAGE_SIZE ||
+ (!internal && !access->ops->unmap)))
return -EINVAL;
if (!length)
@@ -1356,7 +1452,7 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
}
rc = iopt_area_add_access(area, index, last_index, out_pages,
- flags);
+ flags, internal);
if (rc)
goto err_remove;
out_pages += last_index - index + 1;
@@ -1379,7 +1475,8 @@ err_remove:
iopt_area_iova_to_index(area, iter.cur_iova),
iopt_area_iova_to_index(
area, min(last_iova,
- iopt_area_last_iova(area))));
+ iopt_area_last_iova(area))),
+ internal);
}
up_read(&iopt->iova_rwsem);
mutex_unlock(&access->ioas_lock);
@@ -1453,6 +1550,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, "IOMMUFD");
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
{
+ const u32 SUPPORTED_FLAGS = IOMMU_HW_INFO_FLAG_INPUT_TYPE;
struct iommu_hw_info *cmd = ucmd->cmd;
void __user *user_ptr = u64_to_user_ptr(cmd->data_uptr);
const struct iommu_ops *ops;
@@ -1462,9 +1560,14 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
- cmd->__reserved[2])
+ if (cmd->flags & ~SUPPORTED_FLAGS)
return -EOPNOTSUPP;
+ if (cmd->__reserved[0] || cmd->__reserved[1] || cmd->__reserved[2])
+ return -EOPNOTSUPP;
+
+ /* Clear the type field since drivers don't support a random input */
+ if (!(cmd->flags & IOMMU_HW_INFO_FLAG_INPUT_TYPE))
+ cmd->in_data_type = IOMMU_HW_INFO_TYPE_DEFAULT;
idev = iommufd_get_device(ucmd, cmd->dev_id);
if (IS_ERR(idev))
@@ -1484,7 +1587,7 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
*/
if (WARN_ON_ONCE(cmd->out_data_type ==
IOMMU_HW_INFO_TYPE_NONE)) {
- rc = -ENODEV;
+ rc = -EOPNOTSUPP;
goto out_free;
}
} else {
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 922cd1fe7ec2..6f1010da221c 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -3,38 +3,91 @@
*/
#include "iommufd_private.h"
-struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
- size_t size,
- enum iommufd_object_type type)
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+int _iommufd_object_depend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended)
{
- struct iommufd_object *obj;
+ /* Reject self dependency that dead locks */
+ if (obj_dependent == obj_depended)
+ return -EINVAL;
+ /* Only support dependency between two objects of the same type */
+ if (obj_dependent->type != obj_depended->type)
+ return -EINVAL;
+
+ refcount_inc(&obj_depended->users);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_object_depend, "IOMMUFD");
+
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
+ struct iommufd_object *obj_depended)
+{
+ if (WARN_ON_ONCE(obj_dependent == obj_depended ||
+ obj_dependent->type != obj_depended->type))
+ return;
+
+ refcount_dec(&obj_depended->users);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD");
+
+/*
+ * Allocate an @offset to return to user space to use for an mmap() syscall
+ *
+ * Driver should use a per-structure helper in include/linux/iommufd.h
+ */
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+ phys_addr_t mmio_addr, size_t length,
+ unsigned long *offset)
+{
+ struct iommufd_mmap *immap;
+ unsigned long startp;
int rc;
- obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
- if (!obj)
- return ERR_PTR(-ENOMEM);
- obj->type = type;
- /* Starts out bias'd by 1 until it is removed from the xarray */
- refcount_set(&obj->shortterm_users, 1);
- refcount_set(&obj->users, 1);
+ if (!PAGE_ALIGNED(mmio_addr))
+ return -EINVAL;
+ if (!length || !PAGE_ALIGNED(length))
+ return -EINVAL;
- /*
- * Reserve an ID in the xarray but do not publish the pointer yet since
- * the caller hasn't initialized it yet. Once the pointer is published
- * in the xarray and visible to other threads we can't reliably destroy
- * it anymore, so the caller must complete all errorable operations
- * before calling iommufd_object_finalize().
- */
- rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b,
- GFP_KERNEL_ACCOUNT);
- if (rc)
- goto out_free;
- return obj;
-out_free:
- kfree(obj);
- return ERR_PTR(rc);
+ immap = kzalloc(sizeof(*immap), GFP_KERNEL);
+ if (!immap)
+ return -ENOMEM;
+ immap->owner = owner;
+ immap->length = length;
+ immap->mmio_addr = mmio_addr;
+
+ /* Skip the first page to ease caller identifying the returned offset */
+ rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->length,
+ PAGE_SIZE, ULONG_MAX, GFP_KERNEL);
+ if (rc < 0) {
+ kfree(immap);
+ return rc;
+ }
+
+ /* mmap() syscall will right-shift the offset in vma->vm_pgoff too */
+ immap->vm_pgoff = startp >> PAGE_SHIFT;
+ *offset = startp;
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD");
+
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+ struct iommufd_object *owner, unsigned long offset)
+{
+ struct iommufd_mmap *immap;
+
+ immap = mtree_erase(&ictx->mt_mmap, offset);
+ WARN_ON_ONCE(!immap || immap->owner != owner);
+ kfree(immap);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD");
+
+struct device *iommufd_vdevice_to_device(struct iommufd_vdevice *vdev)
+{
+ return vdev->idev->dev;
}
-EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, "IOMMUFD");
+EXPORT_SYMBOL_NS_GPL(iommufd_vdevice_to_device, "IOMMUFD");
/* Caller should xa_lock(&viommu->vdevs) to protect the return value */
struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
@@ -45,7 +98,7 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
lockdep_assert_held(&viommu->vdevs.xa_lock);
vdev = xa_load(&viommu->vdevs, vdev_id);
- return vdev ? vdev->dev : NULL;
+ return vdev ? iommufd_vdevice_to_device(vdev) : NULL;
}
EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD");
@@ -62,8 +115,8 @@ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
xa_lock(&viommu->vdevs);
xa_for_each(&viommu->vdevs, index, vdev) {
- if (vdev->dev == dev) {
- *vdev_id = vdev->id;
+ if (iommufd_vdevice_to_device(vdev) == dev) {
+ *vdev_id = vdev->virt_id;
rc = 0;
break;
}
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
index e373b9eec7f5..fc4de63b0bce 100644
--- a/drivers/iommu/iommufd/eventq.c
+++ b/drivers/iommu/iommufd/eventq.c
@@ -427,8 +427,8 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
if (cmd->flags)
return -EOPNOTSUPP;
- fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT,
- common.obj);
+ fault = __iommufd_object_alloc_ucmd(ucmd, fault, IOMMUFD_OBJ_FAULT,
+ common.obj);
if (IS_ERR(fault))
return PTR_ERR(fault);
@@ -437,10 +437,8 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
ucmd->ictx, &iommufd_fault_fops);
- if (fdno < 0) {
- rc = fdno;
- goto out_abort;
- }
+ if (fdno < 0)
+ return fdno;
cmd->out_fault_id = fault->common.obj.id;
cmd->out_fault_fd = fdno;
@@ -448,7 +446,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_put_fdno;
- iommufd_object_finalize(ucmd->ictx, &fault->common.obj);
fd_install(fdno, fault->common.filep);
@@ -456,9 +453,6 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
out_put_fdno:
put_unused_fd(fdno);
fput(fault->common.filep);
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj);
-
return rc;
}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 487779470261..fe789c2dc0c9 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -264,7 +264,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
- rc = -EINVAL;
+ rc = -EOPNOTSUPP;
goto out_abort;
}
return hwpt_nested;
@@ -309,10 +309,8 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
refcount_inc(&viommu->obj.users);
hwpt_nested->parent = viommu->hwpt;
- hwpt->domain =
- viommu->ops->alloc_domain_nested(viommu,
- flags & ~IOMMU_HWPT_FAULT_ID_VALID,
- user_data);
+ hwpt->domain = viommu->ops->alloc_domain_nested(
+ viommu, flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
hwpt->domain = NULL;
@@ -323,7 +321,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
- rc = -EINVAL;
+ rc = -EOPNOTSUPP;
goto out_abort;
}
return hwpt_nested;
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 8a790e597e12..c0360c450880 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -70,36 +70,45 @@ struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
return iter->area;
}
-static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
- unsigned long length,
- unsigned long iova_alignment,
- unsigned long page_offset)
+static bool __alloc_iova_check_range(unsigned long *start, unsigned long last,
+ unsigned long length,
+ unsigned long iova_alignment,
+ unsigned long page_offset)
{
- if (span->is_used || span->last_hole - span->start_hole < length - 1)
+ unsigned long aligned_start;
+
+ /* ALIGN_UP() */
+ if (check_add_overflow(*start, iova_alignment - 1, &aligned_start))
return false;
+ aligned_start &= ~(iova_alignment - 1);
+ aligned_start |= page_offset;
- span->start_hole = ALIGN(span->start_hole, iova_alignment) |
- page_offset;
- if (span->start_hole > span->last_hole ||
- span->last_hole - span->start_hole < length - 1)
+ if (aligned_start >= last || last - aligned_start < length - 1)
return false;
+ *start = aligned_start;
return true;
}
-static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
unsigned long length,
unsigned long iova_alignment,
unsigned long page_offset)
{
- if (span->is_hole || span->last_used - span->start_used < length - 1)
+ if (span->is_used)
return false;
+ return __alloc_iova_check_range(&span->start_hole, span->last_hole,
+ length, iova_alignment, page_offset);
+}
- span->start_used = ALIGN(span->start_used, iova_alignment) |
- page_offset;
- if (span->start_used > span->last_used ||
- span->last_used - span->start_used < length - 1)
+static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+ unsigned long length,
+ unsigned long iova_alignment,
+ unsigned long page_offset)
+{
+ if (span->is_hole)
return false;
- return true;
+ return __alloc_iova_check_range(&span->start_used, span->last_used,
+ length, iova_alignment, page_offset);
}
/*
@@ -719,6 +728,12 @@ again:
goto out_unlock_iova;
}
+ /* The area is locked by an object that has not been destroyed */
+ if (area->num_locks) {
+ rc = -EBUSY;
+ goto out_unlock_iova;
+ }
+
if (area_first < start || area_last > last) {
rc = -ENOENT;
goto out_unlock_iova;
@@ -743,8 +758,10 @@ again:
iommufd_access_notify_unmap(iopt, area_first, length);
/* Something is not responding to unmap requests. */
tries++;
- if (WARN_ON(tries > 100))
- return -EDEADLOCK;
+ if (WARN_ON(tries > 100)) {
+ rc = -EDEADLOCK;
+ goto out_unmapped;
+ }
goto again;
}
@@ -766,6 +783,7 @@ again:
out_unlock_iova:
up_write(&iopt->iova_rwsem);
up_read(&iopt->domains_rwsem);
+out_unmapped:
if (unmapped)
*unmapped = unmapped_bytes;
return rc;
@@ -1410,8 +1428,7 @@ out_unlock:
}
void iopt_remove_access(struct io_pagetable *iopt,
- struct iommufd_access *access,
- u32 iopt_access_list_id)
+ struct iommufd_access *access, u32 iopt_access_list_id)
{
down_write(&iopt->domains_rwsem);
down_write(&iopt->iova_rwsem);
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index 10c928a9a463..b6064f4ce4af 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -48,6 +48,7 @@ struct iopt_area {
int iommu_prot;
bool prevent_access : 1;
unsigned int num_accesses;
+ unsigned int num_locks;
};
struct iopt_allowed {
@@ -238,9 +239,9 @@ void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start,
int iopt_area_add_access(struct iopt_area *area, unsigned long start,
unsigned long last, struct page **out_pages,
- unsigned int flags);
+ unsigned int flags, bool lock_area);
void iopt_area_remove_access(struct iopt_area *area, unsigned long start,
- unsigned long last);
+ unsigned long last, bool unlock_area);
int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
void *data, unsigned long length, unsigned int flags);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 9ccc83341f32..0da2a81eedfa 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -7,6 +7,7 @@
#include <linux/iommu.h>
#include <linux/iommufd.h>
#include <linux/iova_bitmap.h>
+#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/uaccess.h>
#include <linux/xarray.h>
@@ -44,6 +45,7 @@ struct iommufd_ctx {
struct xarray groups;
wait_queue_head_t destroy_wait;
struct rw_semaphore ioas_creation_lock;
+ struct maple_tree mt_mmap;
struct mutex sw_msi_lock;
struct list_head sw_msi_list;
@@ -55,6 +57,18 @@ struct iommufd_ctx {
struct iommufd_ioas *vfio_ioas;
};
+/* Entry for iommufd_ctx::mt_mmap */
+struct iommufd_mmap {
+ struct iommufd_object *owner;
+
+ /* Page-shifted start position in mt_mmap to validate vma->vm_pgoff */
+ unsigned long vm_pgoff;
+
+ /* Physical range for io_remap_pfn_range() */
+ phys_addr_t mmio_addr;
+ size_t length;
+};
+
/*
* The IOVA to PFN map. The map automatically copies the PFNs into multiple
* domains and permits sharing of PFNs between io_pagetable instances. This
@@ -135,6 +149,7 @@ struct iommufd_ucmd {
void __user *ubuffer;
u32 user_size;
void *cmd;
+ struct iommufd_object *new_obj;
};
int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
@@ -154,7 +169,7 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj)
{
if (!refcount_inc_not_zero(&obj->users))
return false;
- if (!refcount_inc_not_zero(&obj->shortterm_users)) {
+ if (!refcount_inc_not_zero(&obj->wait_cnt)) {
/*
* If the caller doesn't already have a ref on obj this must be
* called under the xa_lock. Otherwise the caller is holding a
@@ -172,11 +187,11 @@ static inline void iommufd_put_object(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
/*
- * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees
- * a spurious !0 users with a 0 shortterm_users.
+ * Users first, then wait_cnt so that REMOVE_WAIT never sees a spurious
+ * !0 users with a 0 wait_cnt.
*/
refcount_dec(&obj->users);
- if (refcount_dec_and_test(&obj->shortterm_users))
+ if (refcount_dec_and_test(&obj->wait_cnt))
wake_up_interruptible_all(&ictx->destroy_wait);
}
@@ -187,7 +202,8 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
enum {
- REMOVE_WAIT_SHORTTERM = 1,
+ REMOVE_WAIT = BIT(0),
+ REMOVE_OBJ_TOMBSTONE = BIT(1),
};
int iommufd_object_remove(struct iommufd_ctx *ictx,
struct iommufd_object *to_destroy, u32 id,
@@ -195,15 +211,35 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
/*
* The caller holds a users refcount and wants to destroy the object. At this
- * point the caller has no shortterm_users reference and at least the xarray
- * will be holding one.
+ * point the caller has no wait_cnt reference and at least the xarray will be
+ * holding one.
*/
static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
struct iommufd_object *obj)
{
int ret;
- ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM);
+ ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT);
+
+ /*
+ * If there is a bug and we couldn't destroy the object then we did put
+ * back the caller's users refcount and will eventually try to free it
+ * again during close.
+ */
+ WARN_ON(ret);
+}
+
+/*
+ * Similar to iommufd_object_destroy_user(), except that the object ID is left
+ * reserved/tombstoned.
+ */
+static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
+{
+ int ret;
+
+ ret = iommufd_object_remove(ictx, obj, obj->id,
+ REMOVE_WAIT | REMOVE_OBJ_TOMBSTONE);
/*
* If there is a bug and we couldn't destroy the object then we did put
@@ -230,6 +266,15 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
iommufd_object_remove(ictx, obj, obj->id, 0);
}
+/*
+ * Callers of these normal object allocators must call iommufd_object_finalize()
+ * to finalize the object, or call iommufd_object_abort_and_destroy() to revert
+ * the allocation.
+ */
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+ size_t size,
+ enum iommufd_object_type type);
+
#define __iommufd_object_alloc(ictx, ptr, type, obj) \
container_of(_iommufd_object_alloc( \
ictx, \
@@ -243,6 +288,26 @@ iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx,
__iommufd_object_alloc(ictx, ptr, type, obj)
/*
+ * Callers of these _ucmd allocators should not call iommufd_object_finalize()
+ * or iommufd_object_abort_and_destroy(), as the core automatically does that.
+ */
+struct iommufd_object *
+_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd, size_t size,
+ enum iommufd_object_type type);
+
+#define __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj) \
+ container_of(_iommufd_object_alloc_ucmd( \
+ ucmd, \
+ sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \
+ offsetof(typeof(*(ptr)), \
+ obj) != 0), \
+ type), \
+ typeof(*(ptr)), obj)
+
+#define iommufd_object_alloc_ucmd(ucmd, ptr, type) \
+ __iommufd_object_alloc_ucmd(ucmd, ptr, type, obj)
+
+/*
* The IO Address Space (IOAS) pagetable is a virtual page table backed by the
* io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
* mapping is copied into all of the associated domains and made available to
@@ -266,8 +331,7 @@ struct iommufd_ioas {
static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx,
u32 id)
{
- return container_of(iommufd_get_object(ictx, id,
- IOMMUFD_OBJ_IOAS),
+ return container_of(iommufd_get_object(ictx, id, IOMMUFD_OBJ_IOAS),
struct iommufd_ioas, obj);
}
@@ -425,6 +489,8 @@ struct iommufd_device {
/* always the physical device */
struct device *dev;
bool enforce_cache_coherency;
+ struct iommufd_vdevice *vdev;
+ bool destroying;
};
static inline struct iommufd_device *
@@ -435,6 +501,7 @@ iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_device, obj);
}
+void iommufd_device_pre_destroy(struct iommufd_object *obj);
void iommufd_device_destroy(struct iommufd_object *obj);
int iommufd_get_hw_info(struct iommufd_ucmd *ucmd);
@@ -452,10 +519,32 @@ struct iommufd_access {
int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
void iopt_remove_access(struct io_pagetable *iopt,
- struct iommufd_access *access,
- u32 iopt_access_list_id);
+ struct iommufd_access *access, u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
+/* iommufd_access for internal use */
+static inline bool iommufd_access_is_internal(struct iommufd_access *access)
+{
+ return !access->ictx;
+}
+
+struct iommufd_access *iommufd_access_create_internal(struct iommufd_ctx *ictx);
+
+static inline void
+iommufd_access_destroy_internal(struct iommufd_ctx *ictx,
+ struct iommufd_access *access)
+{
+ iommufd_object_destroy_user(ictx, &access->obj);
+}
+
+int iommufd_access_attach_internal(struct iommufd_access *access,
+ struct iommufd_ioas *ioas);
+
+static inline void iommufd_access_detach_internal(struct iommufd_access *access)
+{
+ iommufd_access_detach(access);
+}
+
struct iommufd_eventq {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
@@ -528,7 +617,7 @@ struct iommufd_veventq {
struct list_head node; /* for iommufd_viommu::veventqs */
struct iommufd_vevent lost_events_header;
- unsigned int type;
+ enum iommu_veventq_type type;
unsigned int depth;
/* Use common.lock for protection */
@@ -583,7 +672,8 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
}
static inline struct iommufd_veventq *
-iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
+iommufd_viommu_find_veventq(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type)
{
struct iommufd_veventq *veventq, *next;
@@ -600,14 +690,17 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_vdevice_destroy(struct iommufd_object *obj);
+void iommufd_vdevice_abort(struct iommufd_object *obj);
+int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd);
+void iommufd_hw_queue_destroy(struct iommufd_object *obj);
-struct iommufd_vdevice {
- struct iommufd_object obj;
- struct iommufd_ctx *ictx;
- struct iommufd_viommu *viommu;
- struct device *dev;
- u64 id; /* per-vIOMMU virtual ID */
-};
+static inline struct iommufd_vdevice *
+iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id)
+{
+ return container_of(iommufd_get_object(ictx, id,
+ IOMMUFD_OBJ_VDEVICE),
+ struct iommufd_vdevice, obj);
+}
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index 1cd7e8394129..8fc618b2bcf9 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -227,6 +227,23 @@ struct iommu_hwpt_invalidate_selftest {
#define IOMMU_VIOMMU_TYPE_SELFTEST 0xdeadbeef
+/**
+ * struct iommu_viommu_selftest - vIOMMU data for Mock driver
+ * (IOMMU_VIOMMU_TYPE_SELFTEST)
+ * @in_data: Input random data from user space
+ * @out_data: Output data (matching @in_data) to user space
+ * @out_mmap_offset: The offset argument for mmap syscall
+ * @out_mmap_length: The length argument for mmap syscall
+ *
+ * Simply set @out_data=@in_data for a loopback test
+ */
+struct iommu_viommu_selftest {
+ __u32 in_data;
+ __u32 out_data;
+ __aligned_u64 out_mmap_offset;
+ __aligned_u64 out_mmap_length;
+};
+
/* Should not be equal to any defined value in enum iommu_viommu_invalidate_data_type */
#define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST 0xdeadbeef
#define IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID 0xdadbeef
@@ -252,4 +269,7 @@ struct iommu_viommu_event_selftest {
__u32 virt_id;
};
+#define IOMMU_HW_QUEUE_TYPE_SELFTEST 0xdeadbeef
+#define IOMMU_TEST_HW_QUEUE_MAX 2
+
#endif
diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c
index 39a86a4a1d3a..4514575818fc 100644
--- a/drivers/iommu/iommufd/iova_bitmap.c
+++ b/drivers/iommu/iommufd/iova_bitmap.c
@@ -407,7 +407,6 @@ void iova_bitmap_set(struct iova_bitmap *bitmap,
update_indexes:
if (unlikely(!iova_bitmap_mapped_range(mapped, iova, length))) {
-
/*
* The attempt to advance the base index to @iova
* may fail if it's out of bounds, or pinning the pages
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 3df468f64e7d..15af7ced0501 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -23,12 +23,72 @@
#include "iommufd_test.h"
struct iommufd_object_ops {
+ void (*pre_destroy)(struct iommufd_object *obj);
void (*destroy)(struct iommufd_object *obj);
void (*abort)(struct iommufd_object *obj);
};
static const struct iommufd_object_ops iommufd_object_ops[];
static struct miscdevice vfio_misc_dev;
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+ size_t size,
+ enum iommufd_object_type type)
+{
+ struct iommufd_object *obj;
+ int rc;
+
+ obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+ obj->type = type;
+ /* Starts out bias'd by 1 until it is removed from the xarray */
+ refcount_set(&obj->wait_cnt, 1);
+ refcount_set(&obj->users, 1);
+
+ /*
+ * Reserve an ID in the xarray but do not publish the pointer yet since
+ * the caller hasn't initialized it yet. Once the pointer is published
+ * in the xarray and visible to other threads we can't reliably destroy
+ * it anymore, so the caller must complete all errorable operations
+ * before calling iommufd_object_finalize().
+ */
+ rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b,
+ GFP_KERNEL_ACCOUNT);
+ if (rc)
+ goto out_free;
+ return obj;
+out_free:
+ kfree(obj);
+ return ERR_PTR(rc);
+}
+
+struct iommufd_object *_iommufd_object_alloc_ucmd(struct iommufd_ucmd *ucmd,
+ size_t size,
+ enum iommufd_object_type type)
+{
+ struct iommufd_object *new_obj;
+
+ /* Something is coded wrong if this is hit */
+ if (WARN_ON(ucmd->new_obj))
+ return ERR_PTR(-EBUSY);
+
+ /*
+ * An abort op means that its caller needs to invoke it within a lock in
+ * the caller. So it doesn't work with _iommufd_object_alloc_ucmd() that
+ * will invoke the abort op in iommufd_object_abort_and_destroy(), which
+ * must be outside the caller's lock.
+ */
+ if (WARN_ON(iommufd_object_ops[type].abort))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ new_obj = _iommufd_object_alloc(ucmd->ictx, size, type);
+ if (IS_ERR(new_obj))
+ return new_obj;
+
+ ucmd->new_obj = new_obj;
+ return new_obj;
+}
+
/*
* Allow concurrent access to the object.
*
@@ -95,20 +155,22 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
return obj;
}
-static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
- struct iommufd_object *to_destroy)
+static int iommufd_object_dec_wait(struct iommufd_ctx *ictx,
+ struct iommufd_object *to_destroy)
{
- if (refcount_dec_and_test(&to_destroy->shortterm_users))
+ if (refcount_dec_and_test(&to_destroy->wait_cnt))
return 0;
+ if (iommufd_object_ops[to_destroy->type].pre_destroy)
+ iommufd_object_ops[to_destroy->type].pre_destroy(to_destroy);
+
if (wait_event_timeout(ictx->destroy_wait,
- refcount_read(&to_destroy->shortterm_users) ==
- 0,
- msecs_to_jiffies(60000)))
+ refcount_read(&to_destroy->wait_cnt) == 0,
+ msecs_to_jiffies(60000)))
return 0;
pr_crit("Time out waiting for iommufd object to become free\n");
- refcount_inc(&to_destroy->shortterm_users);
+ refcount_inc(&to_destroy->wait_cnt);
return -EBUSY;
}
@@ -122,17 +184,18 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
{
struct iommufd_object *obj;
XA_STATE(xas, &ictx->objects, id);
- bool zerod_shortterm = false;
+ bool zerod_wait_cnt = false;
int ret;
/*
- * The purpose of the shortterm_users is to ensure deterministic
- * destruction of objects used by external drivers and destroyed by this
- * function. Any temporary increment of the refcount must increment
- * shortterm_users, such as during ioctl execution.
+ * The purpose of the wait_cnt is to ensure deterministic destruction
+ * of objects used by external drivers and destroyed by this function.
+ * Incrementing this wait_cnt should either be short lived, such as
+ * during ioctl execution, or be revoked and blocked during
+ * pre_destroy(), such as vdev holding the idev's refcount.
*/
- if (flags & REMOVE_WAIT_SHORTTERM) {
- ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy);
+ if (flags & REMOVE_WAIT) {
+ ret = iommufd_object_dec_wait(ictx, to_destroy);
if (ret) {
/*
* We have a bug. Put back the callers reference and
@@ -141,7 +204,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
refcount_dec(&to_destroy->users);
return ret;
}
- zerod_shortterm = true;
+ zerod_wait_cnt = true;
}
xa_lock(&ictx->objects);
@@ -167,17 +230,17 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
goto err_xa;
}
- xas_store(&xas, NULL);
+ xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL);
if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
ictx->vfio_ioas = NULL;
xa_unlock(&ictx->objects);
/*
- * Since users is zero any positive users_shortterm must be racing
+ * Since users is zero any positive wait_cnt must be racing
* iommufd_put_object(), or we have a bug.
*/
- if (!zerod_shortterm) {
- ret = iommufd_object_dec_wait_shortterm(ictx, obj);
+ if (!zerod_wait_cnt) {
+ ret = iommufd_object_dec_wait(ictx, obj);
if (WARN_ON(ret))
return ret;
}
@@ -187,9 +250,9 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
return 0;
err_xa:
- if (zerod_shortterm) {
+ if (zerod_wait_cnt) {
/* Restore the xarray owned reference */
- refcount_set(&obj->shortterm_users, 1);
+ refcount_set(&obj->wait_cnt, 1);
}
xa_unlock(&ictx->objects);
@@ -226,6 +289,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
xa_init(&ictx->groups);
ictx->file = filp;
+ mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE);
init_waitqueue_head(&ictx->destroy_wait);
mutex_init(&ictx->sw_msi_lock);
INIT_LIST_HEAD(&ictx->sw_msi_list);
@@ -252,19 +316,41 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
while (!xa_empty(&ictx->objects)) {
unsigned int destroyed = 0;
unsigned long index;
+ bool empty = true;
+ /*
+ * We can't use xa_empty() to end the loop as the tombstones
+ * are stored as XA_ZERO_ENTRY in the xarray. However
+ * xa_for_each() automatically converts them to NULL and skips
+ * them causing xa_empty() to be kept false. Thus once
+ * xa_for_each() finds no further !NULL entries the loop is
+ * done.
+ */
xa_for_each(&ictx->objects, index, obj) {
+ empty = false;
if (!refcount_dec_if_one(&obj->users))
continue;
+
destroyed++;
xa_erase(&ictx->objects, index);
iommufd_object_ops[obj->type].destroy(obj);
kfree(obj);
}
+
+ if (empty)
+ break;
+
/* Bug related to users refcount */
if (WARN_ON(!destroyed))
break;
}
+
+ /*
+ * There may be some tombstones left over from
+ * iommufd_object_tombstone_user()
+ */
+ xa_destroy(&ictx->objects);
+
WARN_ON(!xa_empty(&ictx->groups));
mutex_destroy(&ictx->sw_msi_lock);
@@ -305,6 +391,7 @@ union ucmd_buffer {
struct iommu_destroy destroy;
struct iommu_fault_alloc fault;
struct iommu_hw_info info;
+ struct iommu_hw_queue_alloc hw_queue;
struct iommu_hwpt_alloc hwpt;
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
struct iommu_hwpt_invalidate cache;
@@ -347,6 +434,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
struct iommu_fault_alloc, out_fault_fd),
IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
__reserved),
+ IOCTL_OP(IOMMU_HW_QUEUE_ALLOC, iommufd_hw_queue_alloc_ioctl,
+ struct iommu_hw_queue_alloc, length),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
__reserved),
IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap,
@@ -417,14 +506,83 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
if (ret)
return ret;
ret = op->execute(&ucmd);
+
+ if (ucmd.new_obj) {
+ if (ret)
+ iommufd_object_abort_and_destroy(ictx, ucmd.new_obj);
+ else
+ iommufd_object_finalize(ictx, ucmd.new_obj);
+ }
return ret;
}
+static void iommufd_fops_vma_open(struct vm_area_struct *vma)
+{
+ struct iommufd_mmap *immap = vma->vm_private_data;
+
+ refcount_inc(&immap->owner->users);
+}
+
+static void iommufd_fops_vma_close(struct vm_area_struct *vma)
+{
+ struct iommufd_mmap *immap = vma->vm_private_data;
+
+ refcount_dec(&immap->owner->users);
+}
+
+static const struct vm_operations_struct iommufd_vma_ops = {
+ .open = iommufd_fops_vma_open,
+ .close = iommufd_fops_vma_close,
+};
+
+/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */
+static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct iommufd_ctx *ictx = filp->private_data;
+ size_t length = vma->vm_end - vma->vm_start;
+ struct iommufd_mmap *immap;
+ int rc;
+
+ if (!PAGE_ALIGNED(length))
+ return -EINVAL;
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+ if (vma->vm_flags & VM_EXEC)
+ return -EPERM;
+
+ /* vma->vm_pgoff carries a page-shifted start position to an immap */
+ immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT);
+ if (!immap)
+ return -ENXIO;
+ /*
+ * mtree_load() returns the immap for any contained mmio_addr, so only
+ * allow the exact immap thing to be mapped
+ */
+ if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length)
+ return -ENXIO;
+
+ vma->vm_pgoff = 0;
+ vma->vm_private_data = immap;
+ vma->vm_ops = &iommufd_vma_ops;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ rc = io_remap_pfn_range(vma, vma->vm_start,
+ immap->mmio_addr >> PAGE_SHIFT, length,
+ vma->vm_page_prot);
+ if (rc)
+ return rc;
+
+ /* vm_ops.open won't be called for mmap itself. */
+ refcount_inc(&immap->owner->users);
+ return rc;
+}
+
static const struct file_operations iommufd_fops = {
.owner = THIS_MODULE,
.open = iommufd_fops_open,
.release = iommufd_fops_release,
.unlocked_ioctl = iommufd_fops_ioctl,
+ .mmap = iommufd_fops_mmap,
};
/**
@@ -498,11 +656,15 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_access_destroy_object,
},
[IOMMUFD_OBJ_DEVICE] = {
+ .pre_destroy = iommufd_device_pre_destroy,
.destroy = iommufd_device_destroy,
},
[IOMMUFD_OBJ_FAULT] = {
.destroy = iommufd_fault_destroy,
},
+ [IOMMUFD_OBJ_HW_QUEUE] = {
+ .destroy = iommufd_hw_queue_destroy,
+ },
[IOMMUFD_OBJ_HWPT_PAGING] = {
.destroy = iommufd_hwpt_paging_destroy,
.abort = iommufd_hwpt_paging_abort,
@@ -516,6 +678,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
},
[IOMMUFD_OBJ_VDEVICE] = {
.destroy = iommufd_vdevice_destroy,
+ .abort = iommufd_vdevice_abort,
},
[IOMMUFD_OBJ_VEVENTQ] = {
.destroy = iommufd_veventq_destroy,
@@ -539,7 +702,6 @@ static struct miscdevice iommu_misc_dev = {
.mode = 0660,
};
-
static struct miscdevice vfio_misc_dev = {
.minor = VFIO_MINOR,
.name = "vfio",
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 3427749bc5ce..c3433b845561 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -1287,8 +1287,7 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages,
}
static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte,
- unsigned long length,
- bool writable)
+ unsigned long length, bool writable)
{
struct iopt_pages *pages;
@@ -1328,7 +1327,7 @@ struct iopt_pages *iopt_alloc_user_pages(void __user *uptr,
struct iopt_pages *pages;
unsigned long end;
void __user *uptr_down =
- (void __user *) ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
+ (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
if (check_add_overflow((unsigned long)uptr, length, &end))
return ERR_PTR(-EOVERFLOW);
@@ -2104,6 +2103,7 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
* @last_index: Inclusive last page index
* @out_pages: Output list of struct page's representing the PFNs
* @flags: IOMMUFD_ACCESS_RW_* flags
+ * @lock_area: Fail userspace munmap on this area
*
* Record that an in-kernel access will be accessing the pages, ensure they are
* pinned, and return the PFNs as a simple list of 'struct page *'.
@@ -2111,8 +2111,8 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
* This should be undone through a matching call to iopt_area_remove_access()
*/
int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
- unsigned long last_index, struct page **out_pages,
- unsigned int flags)
+ unsigned long last_index, struct page **out_pages,
+ unsigned int flags, bool lock_area)
{
struct iopt_pages *pages = area->pages;
struct iopt_pages_access *access;
@@ -2125,6 +2125,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
access = iopt_pages_get_exact_access(pages, start_index, last_index);
if (access) {
area->num_accesses++;
+ if (lock_area)
+ area->num_locks++;
access->users++;
iopt_pages_fill_from_xarray(pages, start_index, last_index,
out_pages);
@@ -2146,6 +2148,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
access->node.last = last_index;
access->users = 1;
area->num_accesses++;
+ if (lock_area)
+ area->num_locks++;
interval_tree_insert(&access->node, &pages->access_itree);
mutex_unlock(&pages->mutex);
return 0;
@@ -2162,12 +2166,13 @@ err_unlock:
* @area: The source of PFNs
* @start_index: First page index
* @last_index: Inclusive last page index
+ * @unlock_area: Must match the matching iopt_area_add_access()'s lock_area
*
* Undo iopt_area_add_access() and unpin the pages if necessary. The caller
* must stop using the PFNs before calling this.
*/
void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
- unsigned long last_index)
+ unsigned long last_index, bool unlock_area)
{
struct iopt_pages *pages = area->pages;
struct iopt_pages_access *access;
@@ -2178,6 +2183,10 @@ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
goto out_unlock;
WARN_ON(area->num_accesses == 0 || access->users == 0);
+ if (unlock_area) {
+ WARN_ON(area->num_locks == 0);
+ area->num_locks--;
+ }
area->num_accesses--;
access->users--;
if (access->users)
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index 6bd0abf9a641..61686603c769 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -138,7 +138,6 @@ to_mock_domain(struct iommu_domain *domain)
struct mock_iommu_domain_nested {
struct iommu_domain domain;
struct mock_viommu *mock_viommu;
- struct mock_iommu_domain *parent;
u32 iotlb[MOCK_NESTED_DOMAIN_IOTLB_NUM];
};
@@ -151,6 +150,11 @@ to_mock_nested(struct iommu_domain *domain)
struct mock_viommu {
struct iommufd_viommu core;
struct mock_iommu_domain *s2_parent;
+ struct mock_hw_queue *hw_queue[IOMMU_TEST_HW_QUEUE_MAX];
+ struct mutex queue_mutex;
+
+ unsigned long mmap_offset;
+ u32 *page; /* Mmap page to test u32 type of in_data */
};
static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu)
@@ -158,6 +162,19 @@ static inline struct mock_viommu *to_mock_viommu(struct iommufd_viommu *viommu)
return container_of(viommu, struct mock_viommu, core);
}
+struct mock_hw_queue {
+ struct iommufd_hw_queue core;
+ struct mock_viommu *mock_viommu;
+ struct mock_hw_queue *prev;
+ u16 index;
+};
+
+static inline struct mock_hw_queue *
+to_mock_hw_queue(struct iommufd_hw_queue *hw_queue)
+{
+ return container_of(hw_queue, struct mock_hw_queue, core);
+}
+
enum selftest_obj_type {
TYPE_IDEV,
};
@@ -288,10 +305,15 @@ static struct iommu_domain mock_blocking_domain = {
.ops = &mock_blocking_ops,
};
-static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type)
+static void *mock_domain_hw_info(struct device *dev, u32 *length,
+ enum iommu_hw_info_type *type)
{
struct iommu_test_hw_info *info;
+ if (*type != IOMMU_HW_INFO_TYPE_DEFAULT &&
+ *type != IOMMU_HW_INFO_TYPE_SELFTEST)
+ return ERR_PTR(-EOPNOTSUPP);
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
@@ -434,7 +456,6 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
mock_nested = __mock_domain_alloc_nested(user_data);
if (IS_ERR(mock_nested))
return ERR_CAST(mock_nested);
- mock_nested->parent = mock_parent;
return &mock_nested->domain;
}
@@ -671,9 +692,15 @@ static void mock_viommu_destroy(struct iommufd_viommu *viommu)
{
struct mock_iommu_device *mock_iommu = container_of(
viommu->iommu_dev, struct mock_iommu_device, iommu_dev);
+ struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
if (refcount_dec_and_test(&mock_iommu->users))
complete(&mock_iommu->complete);
+ if (mock_viommu->mmap_offset)
+ iommufd_viommu_destroy_mmap(&mock_viommu->core,
+ mock_viommu->mmap_offset);
+ free_page((unsigned long)mock_viommu->page);
+ mutex_destroy(&mock_viommu->queue_mutex);
/* iommufd core frees mock_viommu and viommu */
}
@@ -692,7 +719,6 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
if (IS_ERR(mock_nested))
return ERR_CAST(mock_nested);
mock_nested->mock_viommu = mock_viommu;
- mock_nested->parent = mock_viommu->s2_parent;
return &mock_nested->domain;
}
@@ -766,31 +792,149 @@ out:
return rc;
}
+static size_t mock_viommu_get_hw_queue_size(struct iommufd_viommu *viommu,
+ enum iommu_hw_queue_type queue_type)
+{
+ if (queue_type != IOMMU_HW_QUEUE_TYPE_SELFTEST)
+ return 0;
+ return HW_QUEUE_STRUCT_SIZE(struct mock_hw_queue, core);
+}
+
+static void mock_hw_queue_destroy(struct iommufd_hw_queue *hw_queue)
+{
+ struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue);
+ struct mock_viommu *mock_viommu = mock_hw_queue->mock_viommu;
+
+ mutex_lock(&mock_viommu->queue_mutex);
+ mock_viommu->hw_queue[mock_hw_queue->index] = NULL;
+ if (mock_hw_queue->prev)
+ iommufd_hw_queue_undepend(mock_hw_queue, mock_hw_queue->prev,
+ core);
+ mutex_unlock(&mock_viommu->queue_mutex);
+}
+
+/* Test iommufd_hw_queue_depend/undepend() */
+static int mock_hw_queue_init_phys(struct iommufd_hw_queue *hw_queue, u32 index,
+ phys_addr_t base_addr_pa)
+{
+ struct mock_viommu *mock_viommu = to_mock_viommu(hw_queue->viommu);
+ struct mock_hw_queue *mock_hw_queue = to_mock_hw_queue(hw_queue);
+ struct mock_hw_queue *prev = NULL;
+ int rc = 0;
+
+ if (index >= IOMMU_TEST_HW_QUEUE_MAX)
+ return -EINVAL;
+
+ mutex_lock(&mock_viommu->queue_mutex);
+
+ if (mock_viommu->hw_queue[index]) {
+ rc = -EEXIST;
+ goto unlock;
+ }
+
+ if (index) {
+ prev = mock_viommu->hw_queue[index - 1];
+ if (!prev) {
+ rc = -EIO;
+ goto unlock;
+ }
+ }
+
+ /*
+ * Test to catch a kernel bug if the core converted the physical address
+ * incorrectly. Let mock_domain_iova_to_phys() WARN_ON if it fails.
+ */
+ if (base_addr_pa != iommu_iova_to_phys(&mock_viommu->s2_parent->domain,
+ hw_queue->base_addr)) {
+ rc = -EFAULT;
+ goto unlock;
+ }
+
+ if (prev) {
+ rc = iommufd_hw_queue_depend(mock_hw_queue, prev, core);
+ if (rc)
+ goto unlock;
+ }
+
+ mock_hw_queue->prev = prev;
+ mock_hw_queue->mock_viommu = mock_viommu;
+ mock_viommu->hw_queue[index] = mock_hw_queue;
+
+ hw_queue->destroy = &mock_hw_queue_destroy;
+unlock:
+ mutex_unlock(&mock_viommu->queue_mutex);
+ return rc;
+}
+
static struct iommufd_viommu_ops mock_viommu_ops = {
.destroy = mock_viommu_destroy,
.alloc_domain_nested = mock_viommu_alloc_domain_nested,
.cache_invalidate = mock_viommu_cache_invalidate,
+ .get_hw_queue_size = mock_viommu_get_hw_queue_size,
+ .hw_queue_init_phys = mock_hw_queue_init_phys,
};
-static struct iommufd_viommu *mock_viommu_alloc(struct device *dev,
- struct iommu_domain *domain,
- struct iommufd_ctx *ictx,
- unsigned int viommu_type)
+static size_t mock_get_viommu_size(struct device *dev,
+ enum iommu_viommu_type viommu_type)
{
- struct mock_iommu_device *mock_iommu =
- iommu_get_iommu_dev(dev, struct mock_iommu_device, iommu_dev);
- struct mock_viommu *mock_viommu;
-
if (viommu_type != IOMMU_VIOMMU_TYPE_SELFTEST)
- return ERR_PTR(-EOPNOTSUPP);
+ return 0;
+ return VIOMMU_STRUCT_SIZE(struct mock_viommu, core);
+}
+
+static int mock_viommu_init(struct iommufd_viommu *viommu,
+ struct iommu_domain *parent_domain,
+ const struct iommu_user_data *user_data)
+{
+ struct mock_iommu_device *mock_iommu = container_of(
+ viommu->iommu_dev, struct mock_iommu_device, iommu_dev);
+ struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
+ struct iommu_viommu_selftest data;
+ int rc;
+
+ if (user_data) {
+ rc = iommu_copy_struct_from_user(
+ &data, user_data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data);
+ if (rc)
+ return rc;
+
+ /* Allocate two pages */
+ mock_viommu->page =
+ (u32 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ if (!mock_viommu->page)
+ return -ENOMEM;
- mock_viommu = iommufd_viommu_alloc(ictx, struct mock_viommu, core,
- &mock_viommu_ops);
- if (IS_ERR(mock_viommu))
- return ERR_CAST(mock_viommu);
+ rc = iommufd_viommu_alloc_mmap(&mock_viommu->core,
+ __pa(mock_viommu->page),
+ PAGE_SIZE * 2,
+ &mock_viommu->mmap_offset);
+ if (rc)
+ goto err_free_page;
+
+ /* For loopback tests on both the page and out_data */
+ *mock_viommu->page = data.in_data;
+ data.out_data = data.in_data;
+ data.out_mmap_length = PAGE_SIZE * 2;
+ data.out_mmap_offset = mock_viommu->mmap_offset;
+ rc = iommu_copy_struct_to_user(
+ user_data, &data, IOMMU_VIOMMU_TYPE_SELFTEST, out_data);
+ if (rc)
+ goto err_destroy_mmap;
+ }
refcount_inc(&mock_iommu->users);
- return &mock_viommu->core;
+ mutex_init(&mock_viommu->queue_mutex);
+ mock_viommu->s2_parent = to_mock_domain(parent_domain);
+
+ viommu->ops = &mock_viommu_ops;
+ return 0;
+
+err_destroy_mmap:
+ iommufd_viommu_destroy_mmap(&mock_viommu->core,
+ mock_viommu->mmap_offset);
+err_free_page:
+ free_page((unsigned long)mock_viommu->page);
+ return rc;
}
static const struct iommu_ops mock_ops = {
@@ -801,7 +945,6 @@ static const struct iommu_ops mock_ops = {
.default_domain = &mock_blocking_domain,
.blocked_domain = &mock_blocking_domain,
.owner = THIS_MODULE,
- .pgsize_bitmap = MOCK_IO_PAGE_SIZE,
.hw_info = mock_domain_hw_info,
.domain_alloc_paging_flags = mock_domain_alloc_paging_flags,
.domain_alloc_nested = mock_domain_alloc_nested,
@@ -810,7 +953,8 @@ static const struct iommu_ops mock_ops = {
.probe_device = mock_probe_device,
.page_response = mock_domain_page_response,
.user_pasid_table = true,
- .viommu_alloc = mock_viommu_alloc,
+ .get_viommu_size = mock_get_viommu_size,
+ .viommu_init = mock_viommu_init,
.default_domain_ops =
&(struct iommu_domain_ops){
.free = mock_domain_free,
@@ -1216,9 +1360,8 @@ static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd,
return 0;
}
-static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd,
- u32 mockpt_id, unsigned int iotlb_id,
- u32 iotlb)
+static int iommufd_test_md_check_iotlb(struct iommufd_ucmd *ucmd, u32 mockpt_id,
+ unsigned int iotlb_id, u32 iotlb)
{
struct mock_iommu_domain_nested *mock_nested;
struct iommufd_hw_pagetable *hwpt;
@@ -1491,7 +1634,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
int rc;
/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
- if (length > 16*1024*1024)
+ if (length > 16 * 1024 * 1024)
return -ENOMEM;
if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ))
@@ -1508,7 +1651,7 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
if (flags & MOCK_FLAGS_ACCESS_SYZ)
iova = iommufd_test_syz_conv_iova(staccess->access,
- &cmd->access_pages.iova);
+ &cmd->access_pages.iova);
npages = (ALIGN(iova + length, PAGE_SIZE) -
ALIGN_DOWN(iova, PAGE_SIZE)) /
@@ -1584,7 +1727,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
int rc;
/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
- if (length > 16*1024*1024)
+ if (length > 16 * 1024 * 1024)
return -ENOMEM;
if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH |
@@ -1610,7 +1753,7 @@ static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
if (flags & MOCK_FLAGS_ACCESS_SYZ)
iova = iommufd_test_syz_conv_iova(staccess->access,
- &cmd->access_rw.iova);
+ &cmd->access_rw.iova);
rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
if (rc)
@@ -1665,7 +1808,7 @@ static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id,
goto out_put;
}
- if (copy_from_user(tmp, uptr,DIV_ROUND_UP(max, BITS_PER_BYTE))) {
+ if (copy_from_user(tmp, uptr, DIV_ROUND_UP(max, BITS_PER_BYTE))) {
rc = -EFAULT;
goto out_free;
}
@@ -1701,7 +1844,7 @@ out_put:
static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
struct iommu_test_cmd *cmd)
{
- struct iopf_fault event = { };
+ struct iopf_fault event = {};
struct iommufd_device *idev;
idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id);
@@ -1832,8 +1975,7 @@ static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd,
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
- iommufd_device_detach(sobj->idev.idev,
- cmd->pasid_attach.pasid);
+ iommufd_device_detach(sobj->idev.idev, cmd->pasid_attach.pasid);
out_sobj:
iommufd_put_object(ucmd->ictx, &sobj->obj);
@@ -2004,8 +2146,8 @@ int __init iommufd_test_init(void)
goto err_bus;
rc = iommu_device_register_bus(&mock_iommu.iommu_dev, &mock_ops,
- &iommufd_mock_bus_type.bus,
- &iommufd_mock_bus_type.nb);
+ &iommufd_mock_bus_type.bus,
+ &iommufd_mock_bus_type.nb);
if (rc)
goto err_sysfs;
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 01df2b985f02..2ca5809b238b 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -17,10 +17,16 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_viommu_alloc *cmd = ucmd->cmd;
+ const struct iommu_user_data user_data = {
+ .type = cmd->type,
+ .uptr = u64_to_user_ptr(cmd->data_uptr),
+ .len = cmd->data_len,
+ };
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
const struct iommu_ops *ops;
+ size_t viommu_size;
int rc;
if (cmd->flags || cmd->type == IOMMU_VIOMMU_TYPE_DEFAULT)
@@ -31,7 +37,22 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
return PTR_ERR(idev);
ops = dev_iommu_ops(idev->dev);
- if (!ops->viommu_alloc) {
+ if (!ops->get_viommu_size || !ops->viommu_init) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+
+ viommu_size = ops->get_viommu_size(idev->dev, cmd->type);
+ if (!viommu_size) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+
+ /*
+ * It is a driver bug for providing a viommu_size smaller than the core
+ * vIOMMU structure size
+ */
+ if (WARN_ON_ONCE(viommu_size < sizeof(*viommu))) {
rc = -EOPNOTSUPP;
goto out_put_idev;
}
@@ -47,8 +68,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_hwpt;
}
- viommu = ops->viommu_alloc(idev->dev, hwpt_paging->common.domain,
- ucmd->ictx, cmd->type);
+ viommu = (struct iommufd_viommu *)_iommufd_object_alloc_ucmd(
+ ucmd, viommu_size, IOMMUFD_OBJ_VIOMMU);
if (IS_ERR(viommu)) {
rc = PTR_ERR(viommu);
goto out_put_hwpt;
@@ -68,15 +89,20 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
*/
viommu->iommu_dev = __iommu_get_iommu_dev(idev->dev);
+ rc = ops->viommu_init(viommu, hwpt_paging->common.domain,
+ user_data.len ? &user_data : NULL);
+ if (rc)
+ goto out_put_hwpt;
+
+ /* It is a driver bug that viommu->ops isn't filled */
+ if (WARN_ON_ONCE(!viommu->ops)) {
+ rc = -EOPNOTSUPP;
+ goto out_put_hwpt;
+ }
+
cmd->out_viommu_id = viommu->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
- if (rc)
- goto out_abort;
- iommufd_object_finalize(ucmd->ictx, &viommu->obj);
- goto out_put_hwpt;
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj);
out_put_hwpt:
iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
out_put_idev:
@@ -84,22 +110,41 @@ out_put_idev:
return rc;
}
-void iommufd_vdevice_destroy(struct iommufd_object *obj)
+void iommufd_vdevice_abort(struct iommufd_object *obj)
{
struct iommufd_vdevice *vdev =
container_of(obj, struct iommufd_vdevice, obj);
struct iommufd_viommu *viommu = vdev->viommu;
+ struct iommufd_device *idev = vdev->idev;
+
+ lockdep_assert_held(&idev->igroup->lock);
+ if (vdev->destroy)
+ vdev->destroy(vdev);
/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
- xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
+ xa_cmpxchg(&viommu->vdevs, vdev->virt_id, vdev, NULL, GFP_KERNEL);
refcount_dec(&viommu->obj.users);
- put_device(vdev->dev);
+ idev->vdev = NULL;
+}
+
+void iommufd_vdevice_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_vdevice *vdev =
+ container_of(obj, struct iommufd_vdevice, obj);
+ struct iommufd_device *idev = vdev->idev;
+ struct iommufd_ctx *ictx = idev->ictx;
+
+ mutex_lock(&idev->igroup->lock);
+ iommufd_vdevice_abort(obj);
+ mutex_unlock(&idev->igroup->lock);
+ iommufd_put_object(ictx, &idev->obj);
}
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
{
struct iommu_vdevice_alloc *cmd = ucmd->cmd;
struct iommufd_vdevice *vdev, *curr;
+ size_t vdev_size = sizeof(*vdev);
struct iommufd_viommu *viommu;
struct iommufd_device *idev;
u64 virt_id = cmd->virt_id;
@@ -124,17 +169,54 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_idev;
}
- vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE);
+ mutex_lock(&idev->igroup->lock);
+ if (idev->destroying) {
+ rc = -ENOENT;
+ goto out_unlock_igroup;
+ }
+
+ if (idev->vdev) {
+ rc = -EEXIST;
+ goto out_unlock_igroup;
+ }
+
+ if (viommu->ops && viommu->ops->vdevice_size) {
+ /*
+ * It is a driver bug for:
+ * - ops->vdevice_size smaller than the core structure size
+ * - not implementing a pairing ops->vdevice_init op
+ */
+ if (WARN_ON_ONCE(viommu->ops->vdevice_size < vdev_size ||
+ !viommu->ops->vdevice_init)) {
+ rc = -EOPNOTSUPP;
+ goto out_put_idev;
+ }
+ vdev_size = viommu->ops->vdevice_size;
+ }
+
+ vdev = (struct iommufd_vdevice *)_iommufd_object_alloc(
+ ucmd->ictx, vdev_size, IOMMUFD_OBJ_VDEVICE);
if (IS_ERR(vdev)) {
rc = PTR_ERR(vdev);
- goto out_put_idev;
+ goto out_unlock_igroup;
}
- vdev->id = virt_id;
- vdev->dev = idev->dev;
- get_device(idev->dev);
+ vdev->virt_id = virt_id;
vdev->viommu = viommu;
refcount_inc(&viommu->obj.users);
+ /*
+ * A wait_cnt reference is held on the idev so long as we have the
+ * pointer. iommufd_device_pre_destroy() will revoke it before the
+ * idev real destruction.
+ */
+ vdev->idev = idev;
+
+ /*
+ * iommufd_device_destroy() delays until idev->vdev is NULL before
+ * freeing the idev, which only happens once the vdev is finished
+ * destruction.
+ */
+ idev->vdev = vdev;
curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
if (curr) {
@@ -142,17 +224,206 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_abort;
}
+ if (viommu->ops && viommu->ops->vdevice_init) {
+ rc = viommu->ops->vdevice_init(vdev);
+ if (rc)
+ goto out_abort;
+ }
+
cmd->out_vdevice_id = vdev->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_abort;
iommufd_object_finalize(ucmd->ictx, &vdev->obj);
- goto out_put_idev;
+ goto out_unlock_igroup;
out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
+out_unlock_igroup:
+ mutex_unlock(&idev->igroup->lock);
out_put_idev:
- iommufd_put_object(ucmd->ictx, &idev->obj);
+ if (rc)
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
+ iommufd_put_object(ucmd->ictx, &viommu->obj);
+ return rc;
+}
+
+static void iommufd_hw_queue_destroy_access(struct iommufd_ctx *ictx,
+ struct iommufd_access *access,
+ u64 base_iova, size_t length)
+{
+ u64 aligned_iova = PAGE_ALIGN_DOWN(base_iova);
+ u64 offset = base_iova - aligned_iova;
+
+ iommufd_access_unpin_pages(access, aligned_iova,
+ PAGE_ALIGN(length + offset));
+ iommufd_access_detach_internal(access);
+ iommufd_access_destroy_internal(ictx, access);
+}
+
+void iommufd_hw_queue_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_hw_queue *hw_queue =
+ container_of(obj, struct iommufd_hw_queue, obj);
+
+ if (hw_queue->destroy)
+ hw_queue->destroy(hw_queue);
+ if (hw_queue->access)
+ iommufd_hw_queue_destroy_access(hw_queue->viommu->ictx,
+ hw_queue->access,
+ hw_queue->base_addr,
+ hw_queue->length);
+ if (hw_queue->viommu)
+ refcount_dec(&hw_queue->viommu->obj.users);
+}
+
+/*
+ * When the HW accesses the guest queue via physical addresses, the underlying
+ * physical pages of the guest queue must be contiguous. Also, for the security
+ * concern that IOMMUFD_CMD_IOAS_UNMAP could potentially remove the mappings of
+ * the guest queue from the nesting parent iopt while the HW is still accessing
+ * the guest queue memory physically, such a HW queue must require an access to
+ * pin the underlying pages and prevent that from happening.
+ */
+static struct iommufd_access *
+iommufd_hw_queue_alloc_phys(struct iommu_hw_queue_alloc *cmd,
+ struct iommufd_viommu *viommu, phys_addr_t *base_pa)
+{
+ u64 aligned_iova = PAGE_ALIGN_DOWN(cmd->nesting_parent_iova);
+ u64 offset = cmd->nesting_parent_iova - aligned_iova;
+ struct iommufd_access *access;
+ struct page **pages;
+ size_t max_npages;
+ size_t length;
+ size_t i;
+ int rc;
+
+ /* max_npages = DIV_ROUND_UP(offset + cmd->length, PAGE_SIZE) */
+ if (check_add_overflow(offset, cmd->length, &length))
+ return ERR_PTR(-ERANGE);
+ if (check_add_overflow(length, PAGE_SIZE - 1, &length))
+ return ERR_PTR(-ERANGE);
+ max_npages = length / PAGE_SIZE;
+ /* length needs to be page aligned too */
+ length = max_npages * PAGE_SIZE;
+
+ /*
+ * Use kvcalloc() to avoid memory fragmentation for a large page array.
+ * Set __GFP_NOWARN to avoid syzkaller blowups
+ */
+ pages = kvcalloc(max_npages, sizeof(*pages), GFP_KERNEL | __GFP_NOWARN);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
+
+ access = iommufd_access_create_internal(viommu->ictx);
+ if (IS_ERR(access)) {
+ rc = PTR_ERR(access);
+ goto out_free;
+ }
+
+ rc = iommufd_access_attach_internal(access, viommu->hwpt->ioas);
+ if (rc)
+ goto out_destroy;
+
+ rc = iommufd_access_pin_pages(access, aligned_iova, length, pages, 0);
+ if (rc)
+ goto out_detach;
+
+ /* Validate if the underlying physical pages are contiguous */
+ for (i = 1; i < max_npages; i++) {
+ if (page_to_pfn(pages[i]) == page_to_pfn(pages[i - 1]) + 1)
+ continue;
+ rc = -EFAULT;
+ goto out_unpin;
+ }
+
+ *base_pa = (page_to_pfn(pages[0]) << PAGE_SHIFT) + offset;
+ kfree(pages);
+ return access;
+
+out_unpin:
+ iommufd_access_unpin_pages(access, aligned_iova, length);
+out_detach:
+ iommufd_access_detach_internal(access);
+out_destroy:
+ iommufd_access_destroy_internal(viommu->ictx, access);
+out_free:
+ kfree(pages);
+ return ERR_PTR(rc);
+}
+
+int iommufd_hw_queue_alloc_ioctl(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_hw_queue_alloc *cmd = ucmd->cmd;
+ struct iommufd_hw_queue *hw_queue;
+ struct iommufd_viommu *viommu;
+ struct iommufd_access *access;
+ size_t hw_queue_size;
+ phys_addr_t base_pa;
+ u64 last;
+ int rc;
+
+ if (cmd->flags || cmd->type == IOMMU_HW_QUEUE_TYPE_DEFAULT)
+ return -EOPNOTSUPP;
+ if (!cmd->length)
+ return -EINVAL;
+ if (check_add_overflow(cmd->nesting_parent_iova, cmd->length - 1,
+ &last))
+ return -EOVERFLOW;
+
+ viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+ if (IS_ERR(viommu))
+ return PTR_ERR(viommu);
+
+ if (!viommu->ops || !viommu->ops->get_hw_queue_size ||
+ !viommu->ops->hw_queue_init_phys) {
+ rc = -EOPNOTSUPP;
+ goto out_put_viommu;
+ }
+
+ hw_queue_size = viommu->ops->get_hw_queue_size(viommu, cmd->type);
+ if (!hw_queue_size) {
+ rc = -EOPNOTSUPP;
+ goto out_put_viommu;
+ }
+
+ /*
+ * It is a driver bug for providing a hw_queue_size smaller than the
+ * core HW queue structure size
+ */
+ if (WARN_ON_ONCE(hw_queue_size < sizeof(*hw_queue))) {
+ rc = -EOPNOTSUPP;
+ goto out_put_viommu;
+ }
+
+ hw_queue = (struct iommufd_hw_queue *)_iommufd_object_alloc_ucmd(
+ ucmd, hw_queue_size, IOMMUFD_OBJ_HW_QUEUE);
+ if (IS_ERR(hw_queue)) {
+ rc = PTR_ERR(hw_queue);
+ goto out_put_viommu;
+ }
+
+ access = iommufd_hw_queue_alloc_phys(cmd, viommu, &base_pa);
+ if (IS_ERR(access)) {
+ rc = PTR_ERR(access);
+ goto out_put_viommu;
+ }
+
+ hw_queue->viommu = viommu;
+ refcount_inc(&viommu->obj.users);
+ hw_queue->access = access;
+ hw_queue->type = cmd->type;
+ hw_queue->length = cmd->length;
+ hw_queue->base_addr = cmd->nesting_parent_iova;
+
+ rc = viommu->ops->hw_queue_init_phys(hw_queue, cmd->index, base_pa);
+ if (rc)
+ goto out_put_viommu;
+
+ cmd->out_hw_queue_id = hw_queue->obj.id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
out_put_viommu:
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 90341b24a811..ffa892f65714 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -430,7 +430,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
* non-secure mode.
*/
domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS;
- domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
+ domain->cfg.pgsize_bitmap = domain->io_domain.pgsize_bitmap;
domain->cfg.ias = 32;
domain->cfg.oas = 40;
domain->cfg.tlb = &ipmmu_flush_ops;
@@ -571,6 +571,7 @@ static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev)
return NULL;
mutex_init(&domain->mutex);
+ domain->io_domain.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
return &domain->io_domain;
}
@@ -882,7 +883,6 @@ static const struct iommu_ops ipmmu_ops = {
*/
.device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
? generic_device_group : generic_single_device_group,
- .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = ipmmu_attach_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 2769e4544038..43a61ba021a5 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -312,6 +312,8 @@ static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev)
INIT_LIST_HEAD(&priv->list_attached);
+ priv->domain.pgsize_bitmap = MSM_IOMMU_PGSIZES;
+
priv->domain.geometry.aperture_start = 0;
priv->domain.geometry.aperture_end = (1ULL << 32) - 1;
priv->domain.geometry.force_aperture = true;
@@ -339,7 +341,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
spin_lock_init(&priv->pgtlock);
priv->cfg = (struct io_pgtable_cfg) {
- .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
+ .pgsize_bitmap = priv->domain.pgsize_bitmap,
.ias = 32,
.oas = 32,
.tlb = &msm_iommu_flush_ops,
@@ -352,8 +354,6 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
return -EINVAL;
}
- msm_iommu_ops.pgsize_bitmap = priv->cfg.pgsize_bitmap;
-
return 0;
}
@@ -692,7 +692,6 @@ static struct iommu_ops msm_iommu_ops = {
.domain_alloc_paging = msm_iommu_domain_alloc_paging,
.probe_device = msm_iommu_probe_device,
.device_group = generic_device_group,
- .pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = msm_iommu_attach_dev,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index cb95fecf6016..0e0285348d2b 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -648,7 +648,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
if (share_dom) {
dom->iop = share_dom->iop;
dom->cfg = share_dom->cfg;
- dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap;
+ dom->domain.pgsize_bitmap = share_dom->domain.pgsize_bitmap;
goto update_iova_region;
}
@@ -656,7 +656,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
.quirks = IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
IO_PGTABLE_QUIRK_ARM_MTK_EXT,
- .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
+ .pgsize_bitmap = dom->domain.pgsize_bitmap,
.ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 32,
.iommu_dev = data->dev,
};
@@ -675,9 +675,6 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
return -ENOMEM;
}
- /* Update our support page sizes bitmap */
- dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap;
-
data->share_dom = dom;
update_iova_region:
@@ -697,6 +694,7 @@ static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev)
if (!dom)
return NULL;
mutex_init(&dom->mutex);
+ dom->domain.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M;
return &dom->domain;
}
@@ -1019,7 +1017,6 @@ static const struct iommu_ops mtk_iommu_ops = {
.device_group = mtk_iommu_device_group,
.of_xlate = mtk_iommu_of_xlate,
.get_resv_regions = mtk_iommu_get_resv_regions,
- .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = mtk_iommu_attach_device,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 66824982e05f..10cc0b1197e8 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -288,6 +288,8 @@ static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev)
if (!dom)
return NULL;
+ dom->domain.pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE;
+
return &dom->domain;
}
@@ -509,14 +511,10 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
static void mtk_iommu_v1_probe_finalize(struct device *dev)
{
- struct dma_iommu_mapping *mtk_mapping;
- struct mtk_iommu_v1_data *data;
+ __maybe_unused struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
int err;
- data = dev_iommu_priv_get(dev);
- mtk_mapping = data->mapping;
-
- err = arm_iommu_attach_device(dev, mtk_mapping);
+ err = arm_iommu_attach_device(dev, data->mapping);
if (err)
dev_err(dev, "Can't create IOMMU mapping - DMA-OPS will not work\n");
}
@@ -582,7 +580,6 @@ static const struct iommu_ops mtk_iommu_v1_ops = {
.probe_finalize = mtk_iommu_v1_probe_finalize,
.release_device = mtk_iommu_v1_release_device,
.device_group = generic_device_group,
- .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = mtk_iommu_v1_attach_device,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 3c62337f43c6..6fb93927bdb9 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1123,29 +1123,15 @@ static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
struct omap_iommu *obj)
{
struct device_node *np = pdev->dev.of_node;
- int ret;
if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
return 0;
- if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) {
- dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n");
- return -EINVAL;
- }
-
- obj->syscfg =
- syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig");
- if (IS_ERR(obj->syscfg)) {
- /* can fail with -EPROBE_DEFER */
- ret = PTR_ERR(obj->syscfg);
- return ret;
- }
-
- if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1,
- &obj->id)) {
- dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n");
- return -EINVAL;
- }
+ obj->syscfg = syscon_regmap_lookup_by_phandle_args(np, "ti,syscon-mmuconfig",
+ 1, &obj->id);
+ if (IS_ERR(obj->syscfg))
+ return dev_err_probe(&pdev->dev, PTR_ERR(obj->syscfg),
+ "ti,syscon-mmuconfig property is missing\n");
if (obj->id != 0 && obj->id != 1) {
dev_err(&pdev->dev, "invalid IOMMU instance id\n");
@@ -1584,6 +1570,8 @@ static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev)
spin_lock_init(&omap_domain->lock);
+ omap_domain->domain.pgsize_bitmap = OMAP_IOMMU_PGSIZES;
+
omap_domain->domain.geometry.aperture_start = 0;
omap_domain->domain.geometry.aperture_end = (1ULL << 32) - 1;
omap_domain->domain.geometry.force_aperture = true;
@@ -1735,7 +1723,6 @@ static const struct iommu_ops omap_iommu_ops = {
.release_device = omap_iommu_release_device,
.device_group = generic_single_device_group,
.of_xlate = omap_iommu_of_xlate,
- .pgsize_bitmap = OMAP_IOMMU_PGSIZES,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = omap_iommu_attach_dev,
.map_pages = omap_iommu_map,
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index bb57092ca901..2d0d31ba2886 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -1533,7 +1533,6 @@ static void riscv_iommu_release_device(struct device *dev)
}
static const struct iommu_ops riscv_iommu_ops = {
- .pgsize_bitmap = SZ_4K,
.of_xlate = riscv_iommu_of_xlate,
.identity_domain = &riscv_iommu_identity_domain,
.blocked_domain = &riscv_iommu_blocking_domain,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index e6bb3c784017..0861dd469bd8 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1081,6 +1081,8 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
spin_lock_init(&rk_domain->dt_lock);
INIT_LIST_HEAD(&rk_domain->iommus);
+ rk_domain->domain.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP;
+
rk_domain->domain.geometry.aperture_start = 0;
rk_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32);
rk_domain->domain.geometry.force_aperture = true;
@@ -1170,7 +1172,6 @@ static const struct iommu_ops rk_iommu_ops = {
.probe_device = rk_iommu_probe_device,
.release_device = rk_iommu_release_device,
.device_group = generic_single_device_group,
- .pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
.of_xlate = rk_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = rk_iommu_attach_device,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 433b59f43530..9c80d61deb2c 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -557,6 +557,7 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
}
zdev->end_dma = zdev->start_dma + aperture_size - 1;
+ s390_domain->domain.pgsize_bitmap = SZ_4K;
s390_domain->domain.geometry.force_aperture = true;
s390_domain->domain.geometry.aperture_start = 0;
s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain);
@@ -1158,7 +1159,6 @@ static struct iommu_domain blocking_domain = {
.domain_alloc_paging = s390_domain_alloc_paging, \
.probe_device = s390_iommu_probe_device, \
.device_group = generic_device_group, \
- .pgsize_bitmap = SZ_4K, \
.get_resv_regions = s390_iommu_get_resv_regions, \
.default_domain_ops = &(const struct iommu_domain_ops) { \
.attach_dev = s390_iommu_attach_device, \
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 941d1f361c8c..c7ca1d8a0b15 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -143,6 +143,8 @@ static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev)
spin_lock_init(&dom->pgtlock);
+ dom->domain.pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE;
+
dom->domain.geometry.aperture_start = 0;
dom->domain.geometry.aperture_end = SZ_256M - 1;
dom->domain.geometry.force_aperture = true;
@@ -410,7 +412,6 @@ static const struct iommu_ops sprd_iommu_ops = {
.probe_device = sprd_iommu_probe_device,
.device_group = generic_single_device_group,
.of_xlate = sprd_iommu_of_xlate,
- .pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = sprd_iommu_attach_device,
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 76c9620af4bb..de10b569d9a9 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -697,6 +697,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev)
refcount_set(&sun50i_domain->refcnt, 1);
+ sun50i_domain->domain.pgsize_bitmap = SZ_4K;
+
sun50i_domain->domain.geometry.aperture_start = 0;
sun50i_domain->domain.geometry.aperture_end = DMA_BIT_MASK(32);
sun50i_domain->domain.geometry.force_aperture = true;
@@ -842,7 +844,6 @@ static int sun50i_iommu_of_xlate(struct device *dev,
static const struct iommu_ops sun50i_iommu_ops = {
.identity_domain = &sun50i_iommu_identity_domain,
- .pgsize_bitmap = SZ_4K,
.device_group = generic_single_device_group,
.domain_alloc_paging = sun50i_iommu_domain_alloc_paging,
.of_xlate = sun50i_iommu_of_xlate,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index e58fe9d8b9e7..36cdd5fbab07 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -318,6 +318,8 @@ static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
spin_lock_init(&as->lock);
+ as->domain.pgsize_bitmap = SZ_4K;
+
/* setup aperture */
as->domain.geometry.aperture_start = 0;
as->domain.geometry.aperture_end = 0xffffffff;
@@ -1002,7 +1004,6 @@ static const struct iommu_ops tegra_smmu_ops = {
.probe_device = tegra_smmu_probe_device,
.device_group = tegra_smmu_device_group,
.of_xlate = tegra_smmu_of_xlate,
- .pgsize_bitmap = SZ_4K,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = tegra_smmu_attach_dev,
.map_pages = tegra_smmu_map,
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index ecd41fb03e5a..532db1de201b 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -998,7 +998,7 @@ static void viommu_get_resv_regions(struct device *dev, struct list_head *head)
iommu_dma_get_resv_regions(dev, head);
}
-static struct iommu_ops viommu_ops;
+static const struct iommu_ops viommu_ops;
static struct virtio_driver virtio_iommu_drv;
static int viommu_match_node(struct device *dev, const void *data)
@@ -1086,7 +1086,7 @@ static bool viommu_capable(struct device *dev, enum iommu_cap cap)
}
}
-static struct iommu_ops viommu_ops = {
+static const struct iommu_ops viommu_ops = {
.capable = viommu_capable,
.domain_alloc_identity = viommu_domain_alloc_identity,
.domain_alloc_paging = viommu_domain_alloc_paging,
@@ -1217,8 +1217,6 @@ static int viommu_probe(struct virtio_device *vdev)
viommu->first_domain++;
}
- viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
-
virtio_device_ready(vdev);
/* Populate the event queue with buffers */