summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-11 14:13:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-11 14:13:24 -0700
commit952ace797c17d06e50ad2a738babd27159b8d7cc (patch)
treed423ac0094dbed0f4421c6f7b7abab76bdcffe74 /drivers/iommu/intel
parent96f970feeb47003a8eba967f188bba4e75875c7a (diff)
parente46b3c0d011eab9933c183d5b47569db8e377281 (diff)
Merge tag 'iommu-updates-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: - Remove of the dev->archdata.iommu (or similar) pointers from most architectures. Only Sparc is left, but this is private to Sparc as their drivers don't use the IOMMU-API. - ARM-SMMU updates from Will Deacon: - Support for SMMU-500 implementation in Marvell Armada-AP806 SoC - Support for SMMU-500 implementation in NVIDIA Tegra194 SoC - DT compatible string updates - Remove unused IOMMU_SYS_CACHE_ONLY flag - Move ARM-SMMU drivers into their own subdirectory - Intel VT-d updates from Lu Baolu: - Misc tweaks and fixes for vSVA - Report/response page request events - Cleanups - Move the Kconfig and Makefile bits for the AMD and Intel drivers into their respective subdirectory. - MT6779 IOMMU Support - Support for new chipsets in the Renesas IOMMU driver - Other misc cleanups and fixes (e.g. to improve compile test coverage) * tag 'iommu-updates-v5.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (77 commits) iommu/amd: Move Kconfig and Makefile bits down into amd directory iommu/vt-d: Move Kconfig and Makefile bits down into intel directory iommu/arm-smmu: Move Arm SMMU drivers into their own subdirectory iommu/vt-d: Skip TE disabling on quirky gfx dedicated iommu iommu: Add gfp parameter to io_pgtable_ops->map() iommu: Mark __iommu_map_sg() as static iommu/vt-d: Rename intel-pasid.h to pasid.h iommu/vt-d: Add page response ops support iommu/vt-d: Report page request faults for guest SVA iommu/vt-d: Add a helper to get svm and sdev for pasid iommu/vt-d: Refactor device_to_iommu() helper iommu/vt-d: Disable multiple GPASID-dev bind iommu/vt-d: Warn on out-of-range invalidation address iommu/vt-d: Fix devTLB flush for vSVA iommu/vt-d: Handle non-page aligned address iommu/vt-d: Fix PASID devTLB invalidation iommu/vt-d: Remove global page support in devTLB flush iommu/vt-d: Enforce PASID devTLB field mask iommu: Make some functions static iommu/amd: Remove double zero check ...
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/Kconfig87
-rw-r--r--drivers/iommu/intel/Makefile7
-rw-r--r--drivers/iommu/intel/debugfs.c2
-rw-r--r--drivers/iommu/intel/dmar.c26
-rw-r--r--drivers/iommu/intel/iommu.c141
-rw-r--r--drivers/iommu/intel/pasid.c13
-rw-r--r--drivers/iommu/intel/pasid.h (renamed from drivers/iommu/intel/intel-pasid.h)2
-rw-r--r--drivers/iommu/intel/svm.c335
8 files changed, 462 insertions, 151 deletions
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
new file mode 100644
index 000000000000..5337ee1584b0
--- /dev/null
+++ b/drivers/iommu/intel/Kconfig
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Intel IOMMU support
+config DMAR_TABLE
+ bool
+
+config INTEL_IOMMU
+ bool "Support for Intel IOMMU using DMA Remapping Devices"
+ depends on PCI_MSI && ACPI && (X86 || IA64)
+ select DMA_OPS
+ select IOMMU_API
+ select IOMMU_IOVA
+ select NEED_DMA_MAP_STATE
+ select DMAR_TABLE
+ select SWIOTLB
+ select IOASID
+ help
+ DMA remapping (DMAR) devices support enables independent address
+ translations for Direct Memory Access (DMA) from devices.
+ These DMA remapping devices are reported via ACPI tables
+ and include PCI device scope covered by these DMA
+ remapping devices.
+
+config INTEL_IOMMU_DEBUGFS
+ bool "Export Intel IOMMU internals in Debugfs"
+ depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ help
+ !!!WARNING!!!
+
+ DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+ Expose Intel IOMMU internals in Debugfs.
+
+ This option is -NOT- intended for production environments, and should
+ only be enabled for debugging Intel IOMMU.
+
+config INTEL_IOMMU_SVM
+ bool "Support for Shared Virtual Memory with Intel IOMMU"
+ depends on INTEL_IOMMU && X86_64
+ select PCI_PASID
+ select PCI_PRI
+ select MMU_NOTIFIER
+ select IOASID
+ help
+ Shared Virtual Memory (SVM) provides a facility for devices
+ to access DMA resources through process address space by
+ means of a Process Address Space ID (PASID).
+
+config INTEL_IOMMU_DEFAULT_ON
+ def_bool y
+ prompt "Enable Intel DMA Remapping Devices by default"
+ depends on INTEL_IOMMU
+ help
+ Selecting this option will enable a DMAR device at boot time if
+ one is found. If this option is not selected, DMAR support can
+ be enabled by passing intel_iommu=on to the kernel.
+
+config INTEL_IOMMU_BROKEN_GFX_WA
+ bool "Workaround broken graphics drivers (going away soon)"
+ depends on INTEL_IOMMU && BROKEN && X86
+ help
+ Current Graphics drivers tend to use physical address
+ for DMA and avoid using DMA APIs. Setting this config
+ option permits the IOMMU driver to set a unity map for
+ all the OS-visible memory. Hence the driver can continue
+ to use physical addresses for DMA, at least until this
+ option is removed in the 2.6.32 kernel.
+
+config INTEL_IOMMU_FLOPPY_WA
+ def_bool y
+ depends on INTEL_IOMMU && X86
+ help
+ Floppy disk drivers are known to bypass DMA API calls
+ thereby failing to work when IOMMU is enabled. This
+ workaround will setup a 1:1 mapping for the first
+ 16MiB to make floppy (an ISA device) work.
+
+config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON
+ bool "Enable Intel IOMMU scalable mode by default"
+ depends on INTEL_IOMMU
+ help
+ Selecting this option will enable by default the scalable mode if
+ hardware presents the capability. The scalable mode is defined in
+ VT-d 3.0. The scalable mode capability could be checked by reading
+ /sys/devices/virtual/iommu/dmar*/intel-iommu/ecap. If this option
+ is not selected, scalable mode support could also be enabled by
+ passing intel_iommu=sm_on to the kernel. If not sure, please use
+ the default value.
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
new file mode 100644
index 000000000000..fb8e1e8c8029
--- /dev/null
+++ b/drivers/iommu/intel/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DMAR_TABLE) += dmar.o
+obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += trace.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
+obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
+obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index cf1ebb98e418..efea7f02abd9 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -15,7 +15,7 @@
#include <asm/irq_remapping.h>
-#include "intel-pasid.h"
+#include "pasid.h"
struct tbl_walk {
u16 bus;
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 683b812c5c47..93e6345f3414 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1102,6 +1102,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
}
drhd->iommu = iommu;
+ iommu->drhd = drhd;
return 0;
@@ -1438,8 +1439,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
/* PASID-based device IOTLB Invalidate */
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
- u32 pasid, u16 qdep, u64 addr,
- unsigned int size_order, u64 granu)
+ u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
{
unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
@@ -1447,7 +1447,6 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw1 = QI_DEV_EIOTLB_GLOB(granu);
/*
* If S bit is 0, we only flush a single page. If S bit is set,
@@ -1458,9 +1457,26 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
* Max Invs Pending (MIP) is set to 0 for now until we have DIT in
* ECAP.
*/
- desc.qw1 |= addr & ~mask;
- if (size_order)
+ if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0))
+ pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
+ addr, size_order);
+
+ /* Take page address */
+ desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
+
+ if (size_order) {
+ /*
+ * Existing 0s in address below size_order may be the least
+ * significant bit, we must set them to 1s to avoid having
+ * smaller size than desired.
+ */
+ desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
+ VTD_PAGE_SHIFT);
+ /* Clear size_order bit to indicate size */
+ desc.qw1 &= ~mask;
+ /* Set the S bit to indicate flushing more than 1 page */
desc.qw1 |= QI_DEV_EIOTLB_SIZE;
+ }
qi_submit_sync(iommu, &desc, 1, 0);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 237a470e1e9c..e9864e52b0e9 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -48,7 +48,7 @@
#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
-#include "intel-pasid.h"
+#include "pasid.h"
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
@@ -356,6 +356,7 @@ static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int intel_no_bounce;
+static int iommu_skip_te_disable;
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
@@ -372,7 +373,7 @@ struct device_domain_info *get_domain_info(struct device *dev)
if (!dev)
return NULL;
- info = dev->archdata.iommu;
+ info = dev_iommu_priv_get(dev);
if (unlikely(info == DUMMY_DEVICE_DOMAIN_INFO ||
info == DEFER_DEVICE_DOMAIN_INFO))
return NULL;
@@ -743,12 +744,12 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
static int iommu_dummy(struct device *dev)
{
- return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
+ return dev_iommu_priv_get(dev) == DUMMY_DEVICE_DOMAIN_INFO;
}
static bool attach_deferred(struct device *dev)
{
- return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO;
+ return dev_iommu_priv_get(dev) == DEFER_DEVICE_DOMAIN_INFO;
}
/**
@@ -778,16 +779,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge)
return false;
}
-static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{
struct dmar_drhd_unit *drhd = NULL;
+ struct pci_dev *pdev = NULL;
struct intel_iommu *iommu;
struct device *tmp;
- struct pci_dev *pdev = NULL;
u16 segment = 0;
int i;
- if (iommu_dummy(dev))
+ if (!dev || iommu_dummy(dev))
return NULL;
if (dev_is_pci(dev)) {
@@ -818,8 +819,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && pdev->is_virtfn)
goto got_pdev;
- *bus = drhd->devices[i].bus;
- *devfn = drhd->devices[i].devfn;
+ if (bus && devfn) {
+ *bus = drhd->devices[i].bus;
+ *devfn = drhd->devices[i].devfn;
+ }
goto out;
}
@@ -829,8 +832,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
if (pdev && drhd->include_all) {
got_pdev:
- *bus = pdev->bus->number;
- *devfn = pdev->devfn;
+ if (bus && devfn) {
+ *bus = pdev->bus->number;
+ *devfn = pdev->devfn;
+ }
goto out;
}
}
@@ -1629,6 +1634,10 @@ static void iommu_disable_translation(struct intel_iommu *iommu)
u32 sts;
unsigned long flag;
+ if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated &&
+ (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap)))
+ return;
+
raw_spin_lock_irqsave(&iommu->register_lock, flag);
iommu->gcmd &= ~DMA_GCMD_TE;
writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
@@ -2420,7 +2429,7 @@ static inline void unlink_domain_info(struct device_domain_info *info)
list_del(&info->link);
list_del(&info->global);
if (info->dev)
- info->dev->archdata.iommu = NULL;
+ dev_iommu_priv_set(info->dev, NULL);
}
static void domain_remove_dev_info(struct dmar_domain *domain)
@@ -2453,7 +2462,7 @@ static void do_deferred_attach(struct device *dev)
{
struct iommu_domain *domain;
- dev->archdata.iommu = NULL;
+ dev_iommu_priv_set(dev, NULL);
domain = iommu_get_domain_for_dev(dev);
if (domain)
intel_iommu_attach_device(domain, dev);
@@ -2599,7 +2608,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
list_add(&info->link, &domain->devices);
list_add(&info->global, &device_domain_list);
if (dev)
- dev->archdata.iommu = info;
+ dev_iommu_priv_set(dev, info);
spin_unlock_irqrestore(&device_domain_lock, flags);
/* PASID table is mandatory for a PCI device in scalable mode. */
@@ -4004,7 +4013,7 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) {
pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(&pdev->dev, DUMMY_DEVICE_DOMAIN_INFO);
}
}
DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
@@ -4039,11 +4048,12 @@ static void __init init_no_remapping_devices(void)
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
+ drhd->gfx_dedicated = 1;
if (!dmar_map_gfx) {
drhd->ignored = 1;
for_each_active_dev_scope(drhd->devices,
drhd->devices_cnt, i, dev)
- dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(dev, DUMMY_DEVICE_DOMAIN_INFO);
}
}
}
@@ -5146,11 +5156,10 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
struct device *dev)
{
int ret;
- u8 bus, devfn;
unsigned long flags;
struct intel_iommu *iommu;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5236,9 +5245,8 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu;
int addr_width;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return -ENODEV;
@@ -5416,7 +5424,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
sid = PCI_DEVID(bus, devfn);
/* Size is only valid in address selective invalidation */
- if (inv_info->granularity != IOMMU_INV_GRANU_PASID)
+ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR)
size = to_vtd_size(inv_info->addr_info.granule_size,
inv_info->addr_info.nb_granules);
@@ -5425,6 +5433,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
IOMMU_CACHE_INV_TYPE_NR) {
int granu = 0;
u64 pasid = 0;
+ u64 addr = 0;
granu = to_vtd_granularity(cache_type, inv_info->granularity);
if (granu == -EINVAL) {
@@ -5446,13 +5455,12 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
switch (BIT(cache_type)) {
case IOMMU_CACHE_INV_TYPE_IOTLB:
+ /* HW will ignore LSB bits based on address mask */
if (inv_info->granularity == IOMMU_INV_GRANU_ADDR &&
size &&
(inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) {
- pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n",
+ pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n",
inv_info->addr_info.addr, size);
- ret = -ERANGE;
- goto out_unlock;
}
/*
@@ -5464,25 +5472,35 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev,
(granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size,
inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF);
+ if (!info->ats_enabled)
+ break;
/*
* Always flush device IOTLB if ATS is enabled. vIOMMU
* in the guest may assume IOTLB flush is inclusive,
* which is more efficient.
*/
- if (info->ats_enabled)
- qi_flush_dev_iotlb_pasid(iommu, sid,
- info->pfsid, pasid,
- info->ats_qdep,
- inv_info->addr_info.addr,
- size, granu);
- break;
+ fallthrough;
case IOMMU_CACHE_INV_TYPE_DEV_IOTLB:
+ /*
+ * PASID based device TLB invalidation does not support
+ * IOMMU_INV_GRANU_PASID granularity but only supports
+ * IOMMU_INV_GRANU_ADDR.
+ * The equivalent of that is we set the size to be the
+ * entire range of 64 bit. User only provides PASID info
+ * without address info. So we set addr to 0.
+ */
+ if (inv_info->granularity == IOMMU_INV_GRANU_PASID) {
+ size = 64 - VTD_PAGE_SHIFT;
+ addr = 0;
+ } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) {
+ addr = inv_info->addr_info.addr;
+ }
+
if (info->ats_enabled)
qi_flush_dev_iotlb_pasid(iommu, sid,
info->pfsid, pasid,
- info->ats_qdep,
- inv_info->addr_info.addr,
- size, granu);
+ info->ats_qdep, addr,
+ size);
else
pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n");
break;
@@ -5658,14 +5676,13 @@ static bool intel_iommu_capable(enum iommu_cap cap)
static struct iommu_device *intel_iommu_probe_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return ERR_PTR(-ENODEV);
if (translation_pre_enabled(iommu))
- dev->archdata.iommu = DEFER_DEVICE_DOMAIN_INFO;
+ dev_iommu_priv_set(dev, DEFER_DEVICE_DOMAIN_INFO);
return &iommu->iommu;
}
@@ -5673,9 +5690,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct intel_iommu *iommu;
- u8 bus, devfn;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
return;
@@ -5825,37 +5841,14 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-#ifdef CONFIG_INTEL_IOMMU_SVM
-struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
-{
- struct intel_iommu *iommu;
- u8 bus, devfn;
-
- if (iommu_dummy(dev)) {
- dev_warn(dev,
- "No IOMMU translation for device; cannot enable SVM\n");
- return NULL;
- }
-
- iommu = device_to_iommu(dev, &bus, &devfn);
- if ((!iommu)) {
- dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
- return NULL;
- }
-
- return iommu;
-}
-#endif /* CONFIG_INTEL_IOMMU_SVM */
-
static int intel_iommu_enable_auxd(struct device *dev)
{
struct device_domain_info *info;
struct intel_iommu *iommu;
unsigned long flags;
- u8 bus, devfn;
int ret;
- iommu = device_to_iommu(dev, &bus, &devfn);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu || dmar_disabled)
return -EINVAL;
@@ -6080,6 +6073,7 @@ const struct iommu_ops intel_iommu_ops = {
.sva_bind = intel_svm_bind,
.sva_unbind = intel_svm_unbind,
.sva_get_pasid = intel_svm_get_pasid,
+ .page_response = intel_svm_page_response,
#endif
};
@@ -6182,6 +6176,27 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_g
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
+static void quirk_igfx_skip_te_disable(struct pci_dev *dev)
+{
+ unsigned short ver;
+
+ if (!IS_GFX_DEVICE(dev))
+ return;
+
+ ver = (dev->device >> 8) & 0xff;
+ if (ver != 0x45 && ver != 0x46 && ver != 0x4c &&
+ ver != 0x4e && ver != 0x8a && ver != 0x98 &&
+ ver != 0x9a)
+ return;
+
+ if (risky_device(dev))
+ return;
+
+ pci_info(dev, "Skip IOMMU disabling for graphics\n");
+ iommu_skip_te_disable = 1;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable);
+
/* On Tylersburg chipsets, some BIOSes have been known to enable the
ISOCH DMAR unit for the Azalia sound device, but not give it any
TLB entries, which causes it to deadlock. Check for that. We do
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index c81f0f17c6ba..e6faedf42fd4 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -19,7 +19,7 @@
#include <linux/pci-ats.h>
#include <linux/spinlock.h>
-#include "intel-pasid.h"
+#include "pasid.h"
/*
* Intel IOMMU system wide PASID name space:
@@ -486,7 +486,16 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
qdep = info->ats_qdep;
pfsid = info->pfsid;
- qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+ /*
+ * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID),
+ * devTLB flush w/o PASID should be used. For non-zero PASID under
+ * SVA usage, device could do DMA with multiple PASIDs. It is more
+ * efficient to flush devTLB specific to the PASID.
+ */
+ if (pasid == PASID_RID2PASID)
+ qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT);
+ else
+ qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT);
}
void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
diff --git a/drivers/iommu/intel/intel-pasid.h b/drivers/iommu/intel/pasid.h
index c5318d40e0fa..c9850766c3a9 100644
--- a/drivers/iommu/intel/intel-pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * intel-pasid.h - PASID idr, table and entry header
+ * pasid.h - PASID idr, table and entry header
*
* Copyright (C) 2018 Intel Corporation
*
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 6c87c807a0ab..442623ac4b47 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -20,7 +20,7 @@
#include <linux/ioasid.h>
#include <asm/page.h>
-#include "intel-pasid.h"
+#include "pasid.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, int pasid);
@@ -228,13 +228,57 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
+static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
+ struct intel_svm **rsvm,
+ struct intel_svm_dev **rsdev)
+{
+ struct intel_svm_dev *d, *sdev = NULL;
+ struct intel_svm *svm;
+
+ /* The caller should hold the pasid_mutex lock */
+ if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
+ return -EINVAL;
+
+ if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
+ return -EINVAL;
+
+ svm = ioasid_find(NULL, pasid, NULL);
+ if (IS_ERR(svm))
+ return PTR_ERR(svm);
+
+ if (!svm)
+ goto out;
+
+ /*
+ * If we found svm for the PASID, there must be at least one device
+ * bond.
+ */
+ if (WARN_ON(list_empty(&svm->devs)))
+ return -EINVAL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(d, &svm->devs, list) {
+ if (d->dev == dev) {
+ sdev = d;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+out:
+ *rsvm = svm;
+ *rsdev = sdev;
+
+ return 0;
+}
+
int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
struct iommu_gpasid_bind_data *data)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+ struct intel_svm_dev *sdev = NULL;
struct dmar_domain *dmar_domain;
- struct intel_svm_dev *sdev;
- struct intel_svm *svm;
+ struct intel_svm *svm = NULL;
int ret = 0;
if (WARN_ON(!iommu) || !data)
@@ -261,39 +305,23 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
dmar_domain = to_dmar_domain(domain);
mutex_lock(&pasid_mutex);
- svm = ioasid_find(NULL, data->hpasid, NULL);
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- if (svm) {
+ if (sdev) {
/*
- * If we found svm for the PASID, there must be at
- * least one device bond, otherwise svm should be freed.
+ * Do not allow multiple bindings of the same device-PASID since
+ * there is only one SL page tables per PASID. We may revisit
+ * once sharing PGD across domains are supported.
*/
- if (WARN_ON(list_empty(&svm->devs))) {
- ret = -EINVAL;
- goto out;
- }
+ dev_warn_ratelimited(dev, "Already bound with PASID %u\n",
+ svm->pasid);
+ ret = -EBUSY;
+ goto out;
+ }
- for_each_svm_dev(sdev, svm, dev) {
- /*
- * For devices with aux domains, we should allow
- * multiple bind calls with the same PASID and pdev.
- */
- if (iommu_dev_feature_enabled(dev,
- IOMMU_DEV_FEAT_AUX)) {
- sdev->users++;
- } else {
- dev_warn_ratelimited(dev,
- "Already bound with PASID %u\n",
- svm->pasid);
- ret = -EBUSY;
- }
- goto out;
- }
- } else {
+ if (!svm) {
/* We come here when PASID has never been bond to a device. */
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
@@ -373,28 +401,20 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
int intel_svm_unbind_gpasid(struct device *dev, int pasid)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm_dev *sdev;
struct intel_svm *svm;
- int ret = -EINVAL;
+ int ret;
if (WARN_ON(!iommu))
return -EINVAL;
mutex_lock(&pasid_mutex);
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm) {
- ret = -EINVAL;
- goto out;
- }
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- for_each_svm_dev(sdev, svm, dev) {
- ret = 0;
+ if (sdev) {
if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
sdev->users--;
if (!sdev->users) {
@@ -418,7 +438,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid)
kfree(svm);
}
}
- break;
}
out:
mutex_unlock(&pasid_mutex);
@@ -430,7 +449,7 @@ static int
intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
struct mm_struct *mm, struct intel_svm_dev **sd)
{
- struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
+ struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct device_domain_info *info;
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
@@ -596,7 +615,7 @@ success:
if (sd)
*sd = sdev;
ret = 0;
- out:
+out:
return ret;
}
@@ -608,21 +627,15 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
struct intel_svm *svm;
int ret = -EINVAL;
- iommu = intel_svm_device_to_iommu(dev);
+ iommu = device_to_iommu(dev, NULL, NULL);
if (!iommu)
goto out;
- svm = ioasid_find(NULL, pasid, NULL);
- if (!svm)
- goto out;
-
- if (IS_ERR(svm)) {
- ret = PTR_ERR(svm);
+ ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
+ if (ret)
goto out;
- }
- for_each_svm_dev(sdev, svm, dev) {
- ret = 0;
+ if (sdev) {
sdev->users--;
if (!sdev->users) {
list_del_rcu(&sdev->list);
@@ -651,10 +664,8 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid)
kfree(svm);
}
}
- break;
}
- out:
-
+out:
return ret;
}
@@ -800,8 +811,63 @@ qi_retry:
}
}
+static int prq_to_iommu_prot(struct page_req_dsc *req)
+{
+ int prot = 0;
+
+ if (req->rd_req)
+ prot |= IOMMU_FAULT_PERM_READ;
+ if (req->wr_req)
+ prot |= IOMMU_FAULT_PERM_WRITE;
+ if (req->exe_req)
+ prot |= IOMMU_FAULT_PERM_EXEC;
+ if (req->pm_req)
+ prot |= IOMMU_FAULT_PERM_PRIV;
+
+ return prot;
+}
+
+static int
+intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
+{
+ struct iommu_fault_event event;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ /* Fill in event data for device specific processing */
+ memset(&event, 0, sizeof(struct iommu_fault_event));
+ event.fault.type = IOMMU_FAULT_PAGE_REQ;
+ event.fault.prm.addr = desc->addr;
+ event.fault.prm.pasid = desc->pasid;
+ event.fault.prm.grpid = desc->prg_index;
+ event.fault.prm.perm = prq_to_iommu_prot(desc);
+
+ if (desc->lpig)
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ if (desc->pasid_present) {
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
+ }
+ if (desc->priv_data_present) {
+ /*
+ * Set last page in group bit if private data is present,
+ * page response is required as it does for LPIG.
+ * iommu_report_device_fault() doesn't understand this vendor
+ * specific requirement thus we set last_page as a workaround.
+ */
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+ event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ memcpy(event.fault.prm.private_data, desc->priv_data,
+ sizeof(desc->priv_data));
+ }
+
+ return iommu_report_device_fault(dev, &event);
+}
+
static irqreturn_t prq_event_thread(int irq, void *d)
{
+ struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
int head, tail, handled = 0;
@@ -813,7 +879,6 @@ static irqreturn_t prq_event_thread(int irq, void *d)
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
while (head != tail) {
- struct intel_svm_dev *sdev;
struct vm_area_struct *vma;
struct page_req_dsc *req;
struct qi_desc resp;
@@ -849,6 +914,20 @@ static irqreturn_t prq_event_thread(int irq, void *d)
}
}
+ if (!sdev || sdev->sid != req->rid) {
+ struct intel_svm_dev *t;
+
+ sdev = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(t, &svm->devs, list) {
+ if (t->sid == req->rid) {
+ sdev = t;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ }
+
result = QI_RESP_INVALID;
/* Since we're using init_mm.pgd directly, we should never take
* any faults on kernel addresses. */
@@ -859,6 +938,17 @@ static irqreturn_t prq_event_thread(int irq, void *d)
if (!is_canonical_address(address))
goto bad_req;
+ /*
+ * If prq is to be handled outside iommu driver via receiver of
+ * the fault notifiers, we skip the page response here.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ if (sdev && !intel_svm_prq_report(sdev->dev, req))
+ goto prq_advance;
+ else
+ goto bad_req;
+ }
+
/* If the mm is already defunct, don't handle faults. */
if (!mmget_not_zero(svm->mm))
goto bad_req;
@@ -877,24 +967,11 @@ static irqreturn_t prq_event_thread(int irq, void *d)
goto invalid;
result = QI_RESP_SUCCESS;
- invalid:
+invalid:
mmap_read_unlock(svm->mm);
mmput(svm->mm);
- bad_req:
- /* Accounting for major/minor faults? */
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- if (sdev->sid == req->rid)
- break;
- }
- /* Other devices can go away, but the drivers are not permitted
- * to unbind while any page faults might be in flight. So it's
- * OK to drop the 'lock' here now we have it. */
- rcu_read_unlock();
-
- if (WARN_ON(&sdev->list == &svm->devs))
- sdev = NULL;
-
+bad_req:
+ WARN_ON(!sdev);
if (sdev && sdev->ops && sdev->ops->fault_cb) {
int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
(req->exe_req << 1) | (req->pm_req);
@@ -905,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
and these can be NULL. Do not use them below this point! */
sdev = NULL;
svm = NULL;
- no_pasid:
+no_pasid:
if (req->lpig || req->priv_data_present) {
/*
* Per VT-d spec. v3.0 ch7.7, system software must
@@ -930,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
resp.qw3 = 0;
qi_submit_sync(iommu, &resp, 1, 0);
}
+prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
@@ -1000,3 +1078,102 @@ int intel_svm_get_pasid(struct iommu_sva *sva)
return pasid;
}
+
+int intel_svm_page_response(struct device *dev,
+ struct iommu_fault_event *evt,
+ struct iommu_page_response *msg)
+{
+ struct iommu_fault_page_request *prm;
+ struct intel_svm_dev *sdev = NULL;
+ struct intel_svm *svm = NULL;
+ struct intel_iommu *iommu;
+ bool private_present;
+ bool pasid_present;
+ bool last_page;
+ u8 bus, devfn;
+ int ret = 0;
+ u16 sid;
+
+ if (!dev || !dev_is_pci(dev))
+ return -ENODEV;
+
+ iommu = device_to_iommu(dev, &bus, &devfn);
+ if (!iommu)
+ return -ENODEV;
+
+ if (!msg || !evt)
+ return -EINVAL;
+
+ mutex_lock(&pasid_mutex);
+
+ prm = &evt->fault.prm;
+ sid = PCI_DEVID(bus, devfn);
+ pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
+ last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
+
+ if (!pasid_present) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
+ if (ret || !sdev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * For responses from userspace, need to make sure that the
+ * pasid has been bound to its mm.
+ */
+ if (svm->flags & SVM_FLAG_GUEST_MODE) {
+ struct mm_struct *mm;
+
+ mm = get_task_mm(current);
+ if (!mm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (mm != svm->mm) {
+ ret = -ENODEV;
+ mmput(mm);
+ goto out;
+ }
+
+ mmput(mm);
+ }
+
+ /*
+ * Per VT-d spec. v3.0 ch7.7, system software must respond
+ * with page group response if private data is present (PDP)
+ * or last page in group (LPIG) bit is set. This is an
+ * additional VT-d requirement beyond PCI ATS spec.
+ */
+ if (last_page || private_present) {
+ struct qi_desc desc;
+
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_PDP(private_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
+ if (private_present)
+ memcpy(&desc.qw2, prm->private_data,
+ sizeof(prm->private_data));
+
+ qi_submit_sync(iommu, &desc, 1, 0);
+ }
+out:
+ mutex_unlock(&pasid_mutex);
+ return ret;
+}