diff options
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r-- | drivers/iommu/intel/debugfs.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/dmar.c | 26 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.c | 123 | ||||
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.c | 13 | ||||
-rw-r--r-- | drivers/iommu/intel/pasid.h (renamed from drivers/iommu/intel/intel-pasid.h) | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/svm.c | 335 |
7 files changed, 360 insertions, 143 deletions
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index cf1ebb98e418..efea7f02abd9 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -15,7 +15,7 @@ #include <asm/irq_remapping.h> -#include "intel-pasid.h" +#include "pasid.h" struct tbl_walk { u16 bus; diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 683b812c5c47..93e6345f3414 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1102,6 +1102,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) } drhd->iommu = iommu; + iommu->drhd = drhd; return 0; @@ -1438,8 +1439,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, /* PASID-based device IOTLB Invalidate */ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, - u32 pasid, u16 qdep, u64 addr, - unsigned int size_order, u64 granu) + u32 pasid, u16 qdep, u64 addr, unsigned int size_order) { unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; @@ -1447,7 +1447,6 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); - desc.qw1 = QI_DEV_EIOTLB_GLOB(granu); /* * If S bit is 0, we only flush a single page. If S bit is set, @@ -1458,9 +1457,26 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, * Max Invs Pending (MIP) is set to 0 for now until we have DIT in * ECAP. */ - desc.qw1 |= addr & ~mask; - if (size_order) + if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0)) + pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", + addr, size_order); + + /* Take page address */ + desc.qw1 = QI_DEV_EIOTLB_ADDR(addr); + + if (size_order) { + /* + * Existing 0s in address below size_order may be the least + * significant bit, we must set them to 1s to avoid having + * smaller size than desired. + */ + desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, + VTD_PAGE_SHIFT); + /* Clear size_order bit to indicate size */ + desc.qw1 &= ~mask; + /* Set the S bit to indicate flushing more than 1 page */ desc.qw1 |= QI_DEV_EIOTLB_SIZE; + } qi_submit_sync(iommu, &desc, 1, 0); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2ce490c2eab8..ca557d351518 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -48,7 +48,7 @@ #include <trace/events/intel_iommu.h> #include "../irq_remapping.h" -#include "intel-pasid.h" +#include "pasid.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -356,6 +356,7 @@ static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int iommu_identity_mapping; static int intel_no_bounce; +static int iommu_skip_te_disable; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 @@ -778,16 +779,16 @@ is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) return false; } -static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) +struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; + struct pci_dev *pdev = NULL; struct intel_iommu *iommu; struct device *tmp; - struct pci_dev *pdev = NULL; u16 segment = 0; int i; - if (iommu_dummy(dev)) + if (!dev || iommu_dummy(dev)) return NULL; if (dev_is_pci(dev)) { @@ -818,8 +819,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf if (pdev && pdev->is_virtfn) goto got_pdev; - *bus = drhd->devices[i].bus; - *devfn = drhd->devices[i].devfn; + if (bus && devfn) { + *bus = drhd->devices[i].bus; + *devfn = drhd->devices[i].devfn; + } goto out; } @@ -829,8 +832,10 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf if (pdev && drhd->include_all) { got_pdev: - *bus = pdev->bus->number; - *devfn = pdev->devfn; + if (bus && devfn) { + *bus = pdev->bus->number; + *devfn = pdev->devfn; + } goto out; } } @@ -1629,6 +1634,10 @@ static void iommu_disable_translation(struct intel_iommu *iommu) u32 sts; unsigned long flag; + if (iommu_skip_te_disable && iommu->drhd->gfx_dedicated && + (cap_read_drain(iommu->cap) || cap_write_drain(iommu->cap))) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flag); iommu->gcmd &= ~DMA_GCMD_TE; writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); @@ -4039,6 +4048,7 @@ static void __init init_no_remapping_devices(void) /* This IOMMU has *only* gfx devices. Either bypass it or set the gfx_mapped flag, as appropriate */ + drhd->gfx_dedicated = 1; if (!dmar_map_gfx) { drhd->ignored = 1; for_each_active_dev_scope(drhd->devices, @@ -5146,11 +5156,10 @@ static int aux_domain_add_dev(struct dmar_domain *domain, struct device *dev) { int ret; - u8 bus, devfn; unsigned long flags; struct intel_iommu *iommu; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) return -ENODEV; @@ -5236,9 +5245,8 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct intel_iommu *iommu; int addr_width; - u8 bus, devfn; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) return -ENODEV; @@ -5416,7 +5424,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, sid = PCI_DEVID(bus, devfn); /* Size is only valid in address selective invalidation */ - if (inv_info->granularity != IOMMU_INV_GRANU_PASID) + if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) size = to_vtd_size(inv_info->addr_info.granule_size, inv_info->addr_info.nb_granules); @@ -5425,6 +5433,7 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, IOMMU_CACHE_INV_TYPE_NR) { int granu = 0; u64 pasid = 0; + u64 addr = 0; granu = to_vtd_granularity(cache_type, inv_info->granularity); if (granu == -EINVAL) { @@ -5446,13 +5455,12 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, switch (BIT(cache_type)) { case IOMMU_CACHE_INV_TYPE_IOTLB: + /* HW will ignore LSB bits based on address mask */ if (inv_info->granularity == IOMMU_INV_GRANU_ADDR && size && (inv_info->addr_info.addr & ((BIT(VTD_PAGE_SHIFT + size)) - 1))) { - pr_err_ratelimited("Address out of range, 0x%llx, size order %llu\n", + pr_err_ratelimited("User address not aligned, 0x%llx, size order %llu\n", inv_info->addr_info.addr, size); - ret = -ERANGE; - goto out_unlock; } /* @@ -5464,25 +5472,35 @@ intel_iommu_sva_invalidate(struct iommu_domain *domain, struct device *dev, (granu == QI_GRAN_NONG_PASID) ? -1 : 1 << size, inv_info->addr_info.flags & IOMMU_INV_ADDR_FLAGS_LEAF); + if (!info->ats_enabled) + break; /* * Always flush device IOTLB if ATS is enabled. vIOMMU * in the guest may assume IOTLB flush is inclusive, * which is more efficient. */ - if (info->ats_enabled) - qi_flush_dev_iotlb_pasid(iommu, sid, - info->pfsid, pasid, - info->ats_qdep, - inv_info->addr_info.addr, - size, granu); - break; + fallthrough; case IOMMU_CACHE_INV_TYPE_DEV_IOTLB: + /* + * PASID based device TLB invalidation does not support + * IOMMU_INV_GRANU_PASID granularity but only supports + * IOMMU_INV_GRANU_ADDR. + * The equivalent of that is we set the size to be the + * entire range of 64 bit. User only provides PASID info + * without address info. So we set addr to 0. + */ + if (inv_info->granularity == IOMMU_INV_GRANU_PASID) { + size = 64 - VTD_PAGE_SHIFT; + addr = 0; + } else if (inv_info->granularity == IOMMU_INV_GRANU_ADDR) { + addr = inv_info->addr_info.addr; + } + if (info->ats_enabled) qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid, pasid, - info->ats_qdep, - inv_info->addr_info.addr, - size, granu); + info->ats_qdep, addr, + size); else pr_warn_ratelimited("Passdown device IOTLB flush w/o ATS!\n"); break; @@ -5658,9 +5676,8 @@ static bool intel_iommu_capable(enum iommu_cap cap) static struct iommu_device *intel_iommu_probe_device(struct device *dev) { struct intel_iommu *iommu; - u8 bus, devfn; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) return ERR_PTR(-ENODEV); @@ -5673,9 +5690,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) static void intel_iommu_release_device(struct device *dev) { struct intel_iommu *iommu; - u8 bus, devfn; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) return; @@ -5825,37 +5841,14 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) return generic_device_group(dev); } -#ifdef CONFIG_INTEL_IOMMU_SVM -struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) -{ - struct intel_iommu *iommu; - u8 bus, devfn; - - if (iommu_dummy(dev)) { - dev_warn(dev, - "No IOMMU translation for device; cannot enable SVM\n"); - return NULL; - } - - iommu = device_to_iommu(dev, &bus, &devfn); - if ((!iommu)) { - dev_err(dev, "No IOMMU for device; cannot enable SVM\n"); - return NULL; - } - - return iommu; -} -#endif /* CONFIG_INTEL_IOMMU_SVM */ - static int intel_iommu_enable_auxd(struct device *dev) { struct device_domain_info *info; struct intel_iommu *iommu; unsigned long flags; - u8 bus, devfn; int ret; - iommu = device_to_iommu(dev, &bus, &devfn); + iommu = device_to_iommu(dev, NULL, NULL); if (!iommu || dmar_disabled) return -EINVAL; @@ -6080,6 +6073,7 @@ const struct iommu_ops intel_iommu_ops = { .sva_bind = intel_svm_bind, .sva_unbind = intel_svm_unbind, .sva_get_pasid = intel_svm_get_pasid, + .page_response = intel_svm_page_response, #endif }; @@ -6182,6 +6176,27 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_g DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); +static void quirk_igfx_skip_te_disable(struct pci_dev *dev) +{ + unsigned short ver; + + if (!IS_GFX_DEVICE(dev)) + return; + + ver = (dev->device >> 8) & 0xff; + if (ver != 0x45 && ver != 0x46 && ver != 0x4c && + ver != 0x4e && ver != 0x8a && ver != 0x98 && + ver != 0x9a) + return; + + if (risky_device(dev)) + return; + + pci_info(dev, "Skip IOMMU disabling for graphics\n"); + iommu_skip_te_disable = 1; +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_igfx_skip_te_disable); + /* On Tylersburg chipsets, some BIOSes have been known to enable the ISOCH DMAR unit for the Azalia sound device, but not give it any TLB entries, which causes it to deadlock. Check for that. We do diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 7f8769800815..9564d23d094f 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -563,8 +563,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) 0, INTR_REMAP_TABLE_ENTRIES, fn, &intel_ir_domain_ops, iommu); - irq_domain_free_fwnode(fn); if (!iommu->ir_domain) { + irq_domain_free_fwnode(fn); pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); goto out_free_bitmap; } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c81f0f17c6ba..e6faedf42fd4 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -19,7 +19,7 @@ #include <linux/pci-ats.h> #include <linux/spinlock.h> -#include "intel-pasid.h" +#include "pasid.h" /* * Intel IOMMU system wide PASID name space: @@ -486,7 +486,16 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, qdep = info->ats_qdep; pfsid = info->pfsid; - qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); + /* + * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), + * devTLB flush w/o PASID should be used. For non-zero PASID under + * SVA usage, device could do DMA with multiple PASIDs. It is more + * efficient to flush devTLB specific to the PASID. + */ + if (pasid == PASID_RID2PASID) + qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); + else + qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); } void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, diff --git a/drivers/iommu/intel/intel-pasid.h b/drivers/iommu/intel/pasid.h index c5318d40e0fa..c9850766c3a9 100644 --- a/drivers/iommu/intel/intel-pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * intel-pasid.h - PASID idr, table and entry header + * pasid.h - PASID idr, table and entry header * * Copyright (C) 2018 Intel Corporation * diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 6c87c807a0ab..442623ac4b47 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -20,7 +20,7 @@ #include <linux/ioasid.h> #include <asm/page.h> -#include "intel-pasid.h" +#include "pasid.h" static irqreturn_t prq_event_thread(int irq, void *d); static void intel_svm_drain_prq(struct device *dev, int pasid); @@ -228,13 +228,57 @@ static LIST_HEAD(global_svm_list); list_for_each_entry((sdev), &(svm)->devs, list) \ if ((d) != (sdev)->dev) {} else +static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, + struct intel_svm **rsvm, + struct intel_svm_dev **rsdev) +{ + struct intel_svm_dev *d, *sdev = NULL; + struct intel_svm *svm; + + /* The caller should hold the pasid_mutex lock */ + if (WARN_ON(!mutex_is_locked(&pasid_mutex))) + return -EINVAL; + + if (pasid == INVALID_IOASID || pasid >= PASID_MAX) + return -EINVAL; + + svm = ioasid_find(NULL, pasid, NULL); + if (IS_ERR(svm)) + return PTR_ERR(svm); + + if (!svm) + goto out; + + /* + * If we found svm for the PASID, there must be at least one device + * bond. + */ + if (WARN_ON(list_empty(&svm->devs))) + return -EINVAL; + + rcu_read_lock(); + list_for_each_entry_rcu(d, &svm->devs, list) { + if (d->dev == dev) { + sdev = d; + break; + } + } + rcu_read_unlock(); + +out: + *rsvm = svm; + *rsdev = sdev; + + return 0; +} + int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct iommu_gpasid_bind_data *data) { - struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct intel_svm_dev *sdev = NULL; struct dmar_domain *dmar_domain; - struct intel_svm_dev *sdev; - struct intel_svm *svm; + struct intel_svm *svm = NULL; int ret = 0; if (WARN_ON(!iommu) || !data) @@ -261,39 +305,23 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, dmar_domain = to_dmar_domain(domain); mutex_lock(&pasid_mutex); - svm = ioasid_find(NULL, data->hpasid, NULL); - if (IS_ERR(svm)) { - ret = PTR_ERR(svm); + ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev); + if (ret) goto out; - } - if (svm) { + if (sdev) { /* - * If we found svm for the PASID, there must be at - * least one device bond, otherwise svm should be freed. + * Do not allow multiple bindings of the same device-PASID since + * there is only one SL page tables per PASID. We may revisit + * once sharing PGD across domains are supported. */ - if (WARN_ON(list_empty(&svm->devs))) { - ret = -EINVAL; - goto out; - } + dev_warn_ratelimited(dev, "Already bound with PASID %u\n", + svm->pasid); + ret = -EBUSY; + goto out; + } - for_each_svm_dev(sdev, svm, dev) { - /* - * For devices with aux domains, we should allow - * multiple bind calls with the same PASID and pdev. - */ - if (iommu_dev_feature_enabled(dev, - IOMMU_DEV_FEAT_AUX)) { - sdev->users++; - } else { - dev_warn_ratelimited(dev, - "Already bound with PASID %u\n", - svm->pasid); - ret = -EBUSY; - } - goto out; - } - } else { + if (!svm) { /* We come here when PASID has never been bond to a device. */ svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) { @@ -373,28 +401,20 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, int intel_svm_unbind_gpasid(struct device *dev, int pasid) { - struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); struct intel_svm_dev *sdev; struct intel_svm *svm; - int ret = -EINVAL; + int ret; if (WARN_ON(!iommu)) return -EINVAL; mutex_lock(&pasid_mutex); - svm = ioasid_find(NULL, pasid, NULL); - if (!svm) { - ret = -EINVAL; - goto out; - } - - if (IS_ERR(svm)) { - ret = PTR_ERR(svm); + ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); + if (ret) goto out; - } - for_each_svm_dev(sdev, svm, dev) { - ret = 0; + if (sdev) { if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) sdev->users--; if (!sdev->users) { @@ -418,7 +438,6 @@ int intel_svm_unbind_gpasid(struct device *dev, int pasid) kfree(svm); } } - break; } out: mutex_unlock(&pasid_mutex); @@ -430,7 +449,7 @@ static int intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops, struct mm_struct *mm, struct intel_svm_dev **sd) { - struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; @@ -596,7 +615,7 @@ success: if (sd) *sd = sdev; ret = 0; - out: +out: return ret; } @@ -608,21 +627,15 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid) struct intel_svm *svm; int ret = -EINVAL; - iommu = intel_svm_device_to_iommu(dev); + iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) goto out; - svm = ioasid_find(NULL, pasid, NULL); - if (!svm) - goto out; - - if (IS_ERR(svm)) { - ret = PTR_ERR(svm); + ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); + if (ret) goto out; - } - for_each_svm_dev(sdev, svm, dev) { - ret = 0; + if (sdev) { sdev->users--; if (!sdev->users) { list_del_rcu(&sdev->list); @@ -651,10 +664,8 @@ static int intel_svm_unbind_mm(struct device *dev, int pasid) kfree(svm); } } - break; } - out: - +out: return ret; } @@ -800,8 +811,63 @@ qi_retry: } } +static int prq_to_iommu_prot(struct page_req_dsc *req) +{ + int prot = 0; + + if (req->rd_req) + prot |= IOMMU_FAULT_PERM_READ; + if (req->wr_req) + prot |= IOMMU_FAULT_PERM_WRITE; + if (req->exe_req) + prot |= IOMMU_FAULT_PERM_EXEC; + if (req->pm_req) + prot |= IOMMU_FAULT_PERM_PRIV; + + return prot; +} + +static int +intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc) +{ + struct iommu_fault_event event; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + /* Fill in event data for device specific processing */ + memset(&event, 0, sizeof(struct iommu_fault_event)); + event.fault.type = IOMMU_FAULT_PAGE_REQ; + event.fault.prm.addr = desc->addr; + event.fault.prm.pasid = desc->pasid; + event.fault.prm.grpid = desc->prg_index; + event.fault.prm.perm = prq_to_iommu_prot(desc); + + if (desc->lpig) + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + if (desc->pasid_present) { + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; + } + if (desc->priv_data_present) { + /* + * Set last page in group bit if private data is present, + * page response is required as it does for LPIG. + * iommu_report_device_fault() doesn't understand this vendor + * specific requirement thus we set last_page as a workaround. + */ + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; + memcpy(event.fault.prm.private_data, desc->priv_data, + sizeof(desc->priv_data)); + } + + return iommu_report_device_fault(dev, &event); +} + static irqreturn_t prq_event_thread(int irq, void *d) { + struct intel_svm_dev *sdev = NULL; struct intel_iommu *iommu = d; struct intel_svm *svm = NULL; int head, tail, handled = 0; @@ -813,7 +879,6 @@ static irqreturn_t prq_event_thread(int irq, void *d) tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; while (head != tail) { - struct intel_svm_dev *sdev; struct vm_area_struct *vma; struct page_req_dsc *req; struct qi_desc resp; @@ -849,6 +914,20 @@ static irqreturn_t prq_event_thread(int irq, void *d) } } + if (!sdev || sdev->sid != req->rid) { + struct intel_svm_dev *t; + + sdev = NULL; + rcu_read_lock(); + list_for_each_entry_rcu(t, &svm->devs, list) { + if (t->sid == req->rid) { + sdev = t; + break; + } + } + rcu_read_unlock(); + } + result = QI_RESP_INVALID; /* Since we're using init_mm.pgd directly, we should never take * any faults on kernel addresses. */ @@ -859,6 +938,17 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (!is_canonical_address(address)) goto bad_req; + /* + * If prq is to be handled outside iommu driver via receiver of + * the fault notifiers, we skip the page response here. + */ + if (svm->flags & SVM_FLAG_GUEST_MODE) { + if (sdev && !intel_svm_prq_report(sdev->dev, req)) + goto prq_advance; + else + goto bad_req; + } + /* If the mm is already defunct, don't handle faults. */ if (!mmget_not_zero(svm->mm)) goto bad_req; @@ -877,24 +967,11 @@ static irqreturn_t prq_event_thread(int irq, void *d) goto invalid; result = QI_RESP_SUCCESS; - invalid: +invalid: mmap_read_unlock(svm->mm); mmput(svm->mm); - bad_req: - /* Accounting for major/minor faults? */ - rcu_read_lock(); - list_for_each_entry_rcu(sdev, &svm->devs, list) { - if (sdev->sid == req->rid) - break; - } - /* Other devices can go away, but the drivers are not permitted - * to unbind while any page faults might be in flight. So it's - * OK to drop the 'lock' here now we have it. */ - rcu_read_unlock(); - - if (WARN_ON(&sdev->list == &svm->devs)) - sdev = NULL; - +bad_req: + WARN_ON(!sdev); if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | (req->exe_req << 1) | (req->pm_req); @@ -905,7 +982,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) and these can be NULL. Do not use them below this point! */ sdev = NULL; svm = NULL; - no_pasid: +no_pasid: if (req->lpig || req->priv_data_present) { /* * Per VT-d spec. v3.0 ch7.7, system software must @@ -930,6 +1007,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) resp.qw3 = 0; qi_submit_sync(iommu, &resp, 1, 0); } +prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } @@ -1000,3 +1078,102 @@ int intel_svm_get_pasid(struct iommu_sva *sva) return pasid; } + +int intel_svm_page_response(struct device *dev, + struct iommu_fault_event *evt, + struct iommu_page_response *msg) +{ + struct iommu_fault_page_request *prm; + struct intel_svm_dev *sdev = NULL; + struct intel_svm *svm = NULL; + struct intel_iommu *iommu; + bool private_present; + bool pasid_present; + bool last_page; + u8 bus, devfn; + int ret = 0; + u16 sid; + + if (!dev || !dev_is_pci(dev)) + return -ENODEV; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return -ENODEV; + + if (!msg || !evt) + return -EINVAL; + + mutex_lock(&pasid_mutex); + + prm = &evt->fault.prm; + sid = PCI_DEVID(bus, devfn); + pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; + last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + + if (!pasid_present) { + ret = -EINVAL; + goto out; + } + + if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { + ret = -EINVAL; + goto out; + } + + ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); + if (ret || !sdev) { + ret = -ENODEV; + goto out; + } + + /* + * For responses from userspace, need to make sure that the + * pasid has been bound to its mm. + */ + if (svm->flags & SVM_FLAG_GUEST_MODE) { + struct mm_struct *mm; + + mm = get_task_mm(current); + if (!mm) { + ret = -EINVAL; + goto out; + } + + if (mm != svm->mm) { + ret = -ENODEV; + mmput(mm); + goto out; + } + + mmput(mm); + } + + /* + * Per VT-d spec. v3.0 ch7.7, system software must respond + * with page group response if private data is present (PDP) + * or last page in group (LPIG) bit is set. This is an + * additional VT-d requirement beyond PCI ATS spec. + */ + if (last_page || private_present) { + struct qi_desc desc; + + desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | + QI_PGRP_PASID_P(pasid_present) | + QI_PGRP_PDP(private_present) | + QI_PGRP_RESP_CODE(msg->code) | + QI_PGRP_RESP_TYPE; + desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); + desc.qw2 = 0; + desc.qw3 = 0; + if (private_present) + memcpy(&desc.qw2, prm->private_data, + sizeof(prm->private_data)); + + qi_submit_sync(iommu, &desc, 1, 0); + } +out: + mutex_unlock(&pasid_mutex); + return ret; +} |