summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/svm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-18 10:55:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-18 10:55:13 -0700
commit0cc6f45cecb46cefe89c17ec816dc8cd58a2229a (patch)
tree8996380ed4473b25607175aafa79756a74c2acf5 /drivers/iommu/intel/svm.c
parentf0cd69b8cca6a5096463644d6dacc9f991bfa521 (diff)
parent2bd5059c6cc04b02073d4d9f57137ab74e1d8e7a (diff)
Merge tag 'iommu-updates-v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull iommu updates from Joerg Roedel: "Core: - IOMMU memory usage observability - This will make the memory used for IO page tables explicitly visible. - Simplify arch_setup_dma_ops() Intel VT-d: - Consolidate domain cache invalidation - Remove private data from page fault message - Allocate DMAR fault interrupts locally - Cleanup and refactoring ARM-SMMUv2: - Support for fault debugging hardware on Qualcomm implementations - Re-land support for the ->domain_alloc_paging() callback ARM-SMMUv3: - Improve handling of MSI allocation failure - Drop support for the "disable_bypass" cmdline option - Major rework of the CD creation code, following on directly from the STE rework merged last time around. - Add unit tests for the new STE/CD manipulation logic AMD-Vi: - Final part of SVA changes with generic IO page fault handling Renesas IPMMU: - Add support for R8A779H0 hardware ... and a couple smaller fixes and updates across the sub-tree" * tag 'iommu-updates-v6.10' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (80 commits) iommu/arm-smmu-v3: Make the kunit into a module arm64: Properly clean up iommu-dma remnants iommu/amd: Enable Guest Translation after reading IOMMU feature register iommu/vt-d: Decouple igfx_off from graphic identity mapping iommu/amd: Fix compilation error iommu/arm-smmu-v3: Add unit tests for arm_smmu_write_entry iommu/arm-smmu-v3: Build the whole CD in arm_smmu_make_s1_cd() iommu/arm-smmu-v3: Move the CD generation for SVA into a function iommu/arm-smmu-v3: Allocate the CD table entry in advance iommu/arm-smmu-v3: Make arm_smmu_alloc_cd_ptr() iommu/arm-smmu-v3: Consolidate clearing a CD table entry iommu/arm-smmu-v3: Move the CD generation for S1 domains into a function iommu/arm-smmu-v3: Make CD programming use arm_smmu_write_entry() iommu/arm-smmu-v3: Add an ops indirection to the STE code iommu/arm-smmu-qcom: Don't build debug features as a kernel module iommu/amd: Add SVA domain support iommu: Add ops->domain_alloc_sva() iommu/amd: Initial SVA support for AMD IOMMU iommu/amd: Add support for enable/disable IOPF iommu/amd: Add IO page fault notifier handler ...
Diffstat (limited to 'drivers/iommu/intel/svm.c')
-rw-r--r--drivers/iommu/intel/svm.c383
1 files changed, 86 insertions, 297 deletions
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index ee3b469e2da1..0e3a9b38bef2 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -22,57 +22,22 @@
#include "iommu.h"
#include "pasid.h"
#include "perf.h"
+#include "../iommu-pages.h"
#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
-static DEFINE_XARRAY_ALLOC(pasid_private_array);
-static int pasid_private_add(ioasid_t pasid, void *priv)
-{
- return xa_alloc(&pasid_private_array, &pasid, priv,
- XA_LIMIT(pasid, pasid), GFP_ATOMIC);
-}
-
-static void pasid_private_remove(ioasid_t pasid)
-{
- xa_erase(&pasid_private_array, pasid);
-}
-
-static void *pasid_private_find(ioasid_t pasid)
-{
- return xa_load(&pasid_private_array, pasid);
-}
-
-static struct intel_svm_dev *
-svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
-{
- struct intel_svm_dev *sdev = NULL, *t;
-
- rcu_read_lock();
- list_for_each_entry_rcu(t, &svm->devs, list) {
- if (t->dev == dev) {
- sdev = t;
- break;
- }
- }
- rcu_read_unlock();
-
- return sdev;
-}
-
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
struct iopf_queue *iopfq;
- struct page *pages;
int irq, ret;
- pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
- if (!pages) {
+ iommu->prq = iommu_alloc_pages_node(iommu->node, GFP_KERNEL, PRQ_ORDER);
+ if (!iommu->prq) {
pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
iommu->name);
return -ENOMEM;
}
- iommu->prq = page_address(pages);
irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
if (irq <= 0) {
@@ -117,7 +82,7 @@ free_hwirq:
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
@@ -140,7 +105,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->iopf_queue = NULL;
}
- free_pages((unsigned long)iommu->prq, PRQ_ORDER);
+ iommu_free_pages(iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return 0;
@@ -168,94 +133,32 @@ void intel_svm_check(struct intel_iommu *iommu)
iommu->flags |= VTD_FLAG_SVM_CAPABLE;
}
-static void __flush_svm_range_dev(struct intel_svm *svm,
- struct intel_svm_dev *sdev,
- unsigned long address,
- unsigned long pages, int ih)
-{
- struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
-
- if (WARN_ON(!pages))
- return;
-
- qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
- if (info->ats_enabled) {
- qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
- svm->pasid, sdev->qdep, address,
- order_base_2(pages));
- quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
- svm->pasid, sdev->qdep);
- }
-}
-
-static void intel_flush_svm_range_dev(struct intel_svm *svm,
- struct intel_svm_dev *sdev,
- unsigned long address,
- unsigned long pages, int ih)
-{
- unsigned long shift = ilog2(__roundup_pow_of_two(pages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
- unsigned long start = ALIGN_DOWN(address, align);
- unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
-
- while (start < end) {
- __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
- start += align;
- }
-}
-
-static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
- unsigned long pages, int ih)
-{
- struct intel_svm_dev *sdev;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list)
- intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
- rcu_read_unlock();
-}
-
-static void intel_flush_svm_all(struct intel_svm *svm)
-{
- struct device_domain_info *info;
- struct intel_svm_dev *sdev;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list) {
- info = dev_iommu_priv_get(sdev->dev);
-
- qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
- if (info->ats_enabled) {
- qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
- svm->pasid, sdev->qdep,
- 0, 64 - VTD_PAGE_SHIFT);
- quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
- svm->pasid, sdev->qdep);
- }
- }
- rcu_read_unlock();
-}
-
/* Pages have been freed at this point */
static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
- if (start == 0 && end == -1UL) {
- intel_flush_svm_all(svm);
+ if (start == 0 && end == ULONG_MAX) {
+ cache_tag_flush_all(domain);
return;
}
- intel_flush_svm_range(svm, start,
- (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
+ /*
+ * The mm_types defines vm_end as the first byte after the end address,
+ * different from IOMMU subsystem using the last address of an address
+ * range.
+ */
+ cache_tag_flush_range(domain, start, end - 1, 0);
}
static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
- struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
- struct intel_svm_dev *sdev;
+ struct dmar_domain *domain = container_of(mn, struct dmar_domain, notifier);
+ struct dev_pasid_info *dev_pasid;
+ struct device_domain_info *info;
+ unsigned long flags;
/* This might end up being called from exit_mmap(), *before* the page
* tables are cleared. And __mmu_notifier_release() will delete us from
@@ -269,157 +172,78 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* page) so that we end up taking a fault that the hardware really
* *has* to handle gracefully without affecting other processes.
*/
- rcu_read_lock();
- list_for_each_entry_rcu(sdev, &svm->devs, list)
- intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
- svm->pasid, true);
- rcu_read_unlock();
+ spin_lock_irqsave(&domain->lock, flags);
+ list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
+ info = dev_iommu_priv_get(dev_pasid->dev);
+ intel_pasid_tear_down_entry(info->iommu, dev_pasid->dev,
+ dev_pasid->pasid, true);
+ }
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+}
+static void intel_mm_free_notifier(struct mmu_notifier *mn)
+{
+ kfree(container_of(mn, struct dmar_domain, notifier));
}
static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release,
.arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
+ .free_notifier = intel_mm_free_notifier,
};
-static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
- struct intel_svm **rsvm,
- struct intel_svm_dev **rsdev)
-{
- struct intel_svm_dev *sdev = NULL;
- struct intel_svm *svm;
-
- if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
- return -EINVAL;
-
- svm = pasid_private_find(pasid);
- if (IS_ERR(svm))
- return PTR_ERR(svm);
-
- if (!svm)
- goto out;
-
- /*
- * If we found svm for the PASID, there must be at least one device
- * bond.
- */
- if (WARN_ON(list_empty(&svm->devs)))
- return -EINVAL;
- sdev = svm_lookup_device_by_dev(svm, dev);
-
-out:
- *rsvm = svm;
- *rsdev = sdev;
-
- return 0;
-}
-
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct mm_struct *mm = domain->mm;
- struct intel_svm_dev *sdev;
- struct intel_svm *svm;
+ struct dev_pasid_info *dev_pasid;
unsigned long sflags;
+ unsigned long flags;
int ret = 0;
- svm = pasid_private_find(pasid);
- if (!svm) {
- svm = kzalloc(sizeof(*svm), GFP_KERNEL);
- if (!svm)
- return -ENOMEM;
-
- svm->pasid = pasid;
- svm->mm = mm;
- INIT_LIST_HEAD_RCU(&svm->devs);
-
- svm->notifier.ops = &intel_mmuops;
- ret = mmu_notifier_register(&svm->notifier, mm);
- if (ret) {
- kfree(svm);
- return ret;
- }
-
- ret = pasid_private_add(svm->pasid, svm);
- if (ret) {
- mmu_notifier_unregister(&svm->notifier, mm);
- kfree(svm);
- return ret;
- }
- }
+ dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
+ if (!dev_pasid)
+ return -ENOMEM;
- sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
- if (!sdev) {
- ret = -ENOMEM;
- goto free_svm;
- }
+ dev_pasid->dev = dev;
+ dev_pasid->pasid = pasid;
- sdev->dev = dev;
- sdev->iommu = iommu;
- sdev->did = FLPT_DEFAULT_DID;
- sdev->sid = PCI_DEVID(info->bus, info->devfn);
- if (info->ats_enabled) {
- sdev->qdep = info->ats_qdep;
- if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
- sdev->qdep = 0;
- }
+ ret = cache_tag_assign_domain(to_dmar_domain(domain), dev, pasid);
+ if (ret)
+ goto free_dev_pasid;
/* Setup the pasid table: */
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
FLPT_DEFAULT_DID, sflags);
if (ret)
- goto free_sdev;
+ goto unassign_tag;
- list_add_rcu(&sdev->list, &svm->devs);
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
return 0;
-free_sdev:
- kfree(sdev);
-free_svm:
- if (list_empty(&svm->devs)) {
- mmu_notifier_unregister(&svm->notifier, mm);
- pasid_private_remove(pasid);
- kfree(svm);
- }
+unassign_tag:
+ cache_tag_unassign_domain(to_dmar_domain(domain), dev, pasid);
+free_dev_pasid:
+ kfree(dev_pasid);
return ret;
}
-void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
-{
- struct intel_svm_dev *sdev;
- struct intel_svm *svm;
- struct mm_struct *mm;
-
- if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
- return;
- mm = svm->mm;
-
- if (sdev) {
- list_del_rcu(&sdev->list);
- kfree_rcu(sdev, rcu);
-
- if (list_empty(&svm->devs)) {
- if (svm->notifier.ops)
- mmu_notifier_unregister(&svm->notifier, mm);
- pasid_private_remove(svm->pasid);
- kfree(svm);
- }
- }
-}
-
/* Page request queue descriptor */
struct page_req_dsc {
union {
struct {
u64 type:8;
u64 pasid_present:1;
- u64 priv_data_present:1;
- u64 rsvd:6;
+ u64 rsvd:7;
u64 rid:16;
u64 pasid:20;
u64 exe_req:1;
@@ -438,7 +262,8 @@ struct page_req_dsc {
};
u64 qw_1;
};
- u64 priv_data[2];
+ u64 qw_2;
+ u64 qw_3;
};
static bool is_canonical_address(u64 addr)
@@ -572,24 +397,6 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
}
- if (desc->priv_data_present) {
- /*
- * Set last page in group bit if private data is present,
- * page response is required as it does for LPIG.
- * iommu_report_device_fault() doesn't understand this vendor
- * specific requirement thus we set last_page as a workaround.
- */
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
- event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
- event.fault.prm.private_data[0] = desc->priv_data[0];
- event.fault.prm.private_data[1] = desc->priv_data[1];
- } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
- /*
- * If the private data fields are not used by hardware, use it
- * to monitor the prq handle latency.
- */
- event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
- }
iommu_report_device_fault(dev, &event);
}
@@ -597,39 +404,23 @@ static void intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
static void handle_bad_prq_event(struct intel_iommu *iommu,
struct page_req_dsc *req, int result)
{
- struct qi_desc desc;
+ struct qi_desc desc = { };
pr_err("%s: Invalid page request: %08llx %08llx\n",
iommu->name, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must
- * respond with page group response if private data
- * is present (PDP) or last page in group (LPIG) bit
- * is set. This is an additional VT-d feature beyond
- * PCI ATS spec.
- */
- if (!req->lpig && !req->priv_data_present)
+ if (!req->lpig)
return;
desc.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
- QI_PGRP_PDP(req->priv_data_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
- if (req->priv_data_present) {
- desc.qw2 = req->priv_data[0];
- desc.qw3 = req->priv_data[1];
- } else {
- desc.qw2 = 0;
- desc.qw3 = 0;
- }
-
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -697,7 +488,7 @@ bad_req:
intel_svm_prq_report(iommu, dev, req);
trace_prq_report(iommu, dev, req->qw_0, req->qw_1,
- req->priv_data[0], req->priv_data[1],
+ req->qw_2, req->qw_3,
iommu->prq_seq_number++);
mutex_unlock(&iommu->iopf_lock);
prq_advance:
@@ -736,7 +527,7 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
struct iommu_fault_page_request *prm;
- bool private_present;
+ struct qi_desc desc;
bool pasid_present;
bool last_page;
u16 sid;
@@ -744,42 +535,25 @@ void intel_svm_page_response(struct device *dev, struct iopf_fault *evt,
prm = &evt->fault.prm;
sid = PCI_DEVID(bus, devfn);
pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
- /*
- * Per VT-d spec. v3.0 ch7.7, system software must respond
- * with page group response if private data is present (PDP)
- * or last page in group (LPIG) bit is set. This is an
- * additional VT-d requirement beyond PCI ATS spec.
- */
- if (last_page || private_present) {
- struct qi_desc desc;
-
- desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
- QI_PGRP_PASID_P(pasid_present) |
- QI_PGRP_PDP(private_present) |
- QI_PGRP_RESP_CODE(msg->code) |
- QI_PGRP_RESP_TYPE;
- desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
- desc.qw2 = 0;
- desc.qw3 = 0;
-
- if (private_present) {
- desc.qw2 = prm->private_data[0];
- desc.qw3 = prm->private_data[1];
- } else if (prm->private_data[0]) {
- dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
- ktime_to_ns(ktime_get()) - prm->private_data[0]);
- }
+ desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
+ QI_PGRP_PASID_P(pasid_present) |
+ QI_PGRP_RESP_CODE(msg->code) |
+ QI_PGRP_RESP_TYPE;
+ desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
+ desc.qw2 = 0;
+ desc.qw3 = 0;
- qi_submit_sync(iommu, &desc, 1, 0);
- }
+ qi_submit_sync(iommu, &desc, 1, 0);
}
static void intel_svm_domain_free(struct iommu_domain *domain)
{
- kfree(to_dmar_domain(domain));
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+
+ /* dmar_domain free is deferred to the mmu free_notifier callback. */
+ mmu_notifier_put(&dmar_domain->notifier);
}
static const struct iommu_domain_ops intel_svm_domain_ops = {
@@ -787,14 +561,29 @@ static const struct iommu_domain_ops intel_svm_domain_ops = {
.free = intel_svm_domain_free
};
-struct iommu_domain *intel_svm_domain_alloc(void)
+struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
+ struct mm_struct *mm)
{
struct dmar_domain *domain;
+ int ret;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
- return NULL;
+ return ERR_PTR(-ENOMEM);
+
domain->domain.ops = &intel_svm_domain_ops;
+ domain->use_first_level = true;
+ INIT_LIST_HEAD(&domain->dev_pasids);
+ INIT_LIST_HEAD(&domain->cache_tags);
+ spin_lock_init(&domain->cache_lock);
+ spin_lock_init(&domain->lock);
+
+ domain->notifier.ops = &intel_mmuops;
+ ret = mmu_notifier_register(&domain->notifier, mm);
+ if (ret) {
+ kfree(domain);
+ return ERR_PTR(ret);
+ }
return &domain->domain;
}