summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r--drivers/iommu/intel/iommu.c1894
1 files changed, 704 insertions, 1190 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a7ecd90303dc..7aa3932251b2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -27,8 +27,8 @@
#include "iommu.h"
#include "../dma-iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "pasid.h"
-#include "cap_audit.h"
#include "perfmon.h"
#define ROOT_SIZE VTD_PAGE_SIZE
@@ -54,11 +54,6 @@
__DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
-/* IO virtual address start page frame number */
-#define IOVA_START_PFN (1)
-
-#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
@@ -171,15 +166,6 @@ static void device_rbtree_remove(struct device_domain_info *info)
spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
}
-/*
- * This domain is a statically identity mapping domain.
- * 1. This domain creats a static 1:1 mapping to all usable memory.
- * 2. It maps to each iommu if successful.
- * 3. Each iommu mapps to this domain if successful.
- */
-static struct dmar_domain *si_domain;
-static int hw_pass_through = 1;
-
struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
@@ -221,12 +207,11 @@ int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
-static int dmar_map_gfx = 1;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
static int iommu_skip_te_disable;
+static int disable_igfx_iommu;
-#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
const struct iommu_ops intel_iommu_ops;
@@ -265,7 +250,7 @@ static int __init intel_iommu_setup(char *str)
no_platform_optin = 1;
pr_info("IOMMU disabled\n");
} else if (!strncmp(str, "igfx_off", 8)) {
- dmar_map_gfx = 0;
+ disable_igfx_iommu = 1;
pr_info("Disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
@@ -298,27 +283,6 @@ static int __init intel_iommu_setup(char *str)
}
__setup("intel_iommu=", intel_iommu_setup);
-void *alloc_pgtable_page(int node, gfp_t gfp)
-{
- struct page *page;
- void *vaddr = NULL;
-
- page = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
- if (page)
- vaddr = page_address(page);
- return vaddr;
-}
-
-void free_pgtable_page(void *vaddr)
-{
- free_page((unsigned long)vaddr);
-}
-
-static int domain_type_is_si(struct dmar_domain *domain)
-{
- return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
-}
-
static int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
@@ -387,89 +351,6 @@ static bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
ecap_smpwc(iommu->ecap) : ecap_coherent(iommu->ecap);
}
-static void domain_update_iommu_coherency(struct dmar_domain *domain)
-{
- struct iommu_domain_info *info;
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool found = false;
- unsigned long i;
-
- domain->iommu_coherency = true;
- xa_for_each(&domain->iommu_array, i, info) {
- found = true;
- if (!iommu_paging_structure_coherency(info->iommu)) {
- domain->iommu_coherency = false;
- break;
- }
- }
- if (found)
- return;
-
- /* No hardware attached; use lowest common denominator */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!iommu_paging_structure_coherency(iommu)) {
- domain->iommu_coherency = false;
- break;
- }
- }
- rcu_read_unlock();
-}
-
-static int domain_update_iommu_superpage(struct dmar_domain *domain,
- struct intel_iommu *skip)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- int mask = 0x3;
-
- if (!intel_iommu_superpage)
- return 0;
-
- /* set iommu_superpage to the smallest common denominator */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (iommu != skip) {
- if (domain && domain->use_first_level) {
- if (!cap_fl1gp_support(iommu->cap))
- mask = 0x1;
- } else {
- mask &= cap_super_page_val(iommu->cap);
- }
-
- if (!mask)
- break;
- }
- }
- rcu_read_unlock();
-
- return fls(mask);
-}
-
-static int domain_update_device_node(struct dmar_domain *domain)
-{
- struct device_domain_info *info;
- int nid = NUMA_NO_NODE;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link) {
- /*
- * There could possibly be multiple device numa nodes as devices
- * within the same domain may sit behind different IOMMUs. There
- * isn't perfect answer in such situation, so we select first
- * come first served policy.
- */
- nid = dev_to_node(info->dev);
- if (nid != NUMA_NO_NODE)
- break;
- }
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return nid;
-}
-
/* Return the super pagesize bitmap if supported. */
static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
{
@@ -487,35 +368,6 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain)
return bitmap;
}
-/* Some capabilities may be different across iommus */
-void domain_update_iommu_cap(struct dmar_domain *domain)
-{
- domain_update_iommu_coherency(domain);
- domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
-
- /*
- * If RHSA is missing, we should default to the device numa domain
- * as fall back.
- */
- if (domain->nid == NUMA_NO_NODE)
- domain->nid = domain_update_device_node(domain);
-
- /*
- * First-level translation restricts the input-address to a
- * canonical address (i.e., address bits 63:N have the same
- * value as address bit [N-1], where N is 48-bits with 4-level
- * paging and 57-bits with 5-level paging). Hence, skip bit
- * [N-1].
- */
- if (domain->use_first_level)
- domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
- else
- domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
-
- domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
- domain_update_iotlb(domain);
-}
-
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
u8 devfn, int alloc)
{
@@ -545,7 +397,8 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
if (!alloc)
return NULL;
- context = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ context = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
+ SZ_4K);
if (!context)
return NULL;
@@ -719,17 +572,17 @@ static void free_context_table(struct intel_iommu *iommu)
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
- free_pgtable_page(context);
+ iommu_free_pages(context);
if (!sm_supported(iommu))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
- free_pgtable_page(context);
+ iommu_free_pages(context);
}
- free_pgtable_page(iommu->root_entry);
+ iommu_free_pages(iommu->root_entry);
iommu->root_entry = NULL;
}
@@ -743,14 +596,15 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
while (1) {
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
- if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
- pr_info("PTE not present at level %d\n", level);
- break;
- }
pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
- if (level == 1)
+ if (!dma_pte_present(pte)) {
+ pr_info("page table not present at level %d\n", level - 1);
+ break;
+ }
+
+ if (level == 1 || dma_pte_superpage(pte))
break;
parent = phys_to_virt(dma_pte_addr(pte));
@@ -773,11 +627,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
/* root entry dump */
- rt_entry = &iommu->root_entry[bus];
- if (!rt_entry) {
- pr_info("root table entry is not present\n");
+ if (!iommu->root_entry) {
+ pr_info("root table is not present\n");
return;
}
+ rt_entry = &iommu->root_entry[bus];
if (sm_supported(iommu))
pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
@@ -788,7 +642,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* context entry dump */
ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
if (!ctx_entry) {
- pr_info("context table entry is not present\n");
+ pr_info("context table is not present\n");
return;
}
@@ -797,17 +651,23 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* legacy mode does not require PASID entries */
if (!sm_supported(iommu)) {
+ if (!context_present(ctx_entry)) {
+ pr_info("legacy mode page table is not present\n");
+ return;
+ }
level = agaw_to_level(ctx_entry->hi & 7);
pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
goto pgtable_walk;
}
- /* get the pointer to pasid directory entry */
- dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
- if (!dir) {
- pr_info("pasid directory entry is not present\n");
+ if (!context_present(ctx_entry)) {
+ pr_info("pasid directory table is not present\n");
return;
}
+
+ /* get the pointer to pasid directory entry */
+ dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
+
/* For request-without-pasid, get the pasid from context entry */
if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
pasid = IOMMU_NO_PASID;
@@ -819,7 +679,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* get the pointer to the pasid table entry */
entries = get_pasid_table_from_pde(pde);
if (!entries) {
- pr_info("pasid table entry is not present\n");
+ pr_info("pasid table is not present\n");
return;
}
index = pasid & PASID_PTE_MASK;
@@ -827,6 +687,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
for (i = 0; i < ARRAY_SIZE(pte->val); i++)
pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
+ if (!pasid_pte_is_present(pte)) {
+ pr_info("scalable mode page table is not present\n");
+ return;
+ }
+
if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
@@ -865,21 +730,24 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
break;
if (!dma_pte_present(pte)) {
- uint64_t pteval;
+ uint64_t pteval, tmp;
- tmp_page = alloc_pgtable_page(domain->nid, gfp);
+ tmp_page = iommu_alloc_pages_node_sz(domain->nid, gfp,
+ SZ_4K);
if (!tmp_page)
return NULL;
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
- pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ pteval = virt_to_phys(tmp_page) | DMA_PTE_READ |
+ DMA_PTE_WRITE;
if (domain->use_first_level)
- pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
+ pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
- if (cmpxchg64(&pte->val, 0ULL, pteval))
+ tmp = 0ULL;
+ if (!try_cmpxchg64(&pte->val, &tmp, pteval))
/* Someone else set it while we were thinking; use theirs. */
- free_pgtable_page(tmp_page);
+ iommu_free_pages(tmp_page);
else
domain_flush_cache(domain, pte, sizeof(*pte));
}
@@ -992,7 +860,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
last_pfn < level_pfn + level_size(level) - 1)) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
- free_pgtable_page(level_pte);
+ iommu_free_pages(level_pte);
}
next:
pfn += level_size(level);
@@ -1016,7 +884,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- free_pgtable_page(domain->pgd);
+ iommu_free_pages(domain->pgd);
domain->pgd = NULL;
}
}
@@ -1028,18 +896,16 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
The 'pte' argument is the *parent* PTE, pointing to the page that is to
be freed. */
static void dma_pte_list_pagetables(struct dmar_domain *domain,
- int level, struct dma_pte *pte,
- struct list_head *freelist)
+ int level, struct dma_pte *parent_pte,
+ struct iommu_pages_list *freelist)
{
- struct page *pg;
+ struct dma_pte *pte = phys_to_virt(dma_pte_addr(parent_pte));
- pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
- list_add_tail(&pg->lru, freelist);
+ iommu_pages_list_add(freelist, pte);
if (level == 1)
return;
- pte = page_address(pg);
do {
if (dma_pte_present(pte) && !dma_pte_superpage(pte))
dma_pte_list_pagetables(domain, level - 1, pte, freelist);
@@ -1050,7 +916,7 @@ static void dma_pte_list_pagetables(struct dmar_domain *domain,
static void dma_pte_clear_level(struct dmar_domain *domain, int level,
struct dma_pte *pte, unsigned long pfn,
unsigned long start_pfn, unsigned long last_pfn,
- struct list_head *freelist)
+ struct iommu_pages_list *freelist)
{
struct dma_pte *first_pte = NULL, *last_pte = NULL;
@@ -1095,7 +961,8 @@ next:
the page tables, and may have cached the intermediate levels. The
pages can only be freed after the IOTLB flush has been done. */
static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
- unsigned long last_pfn, struct list_head *freelist)
+ unsigned long last_pfn,
+ struct iommu_pages_list *freelist)
{
if (WARN_ON(!domain_pfn_supported(domain, last_pfn)) ||
WARN_ON(start_pfn > last_pfn))
@@ -1107,8 +974,7 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
- struct page *pgd_page = virt_to_page(domain->pgd);
- list_add_tail(&pgd_page->lru, freelist);
+ iommu_pages_list_add(freelist, domain->pgd);
domain->pgd = NULL;
}
}
@@ -1118,7 +984,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- root = alloc_pgtable_page(iommu->node, GFP_ATOMIC);
+ root = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, SZ_4K);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -1219,9 +1085,8 @@ static void __iommu_flush_context(struct intel_iommu *iommu,
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-/* return value determine if we need a write buffer flush */
-static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
- u64 addr, unsigned int size_order, u64 type)
+void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type)
{
int tlb_offset = ecap_iotlb_offset(iommu->ecap);
u64 val = 0, val_iva = 0;
@@ -1290,32 +1155,6 @@ domain_lookup_dev_info(struct dmar_domain *domain,
return NULL;
}
-void domain_update_iotlb(struct dmar_domain *domain)
-{
- struct dev_pasid_info *dev_pasid;
- struct device_domain_info *info;
- bool has_iotlb_device = false;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link) {
- if (info->ats_enabled) {
- has_iotlb_device = true;
- break;
- }
- }
-
- list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
- info = dev_iommu_priv_get(dev_pasid->dev);
- if (info->ats_enabled) {
- has_iotlb_device = true;
- break;
- }
- }
- domain->has_iotlb_device = has_iotlb_device;
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
/*
* The extra devTLB flush quirk impacts those QAT devices with PCI device
* IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device()
@@ -1334,257 +1173,64 @@ static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
return true;
}
-static void iommu_enable_pci_caps(struct device_domain_info *info)
+static void iommu_enable_pci_ats(struct device_domain_info *info)
{
struct pci_dev *pdev;
- if (!dev_is_pci(info->dev))
+ if (!info->ats_supported)
return;
pdev = to_pci_dev(info->dev);
+ if (!pci_ats_page_aligned(pdev))
+ return;
- /* The PCIe spec, in its wisdom, declares that the behaviour of
- the device if you enable PASID support after ATS support is
- undefined. So always enable PASID support on devices which
- have it, even if we can't yet know if we're ever going to
- use it. */
- if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
- info->pasid_enabled = 1;
-
- if (info->ats_supported && pci_ats_page_aligned(pdev) &&
- !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
+ if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
info->ats_enabled = 1;
- domain_update_iotlb(info->domain);
- }
}
-static void iommu_disable_pci_caps(struct device_domain_info *info)
+static void iommu_disable_pci_ats(struct device_domain_info *info)
{
- struct pci_dev *pdev;
-
- if (!dev_is_pci(info->dev))
+ if (!info->ats_enabled)
return;
- pdev = to_pci_dev(info->dev);
-
- if (info->ats_enabled) {
- pci_disable_ats(pdev);
- info->ats_enabled = 0;
- domain_update_iotlb(info->domain);
- }
-
- if (info->pasid_enabled) {
- pci_disable_pasid(pdev);
- info->pasid_enabled = 0;
- }
+ pci_disable_ats(to_pci_dev(info->dev));
+ info->ats_enabled = 0;
}
-static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
- u64 addr, unsigned int mask)
+static void iommu_enable_pci_pri(struct device_domain_info *info)
{
- u16 sid, qdep;
+ struct pci_dev *pdev;
- if (!info || !info->ats_enabled)
+ if (!info->ats_enabled || !info->pri_supported)
return;
- sid = info->bus << 8 | info->devfn;
- qdep = info->ats_qdep;
- qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
- qdep, addr, mask);
- quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep);
-}
-
-static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
- u64 addr, unsigned mask)
-{
- struct dev_pasid_info *dev_pasid;
- struct device_domain_info *info;
- unsigned long flags;
-
- if (!domain->has_iotlb_device)
+ pdev = to_pci_dev(info->dev);
+ /* PASID is required in PRG Response Message. */
+ if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
return;
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(info, &domain->devices, link)
- __iommu_flush_dev_iotlb(info, addr, mask);
-
- list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) {
- info = dev_iommu_priv_get(dev_pasid->dev);
-
- if (!info->ats_enabled)
- continue;
-
- qi_flush_dev_iotlb_pasid(info->iommu,
- PCI_DEVID(info->bus, info->devfn),
- info->pfsid, dev_pasid->pasid,
- info->ats_qdep, addr,
- mask);
- }
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
-static void domain_flush_pasid_iotlb(struct intel_iommu *iommu,
- struct dmar_domain *domain, u64 addr,
- unsigned long npages, bool ih)
-{
- u16 did = domain_id_iommu(domain, iommu);
- struct dev_pasid_info *dev_pasid;
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
- list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain)
- qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih);
-
- if (!list_empty(&domain->devices))
- qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih);
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-
-static void __iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
- unsigned long pfn, unsigned int pages,
- int ih)
-{
- unsigned int aligned_pages = __roundup_pow_of_two(pages);
- unsigned long bitmask = aligned_pages - 1;
- unsigned int mask = ilog2(aligned_pages);
- u64 addr = (u64)pfn << VTD_PAGE_SHIFT;
-
- /*
- * PSI masks the low order bits of the base address. If the
- * address isn't aligned to the mask, then compute a mask value
- * needed to ensure the target range is flushed.
- */
- if (unlikely(bitmask & pfn)) {
- unsigned long end_pfn = pfn + pages - 1, shared_bits;
-
- /*
- * Since end_pfn <= pfn + bitmask, the only way bits
- * higher than bitmask can differ in pfn and end_pfn is
- * by carrying. This means after masking out bitmask,
- * high bits starting with the first set bit in
- * shared_bits are all equal in both pfn and end_pfn.
- */
- shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
- mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG;
- }
-
- /*
- * Fallback to domain selective flush if no PSI support or
- * the size is too big.
- */
- if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
- else
- iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
- DMA_TLB_PSI_FLUSH);
-}
-
-static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
- struct dmar_domain *domain,
- unsigned long pfn, unsigned int pages,
- int ih, int map)
-{
- unsigned int aligned_pages = __roundup_pow_of_two(pages);
- unsigned int mask = ilog2(aligned_pages);
- uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
- u16 did = domain_id_iommu(domain, iommu);
-
- if (WARN_ON(!pages))
+ if (pci_reset_pri(pdev))
return;
- if (ih)
- ih = 1 << 6;
-
- if (domain->use_first_level)
- domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih);
- else
- __iommu_flush_iotlb_psi(iommu, did, pfn, pages, ih);
-
- /*
- * In caching mode, changes of pages from non-present to present require
- * flush. However, device IOTLB doesn't need to be flushed in this case.
- */
- if (!cap_caching_mode(iommu->cap) || !map)
- iommu_flush_dev_iotlb(domain, addr, mask);
-}
-
-/* Notification for newly created mappings */
-static void __mapping_notify_one(struct intel_iommu *iommu, struct dmar_domain *domain,
- unsigned long pfn, unsigned int pages)
-{
- /*
- * It's a non-present to present mapping. Only flush if caching mode
- * and second level.
- */
- if (cap_caching_mode(iommu->cap) && !domain->use_first_level)
- iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
- else
- iommu_flush_write_buffer(iommu);
+ if (!pci_enable_pri(pdev, PRQ_DEPTH))
+ info->pri_enabled = 1;
}
-/*
- * Flush the relevant caches in nested translation if the domain
- * also serves as a parent
- */
-static void parent_domain_flush(struct dmar_domain *domain,
- unsigned long pfn,
- unsigned long pages, int ih)
+static void iommu_disable_pci_pri(struct device_domain_info *info)
{
- struct dmar_domain *s1_domain;
-
- spin_lock(&domain->s1_lock);
- list_for_each_entry(s1_domain, &domain->s1_domains, s2_link) {
- struct device_domain_info *device_info;
- struct iommu_domain_info *info;
- unsigned long flags;
- unsigned long i;
+ if (!info->pri_enabled)
+ return;
- xa_for_each(&s1_domain->iommu_array, i, info)
- __iommu_flush_iotlb_psi(info->iommu, info->did,
- pfn, pages, ih);
+ if (WARN_ON(info->iopf_refcount))
+ iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
- if (!s1_domain->has_iotlb_device)
- continue;
-
- spin_lock_irqsave(&s1_domain->lock, flags);
- list_for_each_entry(device_info, &s1_domain->devices, link)
- /*
- * Address translation cache in device side caches the
- * result of nested translation. There is no easy way
- * to identify the exact set of nested translations
- * affected by a change in S2. So just flush the entire
- * device cache.
- */
- __iommu_flush_dev_iotlb(device_info, 0,
- MAX_AGAW_PFN_WIDTH);
- spin_unlock_irqrestore(&s1_domain->lock, flags);
- }
- spin_unlock(&domain->s1_lock);
+ pci_disable_pri(to_pci_dev(info->dev));
+ info->pri_enabled = 0;
}
static void intel_flush_iotlb_all(struct iommu_domain *domain)
{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- struct iommu_domain_info *info;
- unsigned long idx;
-
- xa_for_each(&dmar_domain->iommu_array, idx, info) {
- struct intel_iommu *iommu = info->iommu;
- u16 did = domain_id_iommu(dmar_domain, iommu);
-
- if (dmar_domain->use_first_level)
- domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0);
- else
- iommu->flush.flush_iotlb(iommu, did, 0, 0,
- DMA_TLB_DSI_FLUSH);
-
- if (!cap_caching_mode(iommu->cap))
- iommu_flush_dev_iotlb(dmar_domain, 0, MAX_AGAW_PFN_WIDTH);
- }
-
- if (dmar_domain->nested_parent)
- parent_domain_flush(dmar_domain, 0, -1, 0);
+ cache_tag_flush_all(to_dmar_domain(domain));
}
static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
@@ -1643,52 +1289,13 @@ static void iommu_disable_translation(struct intel_iommu *iommu)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-static int iommu_init_domains(struct intel_iommu *iommu)
-{
- u32 ndomains;
-
- ndomains = cap_ndoms(iommu->cap);
- pr_debug("%s: Number of Domains supported <%d>\n",
- iommu->name, ndomains);
-
- spin_lock_init(&iommu->lock);
-
- iommu->domain_ids = bitmap_zalloc(ndomains, GFP_KERNEL);
- if (!iommu->domain_ids)
- return -ENOMEM;
-
- /*
- * If Caching mode is set, then invalid translations are tagged
- * with domain-id 0, hence we need to pre-allocate it. We also
- * use domain-id 0 as a marker for non-allocated domain-id, so
- * make sure it is not used for a real domain.
- */
- set_bit(0, iommu->domain_ids);
-
- /*
- * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
- * entry for first-level or pass-through translation modes should
- * be programmed with a domain id different from those used for
- * second-level or nested translation. We reserve a domain id for
- * this purpose.
- */
- if (sm_supported(iommu))
- set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
-
- return 0;
-}
-
static void disable_dmar_iommu(struct intel_iommu *iommu)
{
- if (!iommu->domain_ids)
- return;
-
/*
* All iommu domains must have been detached from the devices,
* hence there should be no domain IDs in use.
*/
- if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
- > NUM_RESERVED_DID))
+ if (WARN_ON(!ida_is_empty(&iommu->domain_ida)))
return;
if (iommu->gcmd & DMA_GCMD_TE)
@@ -1697,11 +1304,6 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
static void free_dmar_iommu(struct intel_iommu *iommu)
{
- if (iommu->domain_ids) {
- bitmap_free(iommu->domain_ids);
- iommu->domain_ids = NULL;
- }
-
if (iommu->copied_tables) {
bitmap_free(iommu->copied_tables);
iommu->copied_tables = NULL;
@@ -1710,97 +1312,69 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
/* free context mapping */
free_context_table(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu)) {
- if (ecap_prs(iommu->ecap))
- intel_svm_finish_prq(iommu);
- }
-#endif
+ if (ecap_prs(iommu->ecap))
+ intel_iommu_finish_prq(iommu);
}
/*
* Check and return whether first level is used by default for
* DMA translation.
*/
-static bool first_level_by_default(unsigned int type)
+static bool first_level_by_default(struct intel_iommu *iommu)
{
/* Only SL is available in legacy mode */
- if (!scalable_mode_support())
+ if (!sm_supported(iommu))
return false;
/* Only level (either FL or SL) is available, just use it */
- if (intel_cap_flts_sanity() ^ intel_cap_slts_sanity())
- return intel_cap_flts_sanity();
-
- /* Both levels are available, decide it based on domain type */
- return type != IOMMU_DOMAIN_UNMANAGED;
-}
-
-static struct dmar_domain *alloc_domain(unsigned int type)
-{
- struct dmar_domain *domain;
-
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
+ if (ecap_flts(iommu->ecap) ^ ecap_slts(iommu->ecap))
+ return ecap_flts(iommu->ecap);
- domain->nid = NUMA_NO_NODE;
- if (first_level_by_default(type))
- domain->use_first_level = true;
- domain->has_iotlb_device = false;
- INIT_LIST_HEAD(&domain->devices);
- INIT_LIST_HEAD(&domain->dev_pasids);
- spin_lock_init(&domain->lock);
- xa_init(&domain->iommu_array);
-
- return domain;
+ return true;
}
int domain_attach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info, *curr;
- unsigned long ndomains;
int num, ret = -ENOSPC;
+ if (domain->domain.type == IOMMU_DOMAIN_SVA)
+ return 0;
+
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
- spin_lock(&iommu->lock);
+ guard(mutex)(&iommu->did_lock);
curr = xa_load(&domain->iommu_array, iommu->seq_id);
if (curr) {
curr->refcnt++;
- spin_unlock(&iommu->lock);
kfree(info);
return 0;
}
- ndomains = cap_ndoms(iommu->cap);
- num = find_first_zero_bit(iommu->domain_ids, ndomains);
- if (num >= ndomains) {
+ num = ida_alloc_range(&iommu->domain_ida, IDA_START_DID,
+ cap_ndoms(iommu->cap) - 1, GFP_KERNEL);
+ if (num < 0) {
pr_err("%s: No free domain ids\n", iommu->name);
goto err_unlock;
}
- set_bit(num, iommu->domain_ids);
info->refcnt = 1;
info->did = num;
info->iommu = iommu;
curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
- NULL, info, GFP_ATOMIC);
+ NULL, info, GFP_KERNEL);
if (curr) {
ret = xa_err(curr) ? : -EBUSY;
goto err_clear;
}
- domain_update_iommu_cap(domain);
- spin_unlock(&iommu->lock);
return 0;
err_clear:
- clear_bit(info->did, iommu->domain_ids);
+ ida_free(&iommu->domain_ida, info->did);
err_unlock:
- spin_unlock(&iommu->lock);
kfree(info);
return ret;
}
@@ -1809,47 +1383,91 @@ void domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
{
struct iommu_domain_info *info;
- spin_lock(&iommu->lock);
+ if (domain->domain.type == IOMMU_DOMAIN_SVA)
+ return;
+
+ guard(mutex)(&iommu->did_lock);
info = xa_load(&domain->iommu_array, iommu->seq_id);
if (--info->refcnt == 0) {
- clear_bit(info->did, iommu->domain_ids);
+ ida_free(&iommu->domain_ida, info->did);
xa_erase(&domain->iommu_array, iommu->seq_id);
domain->nid = NUMA_NO_NODE;
- domain_update_iommu_cap(domain);
kfree(info);
}
- spin_unlock(&iommu->lock);
-}
-
-static int guestwidth_to_adjustwidth(int gaw)
-{
- int agaw;
- int r = (gaw - 12) % 9;
-
- if (r == 0)
- agaw = gaw;
- else
- agaw = gaw + 9 - r;
- if (agaw > 64)
- agaw = 64;
- return agaw;
}
static void domain_exit(struct dmar_domain *domain)
{
if (domain->pgd) {
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist =
+ IOMMU_PAGES_LIST_INIT(freelist);
domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist);
- put_pages_list(&freelist);
+ iommu_put_pages_list(&freelist);
}
if (WARN_ON(!list_empty(&domain->devices)))
return;
+ kfree(domain->qi_batch);
kfree(domain);
}
+/*
+ * For kdump cases, old valid entries may be cached due to the
+ * in-flight DMA and copied pgtable, but there is no unmapping
+ * behaviour for them, thus we need an explicit cache flush for
+ * the newly-mapped device. For kdump, at this point, the device
+ * is supposed to finish reset at its driver probe stage, so no
+ * in-flight DMA will exist, and we don't need to worry anymore
+ * hereafter.
+ */
+static void copied_context_tear_down(struct intel_iommu *iommu,
+ struct context_entry *context,
+ u8 bus, u8 devfn)
+{
+ u16 did_old;
+
+ if (!context_copied(iommu, bus, devfn))
+ return;
+
+ assert_spin_locked(&iommu->lock);
+
+ did_old = context_domain_id(context);
+ context_clear_entry(context);
+
+ if (did_old < cap_ndoms(iommu->cap)) {
+ iommu->flush.flush_context(iommu, did_old,
+ PCI_DEVID(bus, devfn),
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+ DMA_TLB_DSI_FLUSH);
+ }
+
+ clear_context_copied(iommu, bus, devfn);
+}
+
+/*
+ * It's a non-present to present mapping. If hardware doesn't cache
+ * non-present entry we only need to flush the write-buffer. If the
+ * _does_ cache non-present entries, then it does so in the special
+ * domain #0, which we have to flush:
+ */
+static void context_present_cache_flush(struct intel_iommu *iommu, u16 did,
+ u8 bus, u8 devfn)
+{
+ if (cap_caching_mode(iommu->cap)) {
+ iommu->flush.flush_context(iommu, 0,
+ PCI_DEVID(bus, devfn),
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+ } else {
+ iommu_flush_write_buffer(iommu);
+ }
+}
+
static int domain_context_mapping_one(struct dmar_domain *domain,
struct intel_iommu *iommu,
u8 bus, u8 devfn)
@@ -1860,10 +1478,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
int translation = CONTEXT_TT_MULTI_LEVEL;
struct dma_pte *pgd = domain->pgd;
struct context_entry *context;
- int agaw, ret;
-
- if (hw_pass_through && domain_type_is_si(domain))
- translation = CONTEXT_TT_PASS_THROUGH;
+ int ret;
pr_debug("Set context mapping for %02x:%02x.%d\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1878,83 +1493,23 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (context_present(context) && !context_copied(iommu, bus, devfn))
goto out_unlock;
- /*
- * For kdump cases, old valid entries may be cached due to the
- * in-flight DMA and copied pgtable, but there is no unmapping
- * behaviour for them, thus we need an explicit cache flush for
- * the newly-mapped device. For kdump, at this point, the device
- * is supposed to finish reset at its driver probe stage, so no
- * in-flight DMA will exist, and we don't need to worry anymore
- * hereafter.
- */
- if (context_copied(iommu, bus, devfn)) {
- u16 did_old = context_domain_id(context);
-
- if (did_old < cap_ndoms(iommu->cap)) {
- iommu->flush.flush_context(iommu, did_old,
- (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT,
- DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
- DMA_TLB_DSI_FLUSH);
- }
-
- clear_context_copied(iommu, bus, devfn);
- }
-
+ copied_context_tear_down(iommu, context, bus, devfn);
context_clear_entry(context);
context_set_domain_id(context, did);
- if (translation != CONTEXT_TT_PASS_THROUGH) {
- /*
- * Skip top levels of page tables for iommu which has
- * less agaw than default. Unnecessary for PT mode.
- */
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- ret = -ENOMEM;
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- goto out_unlock;
- }
-
- if (info && info->ats_supported)
- translation = CONTEXT_TT_DEV_IOTLB;
- else
- translation = CONTEXT_TT_MULTI_LEVEL;
-
- context_set_address_root(context, virt_to_phys(pgd));
- context_set_address_width(context, agaw);
- } else {
- /*
- * In pass through mode, AW must be programmed to
- * indicate the largest AGAW value supported by
- * hardware. And ASR is ignored by hardware.
- */
- context_set_address_width(context, iommu->msagaw);
- }
+ if (info && info->ats_supported)
+ translation = CONTEXT_TT_DEV_IOTLB;
+ else
+ translation = CONTEXT_TT_MULTI_LEVEL;
+ context_set_address_root(context, virt_to_phys(pgd));
+ context_set_address_width(context, domain->agaw);
context_set_translation_type(context, translation);
context_set_fault_enable(context);
context_set_present(context);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(context, sizeof(*context));
-
- /*
- * It's a non-present to present mapping. If hardware doesn't cache
- * non-present entry we only need to flush the write-buffer. If the
- * _does_ cache non-present entries, then it does so in the special
- * domain #0, which we have to flush:
- */
- if (cap_caching_mode(iommu->cap)) {
- iommu->flush.flush_context(iommu, 0,
- (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT,
- DMA_CCMD_DEVICE_INVL);
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
- } else {
- iommu_flush_write_buffer(iommu);
- }
-
+ context_present_cache_flush(iommu, did, bus, devfn);
ret = 0;
out_unlock:
@@ -1980,19 +1535,19 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
+ int ret;
if (!dev_is_pci(dev))
return domain_context_mapping_one(domain, iommu, bus, devfn);
- return pci_for_each_dma_alias(to_pci_dev(dev),
- domain_context_mapping_cb, domain);
-}
+ ret = pci_for_each_dma_alias(to_pci_dev(dev),
+ domain_context_mapping_cb, domain);
+ if (ret)
+ return ret;
-/* Returns a number of VTD pages, but aligned to MM page size */
-static unsigned long aligned_nrpages(unsigned long host_addr, size_t size)
-{
- host_addr &= ~PAGE_MASK;
- return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
+ iommu_enable_pci_ats(info);
+
+ return 0;
}
/* Return largest possible superpage level for a given mapping */
@@ -2031,9 +1586,7 @@ static void switch_to_super_page(struct dmar_domain *domain,
unsigned long end_pfn, int level)
{
unsigned long lvl_pages = lvl_to_nr_pages(level);
- struct iommu_domain_info *info;
struct dma_pte *pte = NULL;
- unsigned long i;
while (start_pfn <= end_pfn) {
if (!pte)
@@ -2045,13 +1598,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
start_pfn + lvl_pages - 1,
level + 1);
- xa_for_each(&domain->iommu_array, i, info)
- iommu_flush_iotlb_psi(info->iommu, domain,
- start_pfn, lvl_pages,
- 0, 0);
- if (domain->nested_parent)
- parent_domain_flush(domain, start_pfn,
- lvl_pages, 0);
+ cache_tag_flush_range(domain, start_pfn << VTD_PAGE_SHIFT,
+ end_pfn << VTD_PAGE_SHIFT, 0);
}
pte++;
@@ -2084,9 +1632,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
}
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
- attr |= DMA_FL_PTE_PRESENT;
if (domain->use_first_level) {
- attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
if (prot & DMA_PTE_WRITE)
attr |= DMA_FL_PTE_DIRTY;
}
@@ -2128,8 +1675,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
/* We don't need lock here, nobody else
* touches the iova range
*/
- tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
- if (tmp) {
+ tmp = 0ULL;
+ if (!try_cmpxchg64_local(&pte->val, &tmp, pteval)) {
static int dumps = 5;
pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
iov_pfn, tmp, (unsigned long long)pteval);
@@ -2173,7 +1720,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
{
struct intel_iommu *iommu = info->iommu;
struct context_entry *context;
- u16 did_old;
+ u16 did;
spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
@@ -2182,46 +1729,59 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
return;
}
- did_old = context_domain_id(context);
-
+ did = context_domain_id(context);
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock(&iommu->lock);
- iommu->flush.flush_context(iommu,
- did_old,
- (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT,
- DMA_CCMD_DEVICE_INVL);
+ intel_context_flush_no_pasid(info, context, did);
+}
+
+int __domain_setup_first_level(struct intel_iommu *iommu,
+ struct device *dev, ioasid_t pasid,
+ u16 did, pgd_t *pgd, int flags,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_first_level(iommu, dev, pgd,
+ pasid, did, flags);
+ return intel_pasid_replace_first_level(iommu, dev, pgd, pasid, did,
+ iommu_domain_did(old, iommu),
+ flags);
+}
- iommu->flush.flush_iotlb(iommu,
- did_old,
- 0,
- 0,
- DMA_TLB_DSI_FLUSH);
+static int domain_setup_second_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_second_level(iommu, domain,
+ dev, pasid);
+ return intel_pasid_replace_second_level(iommu, domain, dev,
+ iommu_domain_did(old, iommu),
+ pasid);
+}
- __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
+static int domain_setup_passthrough(struct intel_iommu *iommu,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ if (!old)
+ return intel_pasid_setup_pass_through(iommu, dev, pasid);
+ return intel_pasid_replace_pass_through(iommu, dev,
+ iommu_domain_did(old, iommu),
+ pasid);
}
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
- u32 pasid)
+ u32 pasid, struct iommu_domain *old)
{
struct dma_pte *pgd = domain->pgd;
- int agaw, level;
- int flags = 0;
-
- /*
- * Skip top levels of page tables for iommu which has
- * less agaw than default. Unnecessary for PT mode.
- */
- for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd))
- return -ENOMEM;
- }
+ int level, flags = 0;
- level = agaw_to_level(agaw);
+ level = agaw_to_level(domain->agaw);
if (level != 4 && level != 5)
return -EINVAL;
@@ -2231,89 +1791,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
if (domain->force_snooping)
flags |= PASID_FLAG_PAGE_SNOOP;
- return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
- domain_id_iommu(domain, iommu),
- flags);
-}
-
-static bool dev_is_real_dma_subdevice(struct device *dev)
-{
- return dev && dev_is_pci(dev) &&
- pci_real_dma_dev(to_pci_dev(dev)) != to_pci_dev(dev);
-}
-
-static int iommu_domain_identity_map(struct dmar_domain *domain,
- unsigned long first_vpfn,
- unsigned long last_vpfn)
-{
- /*
- * RMRR range might have overlap with physical memory range,
- * clear it first
- */
- dma_pte_clear_range(domain, first_vpfn, last_vpfn);
-
- return __domain_mapping(domain, first_vpfn,
- first_vpfn, last_vpfn - first_vpfn + 1,
- DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL);
-}
-
-static int md_domain_init(struct dmar_domain *domain, int guest_width);
-
-static int __init si_domain_init(int hw)
-{
- struct dmar_rmrr_unit *rmrr;
- struct device *dev;
- int i, nid, ret;
-
- si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
- if (!si_domain)
- return -EFAULT;
-
- if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
- domain_exit(si_domain);
- si_domain = NULL;
- return -EFAULT;
- }
-
- if (hw)
- return 0;
-
- for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn;
- int i;
-
- for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
- ret = iommu_domain_identity_map(si_domain,
- mm_to_dma_pfn_start(start_pfn),
- mm_to_dma_pfn_end(end_pfn));
- if (ret)
- return ret;
- }
- }
-
- /*
- * Identity map the RMRRs so that devices with RMRRs could also use
- * the si_domain.
- */
- for_each_rmrr_units(rmrr) {
- for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
- i, dev) {
- unsigned long long start = rmrr->base_address;
- unsigned long long end = rmrr->end_address;
-
- if (WARN_ON(end < start ||
- end >> agaw_to_width(si_domain->agaw)))
- continue;
-
- ret = iommu_domain_identity_map(si_domain,
- mm_to_dma_pfn_start(start >> PAGE_SHIFT),
- mm_to_dma_pfn_end(end >> PAGE_SHIFT));
- if (ret)
- return ret;
- }
- }
-
- return 0;
+ return __domain_setup_first_level(iommu, dev, pasid,
+ domain_id_iommu(domain, iommu),
+ (pgd_t *)pgd, flags, old);
}
static int dmar_domain_attach_device(struct dmar_domain *domain,
@@ -2327,7 +1807,9 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
ret = domain_attach_iommu(domain, iommu);
if (ret)
return ret;
+
info->domain = domain;
+ info->domain_attached = true;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -2337,22 +1819,25 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (!sm_supported(iommu))
ret = domain_context_mapping(domain, dev);
- else if (hw_pass_through && domain_type_is_si(domain))
- ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
else if (domain->use_first_level)
- ret = domain_setup_first_level(iommu, domain, dev, IOMMU_NO_PASID);
+ ret = domain_setup_first_level(iommu, domain, dev,
+ IOMMU_NO_PASID, NULL);
else
- ret = intel_pasid_setup_second_level(iommu, domain, dev, IOMMU_NO_PASID);
+ ret = domain_setup_second_level(iommu, domain, dev,
+ IOMMU_NO_PASID, NULL);
- if (ret) {
- device_block_translation(dev);
- return ret;
- }
+ if (ret)
+ goto out_block_translation;
- if (sm_supported(info->iommu) || !domain_type_is_si(info->domain))
- iommu_enable_pci_caps(info);
+ ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
+ if (ret)
+ goto out_block_translation;
return 0;
+
+out_block_translation:
+ device_block_translation(dev);
+ return ret;
}
/**
@@ -2384,27 +1869,23 @@ static bool device_rmrr_is_relaxable(struct device *dev)
return false;
}
-/*
- * Return the required default domain type for a specific device.
- *
- * @dev: the device in query
- * @startup: true if this is during early boot
- *
- * Returns:
- * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain
- * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain
- * - 0: both identity and dynamic domains work for this device
- */
static int device_def_domain_type(struct device *dev)
{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * Hardware does not support the passthrough translation mode.
+ * Always use a dynamaic mapping domain.
+ */
+ if (!ecap_pass_through(iommu->ecap))
+ return IOMMU_DOMAIN_DMA;
+
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
return IOMMU_DOMAIN_IDENTITY;
-
- if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
- return IOMMU_DOMAIN_IDENTITY;
}
return 0;
@@ -2497,7 +1978,8 @@ static int copy_context_table(struct intel_iommu *iommu,
if (!old_ce)
goto out;
- new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL);
+ new_ce = iommu_alloc_pages_node_sz(iommu->node,
+ GFP_KERNEL, SZ_4K);
if (!new_ce)
goto out_unmap;
@@ -2512,7 +1994,7 @@ static int copy_context_table(struct intel_iommu *iommu,
did = context_domain_id(&ce);
if (did >= 0 && did < cap_ndoms(iommu->cap))
- set_bit(did, iommu->domain_ids);
+ ida_alloc_range(&iommu->domain_ida, did, did, GFP_KERNEL);
set_context_copied(iommu, bus, devfn);
new_ce[idx] = ce;
@@ -2620,10 +2102,6 @@ static int __init init_dmars(void)
struct intel_iommu *iommu;
int ret;
- ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
- if (ret)
- goto free_iommu;
-
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
iommu_disable_translation(iommu);
@@ -2643,11 +2121,6 @@ static int __init init_dmars(void)
}
intel_iommu_init_qi(iommu);
-
- ret = iommu_init_domains(iommu);
- if (ret)
- goto free_iommu;
-
init_translation_status(iommu);
if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
@@ -2690,8 +2163,6 @@ static int __init init_dmars(void)
}
}
- if (!ecap_pass_through(iommu->ecap))
- hw_pass_through = 0;
intel_svm_check(iommu);
}
@@ -2705,15 +2176,8 @@ static int __init init_dmars(void)
iommu_set_root_entry(iommu);
}
- if (!dmar_map_gfx)
- iommu_identity_mapping |= IDENTMAP_GFX;
-
check_tylersburg_isoch();
- ret = si_domain_init(hw_pass_through);
- if (ret)
- goto free_iommu;
-
/*
* for each drhd
* enable fault log
@@ -2734,19 +2198,18 @@ static int __init init_dmars(void)
iommu_flush_write_buffer(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
+ if (ecap_prs(iommu->ecap)) {
/*
* Call dmar_alloc_hwirq() with dmar_global_lock held,
* could cause possible lock race condition.
*/
up_write(&dmar_global_lock);
- ret = intel_svm_enable_prq(iommu);
+ ret = intel_iommu_enable_prq(iommu);
down_write(&dmar_global_lock);
if (ret)
goto free_iommu;
}
-#endif
+
ret = dmar_set_interrupt(iommu);
if (ret)
goto free_iommu;
@@ -2759,10 +2222,6 @@ free_iommu:
disable_dmar_iommu(iommu);
free_dmar_iommu(iommu);
}
- if (si_domain) {
- domain_exit(si_domain);
- si_domain = NULL;
- }
return ret;
}
@@ -2798,7 +2257,7 @@ static void __init init_no_remapping_devices(void)
/* This IOMMU has *only* gfx devices. Either bypass it or
set the gfx_mapped flag, as appropriate */
drhd->gfx_dedicated = 1;
- if (!dmar_map_gfx)
+ if (disable_igfx_iommu)
drhd->ignored = 1;
}
}
@@ -3130,25 +2589,8 @@ int dmar_parse_one_satc(struct acpi_dmar_header *hdr, void *arg)
static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
{
- int sp, ret;
struct intel_iommu *iommu = dmaru->iommu;
-
- ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
- if (ret)
- goto out;
-
- if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
- pr_warn("%s: Doesn't support hardware pass through.\n",
- iommu->name);
- return -ENXIO;
- }
-
- sp = domain_update_iommu_superpage(NULL, iommu) - 1;
- if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
- pr_warn("%s: Doesn't support large page.\n",
- iommu->name);
- return -ENXIO;
- }
+ int ret;
/*
* Disable translation if already enabled prior to OS handover.
@@ -3156,9 +2598,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
- ret = iommu_init_domains(iommu);
- if (ret == 0)
- ret = iommu_alloc_root_entry(iommu);
+ ret = iommu_alloc_root_entry(iommu);
if (ret)
goto out;
@@ -3176,13 +2616,12 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
intel_iommu_init_qi(iommu);
iommu_flush_write_buffer(iommu);
-#ifdef CONFIG_INTEL_IOMMU_SVM
- if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
- ret = intel_svm_enable_prq(iommu);
+ if (ecap_prs(iommu->ecap)) {
+ ret = intel_iommu_enable_prq(iommu);
if (ret)
goto disable_iommu;
}
-#endif
+
ret = dmar_set_interrupt(iommu);
if (ret)
goto disable_iommu;
@@ -3250,7 +2689,6 @@ static struct dmar_satc_unit *dmar_find_matched_satc_unit(struct pci_dev *dev)
struct device *tmp;
int i;
- dev = pci_physfn(dev);
rcu_read_lock();
list_for_each_entry_rcu(satcu, &dmar_satc_units, list) {
@@ -3267,15 +2705,16 @@ out:
return satcu;
}
-static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
+static bool dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
{
- int i, ret = 1;
- struct pci_bus *bus;
struct pci_dev *bridge = NULL;
- struct device *tmp;
- struct acpi_dmar_atsr *atsr;
struct dmar_atsr_unit *atsru;
struct dmar_satc_unit *satcu;
+ struct acpi_dmar_atsr *atsr;
+ bool supported = true;
+ struct pci_bus *bus;
+ struct device *tmp;
+ int i;
dev = pci_physfn(dev);
satcu = dmar_find_matched_satc_unit(dev);
@@ -3293,11 +2732,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
bridge = bus->self;
/* If it's an integrated device, allow ATS */
if (!bridge)
- return 1;
+ return true;
/* Connected via non-PCIe: no ATS */
if (!pci_is_pcie(bridge) ||
pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
- return 0;
+ return false;
/* If we found the root port, look it up in the ATSR */
if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
break;
@@ -3316,11 +2755,11 @@ static int dmar_ats_supported(struct pci_dev *dev, struct intel_iommu *iommu)
if (atsru->include_all)
goto out;
}
- ret = 0;
+ supported = false;
out:
rcu_read_unlock();
- return ret;
+ return supported;
}
int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
@@ -3393,52 +2832,6 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
return 0;
}
-static int intel_iommu_memory_notifier(struct notifier_block *nb,
- unsigned long val, void *v)
-{
- struct memory_notify *mhp = v;
- unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn);
- unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn +
- mhp->nr_pages - 1);
-
- switch (val) {
- case MEM_GOING_ONLINE:
- if (iommu_domain_identity_map(si_domain,
- start_vpfn, last_vpfn)) {
- pr_warn("Failed to build identity map for [%lx-%lx]\n",
- start_vpfn, last_vpfn);
- return NOTIFY_BAD;
- }
- break;
-
- case MEM_OFFLINE:
- case MEM_CANCEL_ONLINE:
- {
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- LIST_HEAD(freelist);
-
- domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist);
-
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd)
- iommu_flush_iotlb_psi(iommu, si_domain,
- start_vpfn, mhp->nr_pages,
- list_empty(&freelist), 0);
- rcu_read_unlock();
- put_pages_list(&freelist);
- }
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block intel_iommu_memory_nb = {
- .notifier_call = intel_iommu_memory_notifier,
- .priority = 0
-};
-
static void intel_disable_iommus(void)
{
struct intel_iommu *iommu = NULL;
@@ -3456,16 +2849,19 @@ void intel_iommu_shutdown(void)
if (no_iommu || dmar_disabled)
return;
- down_write(&dmar_global_lock);
+ /*
+ * All other CPUs were brought down, hotplug interrupts were disabled,
+ * no lock and RCU checking needed anymore
+ */
+ list_for_each_entry(drhd, &dmar_drhd_units, list) {
+ iommu = drhd->iommu;
- /* Disable PMRs explicitly here. */
- for_each_iommu(iommu, drhd)
+ /* Disable PMRs explicitly here. */
iommu_disable_protect_mem_regions(iommu);
- /* Make sure the IOMMUs are switched off */
- intel_disable_iommus();
-
- up_write(&dmar_global_lock);
+ /* Make sure the IOMMUs are switched off */
+ iommu_disable_translation(iommu);
+ }
}
static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
@@ -3521,9 +2917,14 @@ static ssize_t domains_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
- return sysfs_emit(buf, "%d\n",
- bitmap_weight(iommu->domain_ids,
- cap_ndoms(iommu->cap)));
+ unsigned int count = 0;
+ int id;
+
+ for (id = 0; id < cap_ndoms(iommu->cap); id++)
+ if (ida_exists(&iommu->domain_ida, id))
+ count++;
+
+ return sysfs_emit(buf, "%d\n", count);
}
static DEVICE_ATTR_RO(domains_used);
@@ -3598,6 +2999,7 @@ static int __init probe_acpi_namespace_devices(void)
if (dev->bus != &acpi_bus_type)
continue;
+ up_read(&dmar_global_lock);
adev = to_acpi_device(dev);
mutex_lock(&adev->physical_node_lock);
list_for_each_entry(pn,
@@ -3607,6 +3009,7 @@ static int __init probe_acpi_namespace_devices(void)
break;
}
mutex_unlock(&adev->physical_node_lock);
+ down_read(&dmar_global_lock);
if (ret)
return ret;
@@ -3724,23 +3127,25 @@ int __init intel_iommu_init(void)
* the virtual and physical IOMMU page-tables.
*/
if (cap_caching_mode(iommu->cap) &&
- !first_level_by_default(IOMMU_DOMAIN_DMA)) {
+ !first_level_by_default(iommu)) {
pr_info_once("IOMMU batching disallowed due to virtualization\n");
iommu_set_dma_strict();
}
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
"%s", iommu->name);
+ /*
+ * The iommu device probe is protected by the iommu_probe_device_lock.
+ * Release the dmar_global_lock before entering the device probe path
+ * to avoid unnecessary lock order splat.
+ */
+ up_read(&dmar_global_lock);
iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
+ down_read(&dmar_global_lock);
iommu_pmu_register(iommu);
}
- up_read(&dmar_global_lock);
-
- if (si_domain && !hw_pass_through)
- register_memory_notifier(&intel_iommu_memory_nb);
- down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
pr_warn("ACPI name space devices didn't probe correctly\n");
@@ -3781,11 +3186,14 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
*/
static void domain_context_clear(struct device_domain_info *info)
{
- if (!dev_is_pci(info->dev))
+ if (!dev_is_pci(info->dev)) {
domain_context_clear_one(info, info->bus, info->devfn);
+ return;
+ }
pci_for_each_dma_alias(to_pci_dev(info->dev),
&domain_context_clear_one_cb, info);
+ iommu_disable_pci_ats(info);
}
/*
@@ -3799,7 +3207,13 @@ void device_block_translation(struct device *dev)
struct intel_iommu *iommu = info->iommu;
unsigned long flags;
- iommu_disable_pci_caps(info);
+ /* Device in DMA blocking state. Noting to do. */
+ if (!info->domain_attached)
+ return;
+
+ if (info->domain)
+ cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
+
if (!dev_is_real_dma_subdevice(dev)) {
if (sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, dev,
@@ -3808,6 +3222,9 @@ void device_block_translation(struct device *dev)
domain_context_clear(info);
}
+ /* Device now in DMA blocking state. */
+ info->domain_attached = false;
+
if (!info->domain)
return;
@@ -3819,82 +3236,103 @@ void device_block_translation(struct device *dev)
info->domain = NULL;
}
-static int md_domain_init(struct dmar_domain *domain, int guest_width)
-{
- int adjust_width;
-
- /* calculate AGAW */
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- domain->agaw = width_to_agaw(adjust_width);
-
- domain->iommu_coherency = false;
- domain->iommu_superpage = 0;
- domain->max_addr = 0;
-
- /* always allocate the top pgd */
- domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
- if (!domain->pgd)
- return -ENOMEM;
- domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
- return 0;
-}
-
static int blocking_domain_attach_dev(struct iommu_domain *domain,
struct device *dev)
{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ iopf_for_domain_remove(info->domain ? &info->domain->domain : NULL, dev);
device_block_translation(dev);
return 0;
}
+static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old);
+
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
+ .set_dev_pasid = blocking_domain_set_dev_pasid,
}
};
-static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
+static int iommu_superpage_capability(struct intel_iommu *iommu, bool first_stage)
{
- struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
+ if (!intel_iommu_superpage)
+ return 0;
- switch (type) {
- case IOMMU_DOMAIN_DMA:
- case IOMMU_DOMAIN_UNMANAGED:
- dmar_domain = alloc_domain(type);
- if (!dmar_domain) {
- pr_err("Can't allocate dmar_domain\n");
- return NULL;
- }
- if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
- pr_err("Domain initialization failed\n");
- domain_exit(dmar_domain);
- return NULL;
- }
+ if (first_stage)
+ return cap_fl1gp_support(iommu->cap) ? 2 : 1;
- domain = &dmar_domain->domain;
- domain->geometry.aperture_start = 0;
- domain->geometry.aperture_end =
- __DOMAIN_MAX_ADDR(dmar_domain->gaw);
- domain->geometry.force_aperture = true;
-
- return domain;
- case IOMMU_DOMAIN_IDENTITY:
- return &si_domain->domain;
- case IOMMU_DOMAIN_SVA:
- return intel_svm_domain_alloc();
- default:
- return NULL;
+ return fls(cap_super_page_val(iommu->cap));
+}
+
+static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_stage)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct dmar_domain *domain;
+ int addr_width;
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&domain->devices);
+ INIT_LIST_HEAD(&domain->dev_pasids);
+ INIT_LIST_HEAD(&domain->cache_tags);
+ spin_lock_init(&domain->lock);
+ spin_lock_init(&domain->cache_lock);
+ xa_init(&domain->iommu_array);
+
+ domain->nid = dev_to_node(dev);
+ domain->use_first_level = first_stage;
+
+ /* calculate the address width */
+ addr_width = agaw_to_width(iommu->agaw);
+ if (addr_width > cap_mgaw(iommu->cap))
+ addr_width = cap_mgaw(iommu->cap);
+ domain->gaw = addr_width;
+ domain->agaw = iommu->agaw;
+ domain->max_addr = __DOMAIN_MAX_ADDR(addr_width);
+
+ /* iommu memory access coherency */
+ domain->iommu_coherency = iommu_paging_structure_coherency(iommu);
+
+ /* pagesize bitmap */
+ domain->domain.pgsize_bitmap = SZ_4K;
+ domain->iommu_superpage = iommu_superpage_capability(iommu, first_stage);
+ domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain);
+
+ /*
+ * IOVA aperture: First-level translation restricts the input-address
+ * to a canonical address (i.e., address bits 63:N have the same value
+ * as address bit [N-1], where N is 48-bits with 4-level paging and
+ * 57-bits with 5-level paging). Hence, skip bit [N-1].
+ */
+ domain->domain.geometry.force_aperture = true;
+ domain->domain.geometry.aperture_start = 0;
+ if (first_stage)
+ domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
+ else
+ domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
+
+ /* always allocate the top pgd */
+ domain->pgd = iommu_alloc_pages_node_sz(domain->nid, GFP_KERNEL, SZ_4K);
+ if (!domain->pgd) {
+ kfree(domain);
+ return ERR_PTR(-ENOMEM);
}
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
- return NULL;
+ return domain;
}
static struct iommu_domain *
-intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
- struct iommu_domain *parent,
- const struct iommu_user_data *user_data)
+intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
+ const struct iommu_user_data *user_data)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
@@ -3902,16 +3340,11 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
struct intel_iommu *iommu = info->iommu;
struct dmar_domain *dmar_domain;
struct iommu_domain *domain;
-
- /* Must be NESTING domain */
- if (parent) {
- if (!nested_supported(iommu) || flags)
- return ERR_PTR(-EOPNOTSUPP);
- return intel_nested_domain_alloc(parent, user_data);
- }
+ bool first_stage;
if (flags &
- (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID)))
return ERR_PTR(-EOPNOTSUPP);
if (nested_parent && !nested_supported(iommu))
return ERR_PTR(-EOPNOTSUPP);
@@ -3919,14 +3352,25 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
return ERR_PTR(-EOPNOTSUPP);
/*
- * domain_alloc_user op needs to fully initialize a domain before
- * return, so uses iommu_domain_alloc() here for simple.
+ * Always allocate the guest compatible page table unless
+ * IOMMU_HWPT_ALLOC_NEST_PARENT or IOMMU_HWPT_ALLOC_DIRTY_TRACKING
+ * is specified.
*/
- domain = iommu_domain_alloc(dev->bus);
- if (!domain)
- return ERR_PTR(-ENOMEM);
+ if (nested_parent || dirty_tracking) {
+ if (!sm_supported(iommu) || !ecap_slts(iommu->ecap))
+ return ERR_PTR(-EOPNOTSUPP);
+ first_stage = false;
+ } else {
+ first_stage = first_level_by_default(iommu);
+ }
- dmar_domain = to_dmar_domain(domain);
+ dmar_domain = paging_domain_alloc(dev, first_stage);
+ if (IS_ERR(dmar_domain))
+ return ERR_CAST(dmar_domain);
+ domain = &dmar_domain->domain;
+ domain->type = IOMMU_DOMAIN_UNMANAGED;
+ domain->owner = &intel_iommu_ops;
+ domain->ops = intel_iommu_ops.default_domain_ops;
if (nested_parent) {
dmar_domain->nested_parent = true;
@@ -3951,46 +3395,44 @@ static void intel_iommu_domain_free(struct iommu_domain *domain)
WARN_ON(dmar_domain->nested_parent &&
!list_empty(&dmar_domain->s1_domains));
- if (domain != &si_domain->domain)
- domain_exit(dmar_domain);
+ domain_exit(dmar_domain);
}
-int prepare_domain_attach_device(struct iommu_domain *domain,
- struct device *dev)
+int paging_domain_compatible(struct iommu_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
int addr_width;
+ if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+ return -EPERM;
+
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
return -EINVAL;
if (domain->dirty_ops && !ssads_supported(iommu))
return -EINVAL;
+ if (dmar_domain->iommu_coherency !=
+ iommu_paging_structure_coherency(iommu))
+ return -EINVAL;
+
+ if (dmar_domain->iommu_superpage !=
+ iommu_superpage_capability(iommu, dmar_domain->use_first_level))
+ return -EINVAL;
+
+ if (dmar_domain->use_first_level &&
+ (!sm_supported(iommu) || !ecap_flts(iommu->ecap)))
+ return -EINVAL;
+
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
addr_width = cap_mgaw(iommu->cap);
- if (dmar_domain->max_addr > (1LL << addr_width))
+ if (dmar_domain->gaw > addr_width || dmar_domain->agaw > iommu->agaw)
return -EINVAL;
- dmar_domain->gaw = addr_width;
-
- /*
- * Knock out extra levels of page tables if necessary
- */
- while (iommu->agaw < dmar_domain->agaw) {
- struct dma_pte *pte;
-
- pte = dmar_domain->pgd;
- if (dma_pte_present(pte)) {
- dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
- free_pgtable_page(pte);
- }
- dmar_domain->agaw--;
- }
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
context_copied(iommu, info->bus, info->devfn))
@@ -4002,17 +3444,23 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
static int intel_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret;
- if (info->domain)
- device_block_translation(dev);
+ device_block_translation(dev);
- ret = prepare_domain_attach_device(domain, dev);
+ ret = paging_domain_compatible(domain, dev);
if (ret)
return ret;
- return dmar_domain_attach_device(to_dmar_domain(domain), dev);
+ ret = iopf_for_domain_set(domain, dev);
+ if (ret)
+ return ret;
+
+ ret = dmar_domain_attach_device(to_dmar_domain(domain), dev);
+ if (ret)
+ iopf_for_domain_remove(domain, dev);
+
+ return ret;
}
static int intel_iommu_map(struct iommu_domain *domain,
@@ -4122,26 +3570,10 @@ static size_t intel_iommu_unmap_pages(struct iommu_domain *domain,
static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long iova_pfn = IOVA_PFN(gather->start);
- size_t size = gather->end - gather->start;
- struct iommu_domain_info *info;
- unsigned long start_pfn;
- unsigned long nrpages;
- unsigned long i;
-
- nrpages = aligned_nrpages(gather->start, size);
- start_pfn = mm_to_dma_pfn_start(iova_pfn);
-
- xa_for_each(&dmar_domain->iommu_array, i, info)
- iommu_flush_iotlb_psi(info->iommu, dmar_domain,
- start_pfn, nrpages,
- list_empty(&gather->freelist), 0);
-
- if (dmar_domain->nested_parent)
- parent_domain_flush(dmar_domain, start_pfn, nrpages,
- list_empty(&gather->freelist));
- put_pages_list(&gather->freelist);
+ cache_tag_flush_range(to_dmar_domain(domain), gather->start,
+ gather->end,
+ iommu_pages_list_empty(&gather->freelist));
+ iommu_put_pages_list(&gather->freelist);
}
static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -4300,6 +3732,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
dev_iommu_priv_set(dev, info);
if (pdev && pci_ats_supported(pdev)) {
+ pci_prepare_ats(pdev, VTD_PAGE_SHIFT);
ret = device_rbtree_insert(iommu, info);
if (ret)
goto free;
@@ -4332,11 +3765,39 @@ free:
return ERR_PTR(ret);
}
+static void intel_iommu_probe_finalize(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * The PCIe spec, in its wisdom, declares that the behaviour of the
+ * device is undefined if you enable PASID support after ATS support.
+ * So always enable PASID support on devices which have it, even if
+ * we can't yet know if we're ever going to use it.
+ */
+ if (info->pasid_supported &&
+ !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
+ info->pasid_enabled = 1;
+
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev))
+ iommu_enable_pci_ats(info);
+ iommu_enable_pci_pri(info);
+}
+
static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
+ iommu_disable_pci_pri(info);
+ iommu_disable_pci_ats(info);
+
+ if (info->pasid_enabled) {
+ pci_disable_pasid(to_pci_dev(dev));
+ info->pasid_enabled = 0;
+ }
+
mutex_lock(&iommu->iopf_lock);
if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
device_rbtree_remove(info);
@@ -4349,13 +3810,6 @@ static void intel_iommu_release_device(struct device *dev)
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
- set_dma_ops(dev, NULL);
-}
-
-static void intel_iommu_probe_finalize(struct device *dev)
-{
- set_dma_ops(dev, NULL);
- iommu_setup_dma_ops(dev, 0, U64_MAX);
}
static void intel_iommu_get_resv_regions(struct device *device,
@@ -4424,132 +3878,44 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-static int intel_iommu_enable_sva(struct device *dev)
+int intel_iommu_enable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu;
-
- if (!info || dmar_disabled)
- return -EINVAL;
-
- iommu = info->iommu;
- if (!iommu)
- return -EINVAL;
-
- if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
- return -ENODEV;
-
- if (!info->pasid_enabled || !info->ats_enabled)
- return -EINVAL;
-
- /*
- * Devices having device-specific I/O fault handling should not
- * support PCI/PRI. The IOMMU side has no means to check the
- * capability of device-specific IOPF. Therefore, IOMMU can only
- * default that if the device driver enables SVA on a non-PRI
- * device, it will handle IOPF in its own way.
- */
- if (!info->pri_supported)
- return 0;
-
- /* Devices supporting PRI should have it enabled. */
- if (!info->pri_enabled)
- return -EINVAL;
-
- return 0;
-}
-
-static int intel_iommu_enable_iopf(struct device *dev)
-{
- struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu;
+ struct intel_iommu *iommu = info->iommu;
int ret;
- if (!pdev || !info || !info->ats_enabled || !info->pri_supported)
+ if (!info->pri_enabled)
return -ENODEV;
- if (info->pri_enabled)
- return -EBUSY;
-
- iommu = info->iommu;
- if (!iommu)
- return -EINVAL;
-
- /* PASID is required in PRG Response Message. */
- if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
- return -EINVAL;
-
- ret = pci_reset_pri(pdev);
- if (ret)
- return ret;
+ /* pri_enabled is protected by the group mutex. */
+ iommu_group_mutex_assert(dev);
+ if (info->iopf_refcount) {
+ info->iopf_refcount++;
+ return 0;
+ }
ret = iopf_queue_add_device(iommu->iopf_queue, dev);
if (ret)
return ret;
- ret = pci_enable_pri(pdev, PRQ_DEPTH);
- if (ret) {
- iopf_queue_remove_device(iommu->iopf_queue, dev);
- return ret;
- }
-
- info->pri_enabled = 1;
+ info->iopf_refcount = 1;
return 0;
}
-static int intel_iommu_disable_iopf(struct device *dev)
+void intel_iommu_disable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
- if (!info->pri_enabled)
- return -EINVAL;
-
- /*
- * PCIe spec states that by clearing PRI enable bit, the Page
- * Request Interface will not issue new page requests, but has
- * outstanding page requests that have been transmitted or are
- * queued for transmission. This is supposed to be called after
- * the device driver has stopped DMA, all PASIDs have been
- * unbound and the outstanding PRQs have been drained.
- */
- pci_disable_pri(to_pci_dev(dev));
- info->pri_enabled = 0;
- iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- return 0;
-}
-
-static int
-intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
-{
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- return intel_iommu_enable_iopf(dev);
-
- case IOMMU_DEV_FEAT_SVA:
- return intel_iommu_enable_sva(dev);
-
- default:
- return -ENODEV;
- }
-}
-
-static int
-intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
-{
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- return intel_iommu_disable_iopf(dev);
+ if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
+ return;
- case IOMMU_DEV_FEAT_SVA:
- return 0;
+ iommu_group_mutex_assert(dev);
+ if (--info->iopf_refcount)
+ return;
- default:
- return -ENODEV;
- }
+ iopf_queue_remove_device(iommu->iopf_queue, dev);
}
static bool intel_iommu_is_attach_deferred(struct device *dev)
@@ -4579,39 +3945,26 @@ static bool risky_device(struct pci_dev *pdev)
static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
- struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long pages = aligned_nrpages(iova, size);
- unsigned long pfn = iova >> VTD_PAGE_SHIFT;
- struct iommu_domain_info *info;
- unsigned long i;
+ cache_tag_flush_range_np(to_dmar_domain(domain), iova, iova + size - 1);
- xa_for_each(&dmar_domain->iommu_array, i, info)
- __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
return 0;
}
-static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+void domain_remove_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dev_pasid_info *curr, *dev_pasid = NULL;
struct intel_iommu *iommu = info->iommu;
struct dmar_domain *dmar_domain;
- struct iommu_domain *domain;
unsigned long flags;
- domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
- if (WARN_ON_ONCE(!domain))
- goto out_tear_down;
+ if (!domain)
+ return;
- /*
- * The SVA implementation needs to handle its own stuffs like the mm
- * notification. Before consolidating that code into iommu core, let
- * the intel sva code handle it.
- */
- if (domain->type == IOMMU_DOMAIN_SVA) {
- intel_svm_remove_dev_pasid(dev, pasid);
- goto out_tear_down;
- }
+ /* Identity domain has no meta data for pasid. */
+ if (domain->type == IOMMU_DOMAIN_IDENTITY)
+ return;
dmar_domain = to_dmar_domain(domain);
spin_lock_irqsave(&dmar_domain->lock, flags);
@@ -4622,27 +3975,79 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
break;
}
}
- WARN_ON_ONCE(!dev_pasid);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ cache_tag_unassign_domain(dmar_domain, dev, pasid);
+ domain_detach_iommu(dmar_domain, iommu);
+ if (!WARN_ON_ONCE(!dev_pasid)) {
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
+ kfree(dev_pasid);
+ }
+}
+
+static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ iopf_for_domain_remove(old, dev);
+ intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
+ domain_remove_dev_pasid(old, dev, pasid);
+
+ return 0;
+}
+
+struct dev_pasid_info *
+domain_add_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *dmar_domain = to_dmar_domain(domain);
+ struct intel_iommu *iommu = info->iommu;
+ struct dev_pasid_info *dev_pasid;
+ unsigned long flags;
+ int ret;
+
+ dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
+ if (!dev_pasid)
+ return ERR_PTR(-ENOMEM);
+
+ ret = domain_attach_iommu(dmar_domain, iommu);
+ if (ret)
+ goto out_free;
+
+ ret = cache_tag_assign_domain(dmar_domain, dev, pasid);
+ if (ret)
+ goto out_detach_iommu;
+
+ dev_pasid->dev = dev;
+ dev_pasid->pasid = pasid;
+ spin_lock_irqsave(&dmar_domain->lock, flags);
+ list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
+ spin_unlock_irqrestore(&dmar_domain->lock, flags);
+
+ return dev_pasid;
+out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
- intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
+out_free:
kfree(dev_pasid);
-out_tear_down:
- intel_pasid_tear_down_entry(iommu, dev, pasid, false);
- intel_drain_pasid_prq(dev, pasid);
+ return ERR_PTR(ret);
}
static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
struct intel_iommu *iommu = info->iommu;
struct dev_pasid_info *dev_pasid;
- unsigned long flags;
int ret;
+ if (WARN_ON_ONCE(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+ return -EINVAL;
+
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
return -EOPNOTSUPP;
@@ -4652,43 +4057,37 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
if (context_copied(iommu, info->bus, info->devfn))
return -EBUSY;
- ret = prepare_domain_attach_device(domain, dev);
+ ret = paging_domain_compatible(domain, dev);
if (ret)
return ret;
- dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL);
- if (!dev_pasid)
- return -ENOMEM;
+ dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
+ if (IS_ERR(dev_pasid))
+ return PTR_ERR(dev_pasid);
- ret = domain_attach_iommu(dmar_domain, iommu);
+ ret = iopf_for_domain_replace(domain, old, dev);
if (ret)
- goto out_free;
+ goto out_remove_dev_pasid;
- if (domain_type_is_si(dmar_domain))
- ret = intel_pasid_setup_pass_through(iommu, dev, pasid);
- else if (dmar_domain->use_first_level)
+ if (dmar_domain->use_first_level)
ret = domain_setup_first_level(iommu, dmar_domain,
- dev, pasid);
+ dev, pasid, old);
else
- ret = intel_pasid_setup_second_level(iommu, dmar_domain,
- dev, pasid);
+ ret = domain_setup_second_level(iommu, dmar_domain,
+ dev, pasid, old);
if (ret)
- goto out_detach_iommu;
+ goto out_unwind_iopf;
- dev_pasid->dev = dev;
- dev_pasid->pasid = pasid;
- spin_lock_irqsave(&dmar_domain->lock, flags);
- list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
- spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ domain_remove_dev_pasid(old, dev, pasid);
- if (domain->type & __IOMMU_DOMAIN_PAGING)
- intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+ intel_iommu_debugfs_create_dev_pasid(dev_pasid);
return 0;
-out_detach_iommu:
- domain_detach_iommu(dmar_domain, iommu);
-out_free:
- kfree(dev_pasid);
+
+out_unwind_iopf:
+ iopf_for_domain_replace(old, domain, dev);
+out_remove_dev_pasid:
+ domain_remove_dev_pasid(domain, dev, pasid);
return ret;
}
@@ -4834,27 +4233,140 @@ static const struct iommu_dirty_ops intel_dirty_ops = {
.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
};
+static int context_setup_pass_through(struct device *dev, u8 bus, u8 devfn)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ struct context_entry *context;
+
+ spin_lock(&iommu->lock);
+ context = iommu_context_addr(iommu, bus, devfn, 1);
+ if (!context) {
+ spin_unlock(&iommu->lock);
+ return -ENOMEM;
+ }
+
+ if (context_present(context) && !context_copied(iommu, bus, devfn)) {
+ spin_unlock(&iommu->lock);
+ return 0;
+ }
+
+ copied_context_tear_down(iommu, context, bus, devfn);
+ context_clear_entry(context);
+ context_set_domain_id(context, FLPT_DEFAULT_DID);
+
+ /*
+ * In pass through mode, AW must be programmed to indicate the largest
+ * AGAW value supported by hardware. And ASR is ignored by hardware.
+ */
+ context_set_address_width(context, iommu->msagaw);
+ context_set_translation_type(context, CONTEXT_TT_PASS_THROUGH);
+ context_set_fault_enable(context);
+ context_set_present(context);
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(context, sizeof(*context));
+ context_present_cache_flush(iommu, FLPT_DEFAULT_DID, bus, devfn);
+ spin_unlock(&iommu->lock);
+
+ return 0;
+}
+
+static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct device *dev = data;
+
+ return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff);
+}
+
+static int device_setup_pass_through(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+ if (!dev_is_pci(dev))
+ return context_setup_pass_through(dev, info->bus, info->devfn);
+
+ return pci_for_each_dma_alias(to_pci_dev(dev),
+ context_setup_pass_through_cb, dev);
+}
+
+static int identity_domain_attach_dev(struct iommu_domain *domain, struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ int ret;
+
+ device_block_translation(dev);
+
+ if (dev_is_real_dma_subdevice(dev))
+ return 0;
+
+ /*
+ * No PRI support with the global identity domain. No need to enable or
+ * disable PRI in this path as the iommu has been put in the blocking
+ * state.
+ */
+ if (sm_supported(iommu))
+ ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
+ else
+ ret = device_setup_pass_through(dev);
+
+ if (!ret)
+ info->domain_attached = true;
+
+ return ret;
+}
+
+static int identity_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+ int ret;
+
+ if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
+ return -EOPNOTSUPP;
+
+ ret = iopf_for_domain_replace(domain, old, dev);
+ if (ret)
+ return ret;
+
+ ret = domain_setup_passthrough(iommu, dev, pasid, old);
+ if (ret) {
+ iopf_for_domain_replace(old, domain, dev);
+ return ret;
+ }
+
+ domain_remove_dev_pasid(old, dev, pasid);
+ return 0;
+}
+
+static struct iommu_domain identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &(const struct iommu_domain_ops) {
+ .attach_dev = identity_domain_attach_dev,
+ .set_dev_pasid = identity_domain_set_dev_pasid,
+ },
+};
+
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
+ .identity_domain = &identity_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
- .domain_alloc = intel_iommu_domain_alloc,
- .domain_alloc_user = intel_iommu_domain_alloc_user,
+ .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
+ .domain_alloc_sva = intel_svm_domain_alloc,
+ .domain_alloc_nested = intel_iommu_domain_alloc_nested,
.probe_device = intel_iommu_probe_device,
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.device_group = intel_iommu_device_group,
- .dev_enable_feat = intel_iommu_dev_enable_feat,
- .dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
- .remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
-#ifdef CONFIG_INTEL_IOMMU_SVM
- .page_response = intel_svm_page_response,
-#endif
+ .page_response = intel_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = intel_iommu_attach_device,
.set_dev_pasid = intel_iommu_set_dev_pasid,
@@ -4875,7 +4387,7 @@ static void quirk_iommu_igfx(struct pci_dev *dev)
return;
pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
- dmar_map_gfx = 0;
+ disable_igfx_iommu = 1;
}
/* G4x/GM45 integrated gfx dmar support is totally busted. */
@@ -4887,6 +4399,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
+/* QM57/QS57 integrated gfx malfunctions with dmar */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
+
/* Broadwell igfx malfunctions with dmar */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
@@ -4956,15 +4471,14 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
- dmar_map_gfx = 0;
- } else if (dmar_map_gfx) {
+ disable_igfx_iommu = 1;
+ } else if (!disable_igfx_iommu) {
/* we have to ensure the gfx device is idle before we flush */
pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n");
iommu_set_dma_strict();
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);