summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r--drivers/iommu/intel/iommu.c230
1 files changed, 94 insertions, 136 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ee0932307d64..0e04d450c38a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -360,7 +360,6 @@ int intel_iommu_enabled = 0;
EXPORT_SYMBOL_GPL(intel_iommu_enabled);
static int dmar_map_gfx = 1;
-static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
static int iommu_identity_mapping;
@@ -451,8 +450,8 @@ static int __init intel_iommu_setup(char *str)
dmar_map_gfx = 0;
pr_info("Disable GFX device mapping\n");
} else if (!strncmp(str, "forcedac", 8)) {
- pr_info("Forcing DAC for PCI devices\n");
- dmar_forcedac = 1;
+ pr_warn("intel_iommu=forcedac deprecated; use iommu.forcedac instead\n");
+ iommu_dma_forcedac = true;
} else if (!strncmp(str, "strict", 6)) {
pr_info("Disable batched IOTLB flush\n");
intel_iommu_strict = 1;
@@ -658,7 +657,14 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
- if (!ecap_sc_support(iommu->ecap)) {
+ /*
+ * If the hardware is operating in the scalable mode,
+ * the snooping control is always supported since we
+ * always set PASID-table-entry.PGSNP bit if the domain
+ * is managed outside (UNMANAGED).
+ */
+ if (!sm_supported(iommu) &&
+ !ecap_sc_support(iommu->ecap)) {
ret = 0;
break;
}
@@ -1340,6 +1346,11 @@ static void iommu_set_root_entry(struct intel_iommu *iommu)
readl, (sts & DMA_GSTS_RTPS), sts);
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+
+ iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+ if (sm_supported(iommu))
+ qi_flush_pasid_cache(iommu, 0, QI_PC_GLOBAL, 0);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
void iommu_flush_write_buffer(struct intel_iommu *iommu)
@@ -2289,6 +2300,41 @@ static inline int hardware_largepage_caps(struct dmar_domain *domain,
return level;
}
+/*
+ * Ensure that old small page tables are removed to make room for superpage(s).
+ * We're going to add new large pages, so make sure we don't remove their parent
+ * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
+ */
+static void switch_to_super_page(struct dmar_domain *domain,
+ unsigned long start_pfn,
+ unsigned long end_pfn, int level)
+{
+ unsigned long lvl_pages = lvl_to_nr_pages(level);
+ struct dma_pte *pte = NULL;
+ int i;
+
+ while (start_pfn <= end_pfn) {
+ if (!pte)
+ pte = pfn_to_dma_pte(domain, start_pfn, &level);
+
+ if (dma_pte_present(pte)) {
+ dma_pte_free_pagetable(domain, start_pfn,
+ start_pfn + lvl_pages - 1,
+ level + 1);
+
+ for_each_domain_iommu(i, domain)
+ iommu_flush_iotlb_psi(g_iommus[i], domain,
+ start_pfn, lvl_pages,
+ 0, 0);
+ }
+
+ pte++;
+ start_pfn += lvl_pages;
+ if (first_pte_in_page(pte))
+ pte = NULL;
+ }
+}
+
static int
__domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long phys_pfn, unsigned long nr_pages, int prot)
@@ -2305,8 +2351,9 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -EINVAL;
attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ attr |= DMA_FL_PTE_PRESENT;
if (domain_use_first_level(domain)) {
- attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD | DMA_FL_PTE_US;
+ attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US;
if (domain->domain.type == IOMMU_DOMAIN_DMA) {
attr |= DMA_FL_PTE_ACCESS;
@@ -2329,22 +2376,11 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
return -ENOMEM;
/* It is large page*/
if (largepage_lvl > 1) {
- unsigned long nr_superpages, end_pfn;
+ unsigned long end_pfn;
pteval |= DMA_PTE_LARGE_PAGE;
- lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
- nr_superpages = nr_pages / lvl_pages;
- end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
-
- /*
- * Ensure that old small page tables are
- * removed to make room for superpage(s).
- * We're adding new large pages, so make sure
- * we don't remove their parent tables.
- */
- dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
- largepage_lvl + 1);
+ end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
+ switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
@@ -2422,6 +2458,10 @@ static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
+
+ if (sm_supported(iommu))
+ qi_flush_pasid_cache(iommu, did_old, QI_PC_ALL_PASIDS, 0);
+
iommu->flush.flush_iotlb(iommu,
did_old,
0,
@@ -2505,6 +2545,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+ if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
+ flags |= PASID_FLAG_PAGE_SNOOP;
+
return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
domain->iommu_did[iommu->seq_id],
flags);
@@ -3267,8 +3310,6 @@ static int __init init_dmars(void)
register_pasid_allocator(iommu);
#endif
iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
}
#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
@@ -3458,12 +3499,7 @@ static int init_iommu_hw(void)
}
iommu_flush_write_buffer(iommu);
-
iommu_set_root_entry(iommu);
-
- iommu->flush.flush_context(iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
}
@@ -3846,8 +3882,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
goto disable_iommu;
iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
iommu_disable_protect_mem_regions(iommu);
@@ -4065,35 +4099,6 @@ static struct notifier_block intel_iommu_memory_nb = {
.priority = 0
};
-static void free_all_cpu_cached_iovas(unsigned int cpu)
-{
- int i;
-
- for (i = 0; i < g_num_of_iommus; i++) {
- struct intel_iommu *iommu = g_iommus[i];
- struct dmar_domain *domain;
- int did;
-
- if (!iommu)
- continue;
-
- for (did = 0; did < cap_ndoms(iommu->cap); did++) {
- domain = get_iommu_domain(iommu, (u16)did);
-
- if (!domain || domain->domain.type != IOMMU_DOMAIN_DMA)
- continue;
-
- iommu_dma_free_cpu_cached_iovas(cpu, &domain->domain);
- }
- }
-}
-
-static int intel_iommu_cpu_dead(unsigned int cpu)
-{
- free_all_cpu_cached_iovas(cpu);
- return 0;
-}
-
static void intel_disable_iommus(void)
{
struct intel_iommu *iommu = NULL;
@@ -4377,6 +4382,17 @@ int __init intel_iommu_init(void)
down_read(&dmar_global_lock);
for_each_active_iommu(iommu, drhd) {
+ /*
+ * The flush queue implementation does not perform
+ * page-selective invalidations that are required for efficient
+ * TLB flushes in virtual environments. The benefit of batching
+ * is likely to be much lower than the overhead of synchronizing
+ * the virtual and physical IOMMU page-tables.
+ */
+ if (!intel_iommu_strict && cap_caching_mode(iommu->cap)) {
+ pr_warn("IOMMU batching is disabled due to virtualization");
+ intel_iommu_strict = 1;
+ }
iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
"%s", iommu->name);
@@ -4385,11 +4401,10 @@ int __init intel_iommu_init(void)
}
up_read(&dmar_global_lock);
+ iommu_set_dma_strict(intel_iommu_strict);
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
if (si_domain && !hw_pass_through)
register_memory_notifier(&intel_iommu_memory_nb);
- cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
- intel_iommu_cpu_dead);
down_read(&dmar_global_lock);
if (probe_acpi_namespace_devices())
@@ -5343,6 +5358,8 @@ static int siov_find_pci_dvsec(struct pci_dev *pdev)
static bool
intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
{
+ struct device_domain_info *info = get_domain_info(dev);
+
if (feat == IOMMU_DEV_FEAT_AUX) {
int ret;
@@ -5357,13 +5374,13 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
return !!siov_find_pci_dvsec(to_pci_dev(dev));
}
- if (feat == IOMMU_DEV_FEAT_SVA) {
- struct device_domain_info *info = get_domain_info(dev);
+ if (feat == IOMMU_DEV_FEAT_IOPF)
+ return info && info->pri_supported;
+ if (feat == IOMMU_DEV_FEAT_SVA)
return info && (info->iommu->flags & VTD_FLAG_SVM_CAPABLE) &&
info->pasid_supported && info->pri_supported &&
info->ats_supported;
- }
return false;
}
@@ -5374,12 +5391,18 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
if (feat == IOMMU_DEV_FEAT_AUX)
return intel_iommu_enable_auxd(dev);
+ if (feat == IOMMU_DEV_FEAT_IOPF)
+ return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
+
if (feat == IOMMU_DEV_FEAT_SVA) {
struct device_domain_info *info = get_domain_info(dev);
if (!info)
return -EINVAL;
+ if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
+ return -EINVAL;
+
if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
return 0;
}
@@ -5423,87 +5446,23 @@ static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain,
}
static int
-intel_iommu_domain_set_attr(struct iommu_domain *domain,
- enum iommu_attr attr, void *data)
+intel_iommu_enable_nesting(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long flags;
- int ret = 0;
-
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
+ int ret = -ENODEV;
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- spin_lock_irqsave(&device_domain_lock, flags);
- if (nested_mode_support() &&
- list_empty(&dmar_domain->devices)) {
- dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
- dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
- } else {
- ret = -ENODEV;
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
- break;
- default:
- ret = -EINVAL;
- break;
+ spin_lock_irqsave(&device_domain_lock, flags);
+ if (nested_mode_support() && list_empty(&dmar_domain->devices)) {
+ dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
+ dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
+ ret = 0;
}
+ spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
-static bool domain_use_flush_queue(void)
-{
- struct dmar_drhd_unit *drhd;
- struct intel_iommu *iommu;
- bool r = true;
-
- if (intel_iommu_strict)
- return false;
-
- /*
- * The flush queue implementation does not perform page-selective
- * invalidations that are required for efficient TLB flushes in virtual
- * environments. The benefit of batching is likely to be much lower than
- * the overhead of synchronizing the virtual and physical IOMMU
- * page-tables.
- */
- rcu_read_lock();
- for_each_active_iommu(iommu, drhd) {
- if (!cap_caching_mode(iommu->cap))
- continue;
-
- pr_warn_once("IOMMU batching is disabled due to virtualization");
- r = false;
- break;
- }
- rcu_read_unlock();
-
- return r;
-}
-
-static int
-intel_iommu_domain_get_attr(struct iommu_domain *domain,
- enum iommu_attr attr, void *data)
-{
- switch (domain->type) {
- case IOMMU_DOMAIN_UNMANAGED:
- return -ENODEV;
- case IOMMU_DOMAIN_DMA:
- switch (attr) {
- case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
- *(int *)data = domain_use_flush_queue();
- return 0;
- default:
- return -ENODEV;
- }
- break;
- default:
- return -EINVAL;
- }
-}
-
/*
* Check that the device does not live on an external facing PCI port that is
* marked as untrusted. Such devices should not be able to apply quirks and
@@ -5576,8 +5535,7 @@ const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,
.domain_free = intel_iommu_domain_free,
- .domain_get_attr = intel_iommu_domain_get_attr,
- .domain_set_attr = intel_iommu_domain_set_attr,
+ .enable_nesting = intel_iommu_enable_nesting,
.attach_dev = intel_iommu_attach_device,
.detach_dev = intel_iommu_detach_device,
.aux_attach_dev = intel_iommu_aux_attach_device,