summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c209
1 files changed, 160 insertions, 49 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a4407eabf0e6..f5e02f8e7371 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -440,6 +440,7 @@ struct dmar_rmrr_unit {
u64 end_address; /* reserved end address */
struct dmar_dev_scope *devices; /* target devices */
int devices_cnt; /* target device count */
+ struct iommu_resv_region *resv; /* reserved region handle */
};
struct dmar_atsr_unit {
@@ -547,7 +548,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
static DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
-static const struct iommu_ops intel_iommu_ops;
+const struct iommu_ops intel_iommu_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
{
@@ -892,7 +893,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
return NULL;
if (dev_is_pci(dev)) {
+ struct pci_dev *pf_pdev;
+
pdev = to_pci_dev(dev);
+ /* VFs aren't listed in scope tables; we need to look up
+ * the PF instead to find the IOMMU. */
+ pf_pdev = pci_physfn(pdev);
+ dev = &pf_pdev->dev;
segment = pci_domain_nr(pdev->bus);
} else if (has_acpi_companion(dev))
dev = &ACPI_COMPANION(dev)->dev;
@@ -905,6 +912,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
for_each_active_dev_scope(drhd->devices,
drhd->devices_cnt, i, tmp) {
if (tmp == dev) {
+ /* For a VF use its original BDF# not that of the PF
+ * which we used for the IOMMU lookup. Strictly speaking
+ * we could do this for all PCI devices; we only need to
+ * get the BDF# from the scope table for ACPI matches. */
+ if (pdev->is_virtfn)
+ goto got_pdev;
+
*bus = drhd->devices[i].bus;
*devfn = drhd->devices[i].devfn;
goto out;
@@ -1131,7 +1145,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
if (!dma_pte_present(pte) || dma_pte_superpage(pte))
goto next;
- level_pfn = pfn & level_mask(level - 1);
+ level_pfn = pfn & level_mask(level);
level_pte = phys_to_virt(dma_pte_addr(pte));
if (level > 2)
@@ -1711,6 +1725,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
if (!iommu->domains || !iommu->domain_ids)
return;
+again:
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
struct dmar_domain *domain;
@@ -1723,10 +1738,19 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
domain = info->domain;
- dmar_remove_one_dev_info(domain, info->dev);
+ __dmar_remove_one_dev_info(info);
- if (!domain_type_is_vm_or_si(domain))
+ if (!domain_type_is_vm_or_si(domain)) {
+ /*
+ * The domain_exit() function can't be called under
+ * device_domain_lock, as it takes this lock itself.
+ * So release the lock here and re-run the loop
+ * afterwards.
+ */
+ spin_unlock_irqrestore(&device_domain_lock, flags);
domain_exit(domain);
+ goto again;
+ }
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -2014,6 +2038,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (context_present(context))
goto out_unlock;
+ /*
+ * For kdump cases, old valid entries may be cached due to the
+ * in-flight DMA and copied pgtable, but there is no unmapping
+ * behaviour for them, thus we need an explicit cache flush for
+ * the newly-mapped device. For kdump, at this point, the device
+ * is supposed to finish reset at its driver probe stage, so no
+ * in-flight DMA will exist, and we don't need to worry anymore
+ * hereafter.
+ */
+ if (context_copied(context)) {
+ u16 did_old = context_domain_id(context);
+
+ if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
+ iommu->flush.flush_context(iommu, did_old,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ }
+
pgd = domain->pgd;
context_clear_entry(context);
@@ -3283,13 +3326,14 @@ static int __init init_dmars(void)
iommu_identity_mapping |= IDENTMAP_GFX;
#endif
+ check_tylersburg_isoch();
+
if (iommu_identity_mapping) {
ret = si_domain_init(hw_pass_through);
if (ret)
goto free_iommu;
}
- check_tylersburg_isoch();
/*
* If we copied translations from a previous kernel in the kdump
@@ -4204,27 +4248,40 @@ static inline void init_iommu_pm_ops(void) {}
int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
{
struct acpi_dmar_reserved_memory *rmrr;
+ int prot = DMA_PTE_READ|DMA_PTE_WRITE;
struct dmar_rmrr_unit *rmrru;
+ size_t length;
rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
if (!rmrru)
- return -ENOMEM;
+ goto out;
rmrru->hdr = header;
rmrr = (struct acpi_dmar_reserved_memory *)header;
rmrru->base_address = rmrr->base_address;
rmrru->end_address = rmrr->end_address;
+
+ length = rmrr->end_address - rmrr->base_address + 1;
+ rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot,
+ IOMMU_RESV_DIRECT);
+ if (!rmrru->resv)
+ goto free_rmrru;
+
rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
((void *)rmrr) + rmrr->header.length,
&rmrru->devices_cnt);
- if (rmrru->devices_cnt && rmrru->devices == NULL) {
- kfree(rmrru);
- return -ENOMEM;
- }
+ if (rmrru->devices_cnt && rmrru->devices == NULL)
+ goto free_all;
list_add(&rmrru->list, &dmar_rmrr_units);
return 0;
+free_all:
+ kfree(rmrru->resv);
+free_rmrru:
+ kfree(rmrru);
+out:
+ return -ENOMEM;
}
static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
@@ -4438,6 +4495,7 @@ static void intel_iommu_free_dmars(void)
list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
list_del(&rmrru->list);
dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
+ kfree(rmrru->resv);
kfree(rmrru);
}
@@ -4665,25 +4723,13 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
}
}
-static int intel_iommu_cpu_notifier(struct notifier_block *nfb,
- unsigned long action, void *v)
+static int intel_iommu_cpu_dead(unsigned int cpu)
{
- unsigned int cpu = (unsigned long)v;
-
- switch (action) {
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- free_all_cpu_cached_iovas(cpu);
- flush_unmaps_timeout(cpu);
- break;
- }
- return NOTIFY_OK;
+ free_all_cpu_cached_iovas(cpu);
+ flush_unmaps_timeout(cpu);
+ return 0;
}
-static struct notifier_block intel_iommu_cpu_nb = {
- .notifier_call = intel_iommu_cpu_notifier,
-};
-
static ssize_t intel_iommu_show_version(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -4823,17 +4869,20 @@ int __init intel_iommu_init(void)
init_iommu_pm_ops();
- for_each_active_iommu(iommu, drhd)
- iommu->iommu_dev = iommu_device_create(NULL, iommu,
- intel_iommu_groups,
- "%s", iommu->name);
+ for_each_active_iommu(iommu, drhd) {
+ iommu_device_sysfs_add(&iommu->iommu, NULL,
+ intel_iommu_groups,
+ "%s", iommu->name);
+ iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
+ iommu_device_register(&iommu->iommu);
+ }
bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
bus_register_notifier(&pci_bus_type, &device_nb);
if (si_domain && !hw_pass_through)
register_memory_notifier(&intel_iommu_memory_nb);
- register_hotcpu_notifier(&intel_iommu_cpu_nb);
-
+ cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
+ intel_iommu_cpu_dead);
intel_iommu_enabled = 1;
return 0;
@@ -5148,7 +5197,7 @@ static int intel_iommu_add_device(struct device *dev)
if (!iommu)
return -ENODEV;
- iommu_device_link(iommu->iommu_dev, dev);
+ iommu_device_link(&iommu->iommu, dev);
group = iommu_group_get_for_dev(dev);
@@ -5170,10 +5219,68 @@ static void intel_iommu_remove_device(struct device *dev)
iommu_group_remove_device(dev);
- iommu_device_unlink(iommu->iommu_dev, dev);
+ iommu_device_unlink(&iommu->iommu, dev);
+}
+
+static void intel_iommu_get_resv_regions(struct device *device,
+ struct list_head *head)
+{
+ struct iommu_resv_region *reg;
+ struct dmar_rmrr_unit *rmrr;
+ struct device *i_dev;
+ int i;
+
+ rcu_read_lock();
+ for_each_rmrr_units(rmrr) {
+ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+ i, i_dev) {
+ if (i_dev != device)
+ continue;
+
+ list_add_tail(&rmrr->resv->list, head);
+ }
+ }
+ rcu_read_unlock();
+
+ reg = iommu_alloc_resv_region(IOAPIC_RANGE_START,
+ IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1,
+ 0, IOMMU_RESV_RESERVED);
+ if (!reg)
+ return;
+ list_add_tail(&reg->list, head);
+}
+
+static void intel_iommu_put_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, head, list) {
+ if (entry->type == IOMMU_RESV_RESERVED)
+ kfree(entry);
+ }
}
#ifdef CONFIG_INTEL_IOMMU_SVM
+#define MAX_NR_PASID_BITS (20)
+static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
+{
+ /*
+ * Convert ecap_pss to extend context entry pts encoding, also
+ * respect the soft pasid_max value set by the iommu.
+ * - number of PASID bits = ecap_pss + 1
+ * - number of PASID table entries = 2^(pts + 5)
+ * Therefore, pts = ecap_pss - 4
+ * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
+ */
+ if (ecap_pss(iommu->ecap) < 5)
+ return 0;
+
+ /* pasid_max is encoded as actual number of entries not the bits */
+ return find_first_bit((unsigned long *)&iommu->pasid_max,
+ MAX_NR_PASID_BITS) - 5;
+}
+
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
{
struct device_domain_info *info;
@@ -5206,7 +5313,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
if (!(ctx_lo & CONTEXT_PASIDE)) {
context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
- context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+ context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
+ intel_iommu_get_pts(iommu);
+
wmb();
/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
* extended to permit requests-with-PASID if the PASIDE bit
@@ -5281,20 +5390,22 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
}
#endif /* CONFIG_INTEL_IOMMU_SVM */
-static const struct iommu_ops intel_iommu_ops = {
- .capable = intel_iommu_capable,
- .domain_alloc = intel_iommu_domain_alloc,
- .domain_free = intel_iommu_domain_free,
- .attach_dev = intel_iommu_attach_device,
- .detach_dev = intel_iommu_detach_device,
- .map = intel_iommu_map,
- .unmap = intel_iommu_unmap,
- .map_sg = default_iommu_map_sg,
- .iova_to_phys = intel_iommu_iova_to_phys,
- .add_device = intel_iommu_add_device,
- .remove_device = intel_iommu_remove_device,
- .device_group = pci_device_group,
- .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
+const struct iommu_ops intel_iommu_ops = {
+ .capable = intel_iommu_capable,
+ .domain_alloc = intel_iommu_domain_alloc,
+ .domain_free = intel_iommu_domain_free,
+ .attach_dev = intel_iommu_attach_device,
+ .detach_dev = intel_iommu_detach_device,
+ .map = intel_iommu_map,
+ .unmap = intel_iommu_unmap,
+ .map_sg = default_iommu_map_sg,
+ .iova_to_phys = intel_iommu_iova_to_phys,
+ .add_device = intel_iommu_add_device,
+ .remove_device = intel_iommu_remove_device,
+ .get_resv_regions = intel_iommu_get_resv_regions,
+ .put_resv_regions = intel_iommu_put_resv_regions,
+ .device_group = pci_device_group,
+ .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
};
static void quirk_iommu_g4x_gfx(struct pci_dev *dev)