summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel')
-rw-r--r--drivers/iommu/intel/cap_audit.c2
-rw-r--r--drivers/iommu/intel/debugfs.c51
-rw-r--r--drivers/iommu/intel/dmar.c41
-rw-r--r--drivers/iommu/intel/iommu.c471
-rw-r--r--drivers/iommu/intel/iommu.h839
-rw-r--r--drivers/iommu/intel/irq_remapping.c2
-rw-r--r--drivers/iommu/intel/pasid.c176
-rw-r--r--drivers/iommu/intel/pasid.h2
-rw-r--r--drivers/iommu/intel/perf.c2
-rw-r--r--drivers/iommu/intel/svm.c11
-rw-r--r--drivers/iommu/intel/trace.c2
-rw-r--r--drivers/iommu/intel/trace.h99
12 files changed, 1204 insertions, 494 deletions
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
index 71596fc62822..3ee68393122f 100644
--- a/drivers/iommu/intel/cap_audit.c
+++ b/drivers/iommu/intel/cap_audit.c
@@ -10,7 +10,7 @@
#define pr_fmt(fmt) "DMAR: " fmt
-#include <linux/intel-iommu.h>
+#include "iommu.h"
#include "cap_audit.h"
static u64 intel_iommu_cap_sanity;
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index ed796eea4581..1f925285104e 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -10,11 +10,11 @@
#include <linux/debugfs.h>
#include <linux/dmar.h>
-#include <linux/intel-iommu.h>
#include <linux/pci.h>
#include <asm/irq_remapping.h>
+#include "iommu.h"
#include "pasid.h"
#include "perf.h"
@@ -263,10 +263,9 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
{
- unsigned long flags;
u16 bus;
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
seq_printf(m, "IOMMU %s: Root Table Address: 0x%llx\n", iommu->name,
(u64)virt_to_phys(iommu->root_entry));
seq_puts(m, "B.D.F\tRoot_entry\t\t\t\tContext_entry\t\t\t\tPASID\tPASID_table_entry\n");
@@ -278,8 +277,7 @@ static void root_tbl_walk(struct seq_file *m, struct intel_iommu *iommu)
*/
for (bus = 0; bus < 256; bus++)
ctx_tbl_walk(m, iommu, bus);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
}
static int dmar_translation_struct_show(struct seq_file *m, void *unused)
@@ -342,13 +340,13 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
}
}
-static int show_device_domain_translation(struct device *dev, void *data)
+static int __show_device_domain_translation(struct device *dev, void *data)
{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct dmar_domain *domain = info->domain;
+ struct dmar_domain *domain;
struct seq_file *m = data;
u64 path[6] = { 0 };
+ domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
if (!domain)
return 0;
@@ -359,20 +357,39 @@ static int show_device_domain_translation(struct device *dev, void *data)
pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
seq_putc(m, '\n');
- return 0;
+ /* Don't iterate */
+ return 1;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int show_device_domain_translation(struct device *dev, void *data)
{
- unsigned long flags;
- int ret;
+ struct iommu_group *group;
- spin_lock_irqsave(&device_domain_lock, flags);
- ret = bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ group = iommu_group_get(dev);
+ if (group) {
+ /*
+ * The group->mutex is held across the callback, which will
+ * block calls to iommu_attach/detach_group/device. Hence,
+ * the domain of the device will not change during traversal.
+ *
+ * All devices in an iommu group share a single domain, hence
+ * we only dump the domain of the first device. Even though,
+ * this code still possibly races with the iommu_unmap()
+ * interface. This could be solved by RCU-freeing the page
+ * table pages in the iommu_unmap() path.
+ */
+ iommu_group_for_each_dev(group, data,
+ __show_device_domain_translation);
+ iommu_group_put(group);
+ }
- return ret;
+ return 0;
+}
+
+static int domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ return bus_for_each_dev(&pci_bus_type, NULL, m,
+ show_device_domain_translation);
}
DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 592c1e1a5d4b..6327b34f5aa7 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -19,7 +19,6 @@
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/iova.h>
-#include <linux/intel-iommu.h>
#include <linux/timer.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
@@ -30,10 +29,11 @@
#include <linux/numa.h>
#include <linux/limits.h>
#include <asm/irq_remapping.h>
-#include <trace/events/intel_iommu.h>
+#include "iommu.h"
#include "../irq_remapping.h"
#include "perf.h"
+#include "trace.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -60,7 +60,7 @@ LIST_HEAD(dmar_drhd_units);
struct acpi_table_header * __initdata dmar_tbl;
static int dmar_dev_scope_status = 1;
-static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
+static DEFINE_IDA(dmar_seq_ids);
static int alloc_iommu(struct dmar_drhd_unit *drhd);
static void free_iommu(struct intel_iommu *iommu);
@@ -382,7 +382,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
static struct notifier_block dmar_pci_bus_nb = {
.notifier_call = dmar_pci_bus_notifier,
- .priority = INT_MIN,
+ .priority = 1,
};
static struct dmar_drhd_unit *
@@ -1023,28 +1023,6 @@ out:
return err;
}
-static int dmar_alloc_seq_id(struct intel_iommu *iommu)
-{
- iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
- DMAR_UNITS_SUPPORTED);
- if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
- iommu->seq_id = -1;
- } else {
- set_bit(iommu->seq_id, dmar_seq_ids);
- sprintf(iommu->name, "dmar%d", iommu->seq_id);
- }
-
- return iommu->seq_id;
-}
-
-static void dmar_free_seq_id(struct intel_iommu *iommu)
-{
- if (iommu->seq_id >= 0) {
- clear_bit(iommu->seq_id, dmar_seq_ids);
- iommu->seq_id = -1;
- }
-}
-
static int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
@@ -1062,11 +1040,14 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
if (!iommu)
return -ENOMEM;
- if (dmar_alloc_seq_id(iommu) < 0) {
+ iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0,
+ DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL);
+ if (iommu->seq_id < 0) {
pr_err("Failed to allocate seq_id\n");
- err = -ENOSPC;
+ err = iommu->seq_id;
goto error;
}
+ sprintf(iommu->name, "dmar%d", iommu->seq_id);
err = map_iommu(iommu, drhd->reg_base_addr);
if (err) {
@@ -1150,7 +1131,7 @@ err_sysfs:
err_unmap:
unmap_iommu(iommu);
error_free_seq_id:
- dmar_free_seq_id(iommu);
+ ida_free(&dmar_seq_ids, iommu->seq_id);
error:
kfree(iommu);
return err;
@@ -1183,7 +1164,7 @@ static void free_iommu(struct intel_iommu *iommu)
if (iommu->reg)
unmap_iommu(iommu);
- dmar_free_seq_id(iommu);
+ ida_free(&dmar_seq_ids, iommu->seq_id);
kfree(iommu);
}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 44016594831d..7cca030a508e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -17,7 +17,6 @@
#include <linux/dma-direct.h>
#include <linux/dma-iommu.h>
#include <linux/dmi.h>
-#include <linux/intel-iommu.h>
#include <linux/intel-svm.h>
#include <linux/memory.h>
#include <linux/pci.h>
@@ -26,6 +25,7 @@
#include <linux/syscore_ops.h>
#include <linux/tboot.h>
+#include "iommu.h"
#include "../irq_remapping.h"
#include "../iommu-sva-lib.h"
#include "pasid.h"
@@ -126,13 +126,8 @@ static inline unsigned long virt_to_dma_pfn(void *p)
return page_to_dma_pfn(virt_to_page(p));
}
-/* global iommu list, set NULL for ignored DMAR units */
-static struct intel_iommu **g_iommus;
-
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
-static inline struct device_domain_info *
-dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
/*
* set to 1 to panic kernel if can't successfully enable VT-d
@@ -256,10 +251,6 @@ static inline void context_clear_entry(struct context_entry *context)
static struct dmar_domain *si_domain;
static int hw_pass_through = 1;
-#define for_each_domain_iommu(idx, domain) \
- for (idx = 0; idx < g_num_of_iommus; idx++) \
- if (domain->iommu_refcnt[idx])
-
struct dmar_rmrr_unit {
struct list_head list; /* list of rmrr units */
struct acpi_dmar_header *hdr; /* ACPI header */
@@ -293,12 +284,7 @@ static LIST_HEAD(dmar_satc_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-/* bitmap for indexing intel_iommus */
-static int g_num_of_iommus;
-
-static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
-static void __dmar_remove_one_dev_info(struct device_domain_info *info);
int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
@@ -314,36 +300,6 @@ static int iommu_skip_te_disable;
#define IDENTMAP_GFX 2
#define IDENTMAP_AZALIA 4
-int intel_iommu_gfx_mapped;
-EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
-
-DEFINE_SPINLOCK(device_domain_lock);
-static LIST_HEAD(device_domain_list);
-
-/*
- * Iterate over elements in device_domain_list and call the specified
- * callback @fn against each element.
- */
-int for_each_device_domain(int (*fn)(struct device_domain_info *info,
- void *data), void *data)
-{
- int ret = 0;
- unsigned long flags;
- struct device_domain_info *info;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry(info, &device_domain_list, global) {
- ret = fn(info, data);
- if (ret) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
- return ret;
- }
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
- return 0;
-}
-
const struct iommu_ops intel_iommu_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
@@ -479,24 +435,6 @@ int iommu_calculate_agaw(struct intel_iommu *iommu)
return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
}
-/* This functionin only returns single iommu in a domain */
-struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
-{
- int iommu_id;
-
- /* si_domain and vm domain should not get here. */
- if (WARN_ON(!iommu_is_dma_domain(&domain->domain)))
- return NULL;
-
- for_each_domain_iommu(iommu_id, domain)
- break;
-
- if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
- return NULL;
-
- return g_iommus[iommu_id];
-}
-
static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
{
return sm_supported(iommu) ?
@@ -505,16 +443,16 @@ static inline bool iommu_paging_structure_coherency(struct intel_iommu *iommu)
static void domain_update_iommu_coherency(struct dmar_domain *domain)
{
+ struct iommu_domain_info *info;
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
bool found = false;
- int i;
+ unsigned long i;
domain->iommu_coherency = true;
-
- for_each_domain_iommu(i, domain) {
+ xa_for_each(&domain->iommu_array, i, info) {
found = true;
- if (!iommu_paging_structure_coherency(g_iommus[i])) {
+ if (!iommu_paging_structure_coherency(info->iommu)) {
domain->iommu_coherency = false;
break;
}
@@ -568,15 +506,8 @@ static int domain_update_device_node(struct dmar_domain *domain)
struct device_domain_info *info;
int nid = NUMA_NO_NODE;
- assert_spin_locked(&device_domain_lock);
-
- if (list_empty(&domain->devices))
- return NUMA_NO_NODE;
-
+ spin_lock(&domain->lock);
list_for_each_entry(info, &domain->devices, link) {
- if (!info->dev)
- continue;
-
/*
* There could possibly be multiple device numa nodes as devices
* within the same domain may sit behind different IOMMUs. There
@@ -587,6 +518,7 @@ static int domain_update_device_node(struct dmar_domain *domain)
if (nid != NUMA_NO_NODE)
break;
}
+ spin_unlock(&domain->lock);
return nid;
}
@@ -828,26 +760,23 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
struct context_entry *context;
int ret = 0;
- unsigned long flags;
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (context)
ret = context_present(context);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
return ret;
}
static void free_context_table(struct intel_iommu *iommu)
{
- int i;
- unsigned long flags;
struct context_entry *context;
+ int i;
+
+ if (!iommu->root_entry)
+ return;
- spin_lock_irqsave(&iommu->lock, flags);
- if (!iommu->root_entry) {
- goto out;
- }
for (i = 0; i < ROOT_ENTRY_NR; i++) {
context = iommu_context_addr(iommu, i, 0, 0);
if (context)
@@ -859,12 +788,10 @@ static void free_context_table(struct intel_iommu *iommu)
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context)
free_pgtable_page(context);
-
}
+
free_pgtable_page(iommu->root_entry);
iommu->root_entry = NULL;
-out:
- spin_unlock_irqrestore(&iommu->lock, flags);
}
#ifdef CONFIG_DMAR_DEBUG
@@ -873,9 +800,14 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u
struct device_domain_info *info;
struct dma_pte *parent, *pte;
struct dmar_domain *domain;
+ struct pci_dev *pdev;
int offset, level;
- info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+ pdev = pci_get_domain_bus_and_slot(iommu->segment, bus, devfn);
+ if (!pdev)
+ return;
+
+ info = dev_iommu_priv_get(&pdev->dev);
if (!info || !info->domain) {
pr_info("device [%02x:%02x.%d] not probed\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1258,7 +1190,6 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
struct root_entry *root;
- unsigned long flags;
root = (struct root_entry *)alloc_pgtable_page(iommu->node);
if (!root) {
@@ -1268,10 +1199,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
}
__iommu_flush_cache(iommu, root, ROOT_SIZE);
-
- spin_lock_irqsave(&iommu->lock, flags);
iommu->root_entry = root;
- spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
@@ -1413,23 +1341,23 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
}
static struct device_domain_info *
-iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
- u8 bus, u8 devfn)
+iommu_support_dev_iotlb(struct dmar_domain *domain, struct intel_iommu *iommu,
+ u8 bus, u8 devfn)
{
struct device_domain_info *info;
- assert_spin_locked(&device_domain_lock);
-
if (!iommu->qi)
return NULL;
- list_for_each_entry(info, &domain->devices, link)
+ spin_lock(&domain->lock);
+ list_for_each_entry(info, &domain->devices, link) {
if (info->iommu == iommu && info->bus == bus &&
info->devfn == devfn) {
- if (info->ats_supported && info->dev)
- return info;
- break;
+ spin_unlock(&domain->lock);
+ return info->ats_supported ? info : NULL;
}
+ }
+ spin_unlock(&domain->lock);
return NULL;
}
@@ -1439,23 +1367,21 @@ static void domain_update_iotlb(struct dmar_domain *domain)
struct device_domain_info *info;
bool has_iotlb_device = false;
- assert_spin_locked(&device_domain_lock);
-
- list_for_each_entry(info, &domain->devices, link)
+ spin_lock(&domain->lock);
+ list_for_each_entry(info, &domain->devices, link) {
if (info->ats_enabled) {
has_iotlb_device = true;
break;
}
-
+ }
domain->has_iotlb_device = has_iotlb_device;
+ spin_unlock(&domain->lock);
}
static void iommu_enable_dev_iotlb(struct device_domain_info *info)
{
struct pci_dev *pdev;
- assert_spin_locked(&device_domain_lock);
-
if (!info || !dev_is_pci(info->dev))
return;
@@ -1501,8 +1427,6 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info)
{
struct pci_dev *pdev;
- assert_spin_locked(&device_domain_lock);
-
if (!dev_is_pci(info->dev))
return;
@@ -1542,17 +1466,15 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info,
static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
u64 addr, unsigned mask)
{
- unsigned long flags;
struct device_domain_info *info;
if (!domain->has_iotlb_device)
return;
- spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&domain->lock);
list_for_each_entry(info, &domain->devices, link)
__iommu_flush_dev_iotlb(info, addr, mask);
-
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&domain->lock);
}
static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
@@ -1563,7 +1485,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
unsigned int aligned_pages = __roundup_pow_of_two(pages);
unsigned int mask = ilog2(aligned_pages);
uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
- u16 did = domain->iommu_did[iommu->seq_id];
+ u16 did = domain_id_iommu(domain, iommu);
BUG_ON(pages == 0);
@@ -1633,11 +1555,12 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu,
static void intel_flush_iotlb_all(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- int idx;
+ struct iommu_domain_info *info;
+ unsigned long idx;
- for_each_domain_iommu(idx, dmar_domain) {
- struct intel_iommu *iommu = g_iommus[idx];
- u16 did = dmar_domain->iommu_did[iommu->seq_id];
+ xa_for_each(&dmar_domain->iommu_array, idx, info) {
+ struct intel_iommu *iommu = info->iommu;
+ u16 did = domain_id_iommu(dmar_domain, iommu);
if (domain_use_first_level(dmar_domain))
qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
@@ -1743,23 +1666,16 @@ static int iommu_init_domains(struct intel_iommu *iommu)
static void disable_dmar_iommu(struct intel_iommu *iommu)
{
- struct device_domain_info *info, *tmp;
- unsigned long flags;
-
if (!iommu->domain_ids)
return;
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
- if (info->iommu != iommu)
- continue;
-
- if (!info->dev || !info->domain)
- continue;
-
- __dmar_remove_one_dev_info(info);
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ /*
+ * All iommu domains must have been detached from the devices,
+ * hence there should be no domain IDs in use.
+ */
+ if (WARN_ON(bitmap_weight(iommu->domain_ids, cap_ndoms(iommu->cap))
+ > NUM_RESERVED_DID))
+ return;
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
@@ -1772,8 +1688,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
iommu->domain_ids = NULL;
}
- g_iommus[iommu->seq_id] = NULL;
-
/* free context mapping */
free_context_table(iommu);
@@ -1819,55 +1733,77 @@ static struct dmar_domain *alloc_domain(unsigned int type)
domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
+ spin_lock_init(&domain->lock);
+ xa_init(&domain->iommu_array);
return domain;
}
-/* Must be called with iommu->lock */
static int domain_attach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
{
+ struct iommu_domain_info *info, *curr;
unsigned long ndomains;
- int num;
+ int num, ret = -ENOSPC;
- assert_spin_locked(&device_domain_lock);
- assert_spin_locked(&iommu->lock);
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
- domain->iommu_refcnt[iommu->seq_id] += 1;
- if (domain->iommu_refcnt[iommu->seq_id] == 1) {
- ndomains = cap_ndoms(iommu->cap);
- num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ spin_lock(&iommu->lock);
+ curr = xa_load(&domain->iommu_array, iommu->seq_id);
+ if (curr) {
+ curr->refcnt++;
+ spin_unlock(&iommu->lock);
+ kfree(info);
+ return 0;
+ }
- if (num >= ndomains) {
- pr_err("%s: No free domain ids\n", iommu->name);
- domain->iommu_refcnt[iommu->seq_id] -= 1;
- return -ENOSPC;
- }
+ ndomains = cap_ndoms(iommu->cap);
+ num = find_first_zero_bit(iommu->domain_ids, ndomains);
+ if (num >= ndomains) {
+ pr_err("%s: No free domain ids\n", iommu->name);
+ goto err_unlock;
+ }
- set_bit(num, iommu->domain_ids);
- domain->iommu_did[iommu->seq_id] = num;
- domain->nid = iommu->node;
- domain_update_iommu_cap(domain);
+ set_bit(num, iommu->domain_ids);
+ info->refcnt = 1;
+ info->did = num;
+ info->iommu = iommu;
+ curr = xa_cmpxchg(&domain->iommu_array, iommu->seq_id,
+ NULL, info, GFP_ATOMIC);
+ if (curr) {
+ ret = xa_err(curr) ? : -EBUSY;
+ goto err_clear;
}
+ domain_update_iommu_cap(domain);
+ spin_unlock(&iommu->lock);
return 0;
+
+err_clear:
+ clear_bit(info->did, iommu->domain_ids);
+err_unlock:
+ spin_unlock(&iommu->lock);
+ kfree(info);
+ return ret;
}
static void domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
{
- int num;
+ struct iommu_domain_info *info;
- assert_spin_locked(&device_domain_lock);
- assert_spin_locked(&iommu->lock);
-
- domain->iommu_refcnt[iommu->seq_id] -= 1;
- if (domain->iommu_refcnt[iommu->seq_id] == 0) {
- num = domain->iommu_did[iommu->seq_id];
- clear_bit(num, iommu->domain_ids);
+ spin_lock(&iommu->lock);
+ info = xa_load(&domain->iommu_array, iommu->seq_id);
+ if (--info->refcnt == 0) {
+ clear_bit(info->did, iommu->domain_ids);
+ xa_erase(&domain->iommu_array, iommu->seq_id);
+ domain->nid = NUMA_NO_NODE;
domain_update_iommu_cap(domain);
- domain->iommu_did[iommu->seq_id] = 0;
+ kfree(info);
}
+ spin_unlock(&iommu->lock);
}
static inline int guestwidth_to_adjustwidth(int gaw)
@@ -1886,10 +1822,6 @@ static inline int guestwidth_to_adjustwidth(int gaw)
static void domain_exit(struct dmar_domain *domain)
{
-
- /* Remove associated devices and clear attached or cached domains */
- domain_remove_dev_info(domain);
-
if (domain->pgd) {
LIST_HEAD(freelist);
@@ -1897,6 +1829,9 @@ static void domain_exit(struct dmar_domain *domain)
put_pages_list(&freelist);
}
+ if (WARN_ON(!list_empty(&domain->devices)))
+ return;
+
kfree(domain);
}
@@ -1954,11 +1889,11 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
struct pasid_table *table,
u8 bus, u8 devfn)
{
- u16 did = domain->iommu_did[iommu->seq_id];
+ struct device_domain_info *info =
+ iommu_support_dev_iotlb(domain, iommu, bus, devfn);
+ u16 did = domain_id_iommu(domain, iommu);
int translation = CONTEXT_TT_MULTI_LEVEL;
- struct device_domain_info *info = NULL;
struct context_entry *context;
- unsigned long flags;
int ret;
WARN_ON(did == 0);
@@ -1971,9 +1906,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
BUG_ON(!domain->pgd);
- spin_lock_irqsave(&device_domain_lock, flags);
spin_lock(&iommu->lock);
-
ret = -ENOMEM;
context = iommu_context_addr(iommu, bus, devfn, 1);
if (!context)
@@ -2024,7 +1957,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
* Setup the Device-TLB enable bit and Page request
* Enable bit:
*/
- info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
if (info && info->ats_supported)
context_set_sm_dte(context);
if (info && info->pri_supported)
@@ -2047,7 +1979,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
goto out_unlock;
}
- info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
if (info && info->ats_supported)
translation = CONTEXT_TT_DEV_IOTLB;
else
@@ -2093,7 +2024,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
out_unlock:
spin_unlock(&iommu->lock);
- spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
@@ -2210,8 +2140,9 @@ static void switch_to_super_page(struct dmar_domain *domain,
unsigned long end_pfn, int level)
{
unsigned long lvl_pages = lvl_to_nr_pages(level);
+ struct iommu_domain_info *info;
struct dma_pte *pte = NULL;
- int i;
+ unsigned long i;
while (start_pfn <= end_pfn) {
if (!pte)
@@ -2222,8 +2153,8 @@ static void switch_to_super_page(struct dmar_domain *domain,
start_pfn + lvl_pages - 1,
level + 1);
- for_each_domain_iommu(i, domain)
- iommu_flush_iotlb_psi(g_iommus[i], domain,
+ xa_for_each(&domain->iommu_array, i, info)
+ iommu_flush_iotlb_psi(info->iommu, domain,
start_pfn, lvl_pages,
0, 0);
}
@@ -2337,16 +2268,15 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
{
struct intel_iommu *iommu = info->iommu;
struct context_entry *context;
- unsigned long flags;
u16 did_old;
if (!iommu)
return;
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
context = iommu_context_addr(iommu, bus, devfn, 0);
if (!context) {
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
return;
}
@@ -2354,14 +2284,14 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
if (hw_pass_through && domain_type_is_si(info->domain))
did_old = FLPT_DEFAULT_DID;
else
- did_old = info->domain->iommu_did[iommu->seq_id];
+ did_old = domain_id_iommu(info->domain, iommu);
} else {
did_old = context_domain_id(context);
}
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
iommu->flush.flush_context(iommu,
did_old,
(((u16)bus) << 8) | devfn,
@@ -2380,30 +2310,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
__iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH);
}
-static void domain_remove_dev_info(struct dmar_domain *domain)
-{
- struct device_domain_info *info, *tmp;
- unsigned long flags;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry_safe(info, tmp, &domain->devices, link)
- __dmar_remove_one_dev_info(info);
- spin_unlock_irqrestore(&device_domain_lock, flags);
-}
-
-static inline struct device_domain_info *
-dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
-{
- struct device_domain_info *info;
-
- list_for_each_entry(info, &device_domain_list, global)
- if (info->segment == segment && info->bus == bus &&
- info->devfn == devfn)
- return info;
-
- return NULL;
-}
-
static int domain_setup_first_level(struct intel_iommu *iommu,
struct dmar_domain *domain,
struct device *dev,
@@ -2436,7 +2342,7 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
flags |= PASID_FLAG_PAGE_SNOOP;
return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
- domain->iommu_did[iommu->seq_id],
+ domain_id_iommu(domain, iommu),
flags);
}
@@ -2523,7 +2429,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu;
- unsigned long flags;
u8 bus, devfn;
int ret;
@@ -2531,17 +2436,13 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
if (!iommu)
return -ENODEV;
- spin_lock_irqsave(&device_domain_lock, flags);
- info->domain = domain;
- spin_lock(&iommu->lock);
ret = domain_attach_iommu(domain, iommu);
- spin_unlock(&iommu->lock);
- if (ret) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ if (ret)
return ret;
- }
+ info->domain = domain;
+ spin_lock(&domain->lock);
list_add(&info->link, &domain->devices);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&domain->lock);
/* PASID table is mandatory for a PCI device in scalable mode. */
if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
@@ -2553,7 +2454,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
}
/* Setup the PASID entry for requests without PASID: */
- spin_lock_irqsave(&iommu->lock, flags);
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
@@ -2563,7 +2463,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
- spin_unlock_irqrestore(&iommu->lock, flags);
if (ret) {
dev_err(dev, "Setup RID2PASID failed\n");
dmar_remove_one_dev_info(dev);
@@ -2831,7 +2730,6 @@ static int copy_translation_tables(struct intel_iommu *iommu)
struct root_entry *old_rt;
phys_addr_t old_rt_phys;
int ctxt_table_entries;
- unsigned long flags;
u64 rtaddr_reg;
int bus, ret;
bool new_ext, ext;
@@ -2874,7 +2772,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
}
}
- spin_lock_irqsave(&iommu->lock, flags);
+ spin_lock(&iommu->lock);
/* Context tables are copied, now write them to the root_entry table */
for (bus = 0; bus < 256; bus++) {
@@ -2893,7 +2791,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
iommu->root_entry[bus].hi = val;
}
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock(&iommu->lock);
kfree(ctxt_tbls);
@@ -2992,36 +2890,6 @@ static int __init init_dmars(void)
struct intel_iommu *iommu;
int ret;
- /*
- * for each drhd
- * allocate root
- * initialize and program root entry to not present
- * endfor
- */
- for_each_drhd_unit(drhd) {
- /*
- * lock not needed as this is only incremented in the single
- * threaded kernel __init code path all other access are read
- * only
- */
- if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
- g_num_of_iommus++;
- continue;
- }
- pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
- }
-
- /* Preallocate enough resources for IOMMU hot-addition */
- if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
- g_num_of_iommus = DMAR_UNITS_SUPPORTED;
-
- g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
- GFP_KERNEL);
- if (!g_iommus) {
- ret = -ENOMEM;
- goto error;
- }
-
ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
if (ret)
goto free_iommu;
@@ -3044,8 +2912,6 @@ static int __init init_dmars(void)
intel_pasid_max_id);
}
- g_iommus[iommu->seq_id] = iommu;
-
intel_iommu_init_qi(iommu);
ret = iommu_init_domains(iommu);
@@ -3171,9 +3037,6 @@ free_iommu:
free_dmar_iommu(iommu);
}
- kfree(g_iommus);
-
-error:
return ret;
}
@@ -3554,9 +3417,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
int sp, ret;
struct intel_iommu *iommu = dmaru->iommu;
- if (g_iommus[iommu->seq_id])
- return 0;
-
ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
if (ret)
goto out;
@@ -3580,7 +3440,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
if (iommu->gcmd & DMA_GCMD_TE)
iommu_disable_translation(iommu);
- g_iommus[iommu->seq_id] = iommu;
ret = iommu_init_domains(iommu);
if (ret == 0)
ret = iommu_alloc_root_entry(iommu);
@@ -4046,6 +3905,20 @@ static int __init probe_acpi_namespace_devices(void)
return 0;
}
+static __init int tboot_force_iommu(void)
+{
+ if (!tboot_enabled())
+ return 0;
+
+ if (no_iommu || dmar_disabled)
+ pr_warn("Forcing Intel-IOMMU to enabled\n");
+
+ dmar_disabled = 0;
+ no_iommu = 0;
+
+ return 1;
+}
+
int __init intel_iommu_init(void)
{
int ret = -ENODEV;
@@ -4117,9 +3990,6 @@ int __init intel_iommu_init(void)
if (list_empty(&dmar_satc_units))
pr_info("No SATC found\n");
- if (dmar_map_gfx)
- intel_iommu_gfx_mapped = 1;
-
init_no_remapping_devices();
ret = init_dmars();
@@ -4205,21 +4075,13 @@ static void domain_context_clear(struct device_domain_info *info)
&domain_context_clear_one_cb, info);
}
-static void __dmar_remove_one_dev_info(struct device_domain_info *info)
+static void dmar_remove_one_dev_info(struct device *dev)
{
- struct dmar_domain *domain;
- struct intel_iommu *iommu;
- unsigned long flags;
-
- assert_spin_locked(&device_domain_lock);
-
- if (WARN_ON(!info))
- return;
-
- iommu = info->iommu;
- domain = info->domain;
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct dmar_domain *domain = info->domain;
+ struct intel_iommu *iommu = info->iommu;
- if (info->dev && !dev_is_real_dma_subdevice(info->dev)) {
+ if (!dev_is_real_dma_subdevice(info->dev)) {
if (dev_is_pci(info->dev) && sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, info->dev,
PASID_RID2PASID, false);
@@ -4229,23 +4091,12 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
intel_pasid_free_table(info->dev);
}
+ spin_lock(&domain->lock);
list_del(&info->link);
+ spin_unlock(&domain->lock);
- spin_lock_irqsave(&iommu->lock, flags);
domain_detach_iommu(domain, iommu);
- spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
-static void dmar_remove_one_dev_info(struct device *dev)
-{
- struct device_domain_info *info;
- unsigned long flags;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- info = dev_iommu_priv_get(dev);
- if (info)
- __dmar_remove_one_dev_info(info);
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ info->domain = NULL;
}
static int md_domain_init(struct dmar_domain *domain, int guest_width)
@@ -4490,15 +4341,16 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long iova_pfn = IOVA_PFN(gather->start);
size_t size = gather->end - gather->start;
+ struct iommu_domain_info *info;
unsigned long start_pfn;
unsigned long nrpages;
- int iommu_id;
+ unsigned long i;
nrpages = aligned_nrpages(gather->start, size);
start_pfn = mm_to_dma_pfn(iova_pfn);
- for_each_domain_iommu(iommu_id, dmar_domain)
- iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
+ xa_for_each(&dmar_domain->iommu_array, i, info)
+ iommu_flush_iotlb_psi(info->iommu, dmar_domain,
start_pfn, nrpages,
list_empty(&gather->freelist), 0);
@@ -4527,7 +4379,7 @@ static bool domain_support_force_snooping(struct dmar_domain *domain)
struct device_domain_info *info;
bool support = true;
- assert_spin_locked(&device_domain_lock);
+ assert_spin_locked(&domain->lock);
list_for_each_entry(info, &domain->devices, link) {
if (!ecap_sc_support(info->iommu->ecap)) {
support = false;
@@ -4542,8 +4394,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
{
struct device_domain_info *info;
- assert_spin_locked(&device_domain_lock);
-
+ assert_spin_locked(&domain->lock);
/*
* Second level page table supports per-PTE snoop control. The
* iommu_map() interface will handle this by setting SNP bit.
@@ -4561,20 +4412,19 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
- unsigned long flags;
if (dmar_domain->force_snooping)
return true;
- spin_lock_irqsave(&device_domain_lock, flags);
+ spin_lock(&dmar_domain->lock);
if (!domain_support_force_snooping(dmar_domain)) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&dmar_domain->lock);
return false;
}
domain_set_force_snooping(dmar_domain);
dmar_domain->force_snooping = true;
- spin_unlock_irqrestore(&device_domain_lock, flags);
+ spin_unlock(&dmar_domain->lock);
return true;
}
@@ -4596,7 +4446,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
struct device_domain_info *info;
struct intel_iommu *iommu;
- unsigned long flags;
u8 bus, devfn;
iommu = device_to_iommu(dev, &bus, &devfn);
@@ -4639,10 +4488,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
}
- spin_lock_irqsave(&device_domain_lock, flags);
- list_add(&info->global, &device_domain_list);
dev_iommu_priv_set(dev, info);
- spin_unlock_irqrestore(&device_domain_lock, flags);
return &iommu->iommu;
}
@@ -4650,15 +4496,9 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- unsigned long flags;
dmar_remove_one_dev_info(dev);
-
- spin_lock_irqsave(&device_domain_lock, flags);
dev_iommu_priv_set(dev, NULL);
- list_del(&info->global);
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
kfree(info);
set_dma_ops(dev, NULL);
}
@@ -4731,7 +4571,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct context_entry *context;
struct dmar_domain *domain;
- unsigned long flags;
u64 ctx_lo;
int ret;
@@ -4739,9 +4578,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
if (!domain)
return -EINVAL;
- spin_lock_irqsave(&device_domain_lock, flags);
spin_lock(&iommu->lock);
-
ret = -EINVAL;
if (!info->pasid_supported)
goto out;
@@ -4757,7 +4594,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
context[0].lo = ctx_lo;
wmb();
iommu->flush.flush_context(iommu,
- domain->iommu_did[iommu->seq_id],
+ domain_id_iommu(domain, iommu),
PCI_DEVID(info->bus, info->devfn),
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
@@ -4771,7 +4608,6 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev)
out:
spin_unlock(&iommu->lock);
- spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
@@ -4895,13 +4731,11 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long pages = aligned_nrpages(iova, size);
unsigned long pfn = iova >> VTD_PAGE_SHIFT;
- struct intel_iommu *iommu;
- int iommu_id;
+ struct iommu_domain_info *info;
+ unsigned long i;
- for_each_domain_iommu(iommu_id, dmar_domain) {
- iommu = g_iommus[iommu_id];
- __mapping_notify_one(iommu, dmar_domain, pfn, pages);
- }
+ xa_for_each(&dmar_domain->iommu_array, i, info)
+ __mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
}
const struct iommu_ops intel_iommu_ops = {
@@ -4911,7 +4745,6 @@ const struct iommu_ops intel_iommu_ops = {
.probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
- .put_resv_regions = generic_iommu_put_resv_regions,
.device_group = intel_iommu_device_group,
.dev_enable_feat = intel_iommu_dev_enable_feat,
.dev_disable_feat = intel_iommu_dev_disable_feat,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
new file mode 100644
index 000000000000..fae45bbb0c7f
--- /dev/null
+++ b/drivers/iommu/intel/iommu.h
@@ -0,0 +1,839 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2006-2015, Intel Corporation.
+ *
+ * Authors: Ashok Raj <ashok.raj@intel.com>
+ * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ * David Woodhouse <David.Woodhouse@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/iova.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include <linux/mmu_notifier.h>
+#include <linux/list.h>
+#include <linux/iommu.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/dmar.h>
+#include <linux/ioasid.h>
+#include <linux/bitfield.h>
+#include <linux/xarray.h>
+
+#include <asm/cacheflush.h>
+#include <asm/iommu.h>
+
+/*
+ * VT-d hardware uses 4KiB page size regardless of host page size.
+ */
+#define VTD_PAGE_SHIFT (12)
+#define VTD_PAGE_SIZE (1UL << VTD_PAGE_SHIFT)
+#define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT)
+#define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK)
+
+#define VTD_STRIDE_SHIFT (9)
+#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT)
+
+#define DMA_PTE_READ BIT_ULL(0)
+#define DMA_PTE_WRITE BIT_ULL(1)
+#define DMA_PTE_LARGE_PAGE BIT_ULL(7)
+#define DMA_PTE_SNP BIT_ULL(11)
+
+#define DMA_FL_PTE_PRESENT BIT_ULL(0)
+#define DMA_FL_PTE_US BIT_ULL(2)
+#define DMA_FL_PTE_ACCESS BIT_ULL(5)
+#define DMA_FL_PTE_DIRTY BIT_ULL(6)
+#define DMA_FL_PTE_XD BIT_ULL(63)
+
+#define ADDR_WIDTH_5LEVEL (57)
+#define ADDR_WIDTH_4LEVEL (48)
+
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define CONTEXT_TT_DEV_IOTLB 1
+#define CONTEXT_TT_PASS_THROUGH 2
+#define CONTEXT_PASIDE BIT_ULL(3)
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
+#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
+#define DMAR_GCMD_REG 0x18 /* Global command register */
+#define DMAR_GSTS_REG 0x1c /* Global status register */
+#define DMAR_RTADDR_REG 0x20 /* Root entry table */
+#define DMAR_CCMD_REG 0x28 /* Context command reg */
+#define DMAR_FSTS_REG 0x34 /* Fault Status register */
+#define DMAR_FECTL_REG 0x38 /* Fault control register */
+#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
+#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
+#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
+#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
+#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
+#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
+#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
+#define DMAR_IQ_SHIFT 4 /* Invalidation queue head/tail shift */
+#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
+#define DMAR_ICS_REG 0x9c /* Invalidation complete status register */
+#define DMAR_IQER_REG 0xb0 /* Invalidation queue error record register */
+#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
+#define DMAR_PQH_REG 0xc0 /* Page request queue head register */
+#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */
+#define DMAR_PQA_REG 0xd0 /* Page request queue address register */
+#define DMAR_PRS_REG 0xdc /* Page request status register */
+#define DMAR_PECTL_REG 0xe0 /* Page request event control register */
+#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */
+#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */
+#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */
+#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */
+#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */
+#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */
+#define DMAR_MTRR_FIX16K_80000_REG 0x128
+#define DMAR_MTRR_FIX16K_A0000_REG 0x130
+#define DMAR_MTRR_FIX4K_C0000_REG 0x138
+#define DMAR_MTRR_FIX4K_C8000_REG 0x140
+#define DMAR_MTRR_FIX4K_D0000_REG 0x148
+#define DMAR_MTRR_FIX4K_D8000_REG 0x150
+#define DMAR_MTRR_FIX4K_E0000_REG 0x158
+#define DMAR_MTRR_FIX4K_E8000_REG 0x160
+#define DMAR_MTRR_FIX4K_F0000_REG 0x168
+#define DMAR_MTRR_FIX4K_F8000_REG 0x170
+#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */
+#define DMAR_MTRR_PHYSMASK0_REG 0x188
+#define DMAR_MTRR_PHYSBASE1_REG 0x190
+#define DMAR_MTRR_PHYSMASK1_REG 0x198
+#define DMAR_MTRR_PHYSBASE2_REG 0x1a0
+#define DMAR_MTRR_PHYSMASK2_REG 0x1a8
+#define DMAR_MTRR_PHYSBASE3_REG 0x1b0
+#define DMAR_MTRR_PHYSMASK3_REG 0x1b8
+#define DMAR_MTRR_PHYSBASE4_REG 0x1c0
+#define DMAR_MTRR_PHYSMASK4_REG 0x1c8
+#define DMAR_MTRR_PHYSBASE5_REG 0x1d0
+#define DMAR_MTRR_PHYSMASK5_REG 0x1d8
+#define DMAR_MTRR_PHYSBASE6_REG 0x1e0
+#define DMAR_MTRR_PHYSMASK6_REG 0x1e8
+#define DMAR_MTRR_PHYSBASE7_REG 0x1f0
+#define DMAR_MTRR_PHYSMASK7_REG 0x1f8
+#define DMAR_MTRR_PHYSBASE8_REG 0x200
+#define DMAR_MTRR_PHYSMASK8_REG 0x208
+#define DMAR_MTRR_PHYSBASE9_REG 0x210
+#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */
+#define DMAR_VCMD_REG 0xe00 /* Virtual command register */
+#define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */
+
+#define DMAR_IQER_REG_IQEI(reg) FIELD_GET(GENMASK_ULL(3, 0), reg)
+#define DMAR_IQER_REG_ITESID(reg) FIELD_GET(GENMASK_ULL(47, 32), reg)
+#define DMAR_IQER_REG_ICESID(reg) FIELD_GET(GENMASK_ULL(63, 48), reg)
+
+#define OFFSET_STRIDE (9)
+
+#define dmar_readq(a) readq(a)
+#define dmar_writeq(a,v) writeq(v,a)
+#define dmar_readl(a) readl(a)
+#define dmar_writel(a, v) writel(v, a)
+
+#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v) ((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_5lp_support(c) (((c) >> 60) & 1)
+#define cap_pi_support(c) (((c) >> 59) & 1)
+#define cap_fl1gp_support(c) (((c) >> 56) & 1)
+#define cap_read_drain(c) (((c) >> 55) & 1)
+#define cap_write_drain(c) (((c) >> 54) & 1)
+#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c) (((c) >> 39) & 1)
+
+#define cap_super_page_val(c) (((c) >> 34) & 0xf)
+#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
+ * OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+ (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c) (((c) >> 22) & 1)
+#define cap_isoch(c) (((c) >> 23) & 1)
+#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c) (((c) >> 8) & 0x1f)
+#define cap_caching_mode(c) (((c) >> 7) & 1)
+#define cap_phmr(c) (((c) >> 6) & 1)
+#define cap_plmr(c) (((c) >> 5) & 1)
+#define cap_rwbf(c) (((c) >> 4) & 1)
+#define cap_afl(c) (((c) >> 3) & 1)
+#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_rps(e) (((e) >> 49) & 0x1)
+#define ecap_smpwc(e) (((e) >> 48) & 0x1)
+#define ecap_flts(e) (((e) >> 47) & 0x1)
+#define ecap_slts(e) (((e) >> 46) & 0x1)
+#define ecap_slads(e) (((e) >> 45) & 0x1)
+#define ecap_vcs(e) (((e) >> 44) & 0x1)
+#define ecap_smts(e) (((e) >> 43) & 0x1)
+#define ecap_dit(e) (((e) >> 41) & 0x1)
+#define ecap_pds(e) (((e) >> 42) & 0x1)
+#define ecap_pasid(e) (((e) >> 40) & 0x1)
+#define ecap_pss(e) (((e) >> 35) & 0x1f)
+#define ecap_eafs(e) (((e) >> 34) & 0x1)
+#define ecap_nwfs(e) (((e) >> 33) & 0x1)
+#define ecap_srs(e) (((e) >> 31) & 0x1)
+#define ecap_ers(e) (((e) >> 30) & 0x1)
+#define ecap_prs(e) (((e) >> 29) & 0x1)
+#define ecap_broken_pasid(e) (((e) >> 28) & 0x1)
+#define ecap_dis(e) (((e) >> 27) & 0x1)
+#define ecap_nest(e) (((e) >> 26) & 0x1)
+#define ecap_mts(e) (((e) >> 25) & 0x1)
+#define ecap_ecs(e) (((e) >> 24) & 0x1)
+#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
+#define ecap_coherent(e) ((e) & 0x1)
+#define ecap_qis(e) ((e) & 0x2)
+#define ecap_pass_through(e) (((e) >> 6) & 0x1)
+#define ecap_eim_support(e) (((e) >> 4) & 0x1)
+#define ecap_ir_support(e) (((e) >> 3) & 0x1)
+#define ecap_dev_iotlb_support(e) (((e) >> 2) & 0x1)
+#define ecap_max_handle_mask(e) (((e) >> 20) & 0xf)
+#define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */
+
+/* Virtual command interface capability */
+#define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */
+
+/* IOTLB_REG */
+#define DMA_TLB_FLUSH_GRANU_OFFSET 60
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 3)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 3)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* INVALID_DESC */
+#define DMA_CCMD_INVL_GRANU_OFFSET 61
+#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 4)
+#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 4)
+#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 4)
+#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
+#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr) (addr)
+#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
+
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
+#define DMA_GCMD_CFI (((u32) 1) << 23)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
+#define DMA_GSTS_CFIS (((u32)1) << 23)
+
+/* DMA_RTADDR_REG */
+#define DMA_RTADDR_RTT (((u64)1) << 11)
+#define DMA_RTADDR_SMT (((u64)1) << 10)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */
+#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */
+#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */
+#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */
+#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff)
+#define dma_frcd_pasid_present(c) (((c) >> 31) & 1)
+/* low 64 bit */
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
+
+/* PRS_REG */
+#define DMA_PRS_PPR ((u32)1)
+#define DMA_PRS_PRO ((u32)2)
+
+#define DMA_VCS_PAS ((u64)1)
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+do { \
+ cycles_t start_time = get_cycles(); \
+ while (1) { \
+ sts = op(iommu->reg + offset); \
+ if (cond) \
+ break; \
+ if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+ panic("DMAR hardware is malfunctioning\n"); \
+ cpu_relax(); \
+ } \
+} while (0)
+
+#define QI_LENGTH 256 /* queue length */
+
+enum {
+ QI_FREE,
+ QI_IN_USE,
+ QI_DONE,
+ QI_ABORT
+};
+
+#define QI_CC_TYPE 0x1
+#define QI_IOTLB_TYPE 0x2
+#define QI_DIOTLB_TYPE 0x3
+#define QI_IEC_TYPE 0x4
+#define QI_IWD_TYPE 0x5
+#define QI_EIOTLB_TYPE 0x6
+#define QI_PC_TYPE 0x7
+#define QI_DEIOTLB_TYPE 0x8
+#define QI_PGRP_RESP_TYPE 0x9
+#define QI_PSTRM_RESP_TYPE 0xa
+
+#define QI_IEC_SELECTIVE (((u64)1) << 4)
+#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
+
+#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
+#define QI_IWD_FENCE (((u64)1) << 6)
+#define QI_IWD_PRQ_DRAIN (((u64)1) << 7)
+
+#define QI_IOTLB_DID(did) (((u64)did) << 16)
+#define QI_IOTLB_DR(dr) (((u64)dr) << 7)
+#define QI_IOTLB_DW(dw) (((u64)dw) << 6)
+#define QI_IOTLB_GRAN(gran) (((u64)gran) >> (DMA_TLB_FLUSH_GRANU_OFFSET-4))
+#define QI_IOTLB_ADDR(addr) (((u64)addr) & VTD_PAGE_MASK)
+#define QI_IOTLB_IH(ih) (((u64)ih) << 6)
+#define QI_IOTLB_AM(am) (((u8)am) & 0x3f)
+
+#define QI_CC_FM(fm) (((u64)fm) << 48)
+#define QI_CC_SID(sid) (((u64)sid) << 32)
+#define QI_CC_DID(did) (((u64)did) << 16)
+#define QI_CC_GRAN(gran) (((u64)gran) >> (DMA_CCMD_INVL_GRANU_OFFSET-4))
+
+#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
+#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16)
+#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
+#define QI_DEV_IOTLB_SIZE 1
+#define QI_DEV_IOTLB_MAX_INVS 32
+
+#define QI_PC_PASID(pasid) (((u64)pasid) << 32)
+#define QI_PC_DID(did) (((u64)did) << 16)
+#define QI_PC_GRAN(gran) (((u64)gran) << 4)
+
+/* PASID cache invalidation granu */
+#define QI_PC_ALL_PASIDS 0
+#define QI_PC_PASID_SEL 1
+#define QI_PC_GLOBAL 3
+
+#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
+#define QI_EIOTLB_IH(ih) (((u64)ih) << 6)
+#define QI_EIOTLB_AM(am) (((u64)am) & 0x3f)
+#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32)
+#define QI_EIOTLB_DID(did) (((u64)did) << 16)
+#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4)
+
+/* QI Dev-IOTLB inv granu */
+#define QI_DEV_IOTLB_GRAN_ALL 1
+#define QI_DEV_IOTLB_GRAN_PASID_SEL 0
+
+#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK)
+#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
+#define QI_DEV_EIOTLB_PASID(p) ((u64)((p) & 0xfffff) << 32)
+#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16)
+#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
+#define QI_DEV_EIOTLB_MAX_INVS 32
+
+/* Page group response descriptor QW0 */
+#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4)
+#define QI_PGRP_PDP(p) (((u64)(p)) << 5)
+#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12)
+#define QI_PGRP_DID(rid) (((u64)(rid)) << 16)
+#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32)
+
+/* Page group response descriptor QW1 */
+#define QI_PGRP_LPIG(x) (((u64)(x)) << 2)
+#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3)
+
+
+#define QI_RESP_SUCCESS 0x0
+#define QI_RESP_INVALID 0x1
+#define QI_RESP_FAILURE 0xf
+
+#define QI_GRAN_NONG_PASID 2
+#define QI_GRAN_PSI_PASID 3
+
+#define qi_shift(iommu) (DMAR_IQ_SHIFT + !!ecap_smts((iommu)->ecap))
+
+struct qi_desc {
+ u64 qw0;
+ u64 qw1;
+ u64 qw2;
+ u64 qw3;
+};
+
+struct q_inval {
+ raw_spinlock_t q_lock;
+ void *desc; /* invalidation queue */
+ int *desc_status; /* desc status */
+ int free_head; /* first free entry */
+ int free_tail; /* last free entry */
+ int free_cnt;
+};
+
+struct dmar_pci_notify_info;
+
+#ifdef CONFIG_IRQ_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER 8
+#define INTR_REMAP_TABLE_REG_SIZE 0xf
+#define INTR_REMAP_TABLE_REG_SIZE_MASK 0xf
+
+#define INTR_REMAP_TABLE_ENTRIES 65536
+
+struct irq_domain;
+
+struct ir_table {
+ struct irte *base;
+ unsigned long *bitmap;
+};
+
+void intel_irq_remap_add_device(struct dmar_pci_notify_info *info);
+#else
+static inline void
+intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { }
+#endif
+
+struct iommu_flush {
+ void (*flush_context)(struct intel_iommu *iommu, u16 did, u16 sid,
+ u8 fm, u64 type);
+ void (*flush_iotlb)(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
+};
+
+enum {
+ SR_DMAR_FECTL_REG,
+ SR_DMAR_FEDATA_REG,
+ SR_DMAR_FEADDR_REG,
+ SR_DMAR_FEUADDR_REG,
+ MAX_SR_DMAR_REGS
+};
+
+#define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0)
+#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1)
+#define VTD_FLAG_SVM_CAPABLE (1 << 2)
+
+extern int intel_iommu_sm;
+
+#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
+#define pasid_supported(iommu) (sm_supported(iommu) && \
+ ecap_pasid((iommu)->ecap))
+
+struct pasid_entry;
+struct pasid_state_entry;
+struct page_req_dsc;
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 lo;
+ u64 hi;
+};
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+
+/*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+ * bit marks that the DMA translation for the domain goes through the
+ * first level page table, otherwise, it goes through the second level.
+ */
+#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(1)
+
+struct iommu_domain_info {
+ struct intel_iommu *iommu;
+ unsigned int refcnt; /* Refcount of devices per iommu */
+ u16 did; /* Domain ids per IOMMU. Use u16 since
+ * domain ids are 16 bit wide according
+ * to VT-d spec, section 9.3 */
+};
+
+struct dmar_domain {
+ int nid; /* node id */
+ struct xarray iommu_array; /* Attached IOMMU array */
+
+ u8 has_iotlb_device: 1;
+ u8 iommu_coherency: 1; /* indicate coherency of iommu access */
+ u8 force_snooping : 1; /* Create IOPTEs with snoop control */
+ u8 set_pte_snp:1;
+
+ spinlock_t lock; /* Protect device tracking lists */
+ struct list_head devices; /* all devices' list */
+
+ struct dma_pte *pgd; /* virtual address */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+ int flags; /* flags to find out type of domain */
+ int iommu_superpage;/* Level of superpages supported:
+ 0 == 4KiB (no superpages), 1 == 2MiB,
+ 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
+ u64 max_addr; /* maximum mapped address */
+
+ struct iommu_domain domain; /* generic domain data structure for
+ iommu core */
+};
+
+struct intel_iommu {
+ void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+ u64 reg_phys; /* physical address of hw register set */
+ u64 reg_size; /* size of hw register set */
+ u64 cap;
+ u64 ecap;
+ u64 vccap;
+ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+ raw_spinlock_t register_lock; /* protect register handling */
+ int seq_id; /* sequence id of the iommu */
+ int agaw; /* agaw of this iommu */
+ int msagaw; /* max sagaw of this iommu */
+ unsigned int irq, pr_irq;
+ u16 segment; /* PCI segment# */
+ unsigned char name[13]; /* Device Name */
+
+#ifdef CONFIG_INTEL_IOMMU
+ unsigned long *domain_ids; /* bitmap of domains */
+ spinlock_t lock; /* protect context, domain ids */
+ struct root_entry *root_entry; /* virtual address */
+
+ struct iommu_flush flush;
+#endif
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ struct page_req_dsc *prq;
+ unsigned char prq_name[16]; /* Name for PRQ interrupt */
+ struct completion prq_complete;
+ struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
+#endif
+ struct iopf_queue *iopf_queue;
+ unsigned char iopfq_name[16];
+ struct q_inval *qi; /* Queued invalidation info */
+ u32 *iommu_state; /* Store iommu states between suspend and resume.*/
+
+#ifdef CONFIG_IRQ_REMAP
+ struct ir_table *ir_table; /* Interrupt remapping info */
+ struct irq_domain *ir_domain;
+ struct irq_domain *ir_msi_domain;
+#endif
+ struct iommu_device iommu; /* IOMMU core code handle */
+ int node;
+ u32 flags; /* Software defined flags */
+
+ struct dmar_drhd_unit *drhd;
+ void *perf_statistic;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ u32 segment; /* PCI segment number */
+ u8 bus; /* PCI bus number */
+ u8 devfn; /* PCI devfn number */
+ u16 pfsid; /* SRIOV physical function source ID */
+ u8 pasid_supported:3;
+ u8 pasid_enabled:1;
+ u8 pri_supported:1;
+ u8 pri_enabled:1;
+ u8 ats_supported:1;
+ u8 ats_enabled:1;
+ u8 ats_qdep;
+ struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
+ struct intel_iommu *iommu; /* IOMMU used by this device */
+ struct dmar_domain *domain; /* pointer to domain */
+ struct pasid_table *pasid_table; /* pasid table */
+};
+
+static inline void __iommu_flush_cache(
+ struct intel_iommu *iommu, void *addr, int size)
+{
+ if (!ecap_coherent(iommu->ecap))
+ clflush_cache_range(addr, size);
+}
+
+/* Convert generic struct iommu_domain to private struct dmar_domain */
+static inline struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct dmar_domain, domain);
+}
+
+/* Retrieve the domain ID which has allocated to the domain */
+static inline u16
+domain_id_iommu(struct dmar_domain *domain, struct intel_iommu *iommu)
+{
+ struct iommu_domain_info *info =
+ xa_load(&domain->iommu_array, iommu->seq_id);
+
+ return info->did;
+}
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-10: available
+ * 11: snoop behavior
+ * 12-63: Host physical address
+ */
+struct dma_pte {
+ u64 val;
+};
+
+static inline void dma_clear_pte(struct dma_pte *pte)
+{
+ pte->val = 0;
+}
+
+static inline u64 dma_pte_addr(struct dma_pte *pte)
+{
+#ifdef CONFIG_64BIT
+ return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
+#else
+ /* Must have a full atomic 64-bit read */
+ return __cmpxchg64(&pte->val, 0ULL, 0ULL) &
+ VTD_PAGE_MASK & (~DMA_FL_PTE_XD);
+#endif
+}
+
+static inline bool dma_pte_present(struct dma_pte *pte)
+{
+ return (pte->val & 3) != 0;
+}
+
+static inline bool dma_pte_superpage(struct dma_pte *pte)
+{
+ return (pte->val & DMA_PTE_LARGE_PAGE);
+}
+
+static inline bool first_pte_in_page(struct dma_pte *pte)
+{
+ return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
+}
+
+static inline int nr_pte_to_next_page(struct dma_pte *pte)
+{
+ return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
+ (struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
+}
+
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void dmar_disable_qi(struct intel_iommu *iommu);
+extern int dmar_reenable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
+ u8 fm, u64 type);
+extern void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
+ unsigned int size_order, u64 type);
+extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u16 qdep, u64 addr, unsigned mask);
+
+void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
+ unsigned long npages, bool ih);
+
+void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
+ u32 pasid, u16 qdep, u64 addr,
+ unsigned int size_order);
+void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
+ u32 pasid);
+
+int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
+ unsigned int count, unsigned long options);
+/*
+ * Options used in qi_submit_sync:
+ * QI_OPT_WAIT_DRAIN - Wait for PRQ drain completion, spec 6.5.2.8.
+ */
+#define QI_OPT_WAIT_DRAIN BIT(0)
+
+extern int dmar_ir_support(void);
+
+void *alloc_pgtable_page(int node);
+void free_pgtable_page(void *vaddr);
+void iommu_flush_write_buffer(struct intel_iommu *iommu);
+int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
+struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+extern void intel_svm_check(struct intel_iommu *iommu);
+extern int intel_svm_enable_prq(struct intel_iommu *iommu);
+extern int intel_svm_finish_prq(struct intel_iommu *iommu);
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
+ void *drvdata);
+void intel_svm_unbind(struct iommu_sva *handle);
+u32 intel_svm_get_pasid(struct iommu_sva *handle);
+int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
+ struct iommu_page_response *msg);
+
+struct intel_svm_dev {
+ struct list_head list;
+ struct rcu_head rcu;
+ struct device *dev;
+ struct intel_iommu *iommu;
+ struct iommu_sva sva;
+ unsigned long prq_seq_number;
+ u32 pasid;
+ int users;
+ u16 did;
+ u16 dev_iotlb:1;
+ u16 sid, qdep;
+};
+
+struct intel_svm {
+ struct mmu_notifier notifier;
+ struct mm_struct *mm;
+
+ unsigned int flags;
+ u32 pasid;
+ struct list_head devs;
+};
+#else
+static inline void intel_svm_check(struct intel_iommu *iommu) {}
+#endif
+
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+void intel_iommu_debugfs_init(void);
+#else
+static inline void intel_iommu_debugfs_init(void) {}
+#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
+extern const struct attribute_group *intel_iommu_groups[];
+bool context_present(struct context_entry *context);
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ u8 devfn, int alloc);
+
+extern const struct iommu_ops intel_iommu_ops;
+
+#ifdef CONFIG_INTEL_IOMMU
+extern int iommu_calculate_agaw(struct intel_iommu *iommu);
+extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
+extern int dmar_disabled;
+extern int intel_iommu_enabled;
+#else
+static inline int iommu_calculate_agaw(struct intel_iommu *iommu)
+{
+ return 0;
+}
+static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
+{
+ return 0;
+}
+#define dmar_disabled (1)
+#define intel_iommu_enabled (0)
+#endif
+
+static inline const char *decode_prq_descriptor(char *str, size_t size,
+ u64 dw0, u64 dw1, u64 dw2, u64 dw3)
+{
+ char *buf = str;
+ int bytes;
+
+ bytes = snprintf(buf, size,
+ "rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx",
+ FIELD_GET(GENMASK_ULL(31, 16), dw0),
+ FIELD_GET(GENMASK_ULL(63, 12), dw1),
+ dw1 & BIT_ULL(0) ? 'r' : '-',
+ dw1 & BIT_ULL(1) ? 'w' : '-',
+ dw0 & BIT_ULL(52) ? 'x' : '-',
+ dw0 & BIT_ULL(53) ? 'p' : '-',
+ dw1 & BIT_ULL(2) ? 'l' : '-',
+ FIELD_GET(GENMASK_ULL(51, 32), dw0),
+ FIELD_GET(GENMASK_ULL(11, 3), dw1));
+
+ /* Private Data */
+ if (dw0 & BIT_ULL(9)) {
+ size -= bytes;
+ buf += bytes;
+ snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3);
+ }
+
+ return str;
+}
+
+#endif
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index a67319597884..2e9683e970f8 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -10,7 +10,6 @@
#include <linux/hpet.h>
#include <linux/pci.h>
#include <linux/irq.h>
-#include <linux/intel-iommu.h>
#include <linux/acpi.h>
#include <linux/irqdomain.h>
#include <linux/crash_dump.h>
@@ -21,6 +20,7 @@
#include <asm/irq_remapping.h>
#include <asm/pci-direct.h>
+#include "iommu.h"
#include "../irq_remapping.h"
#include "cap_audit.h"
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index cb4c1d0cf25c..c5e7e8b020a5 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -12,13 +12,13 @@
#include <linux/bitops.h>
#include <linux/cpufeature.h>
#include <linux/dmar.h>
-#include <linux/intel-iommu.h>
#include <linux/iommu.h>
#include <linux/memory.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/spinlock.h>
+#include "iommu.h"
#include "pasid.h"
/*
@@ -86,54 +86,6 @@ void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid)
/*
* Per device pasid table management:
*/
-static inline void
-device_attach_pasid_table(struct device_domain_info *info,
- struct pasid_table *pasid_table)
-{
- info->pasid_table = pasid_table;
- list_add(&info->table, &pasid_table->dev);
-}
-
-static inline void
-device_detach_pasid_table(struct device_domain_info *info,
- struct pasid_table *pasid_table)
-{
- info->pasid_table = NULL;
- list_del(&info->table);
-}
-
-struct pasid_table_opaque {
- struct pasid_table **pasid_table;
- int segment;
- int bus;
- int devfn;
-};
-
-static int search_pasid_table(struct device_domain_info *info, void *opaque)
-{
- struct pasid_table_opaque *data = opaque;
-
- if (info->iommu->segment == data->segment &&
- info->bus == data->bus &&
- info->devfn == data->devfn &&
- info->pasid_table) {
- *data->pasid_table = info->pasid_table;
- return 1;
- }
-
- return 0;
-}
-
-static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- struct pasid_table_opaque *data = opaque;
-
- data->segment = pci_domain_nr(pdev->bus);
- data->bus = PCI_BUS_NUM(alias);
- data->devfn = alias & 0xff;
-
- return for_each_device_domain(&search_pasid_table, data);
-}
/*
* Allocate a pasid table for @dev. It should be called in a
@@ -143,28 +95,18 @@ int intel_pasid_alloc_table(struct device *dev)
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
- struct pasid_table_opaque data;
struct page *pages;
u32 max_pasid = 0;
- int ret, order;
- int size;
+ int order, size;
might_sleep();
info = dev_iommu_priv_get(dev);
if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
- /* DMA alias device already has a pasid table, use it: */
- data.pasid_table = &pasid_table;
- ret = pci_for_each_dma_alias(to_pci_dev(dev),
- &get_alias_pasid_table, &data);
- if (ret)
- goto attach_out;
-
pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
if (!pasid_table)
return -ENOMEM;
- INIT_LIST_HEAD(&pasid_table->dev);
if (info->pasid_supported)
max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
@@ -182,9 +124,7 @@ int intel_pasid_alloc_table(struct device *dev)
pasid_table->table = page_address(pages);
pasid_table->order = order;
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
-
-attach_out:
- device_attach_pasid_table(info, pasid_table);
+ info->pasid_table = pasid_table;
return 0;
}
@@ -202,10 +142,7 @@ void intel_pasid_free_table(struct device *dev)
return;
pasid_table = info->pasid_table;
- device_detach_pasid_table(info, pasid_table);
-
- if (!list_empty(&pasid_table->dev))
- return;
+ info->pasid_table = NULL;
/* Free scalable mode PASID directory tables: */
dir = pasid_table->table;
@@ -513,17 +450,17 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
struct pasid_entry *pte;
u16 did, pgtt;
+ spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte))
- return;
-
- if (!pasid_pte_is_present(pte))
+ if (WARN_ON(!pte) || !pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return;
+ }
did = pasid_get_domain_id(pte);
pgtt = pasid_pte_get_pgtt(pte);
-
intel_pasid_clear_entry(dev, pasid, fault_ignore);
+ spin_unlock(&iommu->lock);
if (!ecap_coherent(iommu->ecap))
clflush_cache_range(pte, sizeof(*pte));
@@ -559,22 +496,6 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
}
}
-static inline int pasid_enable_wpe(struct pasid_entry *pte)
-{
-#ifdef CONFIG_X86
- unsigned long cr0 = read_cr0();
-
- /* CR0.WP is normally set but just to be sure */
- if (unlikely(!(cr0 & X86_CR0_WP))) {
- pr_err_ratelimited("No CPU write protect!\n");
- return -EINVAL;
- }
-#endif
- pasid_set_wpe(pte);
-
- return 0;
-};
-
/*
* Set up the scalable mode pasid table entry for first only
* translation type.
@@ -591,39 +512,52 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
return -EINVAL;
}
- pte = intel_pasid_get_entry(dev, pasid);
- if (WARN_ON(!pte))
+ if (flags & PASID_FLAG_SUPERVISOR_MODE) {
+#ifdef CONFIG_X86
+ unsigned long cr0 = read_cr0();
+
+ /* CR0.WP is normally set but just to be sure */
+ if (unlikely(!(cr0 & X86_CR0_WP))) {
+ pr_err("No CPU write protect!\n");
+ return -EINVAL;
+ }
+#endif
+ if (!ecap_srs(iommu->ecap)) {
+ pr_err("No supervisor request support on %s\n",
+ iommu->name);
+ return -EINVAL;
+ }
+ }
+
+ if ((flags & PASID_FLAG_FL5LP) && !cap_5lp_support(iommu->cap)) {
+ pr_err("No 5-level paging support for first-level on %s\n",
+ iommu->name);
return -EINVAL;
+ }
- /* Caller must ensure PASID entry is not in use. */
- if (pasid_pte_is_present(pte))
+ spin_lock(&iommu->lock);
+ pte = intel_pasid_get_entry(dev, pasid);
+ if (!pte) {
+ spin_unlock(&iommu->lock);
+ return -ENODEV;
+ }
+
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return -EBUSY;
+ }
pasid_clear_entry(pte);
/* Setup the first level page table pointer: */
pasid_set_flptr(pte, (u64)__pa(pgd));
if (flags & PASID_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap)) {
- pr_err("No supervisor request support on %s\n",
- iommu->name);
- return -EINVAL;
- }
pasid_set_sre(pte);
- if (pasid_enable_wpe(pte))
- return -EINVAL;
-
+ pasid_set_wpe(pte);
}
- if (flags & PASID_FLAG_FL5LP) {
- if (cap_5lp_support(iommu->cap)) {
- pasid_set_flpm(pte, 1);
- } else {
- pr_err("No 5-level paging support for first-level\n");
- pasid_clear_entry(pte);
- return -EINVAL;
- }
- }
+ if (flags & PASID_FLAG_FL5LP)
+ pasid_set_flpm(pte, 1);
if (flags & PASID_FLAG_PAGE_SNOOP)
pasid_set_pgsnp(pte);
@@ -635,6 +569,8 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
/* Setup Present and PASID Granular Transfer Type: */
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY);
pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
pasid_flush_caches(iommu, pte, pasid, did);
return 0;
@@ -690,17 +626,19 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
}
pgd_val = virt_to_phys(pgd);
- did = domain->iommu_did[iommu->seq_id];
+ did = domain_id_iommu(domain, iommu);
+ spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
- dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ spin_unlock(&iommu->lock);
return -ENODEV;
}
- /* Caller must ensure PASID entry is not in use. */
- if (pasid_pte_is_present(pte))
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return -EBUSY;
+ }
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
@@ -717,6 +655,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
if (pasid != PASID_RID2PASID)
pasid_set_sre(pte);
pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
pasid_flush_caches(iommu, pte, pasid, did);
return 0;
@@ -732,15 +672,17 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
u16 did = FLPT_DEFAULT_DID;
struct pasid_entry *pte;
+ spin_lock(&iommu->lock);
pte = intel_pasid_get_entry(dev, pasid);
if (!pte) {
- dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+ spin_unlock(&iommu->lock);
return -ENODEV;
}
- /* Caller must ensure PASID entry is not in use. */
- if (pasid_pte_is_present(pte))
+ if (pasid_pte_is_present(pte)) {
+ spin_unlock(&iommu->lock);
return -EBUSY;
+ }
pasid_clear_entry(pte);
pasid_set_domain_id(pte, did);
@@ -755,6 +697,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
*/
pasid_set_sre(pte);
pasid_set_present(pte);
+ spin_unlock(&iommu->lock);
+
pasid_flush_caches(iommu, pte, pasid, did);
return 0;
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 583ea67fc783..20c54e50f533 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -39,6 +39,7 @@
* only and pass-through transfer modes.
*/
#define FLPT_DEFAULT_DID 1
+#define NUM_RESERVED_DID 2
/*
* The SUPERVISOR_MODE flag indicates a first level translation which
@@ -74,7 +75,6 @@ struct pasid_table {
void *table; /* pasid table pointer */
int order; /* page order of pasid table */
u32 max_pasid; /* max pasid */
- struct list_head dev; /* device list */
};
/* Get PRESENT bit of a PASID directory entry. */
diff --git a/drivers/iommu/intel/perf.c b/drivers/iommu/intel/perf.c
index 0e8e03252d92..94ee70ac38e3 100644
--- a/drivers/iommu/intel/perf.c
+++ b/drivers/iommu/intel/perf.c
@@ -9,8 +9,8 @@
*/
#include <linux/spinlock.h>
-#include <linux/intel-iommu.h>
+#include "iommu.h"
#include "perf.h"
static DEFINE_SPINLOCK(latency_lock);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7ee37d996e15..8bcfb93dda56 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -5,7 +5,6 @@
* Authors: David Woodhouse <dwmw2@infradead.org>
*/
-#include <linux/intel-iommu.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
@@ -21,11 +20,12 @@
#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
-#include <trace/events/intel_iommu.h>
+#include "iommu.h"
#include "pasid.h"
#include "perf.h"
#include "../iommu-sva-lib.h"
+#include "trace.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, u32 pasid);
@@ -328,9 +328,9 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
unsigned int flags)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- unsigned long iflags, sflags;
struct intel_svm_dev *sdev;
struct intel_svm *svm;
+ unsigned long sflags;
int ret = 0;
svm = pasid_private_find(mm->pasid);
@@ -394,11 +394,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
PASID_FLAG_SUPERVISOR_MODE : 0;
sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
- spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
FLPT_DEFAULT_DID, sflags);
- spin_unlock_irqrestore(&iommu->lock, iflags);
-
if (ret)
goto free_sdev;
@@ -544,7 +541,7 @@ static void intel_svm_drain_prq(struct device *dev, u32 pasid)
domain = info->domain;
pdev = to_pci_dev(dev);
sid = PCI_DEVID(info->bus, info->devfn);
- did = domain->iommu_did[iommu->seq_id];
+ did = domain_id_iommu(domain, iommu);
qdep = pci_ats_queue_depth(pdev);
/*
diff --git a/drivers/iommu/intel/trace.c b/drivers/iommu/intel/trace.c
index bfb6a6e37a88..117e626e3ea9 100644
--- a/drivers/iommu/intel/trace.c
+++ b/drivers/iommu/intel/trace.c
@@ -11,4 +11,4 @@
#include <linux/types.h>
#define CREATE_TRACE_POINTS
-#include <trace/events/intel_iommu.h>
+#include "trace.h"
diff --git a/drivers/iommu/intel/trace.h b/drivers/iommu/intel/trace.h
new file mode 100644
index 000000000000..93d96f93a89b
--- /dev/null
+++ b/drivers/iommu/intel/trace.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel IOMMU trace support
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM intel_iommu
+
+#if !defined(_TRACE_INTEL_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INTEL_IOMMU_H
+
+#include <linux/tracepoint.h>
+
+#include "iommu.h"
+
+#define MSG_MAX 256
+
+TRACE_EVENT(qi_submit,
+ TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3),
+
+ TP_ARGS(iommu, qw0, qw1, qw2, qw3),
+
+ TP_STRUCT__entry(
+ __field(u64, qw0)
+ __field(u64, qw1)
+ __field(u64, qw2)
+ __field(u64, qw3)
+ __string(iommu, iommu->name)
+ ),
+
+ TP_fast_assign(
+ __assign_str(iommu, iommu->name);
+ __entry->qw0 = qw0;
+ __entry->qw1 = qw1;
+ __entry->qw2 = qw2;
+ __entry->qw3 = qw3;
+ ),
+
+ TP_printk("%s %s: 0x%llx 0x%llx 0x%llx 0x%llx",
+ __print_symbolic(__entry->qw0 & 0xf,
+ { QI_CC_TYPE, "cc_inv" },
+ { QI_IOTLB_TYPE, "iotlb_inv" },
+ { QI_DIOTLB_TYPE, "dev_tlb_inv" },
+ { QI_IEC_TYPE, "iec_inv" },
+ { QI_IWD_TYPE, "inv_wait" },
+ { QI_EIOTLB_TYPE, "p_iotlb_inv" },
+ { QI_PC_TYPE, "pc_inv" },
+ { QI_DEIOTLB_TYPE, "p_dev_tlb_inv" },
+ { QI_PGRP_RESP_TYPE, "page_grp_resp" }),
+ __get_str(iommu),
+ __entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3
+ )
+);
+
+TRACE_EVENT(prq_report,
+ TP_PROTO(struct intel_iommu *iommu, struct device *dev,
+ u64 dw0, u64 dw1, u64 dw2, u64 dw3,
+ unsigned long seq),
+
+ TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq),
+
+ TP_STRUCT__entry(
+ __field(u64, dw0)
+ __field(u64, dw1)
+ __field(u64, dw2)
+ __field(u64, dw3)
+ __field(unsigned long, seq)
+ __string(iommu, iommu->name)
+ __string(dev, dev_name(dev))
+ __dynamic_array(char, buff, MSG_MAX)
+ ),
+
+ TP_fast_assign(
+ __entry->dw0 = dw0;
+ __entry->dw1 = dw1;
+ __entry->dw2 = dw2;
+ __entry->dw3 = dw3;
+ __entry->seq = seq;
+ __assign_str(iommu, iommu->name);
+ __assign_str(dev, dev_name(dev));
+ ),
+
+ TP_printk("%s/%s seq# %ld: %s",
+ __get_str(iommu), __get_str(dev), __entry->seq,
+ decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0,
+ __entry->dw1, __entry->dw2, __entry->dw3)
+ )
+);
+#endif /* _TRACE_INTEL_IOMMU_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/iommu/intel/
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>