summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/dmar.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/dmar.c')
-rw-r--r--drivers/iommu/intel/dmar.c517
1 files changed, 328 insertions, 189 deletions
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index cc46dff98fa0..ec975c73cfe6 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -19,7 +19,6 @@
#include <linux/pci.h>
#include <linux/dmar.h>
#include <linux/iova.h>
-#include <linux/intel-iommu.h>
#include <linux/timer.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
@@ -30,9 +29,13 @@
#include <linux/numa.h>
#include <linux/limits.h>
#include <asm/irq_remapping.h>
-#include <asm/iommu_table.h>
+#include "iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
+#include "perf.h"
+#include "trace.h"
+#include "perfmon.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@@ -59,13 +62,11 @@ LIST_HEAD(dmar_drhd_units);
struct acpi_table_header * __initdata dmar_tbl;
static int dmar_dev_scope_status = 1;
-static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
+static DEFINE_IDA(dmar_seq_ids);
static int alloc_iommu(struct dmar_drhd_unit *drhd);
static void free_iommu(struct intel_iommu *iommu);
-extern const struct iommu_ops intel_iommu_ops;
-
static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
{
/*
@@ -127,8 +128,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
struct pci_dev *tmp;
struct dmar_pci_notify_info *info;
- BUG_ON(dev->is_virtfn);
-
/*
* Ignore devices that have a domain number higher than what can
* be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
@@ -147,8 +146,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
} else {
info = kzalloc(size, GFP_KERNEL);
if (!info) {
- pr_warn("Out of memory when allocating notify_info "
- "for %s.\n", pci_name(dev));
if (dmar_dev_scope_status == 0)
dmar_dev_scope_status = -ENOMEM;
return NULL;
@@ -266,7 +263,8 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
get_device(dev));
return 1;
}
- BUG_ON(i >= devices_cnt);
+ if (WARN_ON(i >= devices_cnt))
+ return -EINVAL;
}
return 0;
@@ -316,6 +314,9 @@ static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
if (ret < 0 && dmar_dev_scope_status == 0)
dmar_dev_scope_status = ret;
+ if (ret >= 0)
+ intel_irq_remap_add_device(info);
+
return ret;
}
@@ -330,6 +331,13 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
dmar_iommu_notify_scope_dev(info);
}
+static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
+{
+ struct pci_dev *physfn = pci_physfn(pdev);
+
+ dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
+}
+
static int dmar_pci_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -339,8 +347,20 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
/* Only care about add/remove events for physical functions.
* For VFs we actually do the lookup based on the corresponding
* PF in device_to_iommu() anyway. */
- if (pdev->is_virtfn)
+ if (pdev->is_virtfn) {
+ /*
+ * Ensure that the VF device inherits the irq domain of the
+ * PF device. Ideally the device would inherit the domain
+ * from the bus, but DMAR can have multiple units per bus
+ * which makes this impossible. The VF 'bus' could inherit
+ * from the PF device, but that's yet another x86'sism to
+ * inflict on everybody else.
+ */
+ if (action == BUS_NOTIFY_ADD_DEVICE)
+ vf_inherit_msi_domain(pdev);
return NOTIFY_DONE;
+ }
+
if (action != BUS_NOTIFY_ADD_DEVICE &&
action != BUS_NOTIFY_REMOVED_DEVICE)
return NOTIFY_DONE;
@@ -363,7 +383,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
static struct notifier_block dmar_pci_bus_nb = {
.notifier_call = dmar_pci_bus_notifier,
- .priority = INT_MIN,
+ .priority = 1,
};
static struct dmar_drhd_unit *
@@ -380,7 +400,7 @@ dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
return NULL;
}
-/**
+/*
* dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
* structure which uniquely represent one DMA remapping hardware unit
* present in the platform
@@ -408,6 +428,8 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
memcpy(dmaru->hdr, header, header->length);
dmaru->reg_base_addr = drhd->address;
dmaru->segment = drhd->segment;
+ /* The size of the register set is 2 ^ N 4 KB pages. */
+ dmaru->reg_size = 1UL << (drhd->size + 12);
dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
((void *)drhd) + drhd->header.length,
@@ -473,9 +495,9 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
rhsa = (struct acpi_dmar_rhsa *)header;
for_each_drhd_unit(drhd) {
if (drhd->reg_base_addr == rhsa->base_address) {
- int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
+ int node = pxm_to_node(rhsa->proximity_domain);
- if (!node_online(node))
+ if (node != NUMA_NO_NODE && !node_online(node))
node = NUMA_NO_NODE;
drhd->iommu->node = node;
return 0;
@@ -503,6 +525,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
struct acpi_dmar_reserved_memory *rmrr;
struct acpi_dmar_atsr *atsr;
struct acpi_dmar_rhsa *rhsa;
+ struct acpi_dmar_satc *satc;
switch (header->type) {
case ACPI_DMAR_TYPE_HARDWARE_UNIT:
@@ -532,6 +555,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
/* We don't print this here because we need to sanity-check
it first. So print it in dmar_parse_one_andd() instead. */
break;
+ case ACPI_DMAR_TYPE_SATC:
+ satc = container_of(header, struct acpi_dmar_satc, header);
+ pr_info("SATC flags: 0x%x\n", satc->flags);
+ break;
}
}
@@ -619,6 +646,7 @@ parse_dmar_table(void)
.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
+ .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
};
/*
@@ -761,7 +789,8 @@ static int __init dmar_acpi_dev_scope_init(void)
andd->device_name);
continue;
}
- if (acpi_bus_get_device(h, &adev)) {
+ adev = acpi_fetch_acpi_dev(h);
+ if (!adev) {
pr_err("Failed to get device for ACPI object %s\n",
andd->device_name);
continue;
@@ -794,6 +823,7 @@ int __init dmar_dev_scope_init(void)
info = dmar_alloc_pci_notify_info(dev,
BUS_NOTIFY_ADD_DEVICE);
if (!info) {
+ pci_dev_put(dev);
return dmar_dev_scope_status;
} else {
dmar_pci_bus_add_dev(info);
@@ -885,7 +915,7 @@ dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
return 0;
}
-int __init detect_intel_iommu(void)
+void __init detect_intel_iommu(void)
{
int ret;
struct dmar_res_callback validate_drhd_cb = {
@@ -898,27 +928,23 @@ int __init detect_intel_iommu(void)
if (!ret)
ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
&validate_drhd_cb);
- if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) {
+ if (!ret && !no_iommu && !iommu_detected &&
+ (!dmar_disabled || dmar_platform_optin())) {
iommu_detected = 1;
/* Make sure ACS will be enabled */
pci_request_acs();
}
-#ifdef CONFIG_X86
if (!ret) {
x86_init.iommu.iommu_init = intel_iommu_init;
x86_platform.iommu_shutdown = intel_iommu_shutdown;
}
-#endif
-
if (dmar_tbl) {
acpi_put_table(dmar_tbl);
dmar_tbl = NULL;
}
up_write(&dmar_global_lock);
-
- return ret ? ret : 1;
}
static void unmap_iommu(struct intel_iommu *iommu)
@@ -930,17 +956,18 @@ static void unmap_iommu(struct intel_iommu *iommu)
/**
* map_iommu: map the iommu's registers
* @iommu: the iommu to map
- * @phys_addr: the physical address of the base resgister
+ * @drhd: DMA remapping hardware definition structure
*
* Memory map the iommu's registers. Start w/ a single page, and
* possibly expand if that turns out to be insufficent.
*/
-static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
+static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd)
{
+ u64 phys_addr = drhd->reg_base_addr;
int map_size, err=0;
iommu->reg_phys = phys_addr;
- iommu->reg_size = VTD_PAGE_SIZE;
+ iommu->reg_size = drhd->reg_size;
if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
pr_err("Can't reserve memory\n");
@@ -963,7 +990,6 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
warn_invalid_dmar(phys_addr, " returns all ones");
goto unmap;
}
- iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
/* the registers might be more than one page */
map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
@@ -986,6 +1012,16 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
goto release;
}
}
+
+ if (cap_ecmds(iommu->cap)) {
+ int i;
+
+ for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) {
+ iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG +
+ i * DMA_ECMD_REG_STEP);
+ }
+ }
+
err = 0;
goto out;
@@ -997,34 +1033,12 @@ out:
return err;
}
-static int dmar_alloc_seq_id(struct intel_iommu *iommu)
-{
- iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
- DMAR_UNITS_SUPPORTED);
- if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
- iommu->seq_id = -1;
- } else {
- set_bit(iommu->seq_id, dmar_seq_ids);
- sprintf(iommu->name, "dmar%d", iommu->seq_id);
- }
-
- return iommu->seq_id;
-}
-
-static void dmar_free_seq_id(struct intel_iommu *iommu)
-{
- if (iommu->seq_id >= 0) {
- clear_bit(iommu->seq_id, dmar_seq_ids);
- iommu->seq_id = -1;
- }
-}
-
static int alloc_iommu(struct dmar_drhd_unit *drhd)
{
struct intel_iommu *iommu;
u32 ver, sts;
- int agaw = 0;
- int msagaw = 0;
+ int agaw = -1;
+ int msagaw = -1;
int err;
if (!drhd->reg_base_addr) {
@@ -1036,36 +1050,55 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
if (!iommu)
return -ENOMEM;
- if (dmar_alloc_seq_id(iommu) < 0) {
+ iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0,
+ DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL);
+ if (iommu->seq_id < 0) {
pr_err("Failed to allocate seq_id\n");
- err = -ENOSPC;
+ err = iommu->seq_id;
goto error;
}
+ snprintf(iommu->name, sizeof(iommu->name), "dmar%d", iommu->seq_id);
- err = map_iommu(iommu, drhd->reg_base_addr);
+ err = map_iommu(iommu, drhd);
if (err) {
pr_err("Failed to map %s\n", iommu->name);
goto error_free_seq_id;
}
- err = -EINVAL;
- agaw = iommu_calculate_agaw(iommu);
- if (agaw < 0) {
- pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
- iommu->seq_id);
- goto err_unmap;
+ if (!cap_sagaw(iommu->cap) &&
+ (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
+ pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
+ iommu->name);
+ drhd->ignored = 1;
}
- msagaw = iommu_calculate_max_sagaw(iommu);
- if (msagaw < 0) {
- pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
- iommu->seq_id);
- goto err_unmap;
+
+ if (!drhd->ignored) {
+ agaw = iommu_calculate_agaw(iommu);
+ if (agaw < 0) {
+ pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
+ drhd->ignored = 1;
+ }
+ }
+ if (!drhd->ignored) {
+ msagaw = iommu_calculate_max_sagaw(iommu);
+ if (msagaw < 0) {
+ pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
+ iommu->seq_id);
+ drhd->ignored = 1;
+ agaw = -1;
+ }
}
iommu->agaw = agaw;
iommu->msagaw = msagaw;
iommu->segment = drhd->segment;
-
+ iommu->device_rbtree = RB_ROOT;
+ spin_lock_init(&iommu->device_rbtree_lock);
+ mutex_init(&iommu->iopf_lock);
iommu->node = NUMA_NO_NODE;
+ spin_lock_init(&iommu->lock);
+ ida_init(&iommu->domain_ida);
+ mutex_init(&iommu->did_lock);
ver = readl(iommu->reg + DMAR_VER_REG);
pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
@@ -1084,30 +1117,49 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
if (sts & DMA_GSTS_QIES)
iommu->gcmd |= DMA_GCMD_QIE;
+ if (alloc_iommu_pmu(iommu))
+ pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id);
+
raw_spin_lock_init(&iommu->register_lock);
- if (intel_iommu_enabled) {
+ /*
+ * A value of N in PSS field of eCap register indicates hardware
+ * supports PASID field of N+1 bits.
+ */
+ if (pasid_supported(iommu))
+ iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap);
+
+ /*
+ * This is only for hotplug; at boot time intel_iommu_enabled won't
+ * be set yet. When intel_iommu_init() runs, it registers the units
+ * present at boot time, then sets intel_iommu_enabled.
+ */
+ if (intel_iommu_enabled && !drhd->ignored) {
err = iommu_device_sysfs_add(&iommu->iommu, NULL,
intel_iommu_groups,
"%s", iommu->name);
if (err)
goto err_unmap;
- iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops);
-
- err = iommu_device_register(&iommu->iommu);
+ err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
if (err)
- goto err_unmap;
+ goto err_sysfs;
+
+ iommu_pmu_register(iommu);
}
drhd->iommu = iommu;
+ iommu->drhd = drhd;
return 0;
+err_sysfs:
+ iommu_device_sysfs_remove(&iommu->iommu);
err_unmap:
+ free_iommu_pmu(iommu);
unmap_iommu(iommu);
error_free_seq_id:
- dmar_free_seq_id(iommu);
+ ida_free(&dmar_seq_ids, iommu->seq_id);
error:
kfree(iommu);
return err;
@@ -1115,11 +1167,14 @@ error:
static void free_iommu(struct intel_iommu *iommu)
{
- if (intel_iommu_enabled) {
+ if (intel_iommu_enabled && !iommu->drhd->ignored) {
+ iommu_pmu_unregister(iommu);
iommu_device_unregister(&iommu->iommu);
iommu_device_sysfs_remove(&iommu->iommu);
}
+ free_iommu_pmu(iommu);
+
if (iommu->irq) {
if (iommu->pr_irq) {
free_irq(iommu->pr_irq, iommu);
@@ -1132,7 +1187,7 @@ static void free_iommu(struct intel_iommu *iommu)
}
if (iommu->qi) {
- free_page((unsigned long)iommu->qi->desc);
+ iommu_free_pages(iommu->qi->desc);
kfree(iommu->qi->desc_status);
kfree(iommu->qi);
}
@@ -1140,7 +1195,8 @@ static void free_iommu(struct intel_iommu *iommu)
if (iommu->reg)
unmap_iommu(iommu);
- dmar_free_seq_id(iommu);
+ ida_destroy(&iommu->domain_ida);
+ ida_free(&dmar_seq_ids, iommu->seq_id);
kfree(iommu);
}
@@ -1149,18 +1205,75 @@ static void free_iommu(struct intel_iommu *iommu)
*/
static inline void reclaim_free_desc(struct q_inval *qi)
{
- while (qi->desc_status[qi->free_tail] == QI_DONE ||
- qi->desc_status[qi->free_tail] == QI_ABORT) {
- qi->desc_status[qi->free_tail] = QI_FREE;
+ while (qi->desc_status[qi->free_tail] == QI_FREE && qi->free_tail != qi->free_head) {
qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
qi->free_cnt++;
}
}
+static const char *qi_type_string(u8 type)
+{
+ switch (type) {
+ case QI_CC_TYPE:
+ return "Context-cache Invalidation";
+ case QI_IOTLB_TYPE:
+ return "IOTLB Invalidation";
+ case QI_DIOTLB_TYPE:
+ return "Device-TLB Invalidation";
+ case QI_IEC_TYPE:
+ return "Interrupt Entry Cache Invalidation";
+ case QI_IWD_TYPE:
+ return "Invalidation Wait";
+ case QI_EIOTLB_TYPE:
+ return "PASID-based IOTLB Invalidation";
+ case QI_PC_TYPE:
+ return "PASID-cache Invalidation";
+ case QI_DEIOTLB_TYPE:
+ return "PASID-based Device-TLB Invalidation";
+ case QI_PGRP_RESP_TYPE:
+ return "Page Group Response";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
+{
+ unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
+ u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
+ struct qi_desc *desc = iommu->qi->desc + head;
+
+ if (fault & DMA_FSTS_IQE)
+ pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
+ DMAR_IQER_REG_IQEI(iqe_err));
+ if (fault & DMA_FSTS_ITE)
+ pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
+ DMAR_IQER_REG_ITESID(iqe_err));
+ if (fault & DMA_FSTS_ICE)
+ pr_err("VT-d detected Invalidation Completion Error: SID %llx",
+ DMAR_IQER_REG_ICESID(iqe_err));
+
+ pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
+ qi_type_string(desc->qw0 & 0xf),
+ (unsigned long long)desc->qw0,
+ (unsigned long long)desc->qw1);
+
+ head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
+ head <<= qi_shift(iommu);
+ desc = iommu->qi->desc + head;
+
+ pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
+ qi_type_string(desc->qw0 & 0xf),
+ (unsigned long long)desc->qw0,
+ (unsigned long long)desc->qw1);
+}
+
static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
int head, tail;
+ struct device *dev;
+ u64 iqe_err, ite_sid;
struct q_inval *qi = iommu->qi;
int shift = qi_shift(iommu);
@@ -1168,6 +1281,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
return -EAGAIN;
fault = readl(iommu->reg + DMAR_FSTS_REG);
+ if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
+ qi_dump_fault(iommu, fault);
/*
* If IQE happens, the head points to the descriptor associated
@@ -1184,12 +1299,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
* used by software as private data. We won't print
* out these two qw's for security consideration.
*/
- pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n",
- (unsigned long long)desc->qw0,
- (unsigned long long)desc->qw1);
memcpy(desc, qi->desc + (wait_index << shift),
1 << shift);
writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
+ pr_info("Invalidation Queue Error (IQE) cleared\n");
return -EINVAL;
}
}
@@ -1205,7 +1318,15 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
tail = readl(iommu->reg + DMAR_IQT_REG);
tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
+ /*
+ * SID field is valid only when the ITE field is Set in FSTS_REG
+ * see Intel VT-d spec r4.1, section 11.4.9.9
+ */
+ iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
+ ite_sid = DMAR_IQER_REG_ITESID(iqe_err);
+
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
+ pr_info("Invalidation Time-out Error (ITE) cleared\n");
do {
if (qi->desc_status[head] == QI_IN_USE)
@@ -1213,12 +1334,27 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
head = (head - 2 + QI_LENGTH) % QI_LENGTH;
} while (head != tail);
+ /*
+ * If device was released or isn't present, no need to retry
+ * the ATS invalidate request anymore.
+ *
+ * 0 value of ite_sid means old VT-d device, no ite_sid value.
+ * see Intel VT-d spec r4.1, section 11.4.9.9
+ */
+ if (ite_sid) {
+ dev = device_rbtree_find(iommu, ite_sid);
+ if (!dev || !dev_is_pci(dev) ||
+ !pci_device_is_present(to_pci_dev(dev)))
+ return -ETIMEDOUT;
+ }
if (qi->desc_status[wait_index] == QI_ABORT)
return -EAGAIN;
}
- if (fault & DMA_FSTS_ICE)
+ if (fault & DMA_FSTS_ICE) {
writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
+ pr_info("Invalidation Completion Error (ICE) cleared\n");
+ }
return 0;
}
@@ -1234,15 +1370,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options)
{
struct q_inval *qi = iommu->qi;
+ s64 devtlb_start_ktime = 0;
+ s64 iotlb_start_ktime = 0;
+ s64 iec_start_ktime = 0;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
int offset, shift;
int rc, i;
+ u64 type;
if (!qi)
return 0;
+ type = desc->qw0 & GENMASK_ULL(3, 0);
+
+ if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
+ iotlb_start_ktime = ktime_to_ns(ktime_get());
+
+ if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
+ devtlb_start_ktime = ktime_to_ns(ktime_get());
+
+ if (type == QI_IEC_TYPE &&
+ dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
+ iec_start_ktime = ktime_to_ns(ktime_get());
+
restart:
rc = 0;
@@ -1266,6 +1420,8 @@ restart:
offset = ((index + i) % QI_LENGTH) << shift;
memcpy(qi->desc + offset, &desc[i], 1 << shift);
qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
+ trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
+ desc[i].qw2, desc[i].qw3);
}
qi->desc_status[wait_index] = QI_IN_USE;
@@ -1289,7 +1445,7 @@ restart:
*/
writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
- while (qi->desc_status[wait_index] != QI_DONE) {
+ while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) {
/*
* We will leave the interrupts disabled, to prevent interrupt
* context to queue another cmd while a cmd is already submitted
@@ -1306,8 +1462,16 @@ restart:
raw_spin_lock(&qi->q_lock);
}
- for (i = 0; i < count; i++)
- qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
+ /*
+ * The reclaim code can free descriptors from multiple submissions
+ * starting from the tail of the queue. When count == 0, the
+ * status of the standalone wait descriptor at the tail of the queue
+ * must be set to QI_FREE to allow the reclaim code to proceed.
+ * It is also possible that descriptors from one of the previous
+ * submissions has to be reclaimed by a subsequent submission.
+ */
+ for (i = 0; i <= count; i++)
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_FREE;
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -1315,6 +1479,18 @@ restart:
if (rc == -EAGAIN)
goto restart;
+ if (iotlb_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
+ ktime_to_ns(ktime_get()) - iotlb_start_ktime);
+
+ if (devtlb_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
+ ktime_to_ns(ktime_get()) - devtlb_start_ktime);
+
+ if (iec_start_ktime)
+ dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
+ ktime_to_ns(ktime_get()) - iec_start_ktime);
+
return rc;
}
@@ -1351,24 +1527,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type)
{
- u8 dw = 0, dr = 0;
-
struct qi_desc desc;
- int ih = 0;
-
- if (cap_write_drain(iommu->cap))
- dw = 1;
-
- if (cap_read_drain(iommu->cap))
- dr = 1;
-
- desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
- | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
- | QI_IOTLB_AM(size_order);
- desc.qw2 = 0;
- desc.qw3 = 0;
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1377,20 +1538,16 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
{
struct qi_desc desc;
- if (mask) {
- addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
- } else
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
-
- if (qdep >= QI_DEV_IOTLB_MAX_INVS)
- qdep = 0;
-
- desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
- QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw2 = 0;
- desc.qw3 = 0;
+ /*
+ * VT-d spec, section 4.3:
+ *
+ * Software is recommended to not submit any Device-TLB invalidation
+ * requests while address remapping hardware is disabled.
+ */
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1410,62 +1567,33 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
return;
}
- if (npages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- } else {
- int mask = ilog2(__roundup_pow_of_two(npages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
-
- if (WARN_ON_ONCE(!ALIGN(addr, align)))
- addr &= ~(align - 1);
-
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(addr) |
- QI_EIOTLB_IH(ih) |
- QI_EIOTLB_AM(mask);
- }
-
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
/* PASID-based device IOTLB Invalidate */
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
- u32 pasid, u16 qdep, u64 addr,
- unsigned int size_order, u64 granu)
+ u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
{
- unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
- desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
- QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
- QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw1 = QI_DEV_EIOTLB_GLOB(granu);
-
/*
- * If S bit is 0, we only flush a single page. If S bit is set,
- * The least significant zero bit indicates the invalidation address
- * range. VT-d spec 6.5.2.6.
- * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
- * size order = 0 is PAGE_SIZE 4KB
- * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
- * ECAP.
+ * VT-d spec, section 4.3:
+ *
+ * Software is recommended to not submit any Device-TLB invalidation
+ * requests while address remapping hardware is disabled.
*/
- desc.qw1 |= addr & ~mask;
- if (size_order)
- desc.qw1 |= QI_DEV_EIOTLB_SIZE;
+ if (!(iommu->gcmd & DMA_GCMD_TE))
+ return;
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid,
+ qdep, addr, size_order,
+ &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
- u64 granu, int pasid)
+ u64 granu, u32 pasid)
{
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
@@ -1527,7 +1655,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
* is present.
*/
if (ecap_smts(iommu->ecap))
- val |= (1 << 11) | 1;
+ val |= BIT_ULL(11) | BIT_ULL(0);
raw_spin_lock_irqsave(&iommu->register_lock, flags);
@@ -1553,7 +1681,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
- struct page *desc_page;
+ void *desc;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
@@ -1574,19 +1702,20 @@ int dmar_enable_qi(struct intel_iommu *iommu)
* Need two pages to accommodate 256 descriptors of 256 bits each
* if the remapping hardware supports scalable mode translation.
*/
- desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
- !!ecap_smts(iommu->ecap));
- if (!desc_page) {
+ desc = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC,
+ ecap_smts(iommu->ecap) ? SZ_8K :
+ SZ_4K);
+ if (!desc) {
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
}
- qi->desc = page_address(desc_page);
+ qi->desc = desc;
qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
- free_page((unsigned long) qi->desc);
+ iommu_free_pages(qi->desc);
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
@@ -1720,6 +1849,8 @@ static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
return DMAR_FECTL_REG;
else if (iommu->pr_irq == irq)
return DMAR_PECTL_REG;
+ else if (iommu->perf_irq == irq)
+ return DMAR_PERFINTRCTL_REG;
else
BUG();
}
@@ -1765,21 +1896,8 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-void dmar_msi_read(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = irq_get_handler_data(irq);
- int reg = dmar_msi_reg(iommu, irq);
- unsigned long flag;
-
- raw_spin_lock_irqsave(&iommu->register_lock, flag);
- msg->data = readl(iommu->reg + reg + 4);
- msg->address_lo = readl(iommu->reg + reg + 8);
- msg->address_hi = readl(iommu->reg + reg + 12);
- raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
- u8 fault_reason, int pasid, u16 source_id,
+ u8 fault_reason, u32 pasid, u16 source_id,
unsigned long long addr)
{
const char *reason;
@@ -1787,17 +1905,30 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);
- if (fault_type == INTR_REMAP)
- pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
- source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), addr >> 48,
- fault_reason, reason);
- else
- pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
+ if (fault_type == INTR_REMAP) {
+ pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
+ source_id >> 8, PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr >> 48,
+ fault_reason, reason);
+
+ return 0;
+ }
+
+ if (pasid == IOMMU_PASID_INVALID)
+ pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
- PCI_FUNC(source_id & 0xFF), pasid, addr,
+ PCI_FUNC(source_id & 0xFF), addr,
+ fault_reason, reason);
+ else
+ pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
+ type ? "DMA Read" : "DMA Write", pasid,
+ source_id >> 8, PCI_SLOT(source_id & 0xFF),
+ PCI_FUNC(source_id & 0xFF), addr,
fault_reason, reason);
+
+ dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
+
return 0;
}
@@ -1829,7 +1960,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
u8 fault_reason;
u16 source_id;
u64 guest_addr;
- int type, pasid;
+ u32 pasid;
+ int type;
u32 data;
bool pasid_present;
@@ -1863,7 +1995,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
if (!ratelimited)
/* Using pasid -1 if pasid is not present */
dmar_fault_do_one(iommu, type, fault_reason,
- pasid_present ? pasid : -1,
+ pasid_present ? pasid : IOMMU_PASID_INVALID,
source_id, guest_addr);
fault_index++;
@@ -1904,7 +2036,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
return ret;
}
-int __init enable_drhd_fault_handling(void)
+int enable_drhd_fault_handling(unsigned int cpu)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
@@ -1912,9 +2044,15 @@ int __init enable_drhd_fault_handling(void)
/*
* Enable fault control interrupt.
*/
+ guard(rwsem_read)(&dmar_global_lock);
for_each_iommu(iommu, drhd) {
u32 fault_status;
- int ret = dmar_set_interrupt(iommu);
+ int ret;
+
+ if (iommu->irq || iommu->node != cpu_to_node(cpu))
+ continue;
+
+ ret = dmar_set_interrupt(iommu);
if (ret) {
pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
@@ -1997,7 +2135,6 @@ static int __init dmar_free_unused_resources(void)
}
late_initcall(dmar_free_unused_resources);
-IOMMU_INIT_POST(detect_intel_iommu);
/*
* DMAR Hotplug Support
@@ -2017,6 +2154,7 @@ static guid_t dmar_hp_guid =
#define DMAR_DSM_FUNC_DRHD 1
#define DMAR_DSM_FUNC_ATSR 2
#define DMAR_DSM_FUNC_RHSA 3
+#define DMAR_DSM_FUNC_SATC 4
static inline bool dmar_detect_dsm(acpi_handle handle, int func)
{
@@ -2034,6 +2172,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func,
[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
+ [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
};
if (!dmar_detect_dsm(handle, func))