diff options
Diffstat (limited to 'drivers/iommu/intel/dmar.c')
| -rw-r--r-- | drivers/iommu/intel/dmar.c | 517 |
1 files changed, 328 insertions, 189 deletions
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index cc46dff98fa0..ec975c73cfe6 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -19,7 +19,6 @@ #include <linux/pci.h> #include <linux/dmar.h> #include <linux/iova.h> -#include <linux/intel-iommu.h> #include <linux/timer.h> #include <linux/irq.h> #include <linux/interrupt.h> @@ -30,9 +29,13 @@ #include <linux/numa.h> #include <linux/limits.h> #include <asm/irq_remapping.h> -#include <asm/iommu_table.h> +#include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" +#include "perf.h" +#include "trace.h" +#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -59,13 +62,11 @@ LIST_HEAD(dmar_drhd_units); struct acpi_table_header * __initdata dmar_tbl; static int dmar_dev_scope_status = 1; -static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)]; +static DEFINE_IDA(dmar_seq_ids); static int alloc_iommu(struct dmar_drhd_unit *drhd); static void free_iommu(struct intel_iommu *iommu); -extern const struct iommu_ops intel_iommu_ops; - static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) { /* @@ -127,8 +128,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) struct pci_dev *tmp; struct dmar_pci_notify_info *info; - BUG_ON(dev->is_virtfn); - /* * Ignore devices that have a domain number higher than what can * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 @@ -147,8 +146,6 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) } else { info = kzalloc(size, GFP_KERNEL); if (!info) { - pr_warn("Out of memory when allocating notify_info " - "for %s.\n", pci_name(dev)); if (dmar_dev_scope_status == 0) dmar_dev_scope_status = -ENOMEM; return NULL; @@ -266,7 +263,8 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, get_device(dev)); return 1; } - BUG_ON(i >= devices_cnt); + if (WARN_ON(i >= devices_cnt)) + return -EINVAL; } return 0; @@ -316,6 +314,9 @@ static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) if (ret < 0 && dmar_dev_scope_status == 0) dmar_dev_scope_status = ret; + if (ret >= 0) + intel_irq_remap_add_device(info); + return ret; } @@ -330,6 +331,13 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) dmar_iommu_notify_scope_dev(info); } +static inline void vf_inherit_msi_domain(struct pci_dev *pdev) +{ + struct pci_dev *physfn = pci_physfn(pdev); + + dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev)); +} + static int dmar_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -339,8 +347,20 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, /* Only care about add/remove events for physical functions. * For VFs we actually do the lookup based on the corresponding * PF in device_to_iommu() anyway. */ - if (pdev->is_virtfn) + if (pdev->is_virtfn) { + /* + * Ensure that the VF device inherits the irq domain of the + * PF device. Ideally the device would inherit the domain + * from the bus, but DMAR can have multiple units per bus + * which makes this impossible. The VF 'bus' could inherit + * from the PF device, but that's yet another x86'sism to + * inflict on everybody else. + */ + if (action == BUS_NOTIFY_ADD_DEVICE) + vf_inherit_msi_domain(pdev); return NOTIFY_DONE; + } + if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_REMOVED_DEVICE) return NOTIFY_DONE; @@ -363,7 +383,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, static struct notifier_block dmar_pci_bus_nb = { .notifier_call = dmar_pci_bus_notifier, - .priority = INT_MIN, + .priority = 1, }; static struct dmar_drhd_unit * @@ -380,7 +400,7 @@ dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd) return NULL; } -/** +/* * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition * structure which uniquely represent one DMA remapping hardware unit * present in the platform @@ -408,6 +428,8 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) memcpy(dmaru->hdr, header, header->length); dmaru->reg_base_addr = drhd->address; dmaru->segment = drhd->segment; + /* The size of the register set is 2 ^ N 4 KB pages. */ + dmaru->reg_size = 1UL << (drhd->size + 12); dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), ((void *)drhd) + drhd->header.length, @@ -473,9 +495,9 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) rhsa = (struct acpi_dmar_rhsa *)header; for_each_drhd_unit(drhd) { if (drhd->reg_base_addr == rhsa->base_address) { - int node = acpi_map_pxm_to_node(rhsa->proximity_domain); + int node = pxm_to_node(rhsa->proximity_domain); - if (!node_online(node)) + if (node != NUMA_NO_NODE && !node_online(node)) node = NUMA_NO_NODE; drhd->iommu->node = node; return 0; @@ -503,6 +525,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) struct acpi_dmar_reserved_memory *rmrr; struct acpi_dmar_atsr *atsr; struct acpi_dmar_rhsa *rhsa; + struct acpi_dmar_satc *satc; switch (header->type) { case ACPI_DMAR_TYPE_HARDWARE_UNIT: @@ -532,6 +555,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) /* We don't print this here because we need to sanity-check it first. So print it in dmar_parse_one_andd() instead. */ break; + case ACPI_DMAR_TYPE_SATC: + satc = container_of(header, struct acpi_dmar_satc, header); + pr_info("SATC flags: 0x%x\n", satc->flags); + break; } } @@ -619,6 +646,7 @@ parse_dmar_table(void) .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr, .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa, .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd, + .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc, }; /* @@ -761,7 +789,8 @@ static int __init dmar_acpi_dev_scope_init(void) andd->device_name); continue; } - if (acpi_bus_get_device(h, &adev)) { + adev = acpi_fetch_acpi_dev(h); + if (!adev) { pr_err("Failed to get device for ACPI object %s\n", andd->device_name); continue; @@ -794,6 +823,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); @@ -885,7 +915,7 @@ dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg) return 0; } -int __init detect_intel_iommu(void) +void __init detect_intel_iommu(void) { int ret; struct dmar_res_callback validate_drhd_cb = { @@ -898,27 +928,23 @@ int __init detect_intel_iommu(void) if (!ret) ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, &validate_drhd_cb); - if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) { + if (!ret && !no_iommu && !iommu_detected && + (!dmar_disabled || dmar_platform_optin())) { iommu_detected = 1; /* Make sure ACS will be enabled */ pci_request_acs(); } -#ifdef CONFIG_X86 if (!ret) { x86_init.iommu.iommu_init = intel_iommu_init; x86_platform.iommu_shutdown = intel_iommu_shutdown; } -#endif - if (dmar_tbl) { acpi_put_table(dmar_tbl); dmar_tbl = NULL; } up_write(&dmar_global_lock); - - return ret ? ret : 1; } static void unmap_iommu(struct intel_iommu *iommu) @@ -930,17 +956,18 @@ static void unmap_iommu(struct intel_iommu *iommu) /** * map_iommu: map the iommu's registers * @iommu: the iommu to map - * @phys_addr: the physical address of the base resgister + * @drhd: DMA remapping hardware definition structure * * Memory map the iommu's registers. Start w/ a single page, and * possibly expand if that turns out to be insufficent. */ -static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd) { + u64 phys_addr = drhd->reg_base_addr; int map_size, err=0; iommu->reg_phys = phys_addr; - iommu->reg_size = VTD_PAGE_SIZE; + iommu->reg_size = drhd->reg_size; if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { pr_err("Can't reserve memory\n"); @@ -963,7 +990,6 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) warn_invalid_dmar(phys_addr, " returns all ones"); goto unmap; } - iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG); /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), @@ -986,6 +1012,16 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) goto release; } } + + if (cap_ecmds(iommu->cap)) { + int i; + + for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) { + iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG + + i * DMA_ECMD_REG_STEP); + } + } + err = 0; goto out; @@ -997,34 +1033,12 @@ out: return err; } -static int dmar_alloc_seq_id(struct intel_iommu *iommu) -{ - iommu->seq_id = find_first_zero_bit(dmar_seq_ids, - DMAR_UNITS_SUPPORTED); - if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) { - iommu->seq_id = -1; - } else { - set_bit(iommu->seq_id, dmar_seq_ids); - sprintf(iommu->name, "dmar%d", iommu->seq_id); - } - - return iommu->seq_id; -} - -static void dmar_free_seq_id(struct intel_iommu *iommu) -{ - if (iommu->seq_id >= 0) { - clear_bit(iommu->seq_id, dmar_seq_ids); - iommu->seq_id = -1; - } -} - static int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; u32 ver, sts; - int agaw = 0; - int msagaw = 0; + int agaw = -1; + int msagaw = -1; int err; if (!drhd->reg_base_addr) { @@ -1036,36 +1050,55 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (!iommu) return -ENOMEM; - if (dmar_alloc_seq_id(iommu) < 0) { + iommu->seq_id = ida_alloc_range(&dmar_seq_ids, 0, + DMAR_UNITS_SUPPORTED - 1, GFP_KERNEL); + if (iommu->seq_id < 0) { pr_err("Failed to allocate seq_id\n"); - err = -ENOSPC; + err = iommu->seq_id; goto error; } + snprintf(iommu->name, sizeof(iommu->name), "dmar%d", iommu->seq_id); - err = map_iommu(iommu, drhd->reg_base_addr); + err = map_iommu(iommu, drhd); if (err) { pr_err("Failed to map %s\n", iommu->name); goto error_free_seq_id; } - err = -EINVAL; - agaw = iommu_calculate_agaw(iommu); - if (agaw < 0) { - pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", - iommu->seq_id); - goto err_unmap; + if (!cap_sagaw(iommu->cap) && + (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) { + pr_info("%s: No supported address widths. Not attempting DMA translation.\n", + iommu->name); + drhd->ignored = 1; } - msagaw = iommu_calculate_max_sagaw(iommu); - if (msagaw < 0) { - pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n", - iommu->seq_id); - goto err_unmap; + + if (!drhd->ignored) { + agaw = iommu_calculate_agaw(iommu); + if (agaw < 0) { + pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + drhd->ignored = 1; + } + } + if (!drhd->ignored) { + msagaw = iommu_calculate_max_sagaw(iommu); + if (msagaw < 0) { + pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n", + iommu->seq_id); + drhd->ignored = 1; + agaw = -1; + } } iommu->agaw = agaw; iommu->msagaw = msagaw; iommu->segment = drhd->segment; - + iommu->device_rbtree = RB_ROOT; + spin_lock_init(&iommu->device_rbtree_lock); + mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; + spin_lock_init(&iommu->lock); + ida_init(&iommu->domain_ida); + mutex_init(&iommu->did_lock); ver = readl(iommu->reg + DMAR_VER_REG); pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n", @@ -1084,30 +1117,49 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; + if (alloc_iommu_pmu(iommu)) + pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); + raw_spin_lock_init(&iommu->register_lock); - if (intel_iommu_enabled) { + /* + * A value of N in PSS field of eCap register indicates hardware + * supports PASID field of N+1 bits. + */ + if (pasid_supported(iommu)) + iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap); + + /* + * This is only for hotplug; at boot time intel_iommu_enabled won't + * be set yet. When intel_iommu_init() runs, it registers the units + * present at boot time, then sets intel_iommu_enabled. + */ + if (intel_iommu_enabled && !drhd->ignored) { err = iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); if (err) goto err_unmap; - iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); - - err = iommu_device_register(&iommu->iommu); + err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); if (err) - goto err_unmap; + goto err_sysfs; + + iommu_pmu_register(iommu); } drhd->iommu = iommu; + iommu->drhd = drhd; return 0; +err_sysfs: + iommu_device_sysfs_remove(&iommu->iommu); err_unmap: + free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: - dmar_free_seq_id(iommu); + ida_free(&dmar_seq_ids, iommu->seq_id); error: kfree(iommu); return err; @@ -1115,11 +1167,14 @@ error: static void free_iommu(struct intel_iommu *iommu) { - if (intel_iommu_enabled) { + if (intel_iommu_enabled && !iommu->drhd->ignored) { + iommu_pmu_unregister(iommu); iommu_device_unregister(&iommu->iommu); iommu_device_sysfs_remove(&iommu->iommu); } + free_iommu_pmu(iommu); + if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); @@ -1132,7 +1187,7 @@ static void free_iommu(struct intel_iommu *iommu) } if (iommu->qi) { - free_page((unsigned long)iommu->qi->desc); + iommu_free_pages(iommu->qi->desc); kfree(iommu->qi->desc_status); kfree(iommu->qi); } @@ -1140,7 +1195,8 @@ static void free_iommu(struct intel_iommu *iommu) if (iommu->reg) unmap_iommu(iommu); - dmar_free_seq_id(iommu); + ida_destroy(&iommu->domain_ida); + ida_free(&dmar_seq_ids, iommu->seq_id); kfree(iommu); } @@ -1149,18 +1205,75 @@ static void free_iommu(struct intel_iommu *iommu) */ static inline void reclaim_free_desc(struct q_inval *qi) { - while (qi->desc_status[qi->free_tail] == QI_DONE || - qi->desc_status[qi->free_tail] == QI_ABORT) { - qi->desc_status[qi->free_tail] = QI_FREE; + while (qi->desc_status[qi->free_tail] == QI_FREE && qi->free_tail != qi->free_head) { qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; qi->free_cnt++; } } +static const char *qi_type_string(u8 type) +{ + switch (type) { + case QI_CC_TYPE: + return "Context-cache Invalidation"; + case QI_IOTLB_TYPE: + return "IOTLB Invalidation"; + case QI_DIOTLB_TYPE: + return "Device-TLB Invalidation"; + case QI_IEC_TYPE: + return "Interrupt Entry Cache Invalidation"; + case QI_IWD_TYPE: + return "Invalidation Wait"; + case QI_EIOTLB_TYPE: + return "PASID-based IOTLB Invalidation"; + case QI_PC_TYPE: + return "PASID-cache Invalidation"; + case QI_DEIOTLB_TYPE: + return "PASID-based Device-TLB Invalidation"; + case QI_PGRP_RESP_TYPE: + return "Page Group Response"; + default: + return "UNKNOWN"; + } +} + +static void qi_dump_fault(struct intel_iommu *iommu, u32 fault) +{ + unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG); + u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + struct qi_desc *desc = iommu->qi->desc + head; + + if (fault & DMA_FSTS_IQE) + pr_err("VT-d detected Invalidation Queue Error: Reason %llx", + DMAR_IQER_REG_IQEI(iqe_err)); + if (fault & DMA_FSTS_ITE) + pr_err("VT-d detected Invalidation Time-out Error: SID %llx", + DMAR_IQER_REG_ITESID(iqe_err)); + if (fault & DMA_FSTS_ICE) + pr_err("VT-d detected Invalidation Completion Error: SID %llx", + DMAR_IQER_REG_ICESID(iqe_err)); + + pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n", + qi_type_string(desc->qw0 & 0xf), + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); + + head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH; + head <<= qi_shift(iommu); + desc = iommu->qi->desc + head; + + pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n", + qi_type_string(desc->qw0 & 0xf), + (unsigned long long)desc->qw0, + (unsigned long long)desc->qw1); +} + static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; + struct device *dev; + u64 iqe_err, ite_sid; struct q_inval *qi = iommu->qi; int shift = qi_shift(iommu); @@ -1168,6 +1281,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) return -EAGAIN; fault = readl(iommu->reg + DMAR_FSTS_REG); + if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE)) + qi_dump_fault(iommu, fault); /* * If IQE happens, the head points to the descriptor associated @@ -1184,12 +1299,10 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) * used by software as private data. We won't print * out these two qw's for security consideration. */ - pr_err("VT-d detected invalid descriptor: qw0 = %llx, qw1 = %llx\n", - (unsigned long long)desc->qw0, - (unsigned long long)desc->qw1); memcpy(desc, qi->desc + (wait_index << shift), 1 << shift); writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Queue Error (IQE) cleared\n"); return -EINVAL; } } @@ -1205,7 +1318,15 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; + /* + * SID field is valid only when the ITE field is Set in FSTS_REG + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + ite_sid = DMAR_IQER_REG_ITESID(iqe_err); + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Time-out Error (ITE) cleared\n"); do { if (qi->desc_status[head] == QI_IN_USE) @@ -1213,12 +1334,27 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) head = (head - 2 + QI_LENGTH) % QI_LENGTH; } while (head != tail); + /* + * If device was released or isn't present, no need to retry + * the ATS invalidate request anymore. + * + * 0 value of ite_sid means old VT-d device, no ite_sid value. + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + if (ite_sid) { + dev = device_rbtree_find(iommu, ite_sid); + if (!dev || !dev_is_pci(dev) || + !pci_device_is_present(to_pci_dev(dev))) + return -ETIMEDOUT; + } if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; } - if (fault & DMA_FSTS_ICE) + if (fault & DMA_FSTS_ICE) { writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG); + pr_info("Invalidation Completion Error (ICE) cleared\n"); + } return 0; } @@ -1234,15 +1370,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, unsigned int count, unsigned long options) { struct q_inval *qi = iommu->qi; + s64 devtlb_start_ktime = 0; + s64 iotlb_start_ktime = 0; + s64 iec_start_ktime = 0; struct qi_desc wait_desc; int wait_index, index; unsigned long flags; int offset, shift; int rc, i; + u64 type; if (!qi) return 0; + type = desc->qw0 & GENMASK_ULL(3, 0); + + if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB)) + iotlb_start_ktime = ktime_to_ns(ktime_get()); + + if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB)) + devtlb_start_ktime = ktime_to_ns(ktime_get()); + + if (type == QI_IEC_TYPE && + dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC)) + iec_start_ktime = ktime_to_ns(ktime_get()); + restart: rc = 0; @@ -1266,6 +1420,8 @@ restart: offset = ((index + i) % QI_LENGTH) << shift; memcpy(qi->desc + offset, &desc[i], 1 << shift); qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE; + trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1, + desc[i].qw2, desc[i].qw3); } qi->desc_status[wait_index] = QI_IN_USE; @@ -1289,7 +1445,7 @@ restart: */ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); - while (qi->desc_status[wait_index] != QI_DONE) { + while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) { /* * We will leave the interrupts disabled, to prevent interrupt * context to queue another cmd while a cmd is already submitted @@ -1306,8 +1462,16 @@ restart: raw_spin_lock(&qi->q_lock); } - for (i = 0; i < count; i++) - qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE; + /* + * The reclaim code can free descriptors from multiple submissions + * starting from the tail of the queue. When count == 0, the + * status of the standalone wait descriptor at the tail of the queue + * must be set to QI_FREE to allow the reclaim code to proceed. + * It is also possible that descriptors from one of the previous + * submissions has to be reclaimed by a subsequent submission. + */ + for (i = 0; i <= count; i++) + qi->desc_status[(index + i) % QI_LENGTH] = QI_FREE; reclaim_free_desc(qi); raw_spin_unlock_irqrestore(&qi->q_lock, flags); @@ -1315,6 +1479,18 @@ restart: if (rc == -EAGAIN) goto restart; + if (iotlb_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB, + ktime_to_ns(ktime_get()) - iotlb_start_ktime); + + if (devtlb_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB, + ktime_to_ns(ktime_get()) - devtlb_start_ktime); + + if (iec_start_ktime) + dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC, + ktime_to_ns(ktime_get()) - iec_start_ktime); + return rc; } @@ -1351,24 +1527,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type) { - u8 dw = 0, dr = 0; - struct qi_desc desc; - int ih = 0; - - if (cap_write_drain(iommu->cap)) - dw = 1; - - if (cap_read_drain(iommu->cap)) - dr = 1; - - desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) - | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) - | QI_IOTLB_AM(size_order); - desc.qw2 = 0; - desc.qw3 = 0; + qi_desc_iotlb(iommu, did, addr, size_order, type, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1377,20 +1538,16 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, { struct qi_desc desc; - if (mask) { - addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; - } else - desc.qw1 = QI_DEV_IOTLB_ADDR(addr); - - if (qdep >= QI_DEV_IOTLB_MAX_INVS) - qdep = 0; - - desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); - desc.qw2 = 0; - desc.qw3 = 0; + /* + * VT-d spec, section 4.3: + * + * Software is recommended to not submit any Device-TLB invalidation + * requests while address remapping hardware is disabled. + */ + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1410,62 +1567,33 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, return; } - if (npages == -1) { - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = 0; - } else { - int mask = ilog2(__roundup_pow_of_two(npages)); - unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - - if (WARN_ON_ONCE(!ALIGN(addr, align))) - addr &= ~(align - 1); - - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = QI_EIOTLB_ADDR(addr) | - QI_EIOTLB_IH(ih) | - QI_EIOTLB_AM(mask); - } - + qi_desc_piotlb(did, pasid, addr, npages, ih, &desc); qi_submit_sync(iommu, &desc, 1, 0); } /* PASID-based device IOTLB Invalidate */ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, - u32 pasid, u16 qdep, u64 addr, - unsigned int size_order, u64 granu) + u32 pasid, u16 qdep, u64 addr, unsigned int size_order) { - unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; - desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | - QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | - QI_DEV_IOTLB_PFSID(pfsid); - desc.qw1 = QI_DEV_EIOTLB_GLOB(granu); - /* - * If S bit is 0, we only flush a single page. If S bit is set, - * The least significant zero bit indicates the invalidation address - * range. VT-d spec 6.5.2.6. - * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. - * size order = 0 is PAGE_SIZE 4KB - * Max Invs Pending (MIP) is set to 0 for now until we have DIT in - * ECAP. + * VT-d spec, section 4.3: + * + * Software is recommended to not submit any Device-TLB invalidation + * requests while address remapping hardware is disabled. */ - desc.qw1 |= addr & ~mask; - if (size_order) - desc.qw1 |= QI_DEV_EIOTLB_SIZE; + if (!(iommu->gcmd & DMA_GCMD_TE)) + return; + qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, + qdep, addr, size_order, + &desc); qi_submit_sync(iommu, &desc, 1, 0); } void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, - u64 granu, int pasid) + u64 granu, u32 pasid) { struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; @@ -1527,7 +1655,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) * is present. */ if (ecap_smts(iommu->ecap)) - val |= (1 << 11) | 1; + val |= BIT_ULL(11) | BIT_ULL(0); raw_spin_lock_irqsave(&iommu->register_lock, flags); @@ -1553,7 +1681,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; - struct page *desc_page; + void *desc; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1574,19 +1702,20 @@ int dmar_enable_qi(struct intel_iommu *iommu) * Need two pages to accommodate 256 descriptors of 256 bits each * if the remapping hardware supports scalable mode translation. */ - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, - !!ecap_smts(iommu->ecap)); - if (!desc_page) { + desc = iommu_alloc_pages_node_sz(iommu->node, GFP_ATOMIC, + ecap_smts(iommu->ecap) ? SZ_8K : + SZ_4K); + if (!desc) { kfree(qi); iommu->qi = NULL; return -ENOMEM; } - qi->desc = page_address(desc_page); + qi->desc = desc; qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { - free_page((unsigned long) qi->desc); + iommu_free_pages(qi->desc); kfree(qi); iommu->qi = NULL; return -ENOMEM; @@ -1720,6 +1849,8 @@ static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq) return DMAR_FECTL_REG; else if (iommu->pr_irq == irq) return DMAR_PECTL_REG; + else if (iommu->perf_irq == irq) + return DMAR_PERFINTRCTL_REG; else BUG(); } @@ -1765,21 +1896,8 @@ void dmar_msi_write(int irq, struct msi_msg *msg) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -void dmar_msi_read(int irq, struct msi_msg *msg) -{ - struct intel_iommu *iommu = irq_get_handler_data(irq); - int reg = dmar_msi_reg(iommu, irq); - unsigned long flag; - - raw_spin_lock_irqsave(&iommu->register_lock, flag); - msg->data = readl(iommu->reg + reg + 4); - msg->address_lo = readl(iommu->reg + reg + 8); - msg->address_hi = readl(iommu->reg + reg + 12); - raw_spin_unlock_irqrestore(&iommu->register_lock, flag); -} - static int dmar_fault_do_one(struct intel_iommu *iommu, int type, - u8 fault_reason, int pasid, u16 source_id, + u8 fault_reason, u32 pasid, u16 source_id, unsigned long long addr) { const char *reason; @@ -1787,17 +1905,30 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, &fault_type); - if (fault_type == INTR_REMAP) - pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n", - source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr >> 48, - fault_reason, reason); - else - pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n", + if (fault_type == INTR_REMAP) { + pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n", + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + + return 0; + } + + if (pasid == IOMMU_PASID_INVALID) + pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), pasid, addr, + PCI_FUNC(source_id & 0xFF), addr, + fault_reason, reason); + else + pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n", + type ? "DMA Read" : "DMA Write", pasid, + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); + + dmar_fault_dump_ptes(iommu, source_id, addr, pasid); + return 0; } @@ -1829,7 +1960,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id) u8 fault_reason; u16 source_id; u64 guest_addr; - int type, pasid; + u32 pasid; + int type; u32 data; bool pasid_present; @@ -1863,7 +1995,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : -1, + pasid_present ? pasid : IOMMU_PASID_INVALID, source_id, guest_addr); fault_index++; @@ -1904,7 +2036,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) return ret; } -int __init enable_drhd_fault_handling(void) +int enable_drhd_fault_handling(unsigned int cpu) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -1912,9 +2044,15 @@ int __init enable_drhd_fault_handling(void) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; - int ret = dmar_set_interrupt(iommu); + int ret; + + if (iommu->irq || iommu->node != cpu_to_node(cpu)) + continue; + + ret = dmar_set_interrupt(iommu); if (ret) { pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", @@ -1997,7 +2135,6 @@ static int __init dmar_free_unused_resources(void) } late_initcall(dmar_free_unused_resources); -IOMMU_INIT_POST(detect_intel_iommu); /* * DMAR Hotplug Support @@ -2017,6 +2154,7 @@ static guid_t dmar_hp_guid = #define DMAR_DSM_FUNC_DRHD 1 #define DMAR_DSM_FUNC_ATSR 2 #define DMAR_DSM_FUNC_RHSA 3 +#define DMAR_DSM_FUNC_SATC 4 static inline bool dmar_detect_dsm(acpi_handle handle, int func) { @@ -2034,6 +2172,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func, [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT, [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS, [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY, + [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC, }; if (!dmar_detect_dsm(handle, func)) |
