summaryrefslogtreecommitdiff
path: root/drivers/iommu/intel/dmar.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel/dmar.c')
-rw-r--r--drivers/iommu/intel/dmar.c179
1 files changed, 61 insertions, 118 deletions
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 23cb80d62a9a..e540092d664d 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -32,6 +32,7 @@
#include "iommu.h"
#include "../irq_remapping.h"
+#include "../iommu-pages.h"
#include "perf.h"
#include "trace.h"
#include "perfmon.h"
@@ -1059,7 +1060,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
err = iommu->seq_id;
goto error;
}
- sprintf(iommu->name, "dmar%d", iommu->seq_id);
+ snprintf(iommu->name, sizeof(iommu->name), "dmar%d", iommu->seq_id);
err = map_iommu(iommu, drhd);
if (err) {
@@ -1067,7 +1068,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
goto error_free_seq_id;
}
- err = -EINVAL;
if (!cap_sagaw(iommu->cap) &&
(!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) {
pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
@@ -1095,7 +1095,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
iommu->agaw = agaw;
iommu->msagaw = msagaw;
iommu->segment = drhd->segment;
-
+ iommu->device_rbtree = RB_ROOT;
+ spin_lock_init(&iommu->device_rbtree_lock);
+ mutex_init(&iommu->iopf_lock);
iommu->node = NUMA_NO_NODE;
ver = readl(iommu->reg + DMAR_VER_REG);
@@ -1185,7 +1187,7 @@ static void free_iommu(struct intel_iommu *iommu)
}
if (iommu->qi) {
- free_page((unsigned long)iommu->qi->desc);
+ iommu_free_page(iommu->qi->desc);
kfree(iommu->qi->desc_status);
kfree(iommu->qi);
}
@@ -1202,9 +1204,7 @@ static void free_iommu(struct intel_iommu *iommu)
*/
static inline void reclaim_free_desc(struct q_inval *qi)
{
- while (qi->desc_status[qi->free_tail] == QI_DONE ||
- qi->desc_status[qi->free_tail] == QI_ABORT) {
- qi->desc_status[qi->free_tail] = QI_FREE;
+ while (qi->desc_status[qi->free_tail] == QI_FREE && qi->free_tail != qi->free_head) {
qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
qi->free_cnt++;
}
@@ -1271,6 +1271,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
{
u32 fault;
int head, tail;
+ struct device *dev;
+ u64 iqe_err, ite_sid;
struct q_inval *qi = iommu->qi;
int shift = qi_shift(iommu);
@@ -1315,6 +1317,13 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
tail = readl(iommu->reg + DMAR_IQT_REG);
tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
+ /*
+ * SID field is valid only when the ITE field is Set in FSTS_REG
+ * see Intel VT-d spec r4.1, section 11.4.9.9
+ */
+ iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
+ ite_sid = DMAR_IQER_REG_ITESID(iqe_err);
+
writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
pr_info("Invalidation Time-out Error (ITE) cleared\n");
@@ -1324,6 +1333,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
head = (head - 2 + QI_LENGTH) % QI_LENGTH;
} while (head != tail);
+ /*
+ * If device was released or isn't present, no need to retry
+ * the ATS invalidate request anymore.
+ *
+ * 0 value of ite_sid means old VT-d device, no ite_sid value.
+ * see Intel VT-d spec r4.1, section 11.4.9.9
+ */
+ if (ite_sid) {
+ dev = device_rbtree_find(iommu, ite_sid);
+ if (!dev || !dev_is_pci(dev) ||
+ !pci_device_is_present(to_pci_dev(dev)))
+ return -ETIMEDOUT;
+ }
if (qi->desc_status[wait_index] == QI_ABORT)
return -EAGAIN;
}
@@ -1422,7 +1444,7 @@ restart:
*/
writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
- while (qi->desc_status[wait_index] != QI_DONE) {
+ while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) {
/*
* We will leave the interrupts disabled, to prevent interrupt
* context to queue another cmd while a cmd is already submitted
@@ -1439,8 +1461,16 @@ restart:
raw_spin_lock(&qi->q_lock);
}
- for (i = 0; i < count; i++)
- qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
+ /*
+ * The reclaim code can free descriptors from multiple submissions
+ * starting from the tail of the queue. When count == 0, the
+ * status of the standalone wait descriptor at the tail of the queue
+ * must be set to QI_FREE to allow the reclaim code to proceed.
+ * It is also possible that descriptors from one of the previous
+ * submissions has to be reclaimed by a subsequent submission.
+ */
+ for (i = 0; i <= count; i++)
+ qi->desc_status[(index + i) % QI_LENGTH] = QI_FREE;
reclaim_free_desc(qi);
raw_spin_unlock_irqrestore(&qi->q_lock, flags);
@@ -1496,24 +1526,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
unsigned int size_order, u64 type)
{
- u8 dw = 0, dr = 0;
-
struct qi_desc desc;
- int ih = 0;
-
- if (cap_write_drain(iommu->cap))
- dw = 1;
-
- if (cap_read_drain(iommu->cap))
- dr = 1;
-
- desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
- | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
- desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
- | QI_IOTLB_AM(size_order);
- desc.qw2 = 0;
- desc.qw3 = 0;
+ qi_desc_iotlb(iommu, did, addr, size_order, type, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1531,20 +1546,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
- if (mask) {
- addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
- } else
- desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
-
- if (qdep >= QI_DEV_IOTLB_MAX_INVS)
- qdep = 0;
-
- desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
- QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
- desc.qw2 = 0;
- desc.qw3 = 0;
-
+ qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1564,28 +1566,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
return;
}
- if (npages == -1) {
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = 0;
- } else {
- int mask = ilog2(__roundup_pow_of_two(npages));
- unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
-
- if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
- addr = ALIGN_DOWN(addr, align);
-
- desc.qw0 = QI_EIOTLB_PASID(pasid) |
- QI_EIOTLB_DID(did) |
- QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
- QI_EIOTLB_TYPE;
- desc.qw1 = QI_EIOTLB_ADDR(addr) |
- QI_EIOTLB_IH(ih) |
- QI_EIOTLB_AM(mask);
- }
-
+ qi_desc_piotlb(did, pasid, addr, npages, ih, &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1593,7 +1574,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
u32 pasid, u16 qdep, u64 addr, unsigned int size_order)
{
- unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
/*
@@ -1605,40 +1585,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
if (!(iommu->gcmd & DMA_GCMD_TE))
return;
- desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
- QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
- QI_DEV_IOTLB_PFSID(pfsid);
-
- /*
- * If S bit is 0, we only flush a single page. If S bit is set,
- * The least significant zero bit indicates the invalidation address
- * range. VT-d spec 6.5.2.6.
- * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
- * size order = 0 is PAGE_SIZE 4KB
- * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
- * ECAP.
- */
- if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
- pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
- addr, size_order);
-
- /* Take page address */
- desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
-
- if (size_order) {
- /*
- * Existing 0s in address below size_order may be the least
- * significant bit, we must set them to 1s to avoid having
- * smaller size than desired.
- */
- desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
- VTD_PAGE_SHIFT);
- /* Clear size_order bit to indicate size */
- desc.qw1 &= ~mask;
- /* Set the S bit to indicate flushing more than 1 page */
- desc.qw1 |= QI_DEV_EIOTLB_SIZE;
- }
-
+ qi_desc_dev_iotlb_pasid(sid, pfsid, pasid,
+ qdep, addr, size_order,
+ &desc);
qi_submit_sync(iommu, &desc, 1, 0);
}
@@ -1731,7 +1680,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu)
int dmar_enable_qi(struct intel_iommu *iommu)
{
struct q_inval *qi;
- struct page *desc_page;
+ void *desc;
+ int order;
if (!ecap_qis(iommu->ecap))
return -ENOENT;
@@ -1752,19 +1702,19 @@ int dmar_enable_qi(struct intel_iommu *iommu)
* Need two pages to accommodate 256 descriptors of 256 bits each
* if the remapping hardware supports scalable mode translation.
*/
- desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
- !!ecap_smts(iommu->ecap));
- if (!desc_page) {
+ order = ecap_smts(iommu->ecap) ? 1 : 0;
+ desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
+ if (!desc) {
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
}
- qi->desc = page_address(desc_page);
+ qi->desc = desc;
qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
if (!qi->desc_status) {
- free_page((unsigned long) qi->desc);
+ iommu_free_page(qi->desc);
kfree(qi);
iommu->qi = NULL;
return -ENOMEM;
@@ -1945,19 +1895,6 @@ void dmar_msi_write(int irq, struct msi_msg *msg)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-void dmar_msi_read(int irq, struct msi_msg *msg)
-{
- struct intel_iommu *iommu = irq_get_handler_data(irq);
- int reg = dmar_msi_reg(iommu, irq);
- unsigned long flag;
-
- raw_spin_lock_irqsave(&iommu->register_lock, flag);
- msg->data = readl(iommu->reg + reg + 4);
- msg->address_lo = readl(iommu->reg + reg + 8);
- msg->address_hi = readl(iommu->reg + reg + 12);
- raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
-}
-
static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
u8 fault_reason, u32 pasid, u16 source_id,
unsigned long long addr)
@@ -2098,7 +2035,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
return ret;
}
-int __init enable_drhd_fault_handling(void)
+int enable_drhd_fault_handling(unsigned int cpu)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
@@ -2106,9 +2043,15 @@ int __init enable_drhd_fault_handling(void)
/*
* Enable fault control interrupt.
*/
+ guard(rwsem_read)(&dmar_global_lock);
for_each_iommu(iommu, drhd) {
u32 fault_status;
- int ret = dmar_set_interrupt(iommu);
+ int ret;
+
+ if (iommu->irq || iommu->node != cpu_to_node(cpu))
+ continue;
+
+ ret = dmar_set_interrupt(iommu);
if (ret) {
pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",