diff options
Diffstat (limited to 'drivers/iommu/intel/dmar.c')
-rw-r--r-- | drivers/iommu/intel/dmar.c | 179 |
1 files changed, 61 insertions, 118 deletions
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23cb80d62a9a..e540092d664d 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -32,6 +32,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "perf.h" #include "trace.h" #include "perfmon.h" @@ -1059,7 +1060,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err = iommu->seq_id; goto error; } - sprintf(iommu->name, "dmar%d", iommu->seq_id); + snprintf(iommu->name, sizeof(iommu->name), "dmar%d", iommu->seq_id); err = map_iommu(iommu, drhd); if (err) { @@ -1067,7 +1068,6 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) goto error_free_seq_id; } - err = -EINVAL; if (!cap_sagaw(iommu->cap) && (!ecap_smts(iommu->ecap) || ecap_slts(iommu->ecap))) { pr_info("%s: No supported address widths. Not attempting DMA translation.\n", @@ -1095,7 +1095,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; iommu->segment = drhd->segment; - + iommu->device_rbtree = RB_ROOT; + spin_lock_init(&iommu->device_rbtree_lock); + mutex_init(&iommu->iopf_lock); iommu->node = NUMA_NO_NODE; ver = readl(iommu->reg + DMAR_VER_REG); @@ -1185,7 +1187,7 @@ static void free_iommu(struct intel_iommu *iommu) } if (iommu->qi) { - free_page((unsigned long)iommu->qi->desc); + iommu_free_page(iommu->qi->desc); kfree(iommu->qi->desc_status); kfree(iommu->qi); } @@ -1202,9 +1204,7 @@ static void free_iommu(struct intel_iommu *iommu) */ static inline void reclaim_free_desc(struct q_inval *qi) { - while (qi->desc_status[qi->free_tail] == QI_DONE || - qi->desc_status[qi->free_tail] == QI_ABORT) { - qi->desc_status[qi->free_tail] = QI_FREE; + while (qi->desc_status[qi->free_tail] == QI_FREE && qi->free_tail != qi->free_head) { qi->free_tail = (qi->free_tail + 1) % QI_LENGTH; qi->free_cnt++; } @@ -1271,6 +1271,8 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) { u32 fault; int head, tail; + struct device *dev; + u64 iqe_err, ite_sid; struct q_inval *qi = iommu->qi; int shift = qi_shift(iommu); @@ -1315,6 +1317,13 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) tail = readl(iommu->reg + DMAR_IQT_REG); tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH; + /* + * SID field is valid only when the ITE field is Set in FSTS_REG + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG); + ite_sid = DMAR_IQER_REG_ITESID(iqe_err); + writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG); pr_info("Invalidation Time-out Error (ITE) cleared\n"); @@ -1324,6 +1333,19 @@ static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index) head = (head - 2 + QI_LENGTH) % QI_LENGTH; } while (head != tail); + /* + * If device was released or isn't present, no need to retry + * the ATS invalidate request anymore. + * + * 0 value of ite_sid means old VT-d device, no ite_sid value. + * see Intel VT-d spec r4.1, section 11.4.9.9 + */ + if (ite_sid) { + dev = device_rbtree_find(iommu, ite_sid); + if (!dev || !dev_is_pci(dev) || + !pci_device_is_present(to_pci_dev(dev))) + return -ETIMEDOUT; + } if (qi->desc_status[wait_index] == QI_ABORT) return -EAGAIN; } @@ -1422,7 +1444,7 @@ restart: */ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); - while (qi->desc_status[wait_index] != QI_DONE) { + while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) { /* * We will leave the interrupts disabled, to prevent interrupt * context to queue another cmd while a cmd is already submitted @@ -1439,8 +1461,16 @@ restart: raw_spin_lock(&qi->q_lock); } - for (i = 0; i < count; i++) - qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE; + /* + * The reclaim code can free descriptors from multiple submissions + * starting from the tail of the queue. When count == 0, the + * status of the standalone wait descriptor at the tail of the queue + * must be set to QI_FREE to allow the reclaim code to proceed. + * It is also possible that descriptors from one of the previous + * submissions has to be reclaimed by a subsequent submission. + */ + for (i = 0; i <= count; i++) + qi->desc_status[(index + i) % QI_LENGTH] = QI_FREE; reclaim_free_desc(qi); raw_spin_unlock_irqrestore(&qi->q_lock, flags); @@ -1496,24 +1526,9 @@ void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type) { - u8 dw = 0, dr = 0; - struct qi_desc desc; - int ih = 0; - - if (cap_write_drain(iommu->cap)) - dw = 1; - - if (cap_read_drain(iommu->cap)) - dr = 1; - - desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw) - | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE; - desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) - | QI_IOTLB_AM(size_order); - desc.qw2 = 0; - desc.qw3 = 0; + qi_desc_iotlb(iommu, did, addr, size_order, type, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1531,20 +1546,7 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (!(iommu->gcmd & DMA_GCMD_TE)) return; - if (mask) { - addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; - desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; - } else - desc.qw1 = QI_DEV_IOTLB_ADDR(addr); - - if (qdep >= QI_DEV_IOTLB_MAX_INVS) - qdep = 0; - - desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) | - QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid); - desc.qw2 = 0; - desc.qw3 = 0; - + qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1564,28 +1566,7 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, return; } - if (npages == -1) { - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = 0; - } else { - int mask = ilog2(__roundup_pow_of_two(npages)); - unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - - if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) - addr = ALIGN_DOWN(addr, align); - - desc.qw0 = QI_EIOTLB_PASID(pasid) | - QI_EIOTLB_DID(did) | - QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | - QI_EIOTLB_TYPE; - desc.qw1 = QI_EIOTLB_ADDR(addr) | - QI_EIOTLB_IH(ih) | - QI_EIOTLB_AM(mask); - } - + qi_desc_piotlb(did, pasid, addr, npages, ih, &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1593,7 +1574,6 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order) { - unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1); struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0}; /* @@ -1605,40 +1585,9 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, if (!(iommu->gcmd & DMA_GCMD_TE)) return; - desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) | - QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE | - QI_DEV_IOTLB_PFSID(pfsid); - - /* - * If S bit is 0, we only flush a single page. If S bit is set, - * The least significant zero bit indicates the invalidation address - * range. VT-d spec 6.5.2.6. - * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB. - * size order = 0 is PAGE_SIZE 4KB - * Max Invs Pending (MIP) is set to 0 for now until we have DIT in - * ECAP. - */ - if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) - pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", - addr, size_order); - - /* Take page address */ - desc.qw1 = QI_DEV_EIOTLB_ADDR(addr); - - if (size_order) { - /* - * Existing 0s in address below size_order may be the least - * significant bit, we must set them to 1s to avoid having - * smaller size than desired. - */ - desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1, - VTD_PAGE_SHIFT); - /* Clear size_order bit to indicate size */ - desc.qw1 &= ~mask; - /* Set the S bit to indicate flushing more than 1 page */ - desc.qw1 |= QI_DEV_EIOTLB_SIZE; - } - + qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, + qdep, addr, size_order, + &desc); qi_submit_sync(iommu, &desc, 1, 0); } @@ -1731,7 +1680,8 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; - struct page *desc_page; + void *desc; + int order; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1752,19 +1702,19 @@ int dmar_enable_qi(struct intel_iommu *iommu) * Need two pages to accommodate 256 descriptors of 256 bits each * if the remapping hardware supports scalable mode translation. */ - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, - !!ecap_smts(iommu->ecap)); - if (!desc_page) { + order = ecap_smts(iommu->ecap) ? 1 : 0; + desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order); + if (!desc) { kfree(qi); iommu->qi = NULL; return -ENOMEM; } - qi->desc = page_address(desc_page); + qi->desc = desc; qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { - free_page((unsigned long) qi->desc); + iommu_free_page(qi->desc); kfree(qi); iommu->qi = NULL; return -ENOMEM; @@ -1945,19 +1895,6 @@ void dmar_msi_write(int irq, struct msi_msg *msg) raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -void dmar_msi_read(int irq, struct msi_msg *msg) -{ - struct intel_iommu *iommu = irq_get_handler_data(irq); - int reg = dmar_msi_reg(iommu, irq); - unsigned long flag; - - raw_spin_lock_irqsave(&iommu->register_lock, flag); - msg->data = readl(iommu->reg + reg + 4); - msg->address_lo = readl(iommu->reg + reg + 8); - msg->address_hi = readl(iommu->reg + reg + 12); - raw_spin_unlock_irqrestore(&iommu->register_lock, flag); -} - static int dmar_fault_do_one(struct intel_iommu *iommu, int type, u8 fault_reason, u32 pasid, u16 source_id, unsigned long long addr) @@ -2098,7 +2035,7 @@ int dmar_set_interrupt(struct intel_iommu *iommu) return ret; } -int __init enable_drhd_fault_handling(void) +int enable_drhd_fault_handling(unsigned int cpu) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -2106,9 +2043,15 @@ int __init enable_drhd_fault_handling(void) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; - int ret = dmar_set_interrupt(iommu); + int ret; + + if (iommu->irq || iommu->node != cpu_to_node(cpu)) + continue; + + ret = dmar_set_interrupt(iommu); if (ret) { pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n", |