summaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/init.c5
-rw-r--r--drivers/iommu/amd/iommu.c124
-rw-r--r--drivers/iommu/amd/iommu_v2.c13
3 files changed, 115 insertions, 27 deletions
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 6e12a615117b..bdcf167b4afe 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1849,8 +1849,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
if (ret)
return ret;
- if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
+ if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
+ pr_info("Using strict mode due to virtualization\n");
+ iommu_set_dma_strict();
amd_iommu_np_cache = true;
+ }
init_iommu_perf_ctr(iommu);
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 02f9b4fffe90..1722bb161841 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -425,9 +425,11 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
- if (dev_data && __ratelimit(&dev_data->rs)) {
- pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
- vmg_tag, spa, flags);
+ if (dev_data) {
+ if (__ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
+ vmg_tag, spa, flags);
+ }
} else {
pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -456,9 +458,11 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
- if (dev_data && __ratelimit(&dev_data->rs)) {
- pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
- vmg_tag, gpa, flags_rmp, flags);
+ if (dev_data) {
+ if (__ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
+ vmg_tag, gpa, flags_rmp, flags);
+ }
} else {
pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -480,11 +484,13 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
- if (dev_data && __ratelimit(&dev_data->rs)) {
- pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
- domain_id, address, flags);
- } else if (printk_ratelimit()) {
- pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ if (dev_data) {
+ if (__ratelimit(&dev_data->rs)) {
+ pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ domain_id, address, flags);
+ }
+ } else {
+ pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domain_id, address, flags);
}
@@ -1261,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain,
}
static void domain_flush_pages(struct protection_domain *domain,
- u64 address, size_t size)
+ u64 address, size_t size, int pde)
{
- __domain_flush_pages(domain, address, size, 0);
+ if (likely(!amd_iommu_np_cache)) {
+ __domain_flush_pages(domain, address, size, pde);
+ return;
+ }
+
+ /*
+ * When NpCache is on, we infer that we run in a VM and use a vIOMMU.
+ * In such setups it is best to avoid flushes of ranges which are not
+ * naturally aligned, since it would lead to flushes of unmodified
+ * PTEs. Such flushes would require the hypervisor to do more work than
+ * necessary. Therefore, perform repeated flushes of aligned ranges
+ * until you cover the range. Each iteration flushes the smaller
+ * between the natural alignment of the address that we flush and the
+ * greatest naturally aligned region that fits in the range.
+ */
+ while (size != 0) {
+ int addr_alignment = __ffs(address);
+ int size_alignment = __fls(size);
+ int min_alignment;
+ size_t flush_size;
+
+ /*
+ * size is always non-zero, but address might be zero, causing
+ * addr_alignment to be negative. As the casting of the
+ * argument in __ffs(address) to long might trim the high bits
+ * of the address on x86-32, cast to long when doing the check.
+ */
+ if (likely((unsigned long)address != 0))
+ min_alignment = min(addr_alignment, size_alignment);
+ else
+ min_alignment = size_alignment;
+
+ flush_size = 1ul << min_alignment;
+
+ __domain_flush_pages(domain, address, flush_size, pde);
+ address += flush_size;
+ size -= flush_size;
+ }
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
{
- __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
+ domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
void amd_iommu_domain_flush_complete(struct protection_domain *domain)
@@ -1296,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- domain_flush_pages(domain, iova, size);
+ domain_flush_pages(domain, iova, size, 1);
amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -1999,6 +2042,16 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
+static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
+{
+ struct protection_domain *domain = to_pdomain(dom);
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+
+ if (ops->map)
+ domain_flush_np_cache(domain, iova, size);
+}
+
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
phys_addr_t paddr, size_t page_size, int iommu_prot,
gfp_t gfp)
@@ -2017,26 +2070,50 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- if (ops->map) {
+ if (ops->map)
ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
- domain_flush_np_cache(domain, iova, page_size);
- }
return ret;
}
+static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t size)
+{
+ /*
+ * AMD's IOMMU can flush as many pages as necessary in a single flush.
+ * Unless we run in a virtual machine, which can be inferred according
+ * to whether "non-present cache" is on, it is probably best to prefer
+ * (potentially) too extensive TLB flushing (i.e., more misses) over
+ * mutliple TLB flushes (i.e., more flushes). For virtual machines the
+ * hypervisor needs to synchronize the host IOMMU PTEs with those of
+ * the guest, and the trade-off is different: unnecessary TLB flushes
+ * should be avoided.
+ */
+ if (amd_iommu_np_cache &&
+ iommu_iotlb_gather_is_disjoint(gather, iova, size))
+ iommu_iotlb_sync(domain, gather);
+
+ iommu_iotlb_gather_add_range(gather, iova, size);
+}
+
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
+ size_t r;
if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
(domain->iop.mode == PAGE_MODE_NONE))
return 0;
- return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+ r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
+
+ amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
+
+ return r;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -2139,7 +2216,13 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- amd_iommu_flush_iotlb_all(domain);
+ struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
+ amd_iommu_domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
}
static int amd_iommu_def_domain_type(struct device *dev)
@@ -2168,6 +2251,7 @@ const struct iommu_ops amd_iommu_ops = {
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
.map = amd_iommu_map,
+ .iotlb_sync_map = amd_iommu_iotlb_sync_map,
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
.probe_device = amd_iommu_probe_device,
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index f8d4ad421e07..a9e568276c99 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -6,6 +6,7 @@
#define pr_fmt(fmt) "AMD-Vi: " fmt
+#include <linux/refcount.h>
#include <linux/mmu_notifier.h>
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
@@ -33,7 +34,7 @@ struct pri_queue {
struct pasid_state {
struct list_head list; /* For global state-list */
- atomic_t count; /* Reference count */
+ refcount_t count; /* Reference count */
unsigned mmu_notifier_count; /* Counting nested mmu_notifier
calls */
struct mm_struct *mm; /* mm_struct for the faults */
@@ -242,7 +243,7 @@ static struct pasid_state *get_pasid_state(struct device_state *dev_state,
ret = *ptr;
if (ret)
- atomic_inc(&ret->count);
+ refcount_inc(&ret->count);
out_unlock:
spin_unlock_irqrestore(&dev_state->lock, flags);
@@ -257,14 +258,14 @@ static void free_pasid_state(struct pasid_state *pasid_state)
static void put_pasid_state(struct pasid_state *pasid_state)
{
- if (atomic_dec_and_test(&pasid_state->count))
+ if (refcount_dec_and_test(&pasid_state->count))
wake_up(&pasid_state->wq);
}
static void put_pasid_state_wait(struct pasid_state *pasid_state)
{
- atomic_dec(&pasid_state->count);
- wait_event(pasid_state->wq, !atomic_read(&pasid_state->count));
+ refcount_dec(&pasid_state->count);
+ wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
free_pasid_state(pasid_state);
}
@@ -624,7 +625,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
goto out;
- atomic_set(&pasid_state->count, 1);
+ refcount_set(&pasid_state->count, 1);
init_waitqueue_head(&pasid_state->wq);
spin_lock_init(&pasid_state->lock);