summaryrefslogtreecommitdiff
path: root/drivers/vfio/vfio_iommu_type1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/vfio_iommu_type1.c')
-rw-r--r--drivers/vfio/vfio_iommu_type1.c197
1 files changed, 111 insertions, 86 deletions
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c13b9290e357..db516c90a977 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -67,7 +67,8 @@ struct vfio_iommu {
struct list_head iova_list;
struct mutex lock;
struct rb_root dma_list;
- struct blocking_notifier_head notifier;
+ struct list_head device_list;
+ struct mutex device_list_lock;
unsigned int dma_avail;
unsigned int vaddr_invalid_count;
uint64_t pgsize_bitmap;
@@ -828,9 +829,9 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova,
static int vfio_iommu_type1_pin_pages(void *iommu_data,
struct iommu_group *iommu_group,
- unsigned long *user_pfn,
+ dma_addr_t user_iova,
int npage, int prot,
- unsigned long *phys_pfn)
+ struct page **pages)
{
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
@@ -840,7 +841,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
bool do_accounting;
dma_addr_t iova;
- if (!iommu || !user_pfn || !phys_pfn)
+ if (!iommu || !pages)
return -EINVAL;
/* Supported for v2 version only */
@@ -856,7 +857,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
again:
if (iommu->vaddr_invalid_count) {
for (i = 0; i < npage; i++) {
- iova = user_pfn[i] << PAGE_SHIFT;
+ iova = user_iova + PAGE_SIZE * i;
ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma);
if (ret < 0)
goto pin_done;
@@ -865,8 +866,8 @@ again:
}
}
- /* Fail if notifier list is empty */
- if (!iommu->notifier.head) {
+ /* Fail if no dma_umap notifier is registered */
+ if (list_empty(&iommu->device_list)) {
ret = -EINVAL;
goto pin_done;
}
@@ -879,9 +880,10 @@ again:
do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) {
+ unsigned long phys_pfn;
struct vfio_pfn *vpfn;
- iova = user_pfn[i] << PAGE_SHIFT;
+ iova = user_iova + PAGE_SIZE * i;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
if (!dma) {
ret = -EINVAL;
@@ -895,23 +897,25 @@ again:
vpfn = vfio_iova_get_vfio_pfn(dma, iova);
if (vpfn) {
- phys_pfn[i] = vpfn->pfn;
+ pages[i] = pfn_to_page(vpfn->pfn);
continue;
}
remote_vaddr = dma->vaddr + (iova - dma->iova);
- ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn[i],
+ ret = vfio_pin_page_external(dma, remote_vaddr, &phys_pfn,
do_accounting);
if (ret)
goto pin_unwind;
- ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
+ ret = vfio_add_to_pfn_list(dma, iova, phys_pfn);
if (ret) {
- if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
+ if (put_pfn(phys_pfn, dma->prot) && do_accounting)
vfio_lock_acct(dma, -1, true);
goto pin_unwind;
}
+ pages[i] = pfn_to_page(phys_pfn);
+
if (iommu->dirty_page_tracking) {
unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
@@ -934,43 +938,38 @@ again:
goto pin_done;
pin_unwind:
- phys_pfn[i] = 0;
+ pages[i] = NULL;
for (j = 0; j < i; j++) {
dma_addr_t iova;
- iova = user_pfn[j] << PAGE_SHIFT;
+ iova = user_iova + PAGE_SIZE * j;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
vfio_unpin_page_external(dma, iova, do_accounting);
- phys_pfn[j] = 0;
+ pages[j] = NULL;
}
pin_done:
mutex_unlock(&iommu->lock);
return ret;
}
-static int vfio_iommu_type1_unpin_pages(void *iommu_data,
- unsigned long *user_pfn,
- int npage)
+static void vfio_iommu_type1_unpin_pages(void *iommu_data,
+ dma_addr_t user_iova, int npage)
{
struct vfio_iommu *iommu = iommu_data;
bool do_accounting;
int i;
- if (!iommu || !user_pfn || npage <= 0)
- return -EINVAL;
-
/* Supported for v2 version only */
- if (!iommu->v2)
- return -EACCES;
+ if (WARN_ON(!iommu->v2))
+ return;
mutex_lock(&iommu->lock);
do_accounting = list_empty(&iommu->domain_list);
for (i = 0; i < npage; i++) {
+ dma_addr_t iova = user_iova + PAGE_SIZE * i;
struct vfio_dma *dma;
- dma_addr_t iova;
- iova = user_pfn[i] << PAGE_SHIFT;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
if (!dma)
break;
@@ -979,7 +978,8 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
}
mutex_unlock(&iommu->lock);
- return i > 0 ? i : -EINVAL;
+
+ WARN_ON(i != npage);
}
static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
@@ -1287,6 +1287,35 @@ static int verify_bitmap_size(uint64_t npages, uint64_t bitmap_size)
return 0;
}
+/*
+ * Notify VFIO drivers using vfio_register_emulated_iommu_dev() to invalidate
+ * and unmap iovas within the range we're about to unmap. Drivers MUST unpin
+ * pages in response to an invalidation.
+ */
+static void vfio_notify_dma_unmap(struct vfio_iommu *iommu,
+ struct vfio_dma *dma)
+{
+ struct vfio_device *device;
+
+ if (list_empty(&iommu->device_list))
+ return;
+
+ /*
+ * The device is expected to call vfio_unpin_pages() for any IOVA it has
+ * pinned within the range. Since vfio_unpin_pages() will eventually
+ * call back down to this code and try to obtain the iommu->lock we must
+ * drop it.
+ */
+ mutex_lock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
+
+ list_for_each_entry(device, &iommu->device_list, iommu_entry)
+ device->ops->dma_unmap(device, dma->iova, dma->size);
+
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_lock(&iommu->lock);
+}
+
static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
struct vfio_iommu_type1_dma_unmap *unmap,
struct vfio_bitmap *bitmap)
@@ -1377,12 +1406,6 @@ again:
if (!iommu->v2 && iova > dma->iova)
break;
- /*
- * Task with same address space who mapped this iova range is
- * allowed to unmap the iova range.
- */
- if (dma->task->mm != current->mm)
- break;
if (invalidate_vaddr) {
if (dma->vaddr_invalid) {
@@ -1406,8 +1429,6 @@ again:
}
if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
- struct vfio_iommu_type1_dma_unmap nb_unmap;
-
if (dma_last == dma) {
BUG_ON(++retries > 10);
} else {
@@ -1415,20 +1436,7 @@ again:
retries = 0;
}
- nb_unmap.iova = dma->iova;
- nb_unmap.size = dma->size;
-
- /*
- * Notify anyone (mdev vendor drivers) to invalidate and
- * unmap iovas within the range we're about to unmap.
- * Vendor drivers MUST unpin pages in response to an
- * invalidation.
- */
- mutex_unlock(&iommu->lock);
- blocking_notifier_call_chain(&iommu->notifier,
- VFIO_IOMMU_NOTIFY_DMA_UNMAP,
- &nb_unmap);
- mutex_lock(&iommu->lock);
+ vfio_notify_dma_unmap(iommu, dma);
goto again;
}
@@ -1679,18 +1687,6 @@ out_unlock:
return ret;
}
-static int vfio_bus_type(struct device *dev, void *data)
-{
- struct bus_type **bus = data;
-
- if (*bus && *bus != dev->bus)
- return -EINVAL;
-
- *bus = dev->bus;
-
- return 0;
-}
-
static int vfio_iommu_replay(struct vfio_iommu *iommu,
struct vfio_domain *domain)
{
@@ -2153,13 +2149,26 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
}
+/* Redundantly walks non-present capabilities to simplify caller */
+static int vfio_iommu_device_capable(struct device *dev, void *data)
+{
+ return device_iommu_capable(dev, (enum iommu_cap)data);
+}
+
+static int vfio_iommu_domain_alloc(struct device *dev, void *data)
+{
+ struct iommu_domain **domain = data;
+
+ *domain = iommu_domain_alloc(dev->bus);
+ return 1; /* Don't iterate */
+}
+
static int vfio_iommu_type1_attach_group(void *iommu_data,
struct iommu_group *iommu_group, enum vfio_group_type type)
{
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
struct vfio_domain *domain, *d;
- struct bus_type *bus = NULL;
bool resv_msi, msi_remap;
phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo;
@@ -2192,18 +2201,19 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_unlock;
}
- /* Determine bus_type in order to allocate a domain */
- ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
- if (ret)
- goto out_free_group;
-
ret = -ENOMEM;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
goto out_free_group;
+ /*
+ * Going via the iommu_group iterator avoids races, and trivially gives
+ * us a representative device for the IOMMU API call. We don't actually
+ * want to iterate beyond the first device (if any).
+ */
ret = -EIO;
- domain->domain = iommu_domain_alloc(bus);
+ iommu_group_for_each_dev(iommu_group, &domain->domain,
+ vfio_iommu_domain_alloc);
if (!domain->domain)
goto out_free_domain;
@@ -2258,7 +2268,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
list_add(&group->next, &domain->group_list);
msi_remap = irq_domain_check_msi_remap() ||
- iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
+ iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP,
+ vfio_iommu_device_capable);
if (!allow_unsafe_interrupts && !msi_remap) {
pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
@@ -2478,7 +2489,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&iommu->emulated_iommu_groups) &&
list_empty(&iommu->domain_list)) {
- WARN_ON(iommu->notifier.head);
+ WARN_ON(!list_empty(&iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu);
}
goto detach_group_done;
@@ -2510,7 +2521,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
if (list_empty(&domain->group_list)) {
if (list_is_singular(&iommu->domain_list)) {
if (list_empty(&iommu->emulated_iommu_groups)) {
- WARN_ON(iommu->notifier.head);
+ WARN_ON(!list_empty(
+ &iommu->device_list));
vfio_iommu_unmap_unpin_all(iommu);
} else {
vfio_iommu_unmap_unpin_reaccount(iommu);
@@ -2571,7 +2583,8 @@ static void *vfio_iommu_type1_open(unsigned long arg)
iommu->dma_avail = dma_entry_limit;
iommu->container_open = true;
mutex_init(&iommu->lock);
- BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
+ mutex_init(&iommu->device_list_lock);
+ INIT_LIST_HEAD(&iommu->device_list);
init_waitqueue_head(&iommu->vaddr_wait);
iommu->pgsize_bitmap = PAGE_MASK;
INIT_LIST_HEAD(&iommu->emulated_iommu_groups);
@@ -3008,28 +3021,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
}
}
-static int vfio_iommu_type1_register_notifier(void *iommu_data,
- unsigned long *events,
- struct notifier_block *nb)
+static void vfio_iommu_type1_register_device(void *iommu_data,
+ struct vfio_device *vdev)
{
struct vfio_iommu *iommu = iommu_data;
- /* clear known events */
- *events &= ~VFIO_IOMMU_NOTIFY_DMA_UNMAP;
-
- /* refuse to register if still events remaining */
- if (*events)
- return -EINVAL;
+ if (!vdev->ops->dma_unmap)
+ return;
- return blocking_notifier_chain_register(&iommu->notifier, nb);
+ /*
+ * list_empty(&iommu->device_list) is tested under the iommu->lock while
+ * iteration for dma_unmap must be done under the device_list_lock.
+ * Holding both locks here allows avoiding the device_list_lock in
+ * several fast paths. See vfio_notify_dma_unmap()
+ */
+ mutex_lock(&iommu->lock);
+ mutex_lock(&iommu->device_list_lock);
+ list_add(&vdev->iommu_entry, &iommu->device_list);
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
}
-static int vfio_iommu_type1_unregister_notifier(void *iommu_data,
- struct notifier_block *nb)
+static void vfio_iommu_type1_unregister_device(void *iommu_data,
+ struct vfio_device *vdev)
{
struct vfio_iommu *iommu = iommu_data;
- return blocking_notifier_chain_unregister(&iommu->notifier, nb);
+ if (!vdev->ops->dma_unmap)
+ return;
+
+ mutex_lock(&iommu->lock);
+ mutex_lock(&iommu->device_list_lock);
+ list_del(&vdev->iommu_entry);
+ mutex_unlock(&iommu->device_list_lock);
+ mutex_unlock(&iommu->lock);
}
static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
@@ -3163,8 +3188,8 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
.detach_group = vfio_iommu_type1_detach_group,
.pin_pages = vfio_iommu_type1_pin_pages,
.unpin_pages = vfio_iommu_type1_unpin_pages,
- .register_notifier = vfio_iommu_type1_register_notifier,
- .unregister_notifier = vfio_iommu_type1_unregister_notifier,
+ .register_device = vfio_iommu_type1_register_device,
+ .unregister_device = vfio_iommu_type1_unregister_device,
.dma_rw = vfio_iommu_type1_dma_rw,
.group_iommu_domain = vfio_iommu_type1_group_iommu_domain,
.notify = vfio_iommu_type1_notify,