summaryrefslogtreecommitdiff
path: root/drivers/iommu/iommufd/pages.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/iommufd/pages.c')
-rw-r--r--drivers/iommu/iommufd/pages.c729
1 files changed, 629 insertions, 100 deletions
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 3c47846cc5ef..dbe51ecb9a20 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -45,16 +45,20 @@
* last_iova + 1 can overflow. An iopt_pages index will always be much less than
* ULONG_MAX so last_index + 1 cannot overflow.
*/
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+#include <linux/iommu.h>
+#include <linux/iommufd.h>
+#include <linux/kthread.h>
#include <linux/overflow.h>
#include <linux/slab.h>
-#include <linux/iommu.h>
#include <linux/sched/mm.h>
-#include <linux/highmem.h>
-#include <linux/kthread.h>
-#include <linux/iommufd.h>
+#include <linux/vfio_pci_core.h>
-#include "io_pagetable.h"
#include "double_span.h"
+#include "io_pagetable.h"
#ifndef CONFIG_IOMMUFD_TEST
#define TEMP_MEMORY_LIMIT 65536
@@ -257,6 +261,11 @@ static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages,
return container_of(node, struct iopt_area, pages_node);
}
+enum batch_kind {
+ BATCH_CPU_MEMORY = 0,
+ BATCH_MMIO,
+};
+
/*
* A simple datastructure to hold a vector of PFNs, optimized for contiguous
* PFNs. This is used as a temporary holding memory for shuttling pfns from one
@@ -270,7 +279,9 @@ struct pfn_batch {
unsigned int array_size;
unsigned int end;
unsigned int total_pfns;
+ enum batch_kind kind;
};
+enum { MAX_NPFNS = type_max(typeof(((struct pfn_batch *)0)->npfns[0])) };
static void batch_clear(struct pfn_batch *batch)
{
@@ -297,7 +308,7 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
batch->pfns[0] = batch->pfns[batch->end - 1] +
(batch->npfns[batch->end - 1] - keep_pfns);
batch->npfns[0] = keep_pfns;
- batch->end = 0;
+ batch->end = 1;
}
static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns)
@@ -346,27 +357,47 @@ static void batch_destroy(struct pfn_batch *batch, void *backup)
kfree(batch->pfns);
}
-/* true if the pfn was added, false otherwise */
-static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn)
+static bool batch_add_pfn_num(struct pfn_batch *batch, unsigned long pfn,
+ u32 nr, enum batch_kind kind)
{
- const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns));
+ unsigned int end = batch->end;
- if (batch->end &&
- pfn == batch->pfns[batch->end - 1] + batch->npfns[batch->end - 1] &&
- batch->npfns[batch->end - 1] != MAX_NPFNS) {
- batch->npfns[batch->end - 1]++;
- batch->total_pfns++;
- return true;
+ if (batch->kind != kind) {
+ /* One kind per batch */
+ if (batch->end != 0)
+ return false;
+ batch->kind = kind;
}
- if (batch->end == batch->array_size)
+
+ if (end && pfn == batch->pfns[end - 1] + batch->npfns[end - 1] &&
+ nr <= MAX_NPFNS - batch->npfns[end - 1]) {
+ batch->npfns[end - 1] += nr;
+ } else if (end < batch->array_size) {
+ batch->pfns[end] = pfn;
+ batch->npfns[end] = nr;
+ batch->end++;
+ } else {
return false;
- batch->total_pfns++;
- batch->pfns[batch->end] = pfn;
- batch->npfns[batch->end] = 1;
- batch->end++;
+ }
+
+ batch->total_pfns += nr;
return true;
}
+static void batch_remove_pfn_num(struct pfn_batch *batch, unsigned long nr)
+{
+ batch->npfns[batch->end - 1] -= nr;
+ if (batch->npfns[batch->end - 1] == 0)
+ batch->end--;
+ batch->total_pfns -= nr;
+}
+
+/* true if the pfn was added, false otherwise */
+static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn)
+{
+ return batch_add_pfn_num(batch, pfn, 1, BATCH_CPU_MEMORY);
+}
+
/*
* Fill the batch with pfns from the domain. When the batch is full, or it
* reaches last_index, the function will return. The caller should use
@@ -477,6 +508,7 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
{
bool disable_large_pages = area->iopt->disable_large_pages;
unsigned long last_iova = iopt_area_last_iova(area);
+ int iommu_prot = area->iommu_prot;
unsigned int page_offset = 0;
unsigned long start_iova;
unsigned long next_iova;
@@ -484,6 +516,11 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
unsigned long iova;
int rc;
+ if (batch->kind == BATCH_MMIO) {
+ iommu_prot &= ~IOMMU_CACHE;
+ iommu_prot |= IOMMU_MMIO;
+ }
+
/* The first index might be a partial page */
if (start_index == iopt_area_index(area))
page_offset = area->page_offset;
@@ -497,11 +534,11 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
rc = batch_iommu_map_small(
domain, iova,
PFN_PHYS(batch->pfns[cur]) + page_offset,
- next_iova - iova, area->iommu_prot);
+ next_iova - iova, iommu_prot);
else
rc = iommu_map(domain, iova,
PFN_PHYS(batch->pfns[cur]) + page_offset,
- next_iova - iova, area->iommu_prot,
+ next_iova - iova, iommu_prot,
GFP_KERNEL_ACCOUNT);
if (rc)
goto err_unmap;
@@ -622,6 +659,41 @@ static void batch_from_pages(struct pfn_batch *batch, struct page **pages,
break;
}
+static int batch_from_folios(struct pfn_batch *batch, struct folio ***folios_p,
+ unsigned long *offset_p, unsigned long npages)
+{
+ int rc = 0;
+ struct folio **folios = *folios_p;
+ unsigned long offset = *offset_p;
+
+ while (npages) {
+ struct folio *folio = *folios;
+ unsigned long nr = folio_nr_pages(folio) - offset;
+ unsigned long pfn = page_to_pfn(folio_page(folio, offset));
+
+ nr = min(nr, npages);
+ npages -= nr;
+
+ if (!batch_add_pfn_num(batch, pfn, nr, BATCH_CPU_MEMORY))
+ break;
+ if (nr > 1) {
+ rc = folio_add_pins(folio, nr - 1);
+ if (rc) {
+ batch_remove_pfn_num(batch, nr);
+ goto out;
+ }
+ }
+
+ folios++;
+ offset = 0;
+ }
+
+out:
+ *folios_p = folios;
+ *offset_p = offset;
+ return rc;
+}
+
static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages,
unsigned int first_page_off, size_t npages)
{
@@ -703,19 +775,32 @@ struct pfn_reader_user {
* neither
*/
int locked;
+
+ /* The following are only valid if file != NULL. */
+ struct file *file;
+ struct folio **ufolios;
+ size_t ufolios_len;
+ unsigned long ufolios_offset;
+ struct folio **ufolios_next;
};
static void pfn_reader_user_init(struct pfn_reader_user *user,
struct iopt_pages *pages)
{
user->upages = NULL;
+ user->upages_len = 0;
user->upages_start = 0;
user->upages_end = 0;
user->locked = -1;
-
user->gup_flags = FOLL_LONGTERM;
if (pages->writable)
user->gup_flags |= FOLL_WRITE;
+
+ user->file = (pages->type == IOPT_ADDRESS_FILE) ? pages->file : NULL;
+ user->ufolios = NULL;
+ user->ufolios_len = 0;
+ user->ufolios_next = NULL;
+ user->ufolios_offset = 0;
}
static void pfn_reader_user_destroy(struct pfn_reader_user *user,
@@ -724,13 +809,67 @@ static void pfn_reader_user_destroy(struct pfn_reader_user *user,
if (user->locked != -1) {
if (user->locked)
mmap_read_unlock(pages->source_mm);
- if (pages->source_mm != current->mm)
+ if (!user->file && pages->source_mm != current->mm)
mmput(pages->source_mm);
user->locked = -1;
}
kfree(user->upages);
user->upages = NULL;
+ kfree(user->ufolios);
+ user->ufolios = NULL;
+}
+
+static long pin_memfd_pages(struct pfn_reader_user *user, unsigned long start,
+ unsigned long npages)
+{
+ unsigned long i;
+ unsigned long offset;
+ unsigned long npages_out = 0;
+ struct page **upages = user->upages;
+ unsigned long end = start + (npages << PAGE_SHIFT) - 1;
+ long nfolios = user->ufolios_len / sizeof(*user->ufolios);
+
+ /*
+ * todo: memfd_pin_folios should return the last pinned offset so
+ * we can compute npages pinned, and avoid looping over folios here
+ * if upages == NULL.
+ */
+ nfolios = memfd_pin_folios(user->file, start, end, user->ufolios,
+ nfolios, &offset);
+ if (nfolios <= 0)
+ return nfolios;
+
+ offset >>= PAGE_SHIFT;
+ user->ufolios_next = user->ufolios;
+ user->ufolios_offset = offset;
+
+ for (i = 0; i < nfolios; i++) {
+ struct folio *folio = user->ufolios[i];
+ unsigned long nr = folio_nr_pages(folio);
+ unsigned long npin = min(nr - offset, npages);
+
+ npages -= npin;
+ npages_out += npin;
+
+ if (upages) {
+ if (npin == 1) {
+ *upages++ = folio_page(folio, offset);
+ } else {
+ int rc = folio_add_pins(folio, npin - 1);
+
+ if (rc)
+ return rc;
+
+ while (npin--)
+ *upages++ = folio_page(folio, offset++);
+ }
+ }
+
+ offset = 0;
+ }
+
+ return npages_out;
}
static int pfn_reader_user_pin(struct pfn_reader_user *user,
@@ -739,7 +878,9 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
unsigned long last_index)
{
bool remote_mm = pages->source_mm != current->mm;
- unsigned long npages;
+ unsigned long npages = last_index - start_index + 1;
+ unsigned long start;
+ unsigned long unum;
uintptr_t uptr;
long rc;
@@ -747,46 +888,56 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
WARN_ON(last_index < start_index))
return -EINVAL;
- if (!user->upages) {
+ if (!user->file && !user->upages) {
/* All undone in pfn_reader_destroy() */
- user->upages_len =
- (last_index - start_index + 1) * sizeof(*user->upages);
+ user->upages_len = npages * sizeof(*user->upages);
user->upages = temp_kmalloc(&user->upages_len, NULL, 0);
if (!user->upages)
return -ENOMEM;
}
+ if (user->file && !user->ufolios) {
+ user->ufolios_len = npages * sizeof(*user->ufolios);
+ user->ufolios = temp_kmalloc(&user->ufolios_len, NULL, 0);
+ if (!user->ufolios)
+ return -ENOMEM;
+ }
+
if (user->locked == -1) {
/*
* The majority of usages will run the map task within the mm
* providing the pages, so we can optimize into
* get_user_pages_fast()
*/
- if (remote_mm) {
+ if (!user->file && remote_mm) {
if (!mmget_not_zero(pages->source_mm))
return -EFAULT;
}
user->locked = 0;
}
- npages = min_t(unsigned long, last_index - start_index + 1,
- user->upages_len / sizeof(*user->upages));
-
+ unum = user->file ? user->ufolios_len / sizeof(*user->ufolios) :
+ user->upages_len / sizeof(*user->upages);
+ npages = min_t(unsigned long, npages, unum);
if (iommufd_should_fail())
return -EFAULT;
- uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
- if (!remote_mm)
+ if (user->file) {
+ start = pages->start + (start_index * PAGE_SIZE);
+ rc = pin_memfd_pages(user, start, npages);
+ } else if (!remote_mm) {
+ uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
rc = pin_user_pages_fast(uptr, npages, user->gup_flags,
user->upages);
- else {
+ } else {
+ uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
if (!user->locked) {
mmap_read_lock(pages->source_mm);
user->locked = 1;
}
rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
- user->gup_flags, user->upages, NULL,
+ user->gup_flags, user->upages,
&user->locked);
}
if (rc <= 0) {
@@ -809,13 +960,14 @@ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages)
lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >>
PAGE_SHIFT;
+
+ cur_pages = atomic_long_read(&pages->source_user->locked_vm);
do {
- cur_pages = atomic_long_read(&pages->source_user->locked_vm);
new_pages = cur_pages + npages;
if (new_pages > lock_limit)
return -ENOMEM;
- } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages,
- new_pages) != cur_pages);
+ } while (!atomic_long_try_cmpxchg(&pages->source_user->locked_vm,
+ &cur_pages, new_pages));
return 0;
}
@@ -837,7 +989,8 @@ static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages,
mmap_read_unlock(pages->source_mm);
user->locked = 0;
/* If we had the lock then we also have a get */
- } else if ((!user || !user->upages) &&
+
+ } else if ((!user || (!user->upages && !user->ufolios)) &&
pages->source_mm != current->mm) {
if (!mmget_not_zero(pages->source_mm))
return -EINVAL;
@@ -854,8 +1007,8 @@ static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages,
return rc;
}
-static int do_update_pinned(struct iopt_pages *pages, unsigned long npages,
- bool inc, struct pfn_reader_user *user)
+int iopt_pages_update_pinned(struct iopt_pages *pages, unsigned long npages,
+ bool inc, struct pfn_reader_user *user)
{
int rc = 0;
@@ -889,8 +1042,8 @@ static void update_unpinned(struct iopt_pages *pages)
return;
if (pages->npinned == pages->last_npinned)
return;
- do_update_pinned(pages, pages->last_npinned - pages->npinned, false,
- NULL);
+ iopt_pages_update_pinned(pages, pages->last_npinned - pages->npinned,
+ false, NULL);
}
/*
@@ -920,7 +1073,42 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user,
npages = pages->npinned - pages->last_npinned;
inc = true;
}
- return do_update_pinned(pages, npages, inc, user);
+ return iopt_pages_update_pinned(pages, npages, inc, user);
+}
+
+struct pfn_reader_dmabuf {
+ struct dma_buf_phys_vec phys;
+ unsigned long start_offset;
+};
+
+static int pfn_reader_dmabuf_init(struct pfn_reader_dmabuf *dmabuf,
+ struct iopt_pages *pages)
+{
+ /* Callers must not get here if the dmabuf was already revoked */
+ if (WARN_ON(iopt_dmabuf_revoked(pages)))
+ return -EINVAL;
+
+ dmabuf->phys = pages->dmabuf.phys;
+ dmabuf->start_offset = pages->dmabuf.start;
+ return 0;
+}
+
+static int pfn_reader_fill_dmabuf(struct pfn_reader_dmabuf *dmabuf,
+ struct pfn_batch *batch,
+ unsigned long start_index,
+ unsigned long last_index)
+{
+ unsigned long start = dmabuf->start_offset + start_index * PAGE_SIZE;
+
+ /*
+ * start/last_index and start are all PAGE_SIZE aligned, the batch is
+ * always filled using page size aligned PFNs just like the other types.
+ * If the dmabuf has been sliced on a sub page offset then the common
+ * batch to domain code will adjust it before mapping to the domain.
+ */
+ batch_add_pfn_num(batch, PHYS_PFN(dmabuf->phys.paddr + start),
+ last_index - start_index + 1, BATCH_MMIO);
+ return 0;
}
/*
@@ -941,7 +1129,10 @@ struct pfn_reader {
unsigned long batch_end_index;
unsigned long last_index;
- struct pfn_reader_user user;
+ union {
+ struct pfn_reader_user user;
+ struct pfn_reader_dmabuf dmabuf;
+ };
};
static int pfn_reader_update_pinned(struct pfn_reader *pfns)
@@ -977,6 +1168,8 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns)
{
struct interval_tree_double_span_iter *span = &pfns->span;
unsigned long start_index = pfns->batch_end_index;
+ struct pfn_reader_user *user;
+ unsigned long npages;
struct iopt_area *area;
int rc;
@@ -1007,18 +1200,29 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns)
return 0;
}
- if (start_index >= pfns->user.upages_end) {
- rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index,
+ if (iopt_is_dmabuf(pfns->pages))
+ return pfn_reader_fill_dmabuf(&pfns->dmabuf, &pfns->batch,
+ start_index, span->last_hole);
+
+ user = &pfns->user;
+ if (start_index >= user->upages_end) {
+ rc = pfn_reader_user_pin(user, pfns->pages, start_index,
span->last_hole);
if (rc)
return rc;
}
- batch_from_pages(&pfns->batch,
- pfns->user.upages +
- (start_index - pfns->user.upages_start),
- pfns->user.upages_end - start_index);
- return 0;
+ npages = user->upages_end - start_index;
+ start_index -= user->upages_start;
+ rc = 0;
+
+ if (!user->file)
+ batch_from_pages(&pfns->batch, user->upages + start_index,
+ npages);
+ else
+ rc = batch_from_folios(&pfns->batch, &user->ufolios_next,
+ &user->ufolios_offset, npages);
+ return rc;
}
static bool pfn_reader_done(struct pfn_reader *pfns)
@@ -1070,7 +1274,10 @@ static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages,
pfns->batch_start_index = start_index;
pfns->batch_end_index = start_index;
pfns->last_index = last_index;
- pfn_reader_user_init(&pfns->user, pages);
+ if (iopt_is_dmabuf(pages))
+ pfn_reader_dmabuf_init(&pfns->dmabuf, pages);
+ else
+ pfn_reader_user_init(&pfns->user, pages);
rc = batch_init(&pfns->batch, last_index - start_index + 1);
if (rc)
return rc;
@@ -1091,16 +1298,29 @@ static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages,
static void pfn_reader_release_pins(struct pfn_reader *pfns)
{
struct iopt_pages *pages = pfns->pages;
+ struct pfn_reader_user *user;
- if (pfns->user.upages_end > pfns->batch_end_index) {
- size_t npages = pfns->user.upages_end - pfns->batch_end_index;
+ if (iopt_is_dmabuf(pages))
+ return;
+ user = &pfns->user;
+ if (user->upages_end > pfns->batch_end_index) {
/* Any pages not transferred to the batch are just unpinned */
- unpin_user_pages(pfns->user.upages + (pfns->batch_end_index -
- pfns->user.upages_start),
- npages);
+
+ unsigned long npages = user->upages_end - pfns->batch_end_index;
+ unsigned long start_index = pfns->batch_end_index -
+ user->upages_start;
+
+ if (!user->file) {
+ unpin_user_pages(user->upages + start_index, npages);
+ } else {
+ long n = user->ufolios_len / sizeof(*user->ufolios);
+
+ unpin_folios(user->ufolios_next,
+ user->ufolios + n - user->ufolios_next);
+ }
iopt_pages_sub_npinned(pages, npages);
- pfns->user.upages_end = pfns->batch_end_index;
+ user->upages_end = pfns->batch_end_index;
}
if (pfns->batch_start_index != pfns->batch_end_index) {
pfn_reader_unpin(pfns);
@@ -1113,7 +1333,8 @@ static void pfn_reader_destroy(struct pfn_reader *pfns)
struct iopt_pages *pages = pfns->pages;
pfn_reader_release_pins(pfns);
- pfn_reader_user_destroy(&pfns->user, pfns->pages);
+ if (!iopt_is_dmabuf(pfns->pages))
+ pfn_reader_user_destroy(&pfns->user, pfns->pages);
batch_destroy(&pfns->batch, NULL);
WARN_ON(pages->last_npinned != pages->npinned);
}
@@ -1138,11 +1359,10 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages,
return 0;
}
-struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
- bool writable)
+static struct iopt_pages *iopt_alloc_pages(unsigned long start_byte,
+ unsigned long length, bool writable)
{
struct iopt_pages *pages;
- unsigned long end;
/*
* The iommu API uses size_t as the length, and protect the DIV_ROUND_UP
@@ -1151,9 +1371,6 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
if (length > SIZE_MAX - PAGE_SIZE || length == 0)
return ERR_PTR(-EINVAL);
- if (check_add_overflow((unsigned long)uptr, length, &end))
- return ERR_PTR(-EOVERFLOW);
-
pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT);
if (!pages)
return ERR_PTR(-ENOMEM);
@@ -1163,8 +1380,7 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
mutex_init(&pages->mutex);
pages->source_mm = current->mm;
mmgrab(pages->source_mm);
- pages->uptr = (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
- pages->npages = DIV_ROUND_UP(length + (uptr - pages->uptr), PAGE_SIZE);
+ pages->npages = DIV_ROUND_UP(length + start_byte, PAGE_SIZE);
pages->access_itree = RB_ROOT_CACHED;
pages->domains_itree = RB_ROOT_CACHED;
pages->writable = writable;
@@ -1178,6 +1394,253 @@ struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
return pages;
}
+struct iopt_pages *iopt_alloc_user_pages(void __user *uptr,
+ unsigned long length, bool writable)
+{
+ struct iopt_pages *pages;
+ unsigned long end;
+ void __user *uptr_down =
+ (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
+
+ if (check_add_overflow((unsigned long)uptr, length, &end))
+ return ERR_PTR(-EOVERFLOW);
+
+ pages = iopt_alloc_pages(uptr - uptr_down, length, writable);
+ if (IS_ERR(pages))
+ return pages;
+ pages->uptr = uptr_down;
+ pages->type = IOPT_ADDRESS_USER;
+ return pages;
+}
+
+struct iopt_pages *iopt_alloc_file_pages(struct file *file,
+ unsigned long start_byte,
+ unsigned long start,
+ unsigned long length, bool writable)
+
+{
+ struct iopt_pages *pages;
+
+ pages = iopt_alloc_pages(start_byte, length, writable);
+ if (IS_ERR(pages))
+ return pages;
+ pages->file = get_file(file);
+ pages->start = start - start_byte;
+ pages->type = IOPT_ADDRESS_FILE;
+ return pages;
+}
+
+static void iopt_revoke_notify(struct dma_buf_attachment *attach)
+{
+ struct iopt_pages *pages = attach->importer_priv;
+ struct iopt_pages_dmabuf_track *track;
+
+ guard(mutex)(&pages->mutex);
+ if (iopt_dmabuf_revoked(pages))
+ return;
+
+ list_for_each_entry(track, &pages->dmabuf.tracker, elm) {
+ struct iopt_area *area = track->area;
+
+ iopt_area_unmap_domain_range(area, track->domain,
+ iopt_area_index(area),
+ iopt_area_last_index(area));
+ }
+ pages->dmabuf.phys.len = 0;
+}
+
+static struct dma_buf_attach_ops iopt_dmabuf_attach_revoke_ops = {
+ .allow_peer2peer = true,
+ .move_notify = iopt_revoke_notify,
+};
+
+/*
+ * iommufd and vfio have a circular dependency. Future work for a phys
+ * based private interconnect will remove this.
+ */
+static int
+sym_vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment *attachment,
+ struct dma_buf_phys_vec *phys)
+{
+ typeof(&vfio_pci_dma_buf_iommufd_map) fn;
+ int rc;
+
+ rc = iommufd_test_dma_buf_iommufd_map(attachment, phys);
+ if (rc != -EOPNOTSUPP)
+ return rc;
+
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_DMABUF))
+ return -EOPNOTSUPP;
+
+ fn = symbol_get(vfio_pci_dma_buf_iommufd_map);
+ if (!fn)
+ return -EOPNOTSUPP;
+ rc = fn(attachment, phys);
+ symbol_put(vfio_pci_dma_buf_iommufd_map);
+ return rc;
+}
+
+static int iopt_map_dmabuf(struct iommufd_ctx *ictx, struct iopt_pages *pages,
+ struct dma_buf *dmabuf)
+{
+ struct dma_buf_attachment *attach;
+ int rc;
+
+ attach = dma_buf_dynamic_attach(dmabuf, iommufd_global_device(),
+ &iopt_dmabuf_attach_revoke_ops, pages);
+ if (IS_ERR(attach))
+ return PTR_ERR(attach);
+
+ dma_resv_lock(dmabuf->resv, NULL);
+ /*
+ * Lock ordering requires the mutex to be taken inside the reservation,
+ * make sure lockdep sees this.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ mutex_lock(&pages->mutex);
+ mutex_unlock(&pages->mutex);
+ }
+
+ rc = sym_vfio_pci_dma_buf_iommufd_map(attach, &pages->dmabuf.phys);
+ if (rc)
+ goto err_detach;
+
+ dma_resv_unlock(dmabuf->resv);
+
+ /* On success iopt_release_pages() will detach and put the dmabuf. */
+ pages->dmabuf.attach = attach;
+ return 0;
+
+err_detach:
+ dma_resv_unlock(dmabuf->resv);
+ dma_buf_detach(dmabuf, attach);
+ return rc;
+}
+
+struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx,
+ struct dma_buf *dmabuf,
+ unsigned long start_byte,
+ unsigned long start,
+ unsigned long length, bool writable)
+{
+ static struct lock_class_key pages_dmabuf_mutex_key;
+ struct iopt_pages *pages;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (dmabuf->size <= (start + length - 1) ||
+ length / PAGE_SIZE >= MAX_NPFNS)
+ return ERR_PTR(-EINVAL);
+
+ pages = iopt_alloc_pages(start_byte, length, writable);
+ if (IS_ERR(pages))
+ return pages;
+
+ /*
+ * The mmap_lock can be held when obtaining the dmabuf reservation lock
+ * which creates a locking cycle with the pages mutex which is held
+ * while obtaining the mmap_lock. This locking path is not present for
+ * IOPT_ADDRESS_DMABUF so split the lock class.
+ */
+ lockdep_set_class(&pages->mutex, &pages_dmabuf_mutex_key);
+
+ /* dmabuf does not use pinned page accounting. */
+ pages->account_mode = IOPT_PAGES_ACCOUNT_NONE;
+ pages->type = IOPT_ADDRESS_DMABUF;
+ pages->dmabuf.start = start - start_byte;
+ INIT_LIST_HEAD(&pages->dmabuf.tracker);
+
+ rc = iopt_map_dmabuf(ictx, pages, dmabuf);
+ if (rc) {
+ iopt_put_pages(pages);
+ return ERR_PTR(rc);
+ }
+
+ return pages;
+}
+
+int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area,
+ struct iommu_domain *domain)
+{
+ struct iopt_pages_dmabuf_track *track;
+
+ lockdep_assert_held(&pages->mutex);
+ if (WARN_ON(!iopt_is_dmabuf(pages)))
+ return -EINVAL;
+
+ list_for_each_entry(track, &pages->dmabuf.tracker, elm)
+ if (WARN_ON(track->domain == domain && track->area == area))
+ return -EINVAL;
+
+ track = kzalloc(sizeof(*track), GFP_KERNEL);
+ if (!track)
+ return -ENOMEM;
+ track->domain = domain;
+ track->area = area;
+ list_add_tail(&track->elm, &pages->dmabuf.tracker);
+
+ return 0;
+}
+
+void iopt_dmabuf_untrack_domain(struct iopt_pages *pages,
+ struct iopt_area *area,
+ struct iommu_domain *domain)
+{
+ struct iopt_pages_dmabuf_track *track;
+
+ lockdep_assert_held(&pages->mutex);
+ WARN_ON(!iopt_is_dmabuf(pages));
+
+ list_for_each_entry(track, &pages->dmabuf.tracker, elm) {
+ if (track->domain == domain && track->area == area) {
+ list_del(&track->elm);
+ kfree(track);
+ return;
+ }
+ }
+ WARN_ON(true);
+}
+
+int iopt_dmabuf_track_all_domains(struct iopt_area *area,
+ struct iopt_pages *pages)
+{
+ struct iopt_pages_dmabuf_track *track;
+ struct iommu_domain *domain;
+ unsigned long index;
+ int rc;
+
+ list_for_each_entry(track, &pages->dmabuf.tracker, elm)
+ if (WARN_ON(track->area == area))
+ return -EINVAL;
+
+ xa_for_each(&area->iopt->domains, index, domain) {
+ rc = iopt_dmabuf_track_domain(pages, area, domain);
+ if (rc)
+ goto err_untrack;
+ }
+ return 0;
+err_untrack:
+ iopt_dmabuf_untrack_all_domains(area, pages);
+ return rc;
+}
+
+void iopt_dmabuf_untrack_all_domains(struct iopt_area *area,
+ struct iopt_pages *pages)
+{
+ struct iopt_pages_dmabuf_track *track;
+ struct iopt_pages_dmabuf_track *tmp;
+
+ list_for_each_entry_safe(track, tmp, &pages->dmabuf.tracker,
+ elm) {
+ if (track->area == area) {
+ list_del(&track->elm);
+ kfree(track);
+ }
+ }
+}
+
void iopt_release_pages(struct kref *kref)
{
struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref);
@@ -1190,6 +1653,15 @@ void iopt_release_pages(struct kref *kref)
mutex_destroy(&pages->mutex);
put_task_struct(pages->source_task);
free_uid(pages->source_user);
+ if (iopt_is_dmabuf(pages) && pages->dmabuf.attach) {
+ struct dma_buf *dmabuf = pages->dmabuf.attach->dmabuf;
+
+ dma_buf_detach(dmabuf, pages->dmabuf.attach);
+ dma_buf_put(dmabuf);
+ WARN_ON(!list_empty(&pages->dmabuf.tracker));
+ } else if (pages->type == IOPT_ADDRESS_FILE) {
+ fput(pages->file);
+ }
kfree(pages);
}
@@ -1267,6 +1739,14 @@ static void __iopt_area_unfill_domain(struct iopt_area *area,
lockdep_assert_held(&pages->mutex);
+ if (iopt_is_dmabuf(pages)) {
+ if (WARN_ON(iopt_dmabuf_revoked(pages)))
+ return;
+ iopt_area_unmap_domain_range(area, domain, start_index,
+ last_index);
+ return;
+ }
+
/*
* For security we must not unpin something that is still DMA mapped,
* so this must unmap any IOVA before we go ahead and unpin the pages.
@@ -1342,6 +1822,9 @@ void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain)
void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages,
struct iommu_domain *domain)
{
+ if (iopt_dmabuf_revoked(pages))
+ return;
+
__iopt_area_unfill_domain(area, pages, domain,
iopt_area_last_index(area));
}
@@ -1362,6 +1845,9 @@ int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain)
lockdep_assert_held(&area->pages->mutex);
+ if (iopt_dmabuf_revoked(area->pages))
+ return 0;
+
rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area),
iopt_area_last_index(area));
if (rc)
@@ -1421,33 +1907,44 @@ int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages)
return 0;
mutex_lock(&pages->mutex);
- rc = pfn_reader_first(&pfns, pages, iopt_area_index(area),
- iopt_area_last_index(area));
- if (rc)
- goto out_unlock;
+ if (iopt_is_dmabuf(pages)) {
+ rc = iopt_dmabuf_track_all_domains(area, pages);
+ if (rc)
+ goto out_unlock;
+ }
- while (!pfn_reader_done(&pfns)) {
- done_first_end_index = pfns.batch_end_index;
- done_all_end_index = pfns.batch_start_index;
- xa_for_each(&area->iopt->domains, index, domain) {
- rc = batch_to_domain(&pfns.batch, domain, area,
- pfns.batch_start_index);
+ if (!iopt_dmabuf_revoked(pages)) {
+ rc = pfn_reader_first(&pfns, pages, iopt_area_index(area),
+ iopt_area_last_index(area));
+ if (rc)
+ goto out_untrack;
+
+ while (!pfn_reader_done(&pfns)) {
+ done_first_end_index = pfns.batch_end_index;
+ done_all_end_index = pfns.batch_start_index;
+ xa_for_each(&area->iopt->domains, index, domain) {
+ rc = batch_to_domain(&pfns.batch, domain, area,
+ pfns.batch_start_index);
+ if (rc)
+ goto out_unmap;
+ }
+ done_all_end_index = done_first_end_index;
+
+ rc = pfn_reader_next(&pfns);
if (rc)
goto out_unmap;
}
- done_all_end_index = done_first_end_index;
-
- rc = pfn_reader_next(&pfns);
+ rc = pfn_reader_update_pinned(&pfns);
if (rc)
goto out_unmap;
+
+ pfn_reader_destroy(&pfns);
}
- rc = pfn_reader_update_pinned(&pfns);
- if (rc)
- goto out_unmap;
area->storage_domain = xa_load(&area->iopt->domains, 0);
interval_tree_insert(&area->pages_node, &pages->domains_itree);
- goto out_destroy;
+ mutex_unlock(&pages->mutex);
+ return 0;
out_unmap:
pfn_reader_release_pins(&pfns);
@@ -1474,8 +1971,10 @@ out_unmap:
end_index);
}
}
-out_destroy:
pfn_reader_destroy(&pfns);
+out_untrack:
+ if (iopt_is_dmabuf(pages))
+ iopt_dmabuf_untrack_all_domains(area, pages);
out_unlock:
mutex_unlock(&pages->mutex);
return rc;
@@ -1501,14 +2000,22 @@ void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages)
if (!area->storage_domain)
goto out_unlock;
- xa_for_each(&iopt->domains, index, domain)
- if (domain != area->storage_domain)
+ xa_for_each(&iopt->domains, index, domain) {
+ if (domain == area->storage_domain)
+ continue;
+
+ if (!iopt_dmabuf_revoked(pages))
iopt_area_unmap_domain_range(
area, domain, iopt_area_index(area),
iopt_area_last_index(area));
+ }
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+ WARN_ON(RB_EMPTY_NODE(&area->pages_node.rb));
interval_tree_remove(&area->pages_node, &pages->domains_itree);
iopt_area_unfill_domain(area, pages, area->storage_domain);
+ if (iopt_is_dmabuf(pages))
+ iopt_dmabuf_untrack_all_domains(area, pages);
area->storage_domain = NULL;
out_unlock:
mutex_unlock(&pages->mutex);
@@ -1627,11 +2134,11 @@ static int iopt_pages_fill_from_domain(struct iopt_pages *pages,
return 0;
}
-static int iopt_pages_fill_from_mm(struct iopt_pages *pages,
- struct pfn_reader_user *user,
- unsigned long start_index,
- unsigned long last_index,
- struct page **out_pages)
+static int iopt_pages_fill(struct iopt_pages *pages,
+ struct pfn_reader_user *user,
+ unsigned long start_index,
+ unsigned long last_index,
+ struct page **out_pages)
{
unsigned long cur_index = start_index;
int rc;
@@ -1705,8 +2212,8 @@ int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index,
/* hole */
cur_pages = out_pages + (span.start_hole - start_index);
- rc = iopt_pages_fill_from_mm(pages, &user, span.start_hole,
- span.last_hole, cur_pages);
+ rc = iopt_pages_fill(pages, &user, span.start_hole,
+ span.last_hole, cur_pages);
if (rc)
goto out_clean_xa;
rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole,
@@ -1786,6 +2293,10 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
struct page *page = NULL;
int rc;
+ if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+ WARN_ON(pages->type != IOPT_ADDRESS_USER))
+ return -EINVAL;
+
if (!mmget_not_zero(pages->source_mm))
return iopt_pages_rw_slow(pages, index, index, offset, data,
length, flags);
@@ -1799,7 +2310,7 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
rc = pin_user_pages_remote(
pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE),
1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page,
- NULL, NULL);
+ NULL);
mmap_read_unlock(pages->source_mm);
if (rc != 1) {
if (WARN_ON(rc >= 0))
@@ -1841,6 +2352,14 @@ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable)
return -EPERM;
+ if (iopt_is_dmabuf(pages))
+ return -EINVAL;
+
+ if (pages->type != IOPT_ADDRESS_USER)
+ return iopt_pages_rw_slow(pages, start_index, last_index,
+ start_byte % PAGE_SIZE, data, length,
+ flags);
+
if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) {
if (start_index == last_index)
return iopt_pages_rw_page(pages, start_index,
@@ -1904,6 +2423,7 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
* @last_index: Inclusive last page index
* @out_pages: Output list of struct page's representing the PFNs
* @flags: IOMMUFD_ACCESS_RW_* flags
+ * @lock_area: Fail userspace munmap on this area
*
* Record that an in-kernel access will be accessing the pages, ensure they are
* pinned, and return the PFNs as a simple list of 'struct page *'.
@@ -1911,8 +2431,8 @@ iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
* This should be undone through a matching call to iopt_area_remove_access()
*/
int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
- unsigned long last_index, struct page **out_pages,
- unsigned int flags)
+ unsigned long last_index, struct page **out_pages,
+ unsigned int flags, bool lock_area)
{
struct iopt_pages *pages = area->pages;
struct iopt_pages_access *access;
@@ -1925,6 +2445,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
access = iopt_pages_get_exact_access(pages, start_index, last_index);
if (access) {
area->num_accesses++;
+ if (lock_area)
+ area->num_locks++;
access->users++;
iopt_pages_fill_from_xarray(pages, start_index, last_index,
out_pages);
@@ -1946,6 +2468,8 @@ int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
access->node.last = last_index;
access->users = 1;
area->num_accesses++;
+ if (lock_area)
+ area->num_locks++;
interval_tree_insert(&access->node, &pages->access_itree);
mutex_unlock(&pages->mutex);
return 0;
@@ -1962,12 +2486,13 @@ err_unlock:
* @area: The source of PFNs
* @start_index: First page index
* @last_index: Inclusive last page index
+ * @unlock_area: Must match the matching iopt_area_add_access()'s lock_area
*
* Undo iopt_area_add_access() and unpin the pages if necessary. The caller
* must stop using the PFNs before calling this.
*/
void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
- unsigned long last_index)
+ unsigned long last_index, bool unlock_area)
{
struct iopt_pages *pages = area->pages;
struct iopt_pages_access *access;
@@ -1978,6 +2503,10 @@ void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
goto out_unlock;
WARN_ON(area->num_accesses == 0 || access->users == 0);
+ if (unlock_area) {
+ WARN_ON(area->num_locks == 0);
+ area->num_locks--;
+ }
area->num_accesses--;
access->users--;
if (access->users)