summaryrefslogtreecommitdiff
path: root/io_uring/zcrx.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/zcrx.c')
-rw-r--r--io_uring/zcrx.c274
1 files changed, 156 insertions, 118 deletions
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 085eeed8cd50..e5ff49f3425e 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -44,9 +44,40 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
{
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+ lockdep_assert(!area->mem.is_dmabuf);
+
return area->mem.pages[net_iov_idx(niov)];
}
+static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_area *area,
+ struct sg_table *sgt, unsigned long off)
+{
+ struct scatterlist *sg;
+ unsigned i, niov_idx = 0;
+
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ dma_addr_t dma = sg_dma_address(sg);
+ unsigned long sg_len = sg_dma_len(sg);
+ unsigned long sg_off = min(sg_len, off);
+
+ off -= sg_off;
+ sg_len -= sg_off;
+ dma += sg_off;
+
+ while (sg_len && niov_idx < area->nia.num_niovs) {
+ struct net_iov *niov = &area->nia.niovs[niov_idx];
+
+ if (net_mp_niov_set_dma_addr(niov, dma))
+ return -EFAULT;
+ sg_len -= PAGE_SIZE;
+ dma += PAGE_SIZE;
+ niov_idx++;
+ }
+ }
+ return 0;
+}
+
static void io_release_dmabuf(struct io_zcrx_mem *mem)
{
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
@@ -76,6 +107,8 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
int dmabuf_fd = area_reg->dmabuf_fd;
int i, ret;
+ if (off)
+ return -EINVAL;
if (WARN_ON_ONCE(!ifq->dev))
return -EFAULT;
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
@@ -106,7 +139,7 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
for_each_sgtable_dma_sg(mem->sgt, sg, i)
total_size += sg_dma_len(sg);
- if (total_size < off + len) {
+ if (total_size != len) {
ret = -EINVAL;
goto err;
}
@@ -121,33 +154,27 @@ err:
static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
- unsigned long off = area->mem.dmabuf_offset;
- struct scatterlist *sg;
- unsigned i, niov_idx = 0;
-
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
return -EINVAL;
+ return io_populate_area_dma(ifq, area, area->mem.sgt,
+ area->mem.dmabuf_offset);
+}
- for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
- dma_addr_t dma = sg_dma_address(sg);
- unsigned long sg_len = sg_dma_len(sg);
- unsigned long sg_off = min(sg_len, off);
-
- off -= sg_off;
- sg_len -= sg_off;
- dma += sg_off;
+static unsigned long io_count_account_pages(struct page **pages, unsigned nr_pages)
+{
+ struct folio *last_folio = NULL;
+ unsigned long res = 0;
+ int i;
- while (sg_len && niov_idx < area->nia.num_niovs) {
- struct net_iov *niov = &area->nia.niovs[niov_idx];
+ for (i = 0; i < nr_pages; i++) {
+ struct folio *folio = page_folio(pages[i]);
- if (net_mp_niov_set_dma_addr(niov, dma))
- return 0;
- sg_len -= PAGE_SIZE;
- dma += PAGE_SIZE;
- niov_idx++;
- }
+ if (folio == last_folio)
+ continue;
+ last_folio = folio;
+ res += 1UL << folio_order(folio);
}
- return niov_idx;
+ return res;
}
static int io_import_umem(struct io_zcrx_ifq *ifq,
@@ -155,7 +182,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
struct io_uring_zcrx_area_reg *area_reg)
{
struct page **pages;
- int nr_pages;
+ int nr_pages, ret;
if (area_reg->dmabuf_fd)
return -EINVAL;
@@ -166,10 +193,23 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
if (IS_ERR(pages))
return PTR_ERR(pages);
+ ret = sg_alloc_table_from_pages(&mem->page_sg_table, pages, nr_pages,
+ 0, nr_pages << PAGE_SHIFT,
+ GFP_KERNEL_ACCOUNT);
+ if (ret) {
+ unpin_user_pages(pages, nr_pages);
+ return ret;
+ }
+
+ mem->account_pages = io_count_account_pages(pages, nr_pages);
+ ret = io_account_mem(ifq->ctx, mem->account_pages);
+ if (ret < 0)
+ mem->account_pages = 0;
+
mem->pages = pages;
mem->nr_folios = nr_pages;
mem->size = area_reg->len;
- return 0;
+ return ret;
}
static void io_release_area_mem(struct io_zcrx_mem *mem)
@@ -180,6 +220,7 @@ static void io_release_area_mem(struct io_zcrx_mem *mem)
}
if (mem->pages) {
unpin_user_pages(mem->pages, mem->nr_folios);
+ sg_free_table(&mem->page_sg_table);
kvfree(mem->pages);
}
}
@@ -201,84 +242,54 @@ static int io_import_area(struct io_zcrx_ifq *ifq,
return io_import_umem(ifq, mem, area_reg);
}
-static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
- struct io_zcrx_area *area, int nr_mapped)
-{
- int i;
-
- for (i = 0; i < nr_mapped; i++) {
- netmem_ref netmem = net_iov_to_netmem(&area->nia.niovs[i]);
- dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem);
-
- dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
- DMA_FROM_DEVICE, IO_DMA_ATTR);
- }
-}
-
-static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
- struct io_zcrx_area *area, int nr_mapped)
+static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
+ struct io_zcrx_area *area)
{
int i;
- if (area->mem.is_dmabuf)
- io_release_dmabuf(&area->mem);
- else
- io_zcrx_unmap_umem(ifq, area, nr_mapped);
+ guard(mutex)(&ifq->dma_lock);
+ if (!area->is_mapped)
+ return;
+ area->is_mapped = false;
for (i = 0; i < area->nia.num_niovs; i++)
net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0);
-}
-static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
-{
- guard(mutex)(&ifq->dma_lock);
-
- if (area->is_mapped)
- __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs);
- area->is_mapped = false;
+ if (area->mem.is_dmabuf) {
+ io_release_dmabuf(&area->mem);
+ } else {
+ dma_unmap_sgtable(ifq->dev, &area->mem.page_sg_table,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ }
}
-static int io_zcrx_map_area_umem(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
+static unsigned io_zcrx_map_area_umem(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
- int i;
-
- for (i = 0; i < area->nia.num_niovs; i++) {
- struct net_iov *niov = &area->nia.niovs[i];
- dma_addr_t dma;
+ int ret;
- dma = dma_map_page_attrs(ifq->dev, area->mem.pages[i], 0,
- PAGE_SIZE, DMA_FROM_DEVICE, IO_DMA_ATTR);
- if (dma_mapping_error(ifq->dev, dma))
- break;
- if (net_mp_niov_set_dma_addr(niov, dma)) {
- dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
- DMA_FROM_DEVICE, IO_DMA_ATTR);
- break;
- }
- }
- return i;
+ ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ if (ret < 0)
+ return ret;
+ return io_populate_area_dma(ifq, area, &area->mem.page_sg_table, 0);
}
static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
{
- unsigned nr;
+ int ret;
guard(mutex)(&ifq->dma_lock);
if (area->is_mapped)
return 0;
if (area->mem.is_dmabuf)
- nr = io_zcrx_map_area_dmabuf(ifq, area);
+ ret = io_zcrx_map_area_dmabuf(ifq, area);
else
- nr = io_zcrx_map_area_umem(ifq, area);
-
- if (nr != area->nia.num_niovs) {
- __io_zcrx_unmap_area(ifq, area, nr);
- return -EINVAL;
- }
+ ret = io_zcrx_map_area_umem(ifq, area);
- area->is_mapped = true;
- return 0;
+ if (ret == 0)
+ area->is_mapped = true;
+ return ret;
}
static void io_zcrx_sync_for_device(const struct page_pool *pool,
@@ -368,10 +379,12 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
static void io_zcrx_free_area(struct io_zcrx_area *area)
{
- if (area->ifq)
- io_zcrx_unmap_area(area->ifq, area);
+ io_zcrx_unmap_area(area->ifq, area);
io_release_area_mem(&area->mem);
+ if (area->mem.account_pages)
+ io_unaccount_mem(area->ifq->ctx, area->mem.account_pages);
+
kvfree(area->freelist);
kvfree(area->nia.niovs);
kvfree(area->user_refs);
@@ -399,6 +412,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
area = kzalloc(sizeof(*area), GFP_KERNEL);
if (!area)
goto err;
+ area->ifq = ifq;
ret = io_import_area(ifq, &area->mem, area_reg);
if (ret)
@@ -433,7 +447,6 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
}
area->free_count = nr_iovs;
- area->ifq = ifq;
/* we're only supporting one area per ifq for now */
area->area_id = 0;
area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT;
@@ -863,10 +876,7 @@ static int io_pp_zc_init(struct page_pool *pp)
static void io_pp_zc_destroy(struct page_pool *pp)
{
struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
- struct io_zcrx_area *area = ifq->area;
- if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs))
- return;
percpu_ref_put(&ifq->ctx->refs);
}
@@ -944,9 +954,54 @@ static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area)
return niov;
}
+struct io_copy_cache {
+ struct page *page;
+ unsigned long offset;
+ size_t size;
+};
+
+static ssize_t io_copy_page(struct io_copy_cache *cc, struct page *src_page,
+ unsigned int src_offset, size_t len)
+{
+ size_t copied = 0;
+
+ len = min(len, cc->size);
+
+ while (len) {
+ void *src_addr, *dst_addr;
+ struct page *dst_page = cc->page;
+ unsigned dst_offset = cc->offset;
+ size_t n = len;
+
+ if (folio_test_partial_kmap(page_folio(dst_page)) ||
+ folio_test_partial_kmap(page_folio(src_page))) {
+ dst_page = nth_page(dst_page, dst_offset / PAGE_SIZE);
+ dst_offset = offset_in_page(dst_offset);
+ src_page = nth_page(src_page, src_offset / PAGE_SIZE);
+ src_offset = offset_in_page(src_offset);
+ n = min(PAGE_SIZE - src_offset, PAGE_SIZE - dst_offset);
+ n = min(n, len);
+ }
+
+ dst_addr = kmap_local_page(dst_page) + dst_offset;
+ src_addr = kmap_local_page(src_page) + src_offset;
+
+ memcpy(dst_addr, src_addr, n);
+
+ kunmap_local(src_addr);
+ kunmap_local(dst_addr);
+
+ cc->size -= n;
+ cc->offset += n;
+ len -= n;
+ copied += n;
+ }
+ return copied;
+}
+
static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
- void *src_base, struct page *src_page,
- unsigned int src_offset, size_t len)
+ struct page *src_page, unsigned int src_offset,
+ size_t len)
{
struct io_zcrx_area *area = ifq->area;
size_t copied = 0;
@@ -956,11 +1011,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
return -EFAULT;
while (len) {
- size_t copy_size = min_t(size_t, PAGE_SIZE, len);
- const int dst_off = 0;
+ struct io_copy_cache cc;
struct net_iov *niov;
- struct page *dst_page;
- void *dst_addr;
+ size_t n;
niov = io_zcrx_alloc_fallback(area);
if (!niov) {
@@ -968,27 +1021,22 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
break;
}
- dst_page = io_zcrx_iov_page(niov);
- dst_addr = kmap_local_page(dst_page);
- if (src_page)
- src_base = kmap_local_page(src_page);
-
- memcpy(dst_addr, src_base + src_offset, copy_size);
+ cc.page = io_zcrx_iov_page(niov);
+ cc.offset = 0;
+ cc.size = PAGE_SIZE;
- if (src_page)
- kunmap_local(src_base);
- kunmap_local(dst_addr);
+ n = io_copy_page(&cc, src_page, src_offset, len);
- if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) {
+ if (!io_zcrx_queue_cqe(req, niov, ifq, 0, n)) {
io_zcrx_return_niov(niov);
ret = -ENOSPC;
break;
}
io_zcrx_get_niov_uref(niov);
- src_offset += copy_size;
- len -= copy_size;
- copied += copy_size;
+ src_offset += n;
+ len -= n;
+ copied += n;
}
return copied ? copied : ret;
@@ -998,19 +1046,8 @@ static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
const skb_frag_t *frag, int off, int len)
{
struct page *page = skb_frag_page(frag);
- u32 p_off, p_len, t, copied = 0;
- int ret = 0;
- off += skb_frag_off(frag);
-
- skb_frag_foreach_page(frag, off, len,
- page, p_off, p_len, t) {
- ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len);
- if (ret < 0)
- return copied ? copied : ret;
- copied += ret;
- }
- return copied;
+ return io_zcrx_copy_chunk(req, ifq, page, off + skb_frag_off(frag), len);
}
static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
@@ -1067,8 +1104,9 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
size_t to_copy;
to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
- copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL,
- offset, to_copy);
+ copied = io_zcrx_copy_chunk(req, ifq, virt_to_page(skb->data),
+ offset_in_page(skb->data) + offset,
+ to_copy);
if (copied < 0) {
ret = copied;
goto out;