summaryrefslogtreecommitdiff
path: root/lib/iov_iter.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/iov_iter.c')
-rw-r--r--lib/iov_iter.c388
1 files changed, 330 insertions, 58 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index cf2eb2b2f983..8c7fdb7d8c8f 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -461,6 +461,8 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
size_t bytes, struct iov_iter *i)
{
size_t n, copied = 0;
+ bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
+ PageHighMem(page);
if (!page_copy_sane(page, offset, bytes))
return 0;
@@ -471,7 +473,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
char *p;
n = bytes - copied;
- if (PageHighMem(page)) {
+ if (uses_kmap) {
page += offset / PAGE_SIZE;
offset %= PAGE_SIZE;
n = min_t(size_t, n, PAGE_SIZE - offset);
@@ -482,7 +484,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
kunmap_atomic(p);
copied += n;
offset += n;
- } while (PageHighMem(page) && copied != bytes && n > 0);
+ } while (uses_kmap && copied != bytes && n > 0);
return copied;
}
@@ -527,6 +529,39 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
i->__iov = iov;
}
+static void iov_iter_folioq_advance(struct iov_iter *i, size_t size)
+{
+ const struct folio_queue *folioq = i->folioq;
+ unsigned int slot = i->folioq_slot;
+
+ if (!i->count)
+ return;
+ i->count -= size;
+
+ if (slot >= folioq_nr_slots(folioq)) {
+ folioq = folioq->next;
+ slot = 0;
+ }
+
+ size += i->iov_offset; /* From beginning of current segment. */
+ do {
+ size_t fsize = folioq_folio_size(folioq, slot);
+
+ if (likely(size < fsize))
+ break;
+ size -= fsize;
+ slot++;
+ if (slot >= folioq_nr_slots(folioq) && folioq->next) {
+ folioq = folioq->next;
+ slot = 0;
+ }
+ } while (size);
+
+ i->iov_offset = size;
+ i->folioq_slot = slot;
+ i->folioq = folioq;
+}
+
void iov_iter_advance(struct iov_iter *i, size_t size)
{
if (unlikely(i->count < size))
@@ -539,12 +574,40 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
iov_iter_iovec_advance(i, size);
} else if (iov_iter_is_bvec(i)) {
iov_iter_bvec_advance(i, size);
+ } else if (iov_iter_is_folioq(i)) {
+ iov_iter_folioq_advance(i, size);
} else if (iov_iter_is_discard(i)) {
i->count -= size;
}
}
EXPORT_SYMBOL(iov_iter_advance);
+static void iov_iter_folioq_revert(struct iov_iter *i, size_t unroll)
+{
+ const struct folio_queue *folioq = i->folioq;
+ unsigned int slot = i->folioq_slot;
+
+ for (;;) {
+ size_t fsize;
+
+ if (slot == 0) {
+ folioq = folioq->prev;
+ slot = folioq_nr_slots(folioq);
+ }
+ slot--;
+
+ fsize = folioq_folio_size(folioq, slot);
+ if (unroll <= fsize) {
+ i->iov_offset = fsize - unroll;
+ break;
+ }
+ unroll -= fsize;
+ }
+
+ i->folioq_slot = slot;
+ i->folioq = folioq;
+}
+
void iov_iter_revert(struct iov_iter *i, size_t unroll)
{
if (!unroll)
@@ -576,6 +639,9 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
}
unroll -= n;
}
+ } else if (iov_iter_is_folioq(i)) {
+ i->iov_offset = 0;
+ iov_iter_folioq_revert(i, unroll);
} else { /* same logics for iovec and kvec */
const struct iovec *iov = iter_iov(i);
while (1) {
@@ -603,6 +669,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
if (iov_iter_is_bvec(i))
return min(i->count, i->bvec->bv_len - i->iov_offset);
}
+ if (unlikely(iov_iter_is_folioq(i)))
+ return !i->count ? 0 :
+ umin(folioq_folio_size(i->folioq, i->folioq_slot), i->count);
return i->count;
}
EXPORT_SYMBOL(iov_iter_single_seg_count);
@@ -640,6 +709,36 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
EXPORT_SYMBOL(iov_iter_bvec);
/**
+ * iov_iter_folio_queue - Initialise an I/O iterator to use the folios in a folio queue
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @folioq: The starting point in the folio queue.
+ * @first_slot: The first slot in the folio queue to use
+ * @offset: The offset into the folio in the first slot to start at
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator to either draw data out of the pages attached to an
+ * inode or to inject data into those pages. The pages *must* be prevented
+ * from evaporation, either by taking a ref on them or locking them by the
+ * caller.
+ */
+void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction,
+ const struct folio_queue *folioq, unsigned int first_slot,
+ unsigned int offset, size_t count)
+{
+ BUG_ON(direction & ~1);
+ *i = (struct iov_iter) {
+ .iter_type = ITER_FOLIOQ,
+ .data_source = direction,
+ .folioq = folioq,
+ .folioq_slot = first_slot,
+ .count = count,
+ .iov_offset = offset,
+ };
+}
+EXPORT_SYMBOL(iov_iter_folio_queue);
+
+/**
* iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
* @i: The iterator to initialise.
* @direction: The direction of the transfer.
@@ -691,12 +790,11 @@ EXPORT_SYMBOL(iov_iter_discard);
static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
+ const struct iovec *iov = iter_iov(i);
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;
if (len > size)
@@ -706,34 +804,36 @@ static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
if ((unsigned long)(iov->iov_base + skip) & addr_mask)
return false;
+ iov++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return true;
}
static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
unsigned len_mask)
{
- size_t size = i->count;
+ const struct bio_vec *bvec = i->bvec;
unsigned skip = i->iov_offset;
- unsigned k;
+ size_t size = i->count;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
+ do {
+ size_t len = bvec->bv_len;
if (len > size)
len = size;
if (len & len_mask)
return false;
- if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
+ if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
return false;
+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return true;
}
@@ -764,12 +864,19 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
if (iov_iter_is_bvec(i))
return iov_iter_aligned_bvec(i, addr_mask, len_mask);
+ /* With both xarray and folioq types, we're dealing with whole folios. */
if (iov_iter_is_xarray(i)) {
if (i->count & len_mask)
return false;
if ((i->xarray_start + i->iov_offset) & addr_mask)
return false;
}
+ if (iov_iter_is_folioq(i)) {
+ if (i->count & len_mask)
+ return false;
+ if (i->iov_offset & addr_mask)
+ return false;
+ }
return true;
}
@@ -777,13 +884,12 @@ EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
{
+ const struct iovec *iov = iter_iov(i);
unsigned long res = 0;
size_t size = i->count;
size_t skip = i->iov_offset;
- unsigned k;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- const struct iovec *iov = iter_iov(i) + k;
+ do {
size_t len = iov->iov_len - skip;
if (len) {
res |= (unsigned long)iov->iov_base + skip;
@@ -791,30 +897,31 @@ static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
len = size;
res |= len;
size -= len;
- if (!size)
- break;
}
- }
+ iov++;
+ skip = 0;
+ } while (size);
return res;
}
static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
{
+ const struct bio_vec *bvec = i->bvec;
unsigned res = 0;
size_t size = i->count;
unsigned skip = i->iov_offset;
- unsigned k;
- for (k = 0; k < i->nr_segs; k++, skip = 0) {
- size_t len = i->bvec[k].bv_len - skip;
- res |= (unsigned long)i->bvec[k].bv_offset + skip;
+ do {
+ size_t len = bvec->bv_len - skip;
+ res |= (unsigned long)bvec->bv_offset + skip;
if (len > size)
len = size;
res |= len;
+ bvec++;
size -= len;
- if (!size)
- break;
- }
+ skip = 0;
+ } while (size);
+
return res;
}
@@ -834,6 +941,9 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
if (iov_iter_is_bvec(i))
return iov_iter_alignment_bvec(i);
+ /* With both xarray and folioq types, we're dealing with whole folios. */
+ if (iov_iter_is_folioq(i))
+ return i->iov_offset | i->count;
if (iov_iter_is_xarray(i))
return (i->xarray_start + i->iov_offset) | i->count;
@@ -886,6 +996,65 @@ static int want_pages_array(struct page ***res, size_t size,
return count;
}
+static ssize_t iter_folioq_get_pages(struct iov_iter *iter,
+ struct page ***ppages, size_t maxsize,
+ unsigned maxpages, size_t *_start_offset)
+{
+ const struct folio_queue *folioq = iter->folioq;
+ struct page **pages;
+ unsigned int slot = iter->folioq_slot;
+ size_t extracted = 0, count = iter->count, iov_offset = iter->iov_offset;
+
+ if (slot >= folioq_nr_slots(folioq)) {
+ folioq = folioq->next;
+ slot = 0;
+ if (WARN_ON(iov_offset != 0))
+ return -EIO;
+ }
+
+ maxpages = want_pages_array(ppages, maxsize, iov_offset & ~PAGE_MASK, maxpages);
+ if (!maxpages)
+ return -ENOMEM;
+ *_start_offset = iov_offset & ~PAGE_MASK;
+ pages = *ppages;
+
+ for (;;) {
+ struct folio *folio = folioq_folio(folioq, slot);
+ size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot);
+ size_t part = PAGE_SIZE - offset % PAGE_SIZE;
+
+ if (offset < fsize) {
+ part = umin(part, umin(maxsize - extracted, fsize - offset));
+ count -= part;
+ iov_offset += part;
+ extracted += part;
+
+ *pages = folio_page(folio, offset / PAGE_SIZE);
+ get_page(*pages);
+ pages++;
+ maxpages--;
+ }
+
+ if (maxpages == 0 || extracted >= maxsize)
+ break;
+
+ if (iov_offset >= fsize) {
+ iov_offset = 0;
+ slot++;
+ if (slot == folioq_nr_slots(folioq) && folioq->next) {
+ folioq = folioq->next;
+ slot = 0;
+ }
+ }
+ }
+
+ iter->count = count;
+ iter->iov_offset = iov_offset;
+ iter->folioq = folioq;
+ iter->folioq_slot = slot;
+ return extracted;
+}
+
static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
pgoff_t index, unsigned int nr_pages)
{
@@ -1021,8 +1190,12 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
if (!n)
return -ENOMEM;
p = *pages;
- for (int k = 0; k < n; k++)
- get_page(p[k] = page + k);
+ for (int k = 0; k < n; k++) {
+ struct folio *folio = page_folio(page);
+ p[k] = page + k;
+ if (!folio_test_slab(folio))
+ folio_get(folio);
+ }
maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
i->count -= maxsize;
i->iov_offset += maxsize;
@@ -1033,6 +1206,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
}
return maxsize;
}
+ if (iov_iter_is_folioq(i))
+ return iter_folioq_get_pages(i, pages, maxsize, maxpages, start);
if (iov_iter_is_xarray(i))
return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
return -EFAULT;
@@ -1117,6 +1292,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
return iov_npages(i, maxpages);
if (iov_iter_is_bvec(i))
return bvec_npages(i, maxpages);
+ if (iov_iter_is_folioq(i)) {
+ unsigned offset = i->iov_offset % PAGE_SIZE;
+ int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
+ return min(npages, maxpages);
+ }
if (iov_iter_is_xarray(i)) {
unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
@@ -1143,11 +1323,12 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
EXPORT_SYMBOL(dup_iter);
static __noclone int copy_compat_iovec_from_user(struct iovec *iov,
- const struct iovec __user *uvec, unsigned long nr_segs)
+ const struct iovec __user *uvec, u32 nr_segs)
{
const struct compat_iovec __user *uiov =
(const struct compat_iovec __user *)uvec;
- int ret = -EFAULT, i;
+ int ret = -EFAULT;
+ u32 i;
if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
return -EFAULT;
@@ -1251,6 +1432,8 @@ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec,
struct iovec *iov = *iovp;
ssize_t ret;
+ *iovp = NULL;
+
if (compat)
ret = copy_compat_iovec_from_user(iov, uvec, 1);
else
@@ -1261,7 +1444,6 @@ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec,
ret = import_ubuf(type, iov->iov_base, iov->iov_len, i);
if (unlikely(ret))
return ret;
- *iovp = NULL;
return i->count;
}
@@ -1397,6 +1579,68 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
}
/*
+ * Extract a list of contiguous pages from an ITER_FOLIOQ iterator. This does
+ * not get references on the pages, nor does it get a pin on them.
+ */
+static ssize_t iov_iter_extract_folioq_pages(struct iov_iter *i,
+ struct page ***pages, size_t maxsize,
+ unsigned int maxpages,
+ iov_iter_extraction_t extraction_flags,
+ size_t *offset0)
+{
+ const struct folio_queue *folioq = i->folioq;
+ struct page **p;
+ unsigned int nr = 0;
+ size_t extracted = 0, offset, slot = i->folioq_slot;
+
+ if (slot >= folioq_nr_slots(folioq)) {
+ folioq = folioq->next;
+ slot = 0;
+ if (WARN_ON(i->iov_offset != 0))
+ return -EIO;
+ }
+
+ offset = i->iov_offset & ~PAGE_MASK;
+ *offset0 = offset;
+
+ maxpages = want_pages_array(pages, maxsize, offset, maxpages);
+ if (!maxpages)
+ return -ENOMEM;
+ p = *pages;
+
+ for (;;) {
+ struct folio *folio = folioq_folio(folioq, slot);
+ size_t offset = i->iov_offset, fsize = folioq_folio_size(folioq, slot);
+ size_t part = PAGE_SIZE - offset % PAGE_SIZE;
+
+ if (offset < fsize) {
+ part = umin(part, umin(maxsize - extracted, fsize - offset));
+ i->count -= part;
+ i->iov_offset += part;
+ extracted += part;
+
+ p[nr++] = folio_page(folio, offset / PAGE_SIZE);
+ }
+
+ if (nr >= maxpages || extracted >= maxsize)
+ break;
+
+ if (i->iov_offset >= fsize) {
+ i->iov_offset = 0;
+ slot++;
+ if (slot == folioq_nr_slots(folioq) && folioq->next) {
+ folioq = folioq->next;
+ slot = 0;
+ }
+ }
+ }
+
+ i->folioq = folioq;
+ i->folioq_slot = slot;
+ return extracted;
+}
+
+/*
* Extract a list of contiguous pages from an ITER_XARRAY iterator. This does not
* get references on the pages, nor does it get a pin on them.
*/
@@ -1443,8 +1687,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
}
/*
- * Extract a list of contiguous pages from an ITER_BVEC iterator. This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
*/
static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
struct page ***pages, size_t maxsize,
@@ -1452,35 +1696,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
iov_iter_extraction_t extraction_flags,
size_t *offset0)
{
- struct page **p, *page;
- size_t skip = i->iov_offset, offset, size;
- int k;
+ size_t skip = i->iov_offset, size = 0;
+ struct bvec_iter bi;
+ int k = 0;
- for (;;) {
- if (i->nr_segs == 0)
- return 0;
- size = min(maxsize, i->bvec->bv_len - skip);
- if (size)
- break;
+ if (i->nr_segs == 0)
+ return 0;
+
+ if (i->iov_offset == i->bvec->bv_len) {
i->iov_offset = 0;
i->nr_segs--;
i->bvec++;
skip = 0;
}
+ bi.bi_idx = 0;
+ bi.bi_size = maxsize;
+ bi.bi_bvec_done = skip;
+
+ maxpages = want_pages_array(pages, maxsize, skip, maxpages);
+
+ while (bi.bi_size && bi.bi_idx < i->nr_segs) {
+ struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
+
+ /*
+ * The iov_iter_extract_pages interface only allows an offset
+ * into the first page. Break out of the loop if we see an
+ * offset into subsequent pages, the caller will have to call
+ * iov_iter_extract_pages again for the reminder.
+ */
+ if (k) {
+ if (bv.bv_offset)
+ break;
+ } else {
+ *offset0 = bv.bv_offset;
+ }
- skip += i->bvec->bv_offset;
- page = i->bvec->bv_page + skip / PAGE_SIZE;
- offset = skip % PAGE_SIZE;
- *offset0 = offset;
+ (*pages)[k++] = bv.bv_page;
+ size += bv.bv_len;
- maxpages = want_pages_array(pages, size, offset, maxpages);
- if (!maxpages)
- return -ENOMEM;
- p = *pages;
- for (k = 0; k < maxpages; k++)
- p[k] = page + k;
+ if (k >= maxpages)
+ break;
+
+ /*
+ * We are done when the end of the bvec doesn't align to a page
+ * boundary as that would create a hole in the returned space.
+ * The caller will handle this with another call to
+ * iov_iter_extract_pages.
+ */
+ if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+ break;
+
+ bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
+ }
- size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
iov_iter_advance(i, size);
return size;
}
@@ -1616,8 +1884,8 @@ static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
* added to the pages, but refs will not be taken.
* iov_iter_extract_will_pin() will return true.
*
- * (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are
- * merely listed; no extra refs or pins are obtained.
+ * (*) If the iterator is ITER_KVEC, ITER_BVEC, ITER_FOLIOQ or ITER_XARRAY, the
+ * pages are merely listed; no extra refs or pins are obtained.
* iov_iter_extract_will_pin() will return 0.
*
* Note also:
@@ -1652,6 +1920,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i,
return iov_iter_extract_bvec_pages(i, pages, maxsize,
maxpages, extraction_flags,
offset0);
+ if (iov_iter_is_folioq(i))
+ return iov_iter_extract_folioq_pages(i, pages, maxsize,
+ maxpages, extraction_flags,
+ offset0);
if (iov_iter_is_xarray(i))
return iov_iter_extract_xarray_pages(i, pages, maxsize,
maxpages, extraction_flags,