From c191bc070eba9cbbd166322cad293dd09c48c78e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 8 Feb 2023 14:56:10 +0000 Subject: cifs: Use a folio in cifs_page_mkwrite() Avoids many calls to compound_head() and removes calls to various compat functions. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Signed-off-by: Steve French --- fs/cifs/file.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index b8d1cbadb689..7eb476a23701 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4489,23 +4489,22 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) * If the page is mmap'ed into a process' page tables, then we need to make * sure that it doesn't change while being written back. */ -static vm_fault_t -cifs_page_mkwrite(struct vm_fault *vmf) +static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); - /* Wait for the page to be written to the cache before we allow it to - * be modified. We then assume the entire page will need writing back. + /* Wait for the folio to be written to the cache before we allow it to + * be modified. We then assume the entire folio will need writing back. */ #ifdef CONFIG_CIFS_FSCACHE - if (PageFsCache(page) && - wait_on_page_fscache_killable(page) < 0) + if (folio_test_fscache(folio) && + folio_wait_fscache_killable(folio) < 0) return VM_FAULT_RETRY; #endif - wait_on_page_writeback(page); + folio_wait_writeback(folio); - if (lock_page_killable(page) < 0) + if (folio_lock_killable(folio) < 0) return VM_FAULT_RETRY; return VM_FAULT_LOCKED; } -- cgit From de036dcaca65cf94bf7ff09c571c077f02bc92b4 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 11 Jan 2023 12:37:58 +0100 Subject: cifs: Fix uninitialized memory reads for oparms.mode Use a struct assignment with implicit member initialization Signed-off-by: Volker Lendecke Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/file.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7eb476a23701..e216bc9b7abf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -260,14 +260,15 @@ static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_ if (f_flags & O_DIRECT) create_options |= CREATE_NO_BUFFER; - oparms.tcon = tcon; - oparms.cifs_sb = cifs_sb; - oparms.desired_access = desired_access; - oparms.create_options = cifs_create_options(cifs_sb, create_options); - oparms.disposition = disposition; - oparms.path = full_path; - oparms.fid = fid; - oparms.reconnect = false; + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = desired_access, + .create_options = cifs_create_options(cifs_sb, create_options), + .disposition = disposition, + .path = full_path, + .fid = fid, + }; rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) @@ -848,14 +849,16 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); - oparms.tcon = tcon; - oparms.cifs_sb = cifs_sb; - oparms.desired_access = desired_access; - oparms.create_options = cifs_create_options(cifs_sb, create_options); - oparms.disposition = disposition; - oparms.path = full_path; - oparms.fid = &cfile->fid; - oparms.reconnect = true; + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = desired_access, + .create_options = cifs_create_options(cifs_sb, create_options), + .disposition = disposition, + .path = full_path, + .fid = &cfile->fid, + .reconnect = true, + }; /* * Can not refresh inode by passing in file_info buf to be returned by -- cgit From 4e260a8fd740aa0dcecafe79c4f9d3013a21f1ca Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Nov 2022 14:52:47 +0000 Subject: cifs: Implement splice_read to pass down ITER_BVEC not ITER_PIPE Provide cifs_splice_read() to use a bvec rather than an pipe iterator as the latter cannot so easily be split and advanced, which is necessary to pass an iterator down to the bottom levels. Upstream cifs gets around this problem by using iov_iter_get_pages() to prefill the pipe and then passing the list of pages down. This is done by: (1) Bulk-allocate a bunch of pages to carry as much of the requested amount of data as possible, but without overrunning the available slots in the pipe and add them to an ITER_BVEC. (2) Synchronously call ->read_iter() to read into the buffer. (3) Discard any unused pages. (4) Load the remaining pages into the pipe in order and advance the head pointer. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: Al Viro cc: linux-cifs@vger.kernel.org Link: https://lore.kernel.org/r/166732028113.3186319.1793644937097301358.stgit@warthog.procyon.org.uk/ # rfc Signed-off-by: Steve French --- fs/cifs/file.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e216bc9b7abf..ddf6f572af81 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -5275,3 +5275,19 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .launder_folio = cifs_launder_folio, .migrate_folio = filemap_migrate_folio, }; + +/* + * Splice data from a file into a pipe. + */ +ssize_t cifs_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes)) + return 0; + if (unlikely(!len)) + return 0; + if (in->f_flags & O_DIRECT) + return direct_splice_read(in, ppos, pipe, len, flags); + return filemap_splice_read(in, ppos, pipe, len, flags); +} -- cgit From b8713c4dbfa3ebb86c492bc2a836e3ebb748e063 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Apr 2022 19:41:28 +0100 Subject: cifs: Add some helper functions Add some helper functions to manipulate the folio marks by iterating through a list of folios held in an xarray rather than using a page list. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org Link: https://lore.kernel.org/r/164928616583.457102.15157033997163988344.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165211418840.3154751.3090684430628501879.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165348878940.2106726.204291614267188735.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165364825674.3334034.3356201708659748648.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/166126394799.708021.10637797063862600488.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/166697258147.61150.9940790486999562110.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/166732030314.3186319.9209944805565413627.stgit@warthog.procyon.org.uk/ # rfc Signed-off-by: Steve French --- fs/cifs/file.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ddf6f572af81..09240b8b018a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -36,6 +36,99 @@ #include "cifs_ioctl.h" #include "cached_dir.h" +/* + * Completion of write to server. + */ +void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + if (!len) + return; + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (!folio_test_writeback(folio)) { + WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", + len, start, folio_index(folio), end); + continue; + } + + folio_detach_private(folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); +} + +/* + * Failure of write to server. + */ +void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + if (!len) + return; + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (!folio_test_writeback(folio)) { + WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", + len, start, folio_index(folio), end); + continue; + } + + folio_set_error(folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); +} + +/* + * Redirty pages after a temporary failure. + */ +void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + if (!len) + return; + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (!folio_test_writeback(folio)) { + WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", + len, start, folio_index(folio), end); + continue; + } + + filemap_dirty_folio(folio->mapping, folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); +} + /* * Mark as invalid, all open files on tree connections since they * were closed when session to server was lost. -- cgit From d08089f649a0cfb2099c8551ac47eef0cc23fdf2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 24 Jan 2022 21:13:24 +0000 Subject: cifs: Change the I/O paths to use an iterator rather than a page list Currently, the cifs I/O paths hand lists of pages from the VM interface routines at the top all the way through the intervening layers to the socket interface at the bottom. This is a problem, however, for interfacing with netfslib which passes an iterator through to the ->issue_read() method (and will pass an iterator through to the ->issue_write() method in future). Netfslib takes over bounce buffering for direct I/O, async I/O and encrypted content, so cifs doesn't need to do that. Netfslib also converts IOVEC-type iterators into BVEC-type iterators if necessary. Further, cifs needs foliating - and folios may come in a variety of sizes, so a page list pointing to an array of heterogeneous pages may cause problems in places such as where crypto is done. Change the cifs I/O paths to hand iov_iter iterators all the way through instead. Notes: (1) Some old routines are #if'd out to be removed in a follow up patch so as to avoid confusing diff, thereby making the diff output easier to follow. I've removed functions that don't overlap with anything added. (2) struct smb_rqst loses rq_pages, rq_offset, rq_npages, rq_pagesz and rq_tailsz which describe the pages forming the buffer; instead there's an rq_iter describing the source buffer and an rq_buffer which is used to hold the buffer for encryption. (3) struct cifs_readdata and cifs_writedata are similarly modified to smb_rqst. The ->read_into_pages() and ->copy_into_pages() are then replaced with passing the iterator directly to the socket. The iterators are stored in these structs so that they are persistent and don't get deallocated when the function returns (unlike if they were stack variables). (4) Buffered writeback is overhauled, borrowing the code from the afs filesystem to gather up contiguous runs of folios. The XARRAY-type iterator is then used to refer directly to the pagecache and can be passed to the socket to transmit data directly from there. This includes: cifs_extend_writeback() cifs_write_back_from_locked_folio() cifs_writepages_region() cifs_writepages() (5) Pages are converted to folios. (6) Direct I/O uses netfs_extract_user_iter() to create a BVEC-type iterator from an IOBUF/UBUF-type source iterator. (7) smb2_get_aead_req() uses netfs_extract_iter_to_sg() to extract page fragments from the iterator into the scatterlists that the crypto layer prefers. (8) smb2_init_transform_rq() attached pages to smb_rqst::rq_buffer, an xarray, to use as a bounce buffer for encryption. An XARRAY-type iterator can then be used to pass the bounce buffer to lower layers. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Paulo Alcantara cc: Jeff Layton cc: linux-cifs@vger.kernel.org Link: https://lore.kernel.org/r/164311907995.2806745.400147335497304099.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/164928620163.457102.11602306234438271112.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165211420279.3154751.15923591172438186144.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165348880385.2106726.3220789453472800240.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165364827111.3334034.934805882842932881.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/166126396180.708021.271013668175370826.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/166697259595.61150.5982032408321852414.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/166732031756.3186319.12528413619888902872.stgit@warthog.procyon.org.uk/ # rfc Signed-off-by: Steve French --- fs/cifs/file.c | 1195 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 748 insertions(+), 447 deletions(-) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 09240b8b018a..599578f7e961 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -36,6 +36,32 @@ #include "cifs_ioctl.h" #include "cached_dir.h" +/* + * Remove the dirty flags from a span of pages. + */ +static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { + xas_pause(&xas); + rcu_read_unlock(); + folio_lock(folio); + folio_clear_dirty_for_io(folio); + folio_unlock(folio); + rcu_read_lock(); + } + + rcu_read_unlock(); +} + /* * Completion of write to server. */ @@ -2391,7 +2417,6 @@ cifs_writedata_release(struct kref *refcount) if (wdata->cfile) cifsFileInfo_put(wdata->cfile); - kvfree(wdata->pages); kfree(wdata); } @@ -2402,51 +2427,49 @@ cifs_writedata_release(struct kref *refcount) static void cifs_writev_requeue(struct cifs_writedata *wdata) { - int i, rc = 0; + int rc = 0; struct inode *inode = d_inode(wdata->cfile->dentry); struct TCP_Server_Info *server; - unsigned int rest_len; + unsigned int rest_len = wdata->bytes; + loff_t fpos = wdata->offset; server = tlink_tcon(wdata->cfile->tlink)->ses->server; - i = 0; - rest_len = wdata->bytes; do { struct cifs_writedata *wdata2; - unsigned int j, nr_pages, wsize, tailsz, cur_len; + unsigned int wsize, cur_len; wsize = server->ops->wp_retry_size(inode); if (wsize < rest_len) { - nr_pages = wsize / PAGE_SIZE; - if (!nr_pages) { + if (wsize < PAGE_SIZE) { rc = -EOPNOTSUPP; break; } - cur_len = nr_pages * PAGE_SIZE; - tailsz = PAGE_SIZE; + cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); } else { - nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE); cur_len = rest_len; - tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE; } - wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete); + wdata2 = cifs_writedata_alloc(cifs_writev_complete); if (!wdata2) { rc = -ENOMEM; break; } - for (j = 0; j < nr_pages; j++) { - wdata2->pages[j] = wdata->pages[i + j]; - lock_page(wdata2->pages[j]); - clear_page_dirty_for_io(wdata2->pages[j]); - } - wdata2->sync_mode = wdata->sync_mode; - wdata2->nr_pages = nr_pages; - wdata2->offset = page_offset(wdata2->pages[0]); - wdata2->pagesz = PAGE_SIZE; - wdata2->tailsz = tailsz; - wdata2->bytes = cur_len; + wdata2->offset = fpos; + wdata2->bytes = cur_len; + wdata2->iter = wdata->iter; + + iov_iter_advance(&wdata2->iter, fpos - wdata->offset); + iov_iter_truncate(&wdata2->iter, wdata2->bytes); + + if (iov_iter_is_xarray(&wdata2->iter)) + /* Check for pages having been redirtied and clean + * them. We can do this by walking the xarray. If + * it's not an xarray, then it's a DIO and we shouldn't + * be mucking around with the page bits. + */ + cifs_undirty_folios(inode, fpos, cur_len); rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &wdata2->cfile); @@ -2461,33 +2484,22 @@ cifs_writev_requeue(struct cifs_writedata *wdata) cifs_writedata_release); } - for (j = 0; j < nr_pages; j++) { - unlock_page(wdata2->pages[j]); - if (rc != 0 && !is_retryable_error(rc)) { - SetPageError(wdata2->pages[j]); - end_page_writeback(wdata2->pages[j]); - put_page(wdata2->pages[j]); - } - } - kref_put(&wdata2->refcount, cifs_writedata_release); if (rc) { if (is_retryable_error(rc)) continue; - i += nr_pages; + fpos += cur_len; + rest_len -= cur_len; break; } + fpos += cur_len; rest_len -= cur_len; - i += nr_pages; - } while (i < wdata->nr_pages); + } while (rest_len > 0); - /* cleanup remaining pages from the original wdata */ - for (; i < wdata->nr_pages; i++) { - SetPageError(wdata->pages[i]); - end_page_writeback(wdata->pages[i]); - put_page(wdata->pages[i]); - } + /* Clean up remaining pages from the original wdata */ + if (iov_iter_is_xarray(&wdata->iter)) + cifs_pages_write_failed(inode, fpos, rest_len); if (rc != 0 && !is_retryable_error(rc)) mapping_set_error(inode->i_mapping, rc); @@ -2500,7 +2512,6 @@ cifs_writev_complete(struct work_struct *work) struct cifs_writedata *wdata = container_of(work, struct cifs_writedata, work); struct inode *inode = d_inode(wdata->cfile->dentry); - int i = 0; if (wdata->result == 0) { spin_lock(&inode->i_lock); @@ -2511,45 +2522,24 @@ cifs_writev_complete(struct work_struct *work) } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) return cifs_writev_requeue(wdata); - for (i = 0; i < wdata->nr_pages; i++) { - struct page *page = wdata->pages[i]; + if (wdata->result == -EAGAIN) + cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); + else if (wdata->result < 0) + cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); + else + cifs_pages_written_back(inode, wdata->offset, wdata->bytes); - if (wdata->result == -EAGAIN) - __set_page_dirty_nobuffers(page); - else if (wdata->result < 0) - SetPageError(page); - end_page_writeback(page); - cifs_readpage_to_fscache(inode, page); - put_page(page); - } if (wdata->result != -EAGAIN) mapping_set_error(inode->i_mapping, wdata->result); kref_put(&wdata->refcount, cifs_writedata_release); } -struct cifs_writedata * -cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete) -{ - struct cifs_writedata *writedata = NULL; - struct page **pages = - kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); - if (pages) { - writedata = cifs_writedata_direct_alloc(pages, complete); - if (!writedata) - kvfree(pages); - } - - return writedata; -} - -struct cifs_writedata * -cifs_writedata_direct_alloc(struct page **pages, work_func_t complete) +struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) { struct cifs_writedata *wdata; wdata = kzalloc(sizeof(*wdata), GFP_NOFS); if (wdata != NULL) { - wdata->pages = pages; kref_init(&wdata->refcount); INIT_LIST_HEAD(&wdata->list); init_completion(&wdata->done); @@ -2558,7 +2548,6 @@ cifs_writedata_direct_alloc(struct page **pages, work_func_t complete) return wdata; } - static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) { struct address_space *mapping = page->mapping; @@ -2617,6 +2606,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } +#if 0 // TODO: Remove for iov_iter support static struct cifs_writedata * wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, pgoff_t end, pgoff_t *index, @@ -2922,6 +2912,375 @@ retry: set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); return rc; } +#endif + +/* + * Extend the region to be written back to include subsequent contiguously + * dirty pages if possible, but don't sleep while doing so. + */ +static void cifs_extend_writeback(struct address_space *mapping, + long *_count, + loff_t start, + int max_pages, + size_t max_len, + unsigned int *_len) +{ + struct folio_batch batch; + struct folio *folio; + unsigned int psize, nr_pages; + size_t len = *_len; + pgoff_t index = (start + len) / PAGE_SIZE; + bool stop = true; + unsigned int i; + XA_STATE(xas, &mapping->i_pages, index); + + folio_batch_init(&batch); + + do { + /* Firstly, we gather up a batch of contiguous dirty pages + * under the RCU read lock - but we can't clear the dirty flags + * there if any of those pages are mapped. + */ + rcu_read_lock(); + + xas_for_each(&xas, folio, ULONG_MAX) { + stop = true; + if (xas_retry(&xas, folio)) + continue; + if (xa_is_value(folio)) + break; + if (folio_index(folio) != index) + break; + if (!folio_try_get_rcu(folio)) { + xas_reset(&xas); + continue; + } + nr_pages = folio_nr_pages(folio); + if (nr_pages > max_pages) + break; + + /* Has the page moved or been split? */ + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); + break; + } + + if (!folio_trylock(folio)) { + folio_put(folio); + break; + } + if (!folio_test_dirty(folio) || folio_test_writeback(folio)) { + folio_unlock(folio); + folio_put(folio); + break; + } + + max_pages -= nr_pages; + psize = folio_size(folio); + len += psize; + stop = false; + if (max_pages <= 0 || len >= max_len || *_count <= 0) + stop = true; + + index += nr_pages; + if (!folio_batch_add(&batch, folio)) + break; + if (stop) + break; + } + + if (!stop) + xas_pause(&xas); + rcu_read_unlock(); + + /* Now, if we obtained any pages, we can shift them to being + * writable and mark them for caching. + */ + if (!folio_batch_count(&batch)) + break; + + for (i = 0; i < folio_batch_count(&batch); i++) { + folio = batch.folios[i]; + /* The folio should be locked, dirty and not undergoing + * writeback from the loop above. + */ + if (!folio_clear_dirty_for_io(folio)) + WARN_ON(1); + if (folio_start_writeback(folio)) + WARN_ON(1); + + *_count -= folio_nr_pages(folio); + folio_unlock(folio); + } + + folio_batch_release(&batch); + cond_resched(); + } while (!stop); + + *_len = len; +} + +/* + * Write back the locked page and any subsequent non-locked dirty pages. + */ +static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, + struct writeback_control *wbc, + struct folio *folio, + loff_t start, loff_t end) +{ + struct inode *inode = mapping->host; + struct TCP_Server_Info *server; + struct cifs_writedata *wdata; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + struct cifsFileInfo *cfile = NULL; + unsigned int xid, wsize, len; + loff_t i_size = i_size_read(inode); + size_t max_len; + long count = wbc->nr_to_write; + int rc; + + /* The folio should be locked, dirty and not undergoing writeback. */ + if (folio_start_writeback(folio)) + WARN_ON(1); + + count -= folio_nr_pages(folio); + len = folio_size(folio); + + xid = get_xid(); + server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); + + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + if (rc) { + cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); + goto err_xid; + } + + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, + &wsize, credits); + if (rc != 0) + goto err_close; + + wdata = cifs_writedata_alloc(cifs_writev_complete); + if (!wdata) { + rc = -ENOMEM; + goto err_uncredit; + } + + wdata->sync_mode = wbc->sync_mode; + wdata->offset = folio_pos(folio); + wdata->pid = cfile->pid; + wdata->credits = credits_on_stack; + wdata->cfile = cfile; + wdata->server = server; + cfile = NULL; + + /* Find all consecutive lockable dirty pages, stopping when we find a + * page that is not immediately lockable, is not dirty or is missing, + * or we reach the end of the range. + */ + if (start < i_size) { + /* Trim the write to the EOF; the extra data is ignored. Also + * put an upper limit on the size of a single storedata op. + */ + max_len = wsize; + max_len = min_t(unsigned long long, max_len, end - start + 1); + max_len = min_t(unsigned long long, max_len, i_size - start); + + if (len < max_len) { + int max_pages = INT_MAX; + +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_pages = server->smbd_conn->max_frmr_depth; +#endif + max_pages -= folio_nr_pages(folio); + + if (max_pages > 0) + cifs_extend_writeback(mapping, &count, start, + max_pages, max_len, &len); + } + len = min_t(loff_t, len, max_len); + } + + wdata->bytes = len; + + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + folio_unlock(folio); + + if (start < i_size) { + iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, + start, len); + + rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); + if (rc) + goto err_wdata; + + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = wdata->server->ops->async_writev(wdata, + cifs_writedata_release); + if (rc >= 0) { + kref_put(&wdata->refcount, cifs_writedata_release); + goto err_close; + } + } else { + /* The dirty region was entirely beyond the EOF. */ + cifs_pages_written_back(inode, start, len); + rc = 0; + } + +err_wdata: + kref_put(&wdata->refcount, cifs_writedata_release); +err_uncredit: + add_credits_and_wake_if(server, credits, 0); +err_close: + if (cfile) + cifsFileInfo_put(cfile); +err_xid: + free_xid(xid); + if (rc == 0) { + wbc->nr_to_write = count; + } else if (is_retryable_error(rc)) { + cifs_pages_write_redirty(inode, start, len); + } else { + cifs_pages_write_failed(inode, start, len); + mapping_set_error(mapping, rc); + } + /* Indication to update ctime and mtime as close is deferred */ + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); + return rc; +} + +/* + * write a region of pages back to the server + */ +static int cifs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + loff_t start, loff_t end, loff_t *_next) +{ + struct folio *folio; + struct page *head_page; + ssize_t ret; + int n, skips = 0; + + do { + pgoff_t index = start / PAGE_SIZE; + + n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE, + PAGECACHE_TAG_DIRTY, 1, &head_page); + if (!n) + break; + + folio = page_folio(head_page); + start = folio_pos(folio); /* May regress with THPs */ + + /* At this point we hold neither the i_pages lock nor the + * page lock: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled + * back from swapper_space to tmpfs file mapping + */ + if (wbc->sync_mode != WB_SYNC_NONE) { + ret = folio_lock_killable(folio); + if (ret < 0) { + folio_put(folio); + return ret; + } + } else { + if (!folio_trylock(folio)) { + folio_put(folio); + return 0; + } + } + + if (folio_mapping(folio) != mapping || + !folio_test_dirty(folio)) { + start += folio_size(folio); + folio_unlock(folio); + folio_put(folio); + continue; + } + + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + if (wbc->sync_mode != WB_SYNC_NONE) { + folio_wait_writeback(folio); +#ifdef CONFIG_CIFS_FSCACHE + folio_wait_fscache(folio); +#endif + } else { + start += folio_size(folio); + } + folio_put(folio); + if (wbc->sync_mode == WB_SYNC_NONE) { + if (skips >= 5 || need_resched()) + break; + skips++; + } + continue; + } + + if (!folio_clear_dirty_for_io(folio)) + /* We hold the page lock - it should've been dirty. */ + WARN_ON(1); + + ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end); + folio_put(folio); + if (ret < 0) + return ret; + + start += ret; + cond_resched(); + } while (wbc->nr_to_write > 0); + + *_next = start; + return 0; +} + +/* + * Write some of the pending data back to the server + */ +static int cifs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + loff_t start, next; + int ret; + + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + + if (wbc->range_cyclic) { + start = mapping->writeback_index * PAGE_SIZE; + ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); + if (ret == 0) { + mapping->writeback_index = next / PAGE_SIZE; + if (start > 0 && wbc->nr_to_write > 0) { + ret = cifs_writepages_region(mapping, wbc, 0, + start, &next); + if (ret == 0) + mapping->writeback_index = + next / PAGE_SIZE; + } + } + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); + if (wbc->nr_to_write > 0 && ret == 0) + mapping->writeback_index = next / PAGE_SIZE; + } else { + ret = cifs_writepages_region(mapping, wbc, + wbc->range_start, wbc->range_end, &next); + } + + return ret; +} static int cifs_writepage_locked(struct page *page, struct writeback_control *wbc) @@ -2972,6 +3331,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct cifsFileInfo *cfile = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct folio *folio = page_folio(page); __u32 pid; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) @@ -2982,14 +3342,14 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", page, pos, copied); - if (PageChecked(page)) { + if (folio_test_checked(folio)) { if (copied == len) - SetPageUptodate(page); - ClearPageChecked(page); - } else if (!PageUptodate(page) && copied == PAGE_SIZE) - SetPageUptodate(page); + folio_mark_uptodate(folio); + folio_clear_checked(folio); + } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) + folio_mark_uptodate(folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { char *page_data; unsigned offset = pos & (PAGE_SIZE - 1); unsigned int xid; @@ -3149,6 +3509,7 @@ int cifs_flush(struct file *file, fl_owner_t id) return rc; } +#if 0 // TODO: Remove for iov_iter support static int cifs_write_allocate_pages(struct page **pages, unsigned long num_pages) { @@ -3189,17 +3550,15 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) return num_pages; } +#endif static void cifs_uncached_writedata_release(struct kref *refcount) { - int i; struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata, refcount); kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); - for (i = 0; i < wdata->nr_pages; i++) - put_page(wdata->pages[i]); cifs_writedata_release(refcount); } @@ -3225,6 +3584,7 @@ cifs_uncached_writev_complete(struct work_struct *work) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } +#if 0 // TODO: Remove for iov_iter support static int wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, size_t *len, unsigned long *num_pages) @@ -3266,6 +3626,7 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, *num_pages = i + 1; return 0; } +#endif static int cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, @@ -3337,23 +3698,57 @@ fail: return rc; } +/* + * Select span of a bvec iterator we're going to use. Limit it by both maximum + * size and maximum number of segments. + */ +static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, + size_t max_segs, unsigned int *_nsegs) +{ + const struct bio_vec *bvecs = iter->bvec; + unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; + size_t len, span = 0, n = iter->count; + size_t skip = iter->iov_offset; + + if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) + return 0; + + while (n && ix < nbv && skip) { + len = bvecs[ix].bv_len; + if (skip < len) + break; + skip -= len; + n -= len; + ix++; + } + + while (n && ix < nbv) { + len = min3(n, bvecs[ix].bv_len - skip, max_size); + span += len; + nsegs++; + ix++; + if (span >= max_size || nsegs >= max_segs) + break; + skip = 0; + n -= len; + } + + *_nsegs = nsegs; + return span; +} + static int -cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, +cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, struct cifs_aio_ctx *ctx) { int rc = 0; - size_t cur_len; - unsigned long nr_pages, num_pages, i; + size_t cur_len, max_len; struct cifs_writedata *wdata; - struct iov_iter saved_from = *from; - loff_t saved_offset = offset; pid_t pid; struct TCP_Server_Info *server; - struct page **pagevec; - size_t start; - unsigned int xid; + unsigned int xid, max_segs = INT_MAX; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -3363,10 +3758,20 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); xid = get_xid(); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_segs = server->smbd_conn->max_frmr_depth; +#endif + do { - unsigned int wsize; struct cifs_credits credits_on_stack; struct cifs_credits *credits = &credits_on_stack; + unsigned int wsize, nsegs = 0; + + if (signal_pending(current)) { + rc = -EINTR; + break; + } if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, false); @@ -3381,99 +3786,42 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, if (rc) break; - cur_len = min_t(const size_t, len, wsize); - - if (ctx->direct_io) { - ssize_t result; - - result = iov_iter_get_pages_alloc2( - from, &pagevec, cur_len, &start); - if (result < 0) { - cifs_dbg(VFS, - "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n", - result, iov_iter_type(from), - from->iov_offset, from->count); - dump_stack(); - - rc = result; - add_credits_and_wake_if(server, credits, 0); - break; - } - cur_len = (size_t)result; - - nr_pages = - (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE; - - wdata = cifs_writedata_direct_alloc(pagevec, - cifs_uncached_writev_complete); - if (!wdata) { - rc = -ENOMEM; - for (i = 0; i < nr_pages; i++) - put_page(pagevec[i]); - kvfree(pagevec); - add_credits_and_wake_if(server, credits, 0); - break; - } - - - wdata->page_offset = start; - wdata->tailsz = - nr_pages > 1 ? - cur_len - (PAGE_SIZE - start) - - (nr_pages - 2) * PAGE_SIZE : - cur_len; - } else { - nr_pages = get_numpages(wsize, len, &cur_len); - wdata = cifs_writedata_alloc(nr_pages, - cifs_uncached_writev_complete); - if (!wdata) { - rc = -ENOMEM; - add_credits_and_wake_if(server, credits, 0); - break; - } - - rc = cifs_write_allocate_pages(wdata->pages, nr_pages); - if (rc) { - kvfree(wdata->pages); - kfree(wdata); - add_credits_and_wake_if(server, credits, 0); - break; - } - - num_pages = nr_pages; - rc = wdata_fill_from_iovec( - wdata, from, &cur_len, &num_pages); - if (rc) { - for (i = 0; i < nr_pages; i++) - put_page(wdata->pages[i]); - kvfree(wdata->pages); - kfree(wdata); - add_credits_and_wake_if(server, credits, 0); - break; - } + max_len = min_t(const size_t, len, wsize); + if (!max_len) { + rc = -EAGAIN; + add_credits_and_wake_if(server, credits, 0); + break; + } - /* - * Bring nr_pages down to the number of pages we - * actually used, and free any pages that we didn't use. - */ - for ( ; nr_pages > num_pages; nr_pages--) - put_page(wdata->pages[nr_pages - 1]); + cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); + cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", + cur_len, max_len, nsegs, from->nr_segs, max_segs); + if (cur_len == 0) { + rc = -EIO; + add_credits_and_wake_if(server, credits, 0); + break; + } - wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE); + wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); + if (!wdata) { + rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); + break; } wdata->sync_mode = WB_SYNC_ALL; - wdata->nr_pages = nr_pages; - wdata->offset = (__u64)offset; - wdata->cfile = cifsFileInfo_get(open_file); - wdata->server = server; - wdata->pid = pid; - wdata->bytes = cur_len; - wdata->pagesz = PAGE_SIZE; - wdata->credits = credits_on_stack; - wdata->ctx = ctx; + wdata->offset = (__u64)fpos; + wdata->cfile = cifsFileInfo_get(open_file); + wdata->server = server; + wdata->pid = pid; + wdata->bytes = cur_len; + wdata->credits = credits_on_stack; + wdata->iter = *from; + wdata->ctx = ctx; kref_get(&ctx->refcount); + iov_iter_truncate(&wdata->iter, cur_len); + rc = adjust_credits(server, &wdata->credits, wdata->bytes); if (!rc) { @@ -3488,16 +3836,14 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, add_credits_and_wake_if(server, &wdata->credits, 0); kref_put(&wdata->refcount, cifs_uncached_writedata_release); - if (rc == -EAGAIN) { - *from = saved_from; - iov_iter_advance(from, offset - saved_offset); + if (rc == -EAGAIN) continue; - } break; } list_add_tail(&wdata->list, wdata_list); - offset += cur_len; + iov_iter_advance(from, cur_len); + fpos += cur_len; len -= cur_len; } while (len > 0); @@ -3596,8 +3942,6 @@ static ssize_t __cifs_writev( struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; struct cifs_aio_ctx *ctx; - struct iov_iter saved_from = *from; - size_t len = iov_iter_count(from); int rc; /* @@ -3631,23 +3975,54 @@ static ssize_t __cifs_writev( ctx->iocb = iocb; ctx->pos = iocb->ki_pos; + ctx->direct_io = direct; + ctx->nr_pinned_pages = 0; - if (direct) { - ctx->direct_io = true; - ctx->iter = *from; - ctx->len = len; - } else { - rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE); - if (rc) { + if (user_backed_iter(from)) { + /* + * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as + * they contain references to the calling process's virtual + * memory layout which won't be available in an async worker + * thread. This also takes a pin on every folio involved. + */ + rc = netfs_extract_user_iter(from, iov_iter_count(from), + &ctx->iter, 0); + if (rc < 0) { kref_put(&ctx->refcount, cifs_aio_ctx_release); return rc; } + + ctx->nr_pinned_pages = rc; + ctx->bv = (void *)ctx->iter.bvec; + ctx->bv_need_unpin = iov_iter_extract_will_pin(&ctx->iter); + } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && + !is_sync_kiocb(iocb)) { + /* + * If the op is asynchronous, we need to copy the list attached + * to a BVEC/KVEC-type iterator, but we assume that the storage + * will be pinned by the caller; in any case, we may or may not + * be able to pin the pages, so we don't try. + */ + ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); + if (!ctx->bv) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -ENOMEM; + } + } else { + /* + * Otherwise, we just pass the iterator down as-is and rely on + * the caller to make sure the pages referred to by the + * iterator don't evaporate. + */ + ctx->iter = *from; } + ctx->len = iov_iter_count(&ctx->iter); + /* grab a lock here due to read response handlers can access ctx */ mutex_lock(&ctx->aio_mutex); - rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from, + rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, cfile, cifs_sb, &ctx->list, ctx); /* @@ -3790,14 +4165,12 @@ out: return written; } -static struct cifs_readdata * -cifs_readdata_direct_alloc(struct page **pages, work_func_t complete) +static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) { struct cifs_readdata *rdata; rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); - if (rdata != NULL) { - rdata->pages = pages; + if (rdata) { kref_init(&rdata->refcount); INIT_LIST_HEAD(&rdata->list); init_completion(&rdata->done); @@ -3807,27 +4180,14 @@ cifs_readdata_direct_alloc(struct page **pages, work_func_t complete) return rdata; } -static struct cifs_readdata * -cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete) -{ - struct page **pages = - kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); - struct cifs_readdata *ret = NULL; - - if (pages) { - ret = cifs_readdata_direct_alloc(pages, complete); - if (!ret) - kfree(pages); - } - - return ret; -} - void cifs_readdata_release(struct kref *refcount) { struct cifs_readdata *rdata = container_of(refcount, struct cifs_readdata, refcount); + + if (rdata->ctx) + kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); #ifdef CONFIG_CIFS_SMB_DIRECT if (rdata->mr) { smbd_deregister_mr(rdata->mr); @@ -3837,85 +4197,9 @@ cifs_readdata_release(struct kref *refcount) if (rdata->cfile) cifsFileInfo_put(rdata->cfile); - kvfree(rdata->pages); kfree(rdata); } -static int -cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages) -{ - int rc = 0; - struct page *page; - unsigned int i; - - for (i = 0; i < nr_pages; i++) { - page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); - if (!page) { - rc = -ENOMEM; - break; - } - rdata->pages[i] = page; - } - - if (rc) { - unsigned int nr_page_failed = i; - - for (i = 0; i < nr_page_failed; i++) { - put_page(rdata->pages[i]); - rdata->pages[i] = NULL; - } - } - return rc; -} - -static void -cifs_uncached_readdata_release(struct kref *refcount) -{ - struct cifs_readdata *rdata = container_of(refcount, - struct cifs_readdata, refcount); - unsigned int i; - - kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); - for (i = 0; i < rdata->nr_pages; i++) { - put_page(rdata->pages[i]); - } - cifs_readdata_release(refcount); -} - -/** - * cifs_readdata_to_iov - copy data from pages in response to an iovec - * @rdata: the readdata response with list of pages holding data - * @iter: destination for our data - * - * This function copies data from a list of pages in a readdata response into - * an array of iovecs. It will first calculate where the data should go - * based on the info in the readdata and then copy the data into that spot. - */ -static int -cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter) -{ - size_t remaining = rdata->got_bytes; - unsigned int i; - - for (i = 0; i < rdata->nr_pages; i++) { - struct page *page = rdata->pages[i]; - size_t copy = min_t(size_t, remaining, PAGE_SIZE); - size_t written; - - if (unlikely(iov_iter_is_pipe(iter))) { - void *addr = kmap_atomic(page); - - written = copy_to_iter(addr, copy, iter); - kunmap_atomic(addr); - } else - written = copy_page_to_iter(page, 0, copy, iter); - remaining -= written; - if (written < copy && iov_iter_count(iter) > 0) - break; - } - return remaining ? -EFAULT : 0; -} - static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); static void @@ -3927,9 +4211,11 @@ cifs_uncached_readv_complete(struct work_struct *work) complete(&rdata->done); collect_uncached_read_data(rdata->ctx); /* the below call can possibly free the last ref to aio ctx */ - kref_put(&rdata->refcount, cifs_uncached_readdata_release); + kref_put(&rdata->refcount, cifs_readdata_release); } +#if 0 // TODO: Remove for iov_iter support + static int uncached_fill_pages(struct TCP_Server_Info *server, struct cifs_readdata *rdata, struct iov_iter *iter, @@ -4003,6 +4289,7 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server, { return uncached_fill_pages(server, rdata, iter, iter->count); } +#endif static int cifs_resend_rdata(struct cifs_readdata *rdata, struct list_head *rdata_list, @@ -4072,37 +4359,36 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata, } while (rc == -EAGAIN); fail: - kref_put(&rdata->refcount, cifs_uncached_readdata_release); + kref_put(&rdata->refcount, cifs_readdata_release); return rc; } static int -cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, +cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, struct cifs_aio_ctx *ctx) { struct cifs_readdata *rdata; - unsigned int npages, rsize; + unsigned int rsize, nsegs, max_segs = INT_MAX; struct cifs_credits credits_on_stack; struct cifs_credits *credits = &credits_on_stack; - size_t cur_len; + size_t cur_len, max_len; int rc; pid_t pid; struct TCP_Server_Info *server; - struct page **pagevec; - size_t start; - struct iov_iter direct_iov = ctx->iter; server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_segs = server->smbd_conn->max_frmr_depth; +#endif + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; else pid = current->tgid; - if (ctx->direct_io) - iov_iter_advance(&direct_iov, offset - ctx->pos); - do { if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, true); @@ -4122,78 +4408,37 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, if (rc) break; - cur_len = min_t(const size_t, len, rsize); - - if (ctx->direct_io) { - ssize_t result; - - result = iov_iter_get_pages_alloc2( - &direct_iov, &pagevec, - cur_len, &start); - if (result < 0) { - cifs_dbg(VFS, - "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n", - result, iov_iter_type(&direct_iov), - direct_iov.iov_offset, - direct_iov.count); - dump_stack(); - - rc = result; - add_credits_and_wake_if(server, credits, 0); - break; - } - cur_len = (size_t)result; - - rdata = cifs_readdata_direct_alloc( - pagevec, cifs_uncached_readv_complete); - if (!rdata) { - add_credits_and_wake_if(server, credits, 0); - rc = -ENOMEM; - break; - } - - npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE; - rdata->page_offset = start; - rdata->tailsz = npages > 1 ? - cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE : - cur_len; - - } else { - - npages = DIV_ROUND_UP(cur_len, PAGE_SIZE); - /* allocate a readdata struct */ - rdata = cifs_readdata_alloc(npages, - cifs_uncached_readv_complete); - if (!rdata) { - add_credits_and_wake_if(server, credits, 0); - rc = -ENOMEM; - break; - } + max_len = min_t(size_t, len, rsize); - rc = cifs_read_allocate_pages(rdata, npages); - if (rc) { - kvfree(rdata->pages); - kfree(rdata); - add_credits_and_wake_if(server, credits, 0); - break; - } + cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, + max_segs, &nsegs); + cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", + cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); + if (cur_len == 0) { + rc = -EIO; + add_credits_and_wake_if(server, credits, 0); + break; + } - rdata->tailsz = PAGE_SIZE; + rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); + if (!rdata) { + add_credits_and_wake_if(server, credits, 0); + rc = -ENOMEM; + break; } - rdata->server = server; - rdata->cfile = cifsFileInfo_get(open_file); - rdata->nr_pages = npages; - rdata->offset = offset; - rdata->bytes = cur_len; - rdata->pid = pid; - rdata->pagesz = PAGE_SIZE; - rdata->read_into_pages = cifs_uncached_read_into_pages; - rdata->copy_into_pages = cifs_uncached_copy_into_pages; - rdata->credits = credits_on_stack; - rdata->ctx = ctx; + rdata->server = server; + rdata->cfile = cifsFileInfo_get(open_file); + rdata->offset = fpos; + rdata->bytes = cur_len; + rdata->pid = pid; + rdata->credits = credits_on_stack; + rdata->ctx = ctx; kref_get(&ctx->refcount); + rdata->iter = ctx->iter; + iov_iter_truncate(&rdata->iter, cur_len); + rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (!rc) { @@ -4205,17 +4450,15 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file, if (rc) { add_credits_and_wake_if(server, &rdata->credits, 0); - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); - if (rc == -EAGAIN) { - iov_iter_revert(&direct_iov, cur_len); + kref_put(&rdata->refcount, cifs_readdata_release); + if (rc == -EAGAIN) continue; - } break; } list_add_tail(&rdata->list, rdata_list); - offset += cur_len; + iov_iter_advance(&ctx->iter, cur_len); + fpos += cur_len; len -= cur_len; } while (len > 0); @@ -4257,22 +4500,6 @@ again: list_del_init(&rdata->list); INIT_LIST_HEAD(&tmp_list); - /* - * Got a part of data and then reconnect has - * happened -- fill the buffer and continue - * reading. - */ - if (got_bytes && got_bytes < rdata->bytes) { - rc = 0; - if (!ctx->direct_io) - rc = cifs_readdata_to_iov(rdata, to); - if (rc) { - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); - continue; - } - } - if (ctx->direct_io) { /* * Re-use rdata as this is a @@ -4289,7 +4516,7 @@ again: &tmp_list, ctx); kref_put(&rdata->refcount, - cifs_uncached_readdata_release); + cifs_readdata_release); } list_splice(&tmp_list, &ctx->list); @@ -4297,8 +4524,6 @@ again: goto again; } else if (rdata->result) rc = rdata->result; - else if (!ctx->direct_io) - rc = cifs_readdata_to_iov(rdata, to); /* if there was a short read -- discard anything left */ if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) @@ -4307,7 +4532,7 @@ again: ctx->total_len += rdata->got_bytes; } list_del_init(&rdata->list); - kref_put(&rdata->refcount, cifs_uncached_readdata_release); + kref_put(&rdata->refcount, cifs_readdata_release); } if (!ctx->direct_io) @@ -4367,26 +4592,53 @@ static ssize_t __cifs_readv( if (!ctx) return -ENOMEM; - ctx->cfile = cifsFileInfo_get(cfile); + ctx->pos = offset; + ctx->direct_io = direct; + ctx->len = len; + ctx->cfile = cifsFileInfo_get(cfile); + ctx->nr_pinned_pages = 0; if (!is_sync_kiocb(iocb)) ctx->iocb = iocb; - if (user_backed_iter(to)) - ctx->should_dirty = true; - - if (direct) { - ctx->pos = offset; - ctx->direct_io = true; - ctx->iter = *to; - ctx->len = len; - } else { - rc = setup_aio_ctx_iter(ctx, to, ITER_DEST); - if (rc) { + if (user_backed_iter(to)) { + /* + * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as + * they contain references to the calling process's virtual + * memory layout which won't be available in an async worker + * thread. This also takes a pin on every folio involved. + */ + rc = netfs_extract_user_iter(to, iov_iter_count(to), + &ctx->iter, 0); + if (rc < 0) { kref_put(&ctx->refcount, cifs_aio_ctx_release); return rc; } - len = ctx->len; + + ctx->nr_pinned_pages = rc; + ctx->bv = (void *)ctx->iter.bvec; + ctx->bv_need_unpin = iov_iter_extract_will_pin(&ctx->iter); + ctx->should_dirty = true; + } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && + !is_sync_kiocb(iocb)) { + /* + * If the op is asynchronous, we need to copy the list attached + * to a BVEC/KVEC-type iterator, but we assume that the storage + * will be retained by the caller; in any case, we may or may + * not be able to pin the pages, so we don't try. + */ + ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); + if (!ctx->bv) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -ENOMEM; + } + } else { + /* + * Otherwise, we just pass the iterator down as-is and rely on + * the caller to make sure the pages referred to by the + * iterator don't evaporate. + */ + ctx->iter = *to; } if (direct) { @@ -4648,6 +4900,8 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) return rc; } +#if 0 // TODO: Remove for iov_iter support + static void cifs_readv_complete(struct work_struct *work) { @@ -4778,19 +5032,74 @@ cifs_readpages_copy_into_pages(struct TCP_Server_Info *server, { return readpages_fill_pages(server, rdata, iter, iter->count); } +#endif + +/* + * Unlock a bunch of folios in the pagecache. + */ +static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) +{ + struct folio *folio; + XA_STATE(xas, &mapping->i_pages, first); + + rcu_read_lock(); + xas_for_each(&xas, folio, last) { + folio_unlock(folio); + } + rcu_read_unlock(); +} + +static void cifs_readahead_complete(struct work_struct *work) +{ + struct cifs_readdata *rdata = container_of(work, + struct cifs_readdata, work); + struct folio *folio; + pgoff_t last; + bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); + + XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); + + if (good) + cifs_readahead_to_fscache(rdata->mapping->host, + rdata->offset, rdata->bytes); + + if (iov_iter_count(&rdata->iter) > 0) + iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); + + last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; + + rcu_read_lock(); + xas_for_each(&xas, folio, last) { + if (good) { + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + folio_unlock(folio); + } + rcu_read_unlock(); + + kref_put(&rdata->refcount, cifs_readdata_release); +} static void cifs_readahead(struct readahead_control *ractl) { - int rc; struct cifsFileInfo *open_file = ractl->file->private_data; struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); struct TCP_Server_Info *server; - pid_t pid; - unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0; - pgoff_t next_cached = ULONG_MAX; + unsigned int xid, nr_pages, cache_nr_pages = 0; + unsigned int ra_pages; + pgoff_t next_cached = ULONG_MAX, ra_index; bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && cifs_inode_cookie(ractl->mapping->host)->cache_priv; bool check_cache = caching; + pid_t pid; + int rc = 0; + + /* Note that readahead_count() lags behind our dequeuing of pages from + * the ractl, wo we have to keep track for ourselves. + */ + ra_pages = readahead_count(ractl); + ra_index = readahead_index(ractl); xid = get_xid(); @@ -4799,22 +5108,21 @@ static void cifs_readahead(struct readahead_control *ractl) else pid = current->tgid; - rc = 0; server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", - __func__, ractl->file, ractl->mapping, readahead_count(ractl)); + __func__, ractl->file, ractl->mapping, ra_pages); /* * Chop the readahead request up into rsize-sized read requests. */ - while ((nr_pages = readahead_count(ractl) - last_batch_size)) { - unsigned int i, got, rsize; - struct page *page; + while ((nr_pages = ra_pages)) { + unsigned int i, rsize; struct cifs_readdata *rdata; struct cifs_credits credits_on_stack; struct cifs_credits *credits = &credits_on_stack; - pgoff_t index = readahead_index(ractl) + last_batch_size; + struct folio *folio; + pgoff_t fsize; /* * Find out if we have anything cached in the range of @@ -4823,21 +5131,22 @@ static void cifs_readahead(struct readahead_control *ractl) if (caching) { if (check_cache) { rc = cifs_fscache_query_occupancy( - ractl->mapping->host, index, nr_pages, + ractl->mapping->host, ra_index, nr_pages, &next_cached, &cache_nr_pages); if (rc < 0) caching = false; check_cache = false; } - if (index == next_cached) { + if (ra_index == next_cached) { /* * TODO: Send a whole batch of pages to be read * by the cache. */ - struct folio *folio = readahead_folio(ractl); - - last_batch_size = folio_nr_pages(folio); + folio = readahead_folio(ractl); + fsize = folio_nr_pages(folio); + ra_pages -= fsize; + ra_index += fsize; if (cifs_readpage_from_fscache(ractl->mapping->host, &folio->page) < 0) { /* @@ -4848,8 +5157,8 @@ static void cifs_readahead(struct readahead_control *ractl) caching = false; } folio_unlock(folio); - next_cached++; - cache_nr_pages--; + next_cached += fsize; + cache_nr_pages -= fsize; if (cache_nr_pages == 0) check_cache = true; continue; @@ -4874,8 +5183,9 @@ static void cifs_readahead(struct readahead_control *ractl) &rsize, credits); if (rc) break; - nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl)); - nr_pages = min_t(size_t, nr_pages, next_cached - index); + nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); + if (next_cached != ULONG_MAX) + nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); /* * Give up immediately if rsize is too small to read an entire @@ -4888,33 +5198,31 @@ static void cifs_readahead(struct readahead_control *ractl) break; } - rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); + rdata = cifs_readdata_alloc(cifs_readahead_complete); if (!rdata) { /* best to give up if we're out of mem */ add_credits_and_wake_if(server, credits, 0); break; } - got = __readahead_batch(ractl, rdata->pages, nr_pages); - if (got != nr_pages) { - pr_warn("__readahead_batch() returned %u/%u\n", - got, nr_pages); - nr_pages = got; - } - - rdata->nr_pages = nr_pages; - rdata->bytes = readahead_batch_length(ractl); + rdata->offset = ra_index * PAGE_SIZE; + rdata->bytes = nr_pages * PAGE_SIZE; rdata->cfile = cifsFileInfo_get(open_file); rdata->server = server; rdata->mapping = ractl->mapping; - rdata->offset = readahead_pos(ractl); rdata->pid = pid; - rdata->pagesz = PAGE_SIZE; - rdata->tailsz = PAGE_SIZE; - rdata->read_into_pages = cifs_readpages_read_into_pages; - rdata->copy_into_pages = cifs_readpages_copy_into_pages; rdata->credits = credits_on_stack; + for (i = 0; i < nr_pages; i++) { + if (!readahead_folio(ractl)) + WARN_ON(1); + } + ra_pages -= nr_pages; + ra_index += nr_pages; + + iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, + rdata->offset, rdata->bytes); + rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (!rc) { if (rdata->cfile->invalidHandle) @@ -4925,18 +5233,15 @@ static void cifs_readahead(struct readahead_control *ractl) if (rc) { add_credits_and_wake_if(server, &rdata->credits, 0); - for (i = 0; i < rdata->nr_pages; i++) { - page = rdata->pages[i]; - unlock_page(page); - put_page(page); - } + cifs_unlock_folios(rdata->mapping, + rdata->offset / PAGE_SIZE, + (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); /* Fallback to the readpage in error/reconnect cases */ kref_put(&rdata->refcount, cifs_readdata_release); break; } kref_put(&rdata->refcount, cifs_readdata_release); - last_batch_size = nr_pages; } free_xid(xid); @@ -4978,10 +5283,6 @@ static int cifs_readpage_worker(struct file *file, struct page *page, flush_dcache_page(page); SetPageUptodate(page); - - /* send this page to the cache */ - cifs_readpage_to_fscache(file_inode(file), page); - rc = 0; io_error: -- cgit From 607aea3cc2a8f46d24c78eb4efcc511af1c2f74d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 24 Jan 2022 21:13:24 +0000 Subject: cifs: Remove unused code Remove a bunch of functions that are no longer used and are commented out after the conversion to use iterators throughout the I/O path. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org Link: https://lore.kernel.org/r/164928621823.457102.8777804402615654773.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165211421039.3154751.15199634443157779005.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165348881165.2106726.2993852968344861224.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165364827876.3334034.9331465096417303889.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/166126396915.708021.2010212654244139442.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/166697261080.61150.17513116912567922274.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/166732033255.3186319.5527423437137895940.stgit@warthog.procyon.org.uk/ # rfc Signed-off-by: Steve French --- fs/cifs/file.c | 606 --------------------------------------------------------- 1 file changed, 606 deletions(-) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 599578f7e961..bd4ee2cdf3d4 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2606,314 +2606,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } -#if 0 // TODO: Remove for iov_iter support -static struct cifs_writedata * -wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, - pgoff_t end, pgoff_t *index, - unsigned int *found_pages) -{ - struct cifs_writedata *wdata; - - wdata = cifs_writedata_alloc((unsigned int)tofind, - cifs_writev_complete); - if (!wdata) - return NULL; - - *found_pages = find_get_pages_range_tag(mapping, index, end, - PAGECACHE_TAG_DIRTY, tofind, wdata->pages); - return wdata; -} - -static unsigned int -wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages, - struct address_space *mapping, - struct writeback_control *wbc, - pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done) -{ - unsigned int nr_pages = 0, i; - struct page *page; - - for (i = 0; i < found_pages; i++) { - page = wdata->pages[i]; - /* - * At this point we hold neither the i_pages lock nor the - * page lock: the page may be truncated or invalidated - * (changing page->mapping to NULL), or even swizzled - * back from swapper_space to tmpfs file mapping - */ - - if (nr_pages == 0) - lock_page(page); - else if (!trylock_page(page)) - break; - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - break; - } - - if (!wbc->range_cyclic && page->index > end) { - *done = true; - unlock_page(page); - break; - } - - if (*next && (page->index != *next)) { - /* Not next consecutive page */ - unlock_page(page); - break; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - break; - } - - /* - * This actually clears the dirty bit in the radix tree. - * See cifs_writepage() for more commentary. - */ - set_page_writeback(page); - if (page_offset(page) >= i_size_read(mapping->host)) { - *done = true; - unlock_page(page); - end_page_writeback(page); - break; - } - - wdata->pages[i] = page; - *next = page->index + 1; - ++nr_pages; - } - - /* reset index to refind any pages skipped */ - if (nr_pages == 0) - *index = wdata->pages[0]->index + 1; - - /* put any pages we aren't going to use */ - for (i = nr_pages; i < found_pages; i++) { - put_page(wdata->pages[i]); - wdata->pages[i] = NULL; - } - - return nr_pages; -} - -static int -wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages, - struct address_space *mapping, struct writeback_control *wbc) -{ - int rc; - - wdata->sync_mode = wbc->sync_mode; - wdata->nr_pages = nr_pages; - wdata->offset = page_offset(wdata->pages[0]); - wdata->pagesz = PAGE_SIZE; - wdata->tailsz = min(i_size_read(mapping->host) - - page_offset(wdata->pages[nr_pages - 1]), - (loff_t)PAGE_SIZE); - wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz; - wdata->pid = wdata->cfile->pid; - - rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); - if (rc) - return rc; - - if (wdata->cfile->invalidHandle) - rc = -EAGAIN; - else - rc = wdata->server->ops->async_writev(wdata, - cifs_writedata_release); - - return rc; -} - -static int -cifs_writepage_locked(struct page *page, struct writeback_control *wbc); - -static int cifs_write_one_page(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret; - - ret = cifs_writepage_locked(page, wbc); - unlock_page(page); - mapping_set_error(mapping, ret); - return ret; -} - -static int cifs_writepages(struct address_space *mapping, - struct writeback_control *wbc) -{ - struct inode *inode = mapping->host; - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct TCP_Server_Info *server; - bool done = false, scanned = false, range_whole = false; - pgoff_t end, index; - struct cifs_writedata *wdata; - struct cifsFileInfo *cfile = NULL; - int rc = 0; - int saved_rc = 0; - unsigned int xid; - - /* - * If wsize is smaller than the page cache size, default to writing - * one page at a time. - */ - if (cifs_sb->ctx->wsize < PAGE_SIZE) - return write_cache_pages(mapping, wbc, cifs_write_one_page, - mapping); - - xid = get_xid(); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_SHIFT; - end = wbc->range_end >> PAGE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = true; - scanned = true; - } - server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); - -retry: - while (!done && index <= end) { - unsigned int i, nr_pages, found_pages, wsize; - pgoff_t next = 0, tofind, saved_index = index; - struct cifs_credits credits_on_stack; - struct cifs_credits *credits = &credits_on_stack; - int get_file_rc = 0; - - if (cfile) - cifsFileInfo_put(cfile); - - rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); - - /* in case of an error store it to return later */ - if (rc) - get_file_rc = rc; - - rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, - &wsize, credits); - if (rc != 0) { - done = true; - break; - } - - tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1; - - wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index, - &found_pages); - if (!wdata) { - rc = -ENOMEM; - done = true; - add_credits_and_wake_if(server, credits, 0); - break; - } - - if (found_pages == 0) { - kref_put(&wdata->refcount, cifs_writedata_release); - add_credits_and_wake_if(server, credits, 0); - break; - } - - nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc, - end, &index, &next, &done); - - /* nothing to write? */ - if (nr_pages == 0) { - kref_put(&wdata->refcount, cifs_writedata_release); - add_credits_and_wake_if(server, credits, 0); - continue; - } - - wdata->credits = credits_on_stack; - wdata->cfile = cfile; - wdata->server = server; - cfile = NULL; - - if (!wdata->cfile) { - cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", - get_file_rc); - if (is_retryable_error(get_file_rc)) - rc = get_file_rc; - else - rc = -EBADF; - } else - rc = wdata_send_pages(wdata, nr_pages, mapping, wbc); - - for (i = 0; i < nr_pages; ++i) - unlock_page(wdata->pages[i]); - - /* send failure -- clean up the mess */ - if (rc != 0) { - add_credits_and_wake_if(server, &wdata->credits, 0); - for (i = 0; i < nr_pages; ++i) { - if (is_retryable_error(rc)) - redirty_page_for_writepage(wbc, - wdata->pages[i]); - else - SetPageError(wdata->pages[i]); - end_page_writeback(wdata->pages[i]); - put_page(wdata->pages[i]); - } - if (!is_retryable_error(rc)) - mapping_set_error(mapping, rc); - } - kref_put(&wdata->refcount, cifs_writedata_release); - - if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) { - index = saved_index; - continue; - } - - /* Return immediately if we received a signal during writing */ - if (is_interrupt_error(rc)) { - done = true; - break; - } - - if (rc != 0 && saved_rc == 0) - saved_rc = rc; - - wbc->nr_to_write -= nr_pages; - if (wbc->nr_to_write <= 0) - done = true; - - index = next; - } - - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = true; - index = 0; - goto retry; - } - - if (saved_rc != 0) - rc = saved_rc; - - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - - if (cfile) - cifsFileInfo_put(cfile); - free_xid(xid); - /* Indication to update ctime and mtime as close is deferred */ - set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); - return rc; -} -#endif - /* * Extend the region to be written back to include subsequent contiguously * dirty pages if possible, but don't sleep while doing so. @@ -3509,49 +3201,6 @@ int cifs_flush(struct file *file, fl_owner_t id) return rc; } -#if 0 // TODO: Remove for iov_iter support -static int -cifs_write_allocate_pages(struct page **pages, unsigned long num_pages) -{ - int rc = 0; - unsigned long i; - - for (i = 0; i < num_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM); - if (!pages[i]) { - /* - * save number of pages we have already allocated and - * return with ENOMEM error - */ - num_pages = i; - rc = -ENOMEM; - break; - } - } - - if (rc) { - for (i = 0; i < num_pages; i++) - put_page(pages[i]); - } - return rc; -} - -static inline -size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) -{ - size_t num_pages; - size_t clen; - - clen = min_t(const size_t, len, wsize); - num_pages = DIV_ROUND_UP(clen, PAGE_SIZE); - - if (cur_len) - *cur_len = clen; - - return num_pages; -} -#endif - static void cifs_uncached_writedata_release(struct kref *refcount) { @@ -3584,50 +3233,6 @@ cifs_uncached_writev_complete(struct work_struct *work) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } -#if 0 // TODO: Remove for iov_iter support -static int -wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, - size_t *len, unsigned long *num_pages) -{ - size_t save_len, copied, bytes, cur_len = *len; - unsigned long i, nr_pages = *num_pages; - - save_len = cur_len; - for (i = 0; i < nr_pages; i++) { - bytes = min_t(const size_t, cur_len, PAGE_SIZE); - copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from); - cur_len -= copied; - /* - * If we didn't copy as much as we expected, then that - * may mean we trod into an unmapped area. Stop copying - * at that point. On the next pass through the big - * loop, we'll likely end up getting a zero-length - * write and bailing out of it. - */ - if (copied < bytes) - break; - } - cur_len = save_len - cur_len; - *len = cur_len; - - /* - * If we have no data to send, then that probably means that - * the copy above failed altogether. That's most likely because - * the address in the iovec was bogus. Return -EFAULT and let - * the caller free anything we allocated and bail out. - */ - if (!cur_len) - return -EFAULT; - - /* - * i + 1 now represents the number of pages we actually used in - * the copy phase above. - */ - *num_pages = i + 1; - return 0; -} -#endif - static int cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, struct cifs_aio_ctx *ctx) @@ -4214,83 +3819,6 @@ cifs_uncached_readv_complete(struct work_struct *work) kref_put(&rdata->refcount, cifs_readdata_release); } -#if 0 // TODO: Remove for iov_iter support - -static int -uncached_fill_pages(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, struct iov_iter *iter, - unsigned int len) -{ - int result = 0; - unsigned int i; - unsigned int nr_pages = rdata->nr_pages; - unsigned int page_offset = rdata->page_offset; - - rdata->got_bytes = 0; - rdata->tailsz = PAGE_SIZE; - for (i = 0; i < nr_pages; i++) { - struct page *page = rdata->pages[i]; - size_t n; - unsigned int segment_size = rdata->pagesz; - - if (i == 0) - segment_size -= page_offset; - else - page_offset = 0; - - - if (len <= 0) { - /* no need to hold page hostage */ - rdata->pages[i] = NULL; - rdata->nr_pages--; - put_page(page); - continue; - } - - n = len; - if (len >= segment_size) - /* enough data to fill the page */ - n = segment_size; - else - rdata->tailsz = len; - len -= n; - - if (iter) - result = copy_page_from_iter( - page, page_offset, n, iter); -#ifdef CONFIG_CIFS_SMB_DIRECT - else if (rdata->mr) - result = n; -#endif - else - result = cifs_read_page_from_socket( - server, page, page_offset, n); - if (result < 0) - break; - - rdata->got_bytes += result; - } - - return result != -ECONNABORTED && rdata->got_bytes > 0 ? - rdata->got_bytes : result; -} - -static int -cifs_uncached_read_into_pages(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, unsigned int len) -{ - return uncached_fill_pages(server, rdata, NULL, len); -} - -static int -cifs_uncached_copy_into_pages(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, - struct iov_iter *iter) -{ - return uncached_fill_pages(server, rdata, iter, iter->count); -} -#endif - static int cifs_resend_rdata(struct cifs_readdata *rdata, struct list_head *rdata_list, struct cifs_aio_ctx *ctx) @@ -4900,140 +4428,6 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) return rc; } -#if 0 // TODO: Remove for iov_iter support - -static void -cifs_readv_complete(struct work_struct *work) -{ - unsigned int i, got_bytes; - struct cifs_readdata *rdata = container_of(work, - struct cifs_readdata, work); - - got_bytes = rdata->got_bytes; - for (i = 0; i < rdata->nr_pages; i++) { - struct page *page = rdata->pages[i]; - - if (rdata->result == 0 || - (rdata->result == -EAGAIN && got_bytes)) { - flush_dcache_page(page); - SetPageUptodate(page); - } else - SetPageError(page); - - if (rdata->result == 0 || - (rdata->result == -EAGAIN && got_bytes)) - cifs_readpage_to_fscache(rdata->mapping->host, page); - - unlock_page(page); - - got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes); - - put_page(page); - rdata->pages[i] = NULL; - } - kref_put(&rdata->refcount, cifs_readdata_release); -} - -static int -readpages_fill_pages(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, struct iov_iter *iter, - unsigned int len) -{ - int result = 0; - unsigned int i; - u64 eof; - pgoff_t eof_index; - unsigned int nr_pages = rdata->nr_pages; - unsigned int page_offset = rdata->page_offset; - - /* determine the eof that the server (probably) has */ - eof = CIFS_I(rdata->mapping->host)->server_eof; - eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0; - cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index); - - rdata->got_bytes = 0; - rdata->tailsz = PAGE_SIZE; - for (i = 0; i < nr_pages; i++) { - struct page *page = rdata->pages[i]; - unsigned int to_read = rdata->pagesz; - size_t n; - - if (i == 0) - to_read -= page_offset; - else - page_offset = 0; - - n = to_read; - - if (len >= to_read) { - len -= to_read; - } else if (len > 0) { - /* enough for partial page, fill and zero the rest */ - zero_user(page, len + page_offset, to_read - len); - n = rdata->tailsz = len; - len = 0; - } else if (page->index > eof_index) { - /* - * The VFS will not try to do readahead past the - * i_size, but it's possible that we have outstanding - * writes with gaps in the middle and the i_size hasn't - * caught up yet. Populate those with zeroed out pages - * to prevent the VFS from repeatedly attempting to - * fill them until the writes are flushed. - */ - zero_user(page, 0, PAGE_SIZE); - flush_dcache_page(page); - SetPageUptodate(page); - unlock_page(page); - put_page(page); - rdata->pages[i] = NULL; - rdata->nr_pages--; - continue; - } else { - /* no need to hold page hostage */ - unlock_page(page); - put_page(page); - rdata->pages[i] = NULL; - rdata->nr_pages--; - continue; - } - - if (iter) - result = copy_page_from_iter( - page, page_offset, n, iter); -#ifdef CONFIG_CIFS_SMB_DIRECT - else if (rdata->mr) - result = n; -#endif - else - result = cifs_read_page_from_socket( - server, page, page_offset, n); - if (result < 0) - break; - - rdata->got_bytes += result; - } - - return result != -ECONNABORTED && rdata->got_bytes > 0 ? - rdata->got_bytes : result; -} - -static int -cifs_readpages_read_into_pages(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, unsigned int len) -{ - return readpages_fill_pages(server, rdata, NULL, len); -} - -static int -cifs_readpages_copy_into_pages(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, - struct iov_iter *iter) -{ - return readpages_fill_pages(server, rdata, iter, iter->count); -} -#endif - /* * Unlock a bunch of folios in the pagecache. */ -- cgit From e7388b8a1a5bdf47a9a46803a5686387c1ce060d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 16 Jan 2023 14:19:29 +0000 Subject: cifs: DIO to/from KVEC-type iterators should now work DIO to/from KVEC-type iterators should now work as the iterator is passed down to the socket in non-RDMA/non-crypto mode and in RDMA or crypto mode care is taken to handle vmap/vmalloc correctly and not take page refs when building a scatterlist. Signed-off-by: David Howells cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: Tom Talpey cc: Jeff Layton cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/cifs/file.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'fs/cifs/file.c') diff --git a/fs/cifs/file.c b/fs/cifs/file.c index bd4ee2cdf3d4..58801d39213a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3549,16 +3549,6 @@ static ssize_t __cifs_writev( struct cifs_aio_ctx *ctx; int rc; - /* - * iov_iter_get_pages_alloc doesn't work with ITER_KVEC. - * In this case, fall back to non-direct write function. - * this could be improved by getting pages directly in ITER_KVEC - */ - if (direct && iov_iter_is_kvec(from)) { - cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n"); - direct = false; - } - rc = generic_write_checks(iocb, from); if (rc <= 0) return rc; @@ -4092,16 +4082,6 @@ static ssize_t __cifs_readv( loff_t offset = iocb->ki_pos; struct cifs_aio_ctx *ctx; - /* - * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC, - * fall back to data copy read path - * this could be improved by getting pages directly in ITER_KVEC - */ - if (direct && iov_iter_is_kvec(to)) { - cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n"); - direct = false; - } - len = iov_iter_count(to); if (!len) return 0; -- cgit