diff options
Diffstat (limited to 'fs/nfs/file.c')
| -rw-r--r-- | fs/nfs/file.c | 878 |
1 files changed, 438 insertions, 440 deletions
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 94e94bd11aae..d020aab40c64 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/nfs/file.c * @@ -26,26 +27,26 @@ #include <linux/nfs_mount.h> #include <linux/mm.h> #include <linux/pagemap.h> -#include <linux/aio.h> #include <linux/gfp.h> +#include <linux/rmap.h> #include <linux/swap.h> +#include <linux/compaction.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> +#include <linux/filelock.h> #include "delegation.h" #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" + +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_FILE static const struct vm_operations_struct nfs_file_vm_ops; -/* Hack for future NFS swap support */ -#ifndef IS_SWAPFILE -# define IS_SWAPFILE(inode) (0) -#endif - int nfs_check_flags(int flags) { if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) @@ -63,9 +64,7 @@ nfs_file_open(struct inode *inode, struct file *filp) { int res; - dprintk("NFS: open file(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: open file(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSOPEN); res = nfs_check_flags(filp->f_flags); @@ -73,25 +72,27 @@ nfs_file_open(struct inode *inode, struct file *filp) return res; res = nfs_open(inode, filp); + if (res == 0) + filp->f_mode |= FMODE_CAN_ODIRECT; return res; } int nfs_file_release(struct inode *inode, struct file *filp) { - dprintk("NFS: release(%s/%s)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name); + dprintk("NFS: release(%pD2)\n", filp); nfs_inc_stats(inode, NFSIOS_VFSRELEASE); - return nfs_release(inode, filp); + nfs_file_clear_open_context(filp); + nfs_fscache_release_file(inode, filp); + return 0; } EXPORT_SYMBOL_GPL(nfs_file_release); /** - * nfs_revalidate_size - Revalidate the file size - * @inode - pointer to inode struct - * @file - pointer to struct file + * nfs_revalidate_file_size - Revalidate the file size + * @inode: pointer to inode struct + * @filp: pointer to struct file * * Revalidates the file length. This is basically a wrapper around * nfs_revalidate_inode() that takes into account the fact that we may @@ -102,18 +103,11 @@ EXPORT_SYMBOL_GPL(nfs_file_release); static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_inode *nfsi = NFS_I(inode); - - if (nfs_have_delegated_attributes(inode)) - goto out_noreval; if (filp->f_flags & O_DIRECT) goto force_reval; - if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) - goto force_reval; - if (nfs_attribute_timeout(inode)) + if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE)) goto force_reval; -out_noreval: return 0; force_reval: return __nfs_revalidate_inode(server, inode); @@ -121,10 +115,8 @@ force_reval: loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) { - dprintk("NFS: llseek file(%s/%s, %lld, %d)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - offset, whence); + dprintk("NFS: llseek file(%pD2, %lld, %d)\n", + filp, offset, whence); /* * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate @@ -145,179 +137,194 @@ EXPORT_SYMBOL_GPL(nfs_file_llseek); /* * Flush all dirty pages, and check for write errors. */ -int +static int nfs_file_flush(struct file *file, fl_owner_t id) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(file); + errseq_t since; - dprintk("NFS: flush(%s/%s)\n", - dentry->d_parent->d_name.name, - dentry->d_name.name); + dprintk("NFS: flush(%pD2)\n", file); nfs_inc_stats(inode, NFSIOS_VFSFLUSH); if ((file->f_mode & FMODE_WRITE) == 0) return 0; - /* - * If we're holding a write delegation, then just start the i/o - * but don't wait for completion (or send a commit). - */ - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) - return filemap_fdatawrite(file->f_mapping); - /* Flush writes to the server and return any errors */ - return vfs_fsync(file, 0); + since = filemap_sample_wb_err(file->f_mapping); + nfs_wb_all(inode); + return filemap_check_wb_err(file->f_mapping, since); } -EXPORT_SYMBOL_GPL(nfs_file_flush); ssize_t -nfs_file_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +nfs_file_read(struct kiocb *iocb, struct iov_iter *to) { - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; + struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; - if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); + trace_nfs_file_read(iocb, to); - dprintk("NFS: read(%s/%s, %lu@%lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); + if (iocb->ki_flags & IOCB_DIRECT) + return nfs_file_direct_read(iocb, to, false); + + dprintk("NFS: read(%pD2, %zu@%lu)\n", + iocb->ki_filp, + iov_iter_count(to), (unsigned long) iocb->ki_pos); + + result = nfs_start_io_read(inode); + if (result) + return result; result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { - result = generic_file_aio_read(iocb, iov, nr_segs, pos); + result = generic_file_read_iter(iocb, to); if (result > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } + nfs_end_io_read(inode); return result; } EXPORT_SYMBOL_GPL(nfs_file_read); ssize_t -nfs_file_splice_read(struct file *filp, loff_t *ppos, - struct pipe_inode_info *pipe, size_t count, - unsigned int flags) +nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, + size_t len, unsigned int flags) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - ssize_t res; - - dprintk("NFS: splice_read(%s/%s, %lu@%Lu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); - - res = nfs_revalidate_mapping(inode, filp->f_mapping); - if (!res) { - res = generic_file_splice_read(filp, ppos, pipe, count, flags); - if (res > 0) - nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, res); + struct inode *inode = file_inode(in); + ssize_t result; + + dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos); + + result = nfs_start_io_read(inode); + if (result) + return result; + + result = nfs_revalidate_mapping(inode, in->f_mapping); + if (!result) { + result = filemap_splice_read(in, ppos, pipe, len, flags); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } - return res; + nfs_end_io_read(inode); + return result; } EXPORT_SYMBOL_GPL(nfs_file_splice_read); int -nfs_file_mmap(struct file * file, struct vm_area_struct * vma) +nfs_file_mmap_prepare(struct vm_area_desc *desc) { - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct file *file = desc->file; + struct inode *inode = file_inode(file); int status; - dprintk("NFS: mmap(%s/%s)\n", - dentry->d_parent->d_name.name, dentry->d_name.name); + dprintk("NFS: mmap(%pD2)\n", file); - /* Note: generic_file_mmap() returns ENOSYS on nommu systems + /* Note: generic_file_mmap_prepare() returns ENOSYS on nommu systems * so we call that before revalidating the mapping */ - status = generic_file_mmap(file, vma); + status = generic_file_mmap_prepare(desc); if (!status) { - vma->vm_ops = &nfs_file_vm_ops; + desc->vm_ops = &nfs_file_vm_ops; status = nfs_revalidate_mapping(inode, file->f_mapping); } return status; } -EXPORT_SYMBOL_GPL(nfs_file_mmap); +EXPORT_SYMBOL_GPL(nfs_file_mmap_prepare); /* * Flush any dirty pages for this process, and check for write errors. * The return status from this call provides a reliable indication of * whether any write errors occurred for this process. - * - * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to - * disk, but it retrieves and clears ctx->error after synching, despite - * the two being set at the same time in nfs_context_set_write_error(). - * This is because the former is used to notify the _next_ call to - * nfs_file_write() that a write error occurred, and hence cause it to - * fall back to doing a synchronous write. */ -int -nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) +static int +nfs_file_fsync_commit(struct file *file, int datasync) { - struct dentry *dentry = file->f_path.dentry; - struct nfs_open_context *ctx = nfs_file_open_context(file); - struct inode *inode = dentry->d_inode; - int have_error, do_resend, status; - int ret = 0; + struct inode *inode = file_inode(file); + int ret, ret2; - dprintk("NFS: fsync file(%s/%s) datasync %d\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - datasync); + dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - status = nfs_commit_inode(inode, FLUSH_SYNC); - have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - if (have_error) { - ret = xchg(&ctx->error, 0); - if (ret) - goto out; - } - if (status < 0) { - ret = status; - goto out; - } - do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - if (do_resend) - ret = -EAGAIN; -out: + ret = nfs_commit_inode(inode, FLUSH_SYNC); + ret2 = file_check_and_advance_wb_err(file); + if (ret2 < 0) + return ret2; return ret; } -EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); -static int +int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int ret; struct inode *inode = file_inode(file); + struct nfs_inode *nfsi = NFS_I(inode); + long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); + long nredirtied; + int ret; + + trace_nfs_fsync_enter(inode); - do { - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + for (;;) { + ret = file_write_and_wait_range(file, start, end); if (ret != 0) break; - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - mutex_unlock(&inode->i_mutex); - /* - * If nfs_file_fsync_commit detected a server reboot, then - * resend all dirty pages that might have been covered by - * the NFS_CONTEXT_RESEND_WRITES flag - */ - start = 0; - end = LLONG_MAX; - } while (ret == -EAGAIN); + ret = nfs_file_fsync_commit(file, datasync); + if (ret != 0) + break; + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + nredirtied = atomic_long_read(&nfsi->redirtied_pages); + if (nredirtied == save_nredirtied) + break; + save_nredirtied = nredirtied; + } + trace_nfs_fsync_exit(inode, ret); return ret; } +EXPORT_SYMBOL_GPL(nfs_file_fsync); + +void nfs_truncate_last_folio(struct address_space *mapping, loff_t from, + loff_t to) +{ + struct folio *folio; + + if (from >= to) + return; + + folio = filemap_lock_folio(mapping, from >> PAGE_SHIFT); + if (IS_ERR(folio)) + return; + + if (folio_mkclean(folio)) + folio_mark_dirty(folio); + + if (folio_test_uptodate(folio)) { + loff_t fpos = folio_pos(folio); + size_t offset = from - fpos; + size_t end = folio_size(folio); + + if (to - fpos < end) + end = to - fpos; + folio_zero_segment(folio, offset, end); + trace_nfs_size_truncate_folio(mapping->host, to); + } + + folio_unlock(folio); + folio_put(folio); +} +EXPORT_SYMBOL_GPL(nfs_truncate_last_folio); /* * Decide whether a read/modify/write cycle may be more efficient * then a modify/write/read cycle when writing to a page in the * page cache. * + * Some pNFS layout drivers can only read/write at a certain block + * granularity like all block devices and therefore we must perform + * read/modify/write whenever a page hasn't read yet and the data + * to be written there is not aligned to a block boundary and/or + * smaller than the block size. + * * The modify/write/read cycle may occur if a page is read before * being completely filled by the writer. In this situation, the * page must be completely written to stable storage on the server @@ -333,20 +340,35 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) * and that the new data won't completely replace the old data in * that range of the file. */ -static int nfs_want_read_modify_write(struct file *file, struct page *page, - loff_t pos, unsigned len) +static bool nfs_folio_is_full_write(struct folio *folio, loff_t pos, + unsigned int len) { - unsigned int pglen = nfs_page_length(page); - unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); + unsigned int pglen = nfs_folio_length(folio); + unsigned int offset = offset_in_folio(folio, pos); unsigned int end = offset + len; - if ((file->f_mode & FMODE_READ) && /* open for read? */ - !PageUptodate(page) && /* Uptodate? */ - !PagePrivate(page) && /* i/o request already? */ - pglen && /* valid bytes of file? */ - (end < pglen || offset)) /* replace all valid bytes? */ - return 1; - return 0; + return !pglen || (end >= pglen && !offset); +} + +static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, + loff_t pos, unsigned int len) +{ + /* + * Up-to-date pages, those with ongoing or full-page write + * don't need read/modify/write + */ + if (folio_test_uptodate(folio) || folio_test_private(folio) || + nfs_folio_is_full_write(folio, pos, len)) + return false; + + if (pnfs_ld_read_whole_page(file_inode(file))) + return true; + if (folio_test_dropbehind(folio)) + return false; + /* Open for reading too? */ + if (file->f_mode & FMODE_READ) + return true; + return false; } /* @@ -357,90 +379,97 @@ static int nfs_want_read_modify_write(struct file *file, struct page *page, * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int nfs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) +static int nfs_write_begin(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, struct folio **foliop, + void **fsdata) { - int ret; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - struct page *page; + struct folio *folio; + struct file *file = iocb->ki_filp; int once_thru = 0; + int ret; - dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + trace_nfs_write_begin(file_inode(file), pos, len); -start: - /* - * Prevent starvation issues if someone is doing a consistency - * sync-to-disk - */ - ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, - nfs_wait_bit_killable, TASK_KILLABLE); - if (ret) - return ret; + dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); + nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos); - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; - *pagep = page; +start: + folio = write_begin_get_folio(iocb, mapping, pos >> PAGE_SHIFT, len); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); + goto out; + } + *foliop = folio; - ret = nfs_flush_incompatible(file, page); + ret = nfs_flush_incompatible(file, folio); if (ret) { - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); } else if (!once_thru && - nfs_want_read_modify_write(file, page, pos, len)) { + nfs_want_read_modify_write(file, folio, pos, len)) { once_thru = 1; - ret = nfs_readpage(file, page); - page_cache_release(page); + folio_clear_dropbehind(folio); + ret = nfs_read_folio(file, folio); + folio_put(folio); if (!ret) goto start; } +out: + trace_nfs_write_begin_done(file_inode(file), pos, len, ret); return ret; } -static int nfs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) +static int nfs_write_end(const struct kiocb *iocb, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct folio *folio, void *fsdata) { - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = nfs_file_open_context(file); + unsigned offset = offset_in_folio(folio, pos); int status; - dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, - mapping->host->i_ino, len, (long long) pos); + trace_nfs_write_end(file_inode(file), pos, len); + dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n", + file, mapping->host->i_ino, len, (long long) pos); /* * Zero any uninitialised parts of the page, and then mark the page * as up to date if it turns out that we're extending the file. */ - if (!PageUptodate(page)) { - unsigned pglen = nfs_page_length(page); - unsigned end = offset + len; + if (!folio_test_uptodate(folio)) { + size_t fsize = folio_size(folio); + unsigned pglen = nfs_folio_length(folio); + unsigned end = offset + copied; if (pglen == 0) { - zero_user_segments(page, 0, offset, - end, PAGE_CACHE_SIZE); - SetPageUptodate(page); + folio_zero_segments(folio, 0, offset, end, fsize); + folio_mark_uptodate(folio); } else if (end >= pglen) { - zero_user_segment(page, end, PAGE_CACHE_SIZE); + folio_zero_segment(folio, end, fsize); if (offset == 0) - SetPageUptodate(page); + folio_mark_uptodate(folio); } else - zero_user_segment(page, pglen, PAGE_CACHE_SIZE); + folio_zero_segment(folio, pglen, fsize); } - status = nfs_updatepage(file, page, offset, copied); + status = nfs_update_folio(file, folio, offset, copied); - unlock_page(page); - page_cache_release(page); + folio_unlock(folio); + folio_put(folio); - if (status < 0) + if (status < 0) { + trace_nfs_write_end_done(file_inode(file), pos, len, status); return status; + } NFS_I(mapping->host)->write_io += copied; + + if (nfs_ctx_key_to_expire(ctx, mapping->host)) + nfs_wb_all(mapping->host); + + trace_nfs_write_end_done(file_inode(file), pos, len, copied); return copied; } @@ -451,76 +480,66 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, * - Called if either PG_private or PG_fscache is set on the page * - Caller holds page lock */ -static void nfs_invalidate_page(struct page *page, unsigned int offset, - unsigned int length) +static void nfs_invalidate_folio(struct folio *folio, size_t offset, + size_t length) { - dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n", - page, offset, length); + struct inode *inode = folio->mapping->host; + dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", + folio->index, offset, length); - if (offset != 0 || length < PAGE_CACHE_SIZE) - return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_cancel(page_file_mapping(page)->host, page); - - nfs_fscache_invalidate_page(page, page->mapping->host); + if (offset != 0 || length < folio_size(folio)) + nfs_wb_folio(inode, folio); + else + nfs_wb_folio_cancel(inode, folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ + trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); } /* - * Attempt to release the private state associated with a page - * - Called if either PG_private or PG_fscache is set on the page - * - Caller holds page lock - * - Return true (may release page) or false (may not) + * Attempt to release the private state associated with a folio + * - Called if either private or fscache flags are set on the folio + * - Caller holds folio lock + * - Return true (may release folio) or false (may not) */ -static int nfs_release_page(struct page *page, gfp_t gfp) +static bool nfs_release_folio(struct folio *folio, gfp_t gfp) { - struct address_space *mapping = page->mapping; - - dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); - - /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not - * doing this memory reclaim for a fs-related allocation. - */ - if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL && - !(current->flags & PF_FSTRANS)) { - int how = FLUSH_SYNC; - - /* Don't let kswapd deadlock waiting for OOM RPC calls */ - if (current_is_kswapd()) - how = 0; - nfs_commit_inode(mapping->host, how); + dfprintk(PAGECACHE, "NFS: release_folio(%p)\n", folio); + + /* If the private flag is set, then the folio is not freeable */ + if (folio_test_private(folio)) { + if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || + current_is_kswapd() || current_is_kcompactd()) + return false; + if (nfs_wb_folio(folio->mapping->host, folio) < 0) + return false; } - /* If PagePrivate() is set, then the page is not freeable */ - if (PagePrivate(page)) - return 0; - return nfs_fscache_release_page(page, gfp); + return nfs_fscache_release_folio(folio, gfp); } -static void nfs_check_dirty_writeback(struct page *page, +static void nfs_check_dirty_writeback(struct folio *folio, bool *dirty, bool *writeback) { struct nfs_inode *nfsi; - struct address_space *mapping = page_file_mapping(page); - - if (!mapping || PageSwapCache(page)) - return; + struct address_space *mapping = folio->mapping; /* - * Check if an unstable page is currently being committed and - * if so, have the VM treat it as if the page is under writeback - * so it will not block due to pages that will shortly be freeable. + * Check if an unstable folio is currently being committed and + * if so, have the VM treat it as if the folio is under writeback + * so it will not block due to folios that will shortly be freeable. */ nfsi = NFS_I(mapping->host); - if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + if (atomic_read(&nfsi->commit_info.rpcs_out)) { *writeback = true; return; } /* - * If PagePrivate() is set, then the page is not freeable and as the - * inode is not being committed, it's not going to be cleaned in the - * near future so treat it as dirty + * If the private flag is set, then the folio is not freeable + * and as the inode is not being committed, it's not going to + * be cleaned in the near future so treat it as dirty */ - if (PagePrivate(page)) + if (folio_test_private(folio)) *dirty = true; } @@ -532,51 +551,85 @@ static void nfs_check_dirty_writeback(struct page *page, * - Caller holds page lock * - Return 0 if successful, -error otherwise */ -static int nfs_launder_page(struct page *page) +static int nfs_launder_folio(struct folio *folio) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_inode *nfsi = NFS_I(inode); + struct inode *inode = folio->mapping->host; + int ret; - dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", - inode->i_ino, (long long)page_offset(page)); + dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", + inode->i_ino, folio_pos(folio)); - nfs_fscache_wait_on_page_write(nfsi, page); - return nfs_wb_page(inode, page); + folio_wait_private_2(folio); /* [DEPRECATED] */ + ret = nfs_wb_folio(inode, folio); + trace_nfs_launder_folio_done(inode, folio_pos(folio), + folio_size(folio), ret); + return ret; } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { + unsigned long blocks; + long long isize; + int ret; + struct inode *inode = file_inode(file); + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } + + ret = rpc_clnt_swap_activate(clnt); + if (ret) + return ret; + ret = add_swap_extent(sis, 0, sis->max, 0); + if (ret < 0) { + rpc_clnt_swap_deactivate(clnt); + return ret; + } + *span = sis->pages; - return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); + + if (cl->rpc_ops->enable_swap) + cl->rpc_ops->enable_swap(inode); + + sis->flags |= SWP_FS_OPS; + return ret; } static void nfs_swap_deactivate(struct file *file) { - xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); + struct inode *inode = file_inode(file); + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; + + rpc_clnt_swap_deactivate(clnt); + if (cl->rpc_ops->disable_swap) + cl->rpc_ops->disable_swap(file_inode(file)); } -#endif const struct address_space_operations nfs_file_aops = { - .readpage = nfs_readpage, - .readpages = nfs_readpages, - .set_page_dirty = __set_page_dirty_nobuffers, - .writepage = nfs_writepage, + .read_folio = nfs_read_folio, + .readahead = nfs_readahead, + .dirty_folio = filemap_dirty_folio, .writepages = nfs_writepages, .write_begin = nfs_write_begin, .write_end = nfs_write_end, - .invalidatepage = nfs_invalidate_page, - .releasepage = nfs_release_page, - .direct_IO = nfs_direct_IO, - .migratepage = nfs_migrate_page, - .launder_page = nfs_launder_page, + .invalidate_folio = nfs_invalidate_folio, + .release_folio = nfs_release_folio, + .migrate_folio = nfs_migrate_folio, + .launder_folio = nfs_launder_folio, .is_dirty_writeback = nfs_check_dirty_writeback, - .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP + .error_remove_folio = generic_error_remove_folio, .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif + .swap_rw = nfs_swap_rw, }; /* @@ -584,165 +637,164 @@ const struct address_space_operations nfs_file_aops = { * writable, implying that someone is about to modify the page through a * shared-writable mapping */ -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; - struct file *filp = vma->vm_file; - struct dentry *dentry = filp->f_path.dentry; + struct file *filp = vmf->vma->vm_file; + struct inode *inode = file_inode(filp); unsigned pagelen; - int ret = VM_FAULT_NOPAGE; + vm_fault_t ret = VM_FAULT_NOPAGE; struct address_space *mapping; + struct folio *folio = page_folio(vmf->page); - dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - filp->f_mapping->host->i_ino, - (long long)page_offset(page)); + dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", + filp, filp->f_mapping->host->i_ino, + (long long)folio_pos(folio)); + + sb_start_pagefault(inode->i_sb); /* make sure the cache has finished storing the page */ - nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); + if (folio_test_private_2(folio) && /* [DEPRECATED] */ + folio_wait_private_2_killable(folio) < 0) { + ret = VM_FAULT_RETRY; + goto out; + } + + wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, + TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); - lock_page(page); - mapping = page_file_mapping(page); - if (mapping != dentry->d_inode->i_mapping) + folio_lock(folio); + mapping = folio->mapping; + if (mapping != inode->i_mapping) goto out_unlock; - wait_on_page_writeback(page); + folio_wait_writeback(folio); - pagelen = nfs_page_length(page); + pagelen = nfs_folio_length(folio); if (pagelen == 0) goto out_unlock; ret = VM_FAULT_LOCKED; - if (nfs_flush_incompatible(filp, page) == 0 && - nfs_updatepage(filp, page, 0, pagelen) == 0) + if (nfs_flush_incompatible(filp, folio) == 0 && + nfs_update_folio(filp, folio, 0, pagelen) == 0) goto out; ret = VM_FAULT_SIGBUS; out_unlock: - unlock_page(page); + folio_unlock(folio); out: + sb_end_pagefault(inode->i_sb); return ret; } static const struct vm_operations_struct nfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = nfs_vm_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; -static int nfs_need_sync_write(struct file *filp, struct inode *inode) +ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) { - struct nfs_open_context *ctx; + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + unsigned int mntflags = NFS_SERVER(inode)->flags; + ssize_t result, written; + errseq_t since; + int error; - if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC)) - return 1; - ctx = nfs_file_open_context(filp); - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) - return 1; - return 0; -} + trace_nfs_file_write(iocb, from); -ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct dentry * dentry = iocb->ki_filp->f_path.dentry; - struct inode * inode = dentry->d_inode; - unsigned long written = 0; - ssize_t result; - size_t count = iov_length(iov, nr_segs); + result = nfs_key_timeout_notify(file, inode); + if (result) + return result; - if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); + if (iocb->ki_flags & IOCB_DIRECT) + return nfs_file_direct_write(iocb, from, false); - dprintk("NFS: write(%s/%s, %lu@%Ld)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (long long) pos); + dprintk("NFS: write(%pD2, %zu@%Ld)\n", + file, iov_iter_count(from), (long long) iocb->ki_pos); - result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; /* * O_APPEND implies that we must revalidate the file length. */ - if (iocb->ki_filp->f_flags & O_APPEND) { - result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) { + result = nfs_revalidate_file_size(inode, file); if (result) - goto out; + return result; } - result = count; - if (!count) - goto out; + nfs_clear_invalid_mapping(file->f_mapping); - result = generic_file_aio_write(iocb, iov, nr_segs, pos); + since = filemap_sample_wb_err(file->f_mapping); + error = nfs_start_io_write(inode); + if (error) + return error; + result = generic_write_checks(iocb, from); if (result > 0) - written = result; + result = generic_perform_write(iocb, from); + nfs_end_io_write(inode); + if (result <= 0) + goto out; - /* Return error values for O_DSYNC and IS_SYNC() */ - if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { - int err = vfs_fsync(iocb->ki_filp, 0); - if (err < 0) - result = err; + written = result; + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); + + if (mntflags & NFS_MOUNT_WRITE_EAGER) { + result = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); + if (result < 0) + goto out; } - if (result > 0) - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); + if (mntflags & NFS_MOUNT_WRITE_WAIT) { + filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); + } + result = generic_write_sync(iocb, written); + if (result < 0) + return result; + out: + /* Return error values */ + error = filemap_check_wb_err(file->f_mapping, since); + switch (error) { + default: + break; + case -EDQUOT: + case -EFBIG: + case -ENOSPC: + nfs_wb_all(inode); + error = file_check_and_advance_wb_err(file); + if (error < 0) + result = error; + } return result; out_swapfile: printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); - goto out; + return -ETXTBSY; } EXPORT_SYMBOL_GPL(nfs_file_write); -ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, - struct file *filp, loff_t *ppos, - size_t count, unsigned int flags) -{ - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - unsigned long written = 0; - ssize_t ret; - - dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", - dentry->d_parent->d_name.name, dentry->d_name.name, - (unsigned long) count, (unsigned long long) *ppos); - - /* - * The combination of splice and an O_APPEND destination is disallowed. - */ - - ret = generic_file_splice_write(pipe, filp, ppos, count, flags); - if (ret > 0) - written = ret; - - if (ret >= 0 && nfs_need_sync_write(filp, inode)) { - int err = vfs_fsync(filp, 0); - if (err < 0) - ret = err; - } - if (ret > 0) - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); - return ret; -} -EXPORT_SYMBOL_GPL(nfs_file_splice_write); - static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; int status = 0; - unsigned int saved_type = fl->fl_type; + unsigned int saved_type = fl->c.flc_type; /* Try local locking first */ posix_test_lock(filp, fl); - if (fl->fl_type != F_UNLCK) { + if (fl->c.flc_type != F_UNLCK) { /* found a conflict */ goto out; } - fl->fl_type = saved_type; + fl->c.flc_type = saved_type; - if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) + if (nfs_have_read_or_write_delegation(inode)) goto out_noconflict; if (is_local) @@ -752,26 +804,10 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) out: return status; out_noconflict: - fl->fl_type = F_UNLCK; + fl->c.flc_type = F_UNLCK; goto out; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) -{ - int res = 0; - switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { - case FL_POSIX: - res = posix_lock_file_wait(file, fl); - break; - case FL_FLOCK: - res = flock_lock_file_wait(file, fl); - break; - default: - BUG(); - } - return res; -} - static int do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -783,20 +819,20 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) * Flush all pending writes before doing anything * with locks.. */ - nfs_sync_mapping(filp->f_mapping); + nfs_wb_all(inode); l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); if (!IS_ERR(l_ctx)) { - status = nfs_iocounter_wait(&l_ctx->io_count); + status = nfs_iocounter_wait(l_ctx); nfs_put_lock_context(l_ctx); - if (status < 0) + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + if (status < 0 && !(fl->c.flc_flags & FL_CLOSE)) return status; } - /* NOTE: special case - * If we're signalled while cleaning up locks on process exit, we - * still need to complete the unlock. - */ /* * Use local locking if mounted with "-onolock" or with appropriate * "-olocal_lock=" @@ -804,16 +840,11 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = do_vfs_lock(filp, fl); + status = locks_lock_file_wait(filp, fl); return status; } static int -is_time_granular(struct timespec *ts) { - return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); -} - -static int do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { struct inode *inode = filp->f_mapping->host; @@ -834,23 +865,22 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) if (!is_local) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = do_vfs_lock(filp, fl); + status = locks_lock_file_wait(filp, fl); if (status < 0) goto out; /* - * Revalidate the cache if the server has time stamps granular - * enough to detect subsecond changes. Otherwise, clear the - * cache to prevent missing any changes. + * Invalidate cache to prevent missing any changes. If + * the file is mapped, clear the page cache as well so + * those mappings will be loaded. * * This makes locking act as a cache coherency point. */ nfs_sync_mapping(filp->f_mapping); - if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { - if (is_time_granular(&NFS_SERVER(inode)->time_delta)) - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - else - nfs_zap_caches(inode); + if (!nfs_have_read_or_write_delegation(inode)) { + nfs_zap_caches(inode); + if (mapping_mapped(filp->f_mapping)) + nfs_revalidate_mapping(inode, filp->f_mapping); } out: return status; @@ -865,17 +895,14 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) int ret = -ENOLCK; int is_local = 0; - dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags, + dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", + filp, fl->c.flc_type, fl->c.flc_flags, (long long)fl->fl_start, (long long)fl->fl_end); nfs_inc_stats(inode, NFSIOS_VFSLOCK); - /* No mandatory locks over NFS */ - if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) - goto out_err; + if (fl->c.flc_flags & FL_RECLAIM) + return -ENOGRACE; if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) is_local = 1; @@ -888,7 +915,7 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) if (IS_GETLK(cmd)) ret = do_getlk(filp, cmd, fl, is_local); - else if (fl->fl_type == F_UNLCK) + else if (lock_is_unlock(fl)) ret = do_unlk(filp, cmd, fl, is_local); else ret = do_setlk(filp, cmd, fl, is_local); @@ -905,57 +932,27 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = filp->f_mapping->host; int is_local = 0; - dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", - filp->f_path.dentry->d_parent->d_name.name, - filp->f_path.dentry->d_name.name, - fl->fl_type, fl->fl_flags); + dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", + filp, fl->c.flc_type, fl->c.flc_flags); - if (!(fl->fl_flags & FL_FLOCK)) + if (!(fl->c.flc_flags & FL_FLOCK)) return -ENOLCK; - /* - * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of - * any standard. In principle we might be able to support LOCK_MAND - * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the - * NFS code is not set up for it. - */ - if (fl->fl_type & LOCK_MAND) - return -EINVAL; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; /* We're simulating flock() locks using posix locks on the server */ - fl->fl_owner = (fl_owner_t)filp; - fl->fl_start = 0; - fl->fl_end = OFFSET_MAX; - - if (fl->fl_type == F_UNLCK) + if (lock_is_unlock(fl)) return do_unlk(filp, cmd, fl, is_local); return do_setlk(filp, cmd, fl, is_local); } EXPORT_SYMBOL_GPL(nfs_flock); -/* - * There is no protocol support for leases, so we have no way to implement - * them correctly in the face of opens by other clients. - */ -int nfs_setlease(struct file *file, long arg, struct file_lock **fl) -{ - dprintk("NFS: setlease(%s/%s, arg=%ld)\n", - file->f_path.dentry->d_parent->d_name.name, - file->f_path.dentry->d_name.name, arg); - return -EINVAL; -} -EXPORT_SYMBOL_GPL(nfs_setlease); - const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, - .mmap = nfs_file_mmap, + .read_iter = nfs_file_read, + .write_iter = nfs_file_write, + .mmap_prepare = nfs_file_mmap_prepare, .open = nfs_file_open, .flush = nfs_file_flush, .release = nfs_file_release, @@ -963,8 +960,9 @@ const struct file_operations nfs_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, + .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, - .setlease = nfs_setlease, + .setlease = simple_nosetlease, + .fop_flags = FOP_DONTCACHE, }; EXPORT_SYMBOL_GPL(nfs_file_operations); |
