From 6b56a89833fa7903595c8d138bb4927187315cba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Jun 2016 18:23:01 -0400 Subject: NFS: Kill NFS_INO_NFS_INO_FLUSHING: it is a performance killer filemap_datawrite() and friends already deal just fine with livelock. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1c74d3db64d..980d44f3a84c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -657,16 +657,9 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct inode *inode = mapping->host; - unsigned long *bitlock = &NFS_I(inode)->flags; struct nfs_pageio_descriptor pgio; int err; - /* Stop dirtying of new pages while we sync */ - err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING, - nfs_wait_bit_killable, TASK_KILLABLE); - if (err) - goto out_err; - nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, @@ -674,10 +667,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); - clear_bit_unlock(NFS_INO_FLUSHING, bitlock); - smp_mb__after_atomic(); - wake_up_bit(bitlock, NFS_INO_FLUSHING); - if (err < 0) goto out_err; err = pgio.pg_error; -- cgit From 811ed92ecc9f47eee90beabcf5c2133f2a6d2440 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Jun 2016 18:25:56 -0400 Subject: NFS: writepage of a single page should not be synchronous It is almost always better to wait for more so that we can issue a bulk commit. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 980d44f3a84c..b13d48881d3a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -625,7 +625,7 @@ static int nfs_writepage_locked(struct page *page, int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), + nfs_pageio_init_write(&pgio, inode, 0, false, &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio, launder); nfs_pageio_complete(&pgio); -- cgit From 8fc3c3862728373e0d0f5abccc6afc56c69e0c63 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Jun 2016 21:32:24 -0400 Subject: NFS: Fix O_DIRECT verifier problems We should not be interested in looking at the value of the stable field, since that could take any value. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b13d48881d3a..3087fb6f1983 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1789,7 +1789,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) { + if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { /* We have a match */ nfs_inode_remove_request(req); dprintk(" OK\n"); -- cgit From 837bb1d752d92ea4d870877ffbd6ec5cf76624b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 25 Jun 2016 18:12:03 -0400 Subject: NFSv4.2: Fix writeback races in nfs4_copy_file_range We need to ensure that any writes to the destination file are serialised with the copy, meaning that the writeback has to occur under the inode lock. Also relax the writeback requirement on the source, and rely on the stateid checking to tell us if the source rebooted. Add the helper nfs_filemap_write_and_wait_range() to call pnfs_sync_inode() as is appropriate for pNFS servers that may need a layoutcommit. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3087fb6f1983..538a473b324b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1912,6 +1912,24 @@ out_mark_dirty: } EXPORT_SYMBOL_GPL(nfs_write_inode); +/* + * Wrapper for filemap_write_and_wait_range() + * + * Needed for pNFS in order to ensure data becomes visible to the + * client. + */ +int nfs_filemap_write_and_wait_range(struct address_space *mapping, + loff_t lstart, loff_t lend) +{ + int ret; + + ret = filemap_write_and_wait_range(mapping, lstart, lend); + if (ret == 0) + ret = pnfs_sync_inode(mapping->host, true); + return ret; +} +EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range); + /* * flush the inode to disk. */ -- cgit From ce52914eb76efd62aa48d738cf845b37852bf920 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 7 Jun 2016 15:14:48 -0400 Subject: sunrpc: move NO_CRKEY_TIMEOUT to the auth->au_flags A generic_cred can be used to look up a unx_cred or a gss_cred, so it's not really safe to use the the generic_cred->acred->ac_flags to store the NO_CRKEY_TIMEOUT flag. A lookup for a unx_cred triggered while the KEY_EXPIRE_SOON flag is already set will cause both NO_CRKEY_TIMEOUT and KEY_EXPIRE_SOON to be set in the ac_flags, leaving the user associated with the auth_cred to be in a state where they're perpetually doing 4K NFS_FILE_SYNC writes. This can be reproduced as follows: 1. Mount two NFS filesystems, one with sec=krb5 and one with sec=sys. They do not need to be the same export, nor do they even need to be from the same NFS server. Also, v3 is fine. $ sudo mount -o v3,sec=krb5 server1:/export /mnt/krb5 $ sudo mount -o v3,sec=sys server2:/export /mnt/sys 2. As the normal user, before accessing the kerberized mount, kinit with a short lifetime (but not so short that renewing the ticket would leave you within the 4-minute window again by the time the original ticket expires), e.g. $ kinit -l 10m -r 60m 3. Do some I/O to the kerberized mount and verify that the writes are wsize, UNSTABLE: $ dd if=/dev/zero of=/mnt/krb5/file bs=1M count=1 4. Wait until you're within 4 minutes of key expiry, then do some more I/O to the kerberized mount to ensure that RPC_CRED_KEY_EXPIRE_SOON gets set. Verify that the writes are 4K, FILE_SYNC: $ dd if=/dev/zero of=/mnt/krb5/file bs=1M count=1 5. Now do some I/O to the sec=sys mount. This will cause RPC_CRED_NO_CRKEY_TIMEOUT to be set: $ dd if=/dev/zero of=/mnt/sys/file bs=1M count=1 6. Writes for that user will now be permanently 4K, FILE_SYNC for that user, regardless of which mount is being written to, until you reboot the client. Renewing the kerberos ticket (assuming it hasn't already expired) will have no effect. Grabbing a new kerberos ticket at this point will have no effect either. Move the flag to the auth->au_flags field (which is currently unused) and rename it slightly to reflect that it's no longer associated with the auth_cred->ac_flags. Add the rpc_auth to the arg list of rpcauth_cred_key_to_expire and check the au_flags there too. Finally, add the inode to the arg list of nfs_ctx_key_to_expire so we can determine the rpc_auth to pass to rpcauth_cred_key_to_expire. Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1c74d3db64d..0b949a06b297 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1195,9 +1195,11 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode) /* * Test if the open context credential key is marked to expire soon. */ -bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx) +bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode) { - return rpcauth_cred_key_to_expire(ctx->cred); + struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth; + + return rpcauth_cred_key_to_expire(auth, ctx->cred); } /* -- cgit From 149a4fddd0a72d526abbeac0c8deaab03559836a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 18 Jul 2016 10:41:57 -0400 Subject: nfs: don't create zero-length requests NFS doesn't expect requests with wb_bytes set to zero and may make unexpected decisions about how to handle that request at the page IO layer. Skip request creation if we won't have any wb_bytes in the request. Signed-off-by: Benjamin Coddington Signed-off-by: Alexey Dobriyan Reviewed-by: Weston Andros Adamson Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs/write.c') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0b949a06b297..b5f3da346f1d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1291,6 +1291,9 @@ int nfs_updatepage(struct file *file, struct page *page, dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n", file, count, (long long)(page_file_offset(page) + offset)); + if (!count) + goto out; + if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); offset = 0; @@ -1301,7 +1304,7 @@ int nfs_updatepage(struct file *file, struct page *page, nfs_set_pageerror(page); else __set_page_dirty_nobuffers(page); - +out: dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); return status; -- cgit