From e8d975e73e5fa05f983fbf2723120edcf68e0b38 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 15 May 2015 11:45:31 -0400 Subject: fixing infinite OPEN loop in 4.0 stateid recovery Problem: When an operation like WRITE receives a BAD_STATEID, even though recovery code clears the RECLAIM_NOGRACE recovery flag before recovering the open state, because of clearing delegation state for the associated inode, nfs_inode_find_state_and_recover() gets called and it makes the same state with RECLAIM_NOGRACE flag again. As a results, when we restart looking over the open states, we end up in the infinite loop instead of breaking out in the next test of state flags. Solution: unset the RECLAIM_NOGRACE set because of calling of nfs_inode_find_state_and_recover() after returning from calling recover_open() function. Signed-off-by: Olga Kornievskaia Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca2265..ddef1dc80cf7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1482,6 +1482,8 @@ restart: spin_unlock(&state->state_lock); } nfs4_put_open_state(state); + clear_bit(NFS4CLNT_RECLAIM_NOGRACE, + &state->flags); spin_lock(&sp->so_lock); goto restart; } -- cgit From 5cae02f42793130e1387f4ec09c4d07056ce9fa5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Jun 2015 10:30:11 -0400 Subject: NFSv4: Always drain the slot table before re-establishing the lease While the NFSv4.1 code has always drained the slot tables in order to stop non-recovery related RPC calls when doing lease recovery, the NFSv4 code did not. The reason for the difference in behaviour is that NFSv4 does not have session state, and so RPC calls can in theory proceed while recovery is happening. In practice, however, anything I/O or state related needs to wait until recovery is over. This patch changes the behaviour of NFSv4 to match that of NFSv4.1 so that we can simplify the state recovery code by assuming that we do not have to deal with races between recovery and ordinary I/O. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ddef1dc80cf7..605840dc89cf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) goto do_confirm; - nfs4_begin_drain_session(clp); status = nfs4_proc_exchange_id(clp, cred); if (status != 0) goto out; @@ -1832,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) clp->cl_mvops->reboot_recovery_ops; int status; + nfs4_begin_drain_session(clp); cred = nfs4_get_clid_cred(clp); if (cred == NULL) return -ENOENT; -- cgit From 7ef5ca4fe41e6a51d9bdcbbf7c66f203675e8500 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 8 May 2015 13:10:40 +1000 Subject: NFS: report more appropriate block size for directories. In glibc 2.21 (and several previous), a call to opendir() will result in a 32K (BUFSIZ*4) buffer being allocated and passed to getdents. However a call to fdopendir() results in an 'fstat' request to determine block size and a matching buffer allocated for subsequent use with getdents. This will typically be 1M. The first getdents call on an NFS directory will always use READDIR_PLUS (or NFSv4 equivalent) if available. Subsequent getdents calls only use this more expensive version if some 'stat' requests are made between the getdents calls. For this reason it is good to keep at least that first getdents call relatively short. When fdopendir() and readdir() is used on a large directory, it takes approximately 32 times as long to complete as using "opendir". Current versions of 'find' use fdopendir() and demonstrate this slowness. 'stat' on a directory currently returns the 'wsize'. This number has no meaning on directories. Actual READDIR requests are limited to ->dtsize, which itself is capped at 4 pages, coincidently the same as BUFSIZ*4. So this is a meaningful number to use as the blocksize on directories, and has the effect of making 'find' on large directories go a lot faster. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d24..14b4709b9242 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + if (S_ISDIR(inode->i_mode)) + stat->blksize = NFS_SERVER(inode)->dtsize; } out: trace_nfs_getattr_exit(inode, err); -- cgit From d683cc49daf7c5afca8cd9654aaa1bf63cdf2ad9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 26 May 2015 11:53:52 -0400 Subject: NFS: Fix size of NFSACL SETACL operations When encoding the NFSACL SETACL operation, reserve just the estimated size of the ACL rather than a fixed maximum. This eliminates needless zero padding on the wire that the server ignores. Fixes: ee5dc7732bd5 ('NFS: Fix "kernel BUG at fs/nfs/nfs3xdr.c:1338!"') Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88b..9b04c2e6fffc 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, if (args->npages != 0) xdr_write_pages(xdr, args->pages, 0, args->len); else - xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); + xdr_reserve_space(xdr, args->len); error = nfsacl_encode(xdr->buf, base, args->inode, (args->mask & NFS_ACL) ? -- cgit From 13985b1f77d09623f4114ce2d57e69a027ed9a09 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 28 May 2015 23:02:18 +0200 Subject: NFS: drop unneeded goto Delete jump to a label on the next line, when that label is not used elsewhere. A simplified version of the semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @r@ identifier l; @@ -if (...) goto l; -l: // Also drop the unnecessary ret variable. Signed-off-by: Julia Lawall Signed-off-by: Trond Myklebust --- fs/nfs/nfs4idmap.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 2e1737c40a29..535dfc69c628 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp) int nfs_idmap_init(void) { - int ret; - ret = nfs_idmap_init_keyring(); - if (ret != 0) - goto out; -out: - return ret; + return nfs_idmap_init_keyring(); } void nfs_idmap_quit(void) -- cgit From 8eee52af27b96047894bd18649102a0d2de3c3bb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Jun 2015 13:51:13 -0400 Subject: NFSv4: nfs4_handle_delegation_recall_error should ignore EAGAIN EAGAIN is a valid return code from nfs4_open_recover(), and should be handled by nfs4_handle_delegation_recall_error by simply passing it through. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..d25de2ab12fb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1684,6 +1684,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct "%d.\n", __func__, err); case 0: case -ENOENT: + case -EAGAIN: case -ESTALE: break; case -NFS4ERR_BADSESSION: -- cgit From 3c87ef6efb40f0e339d705c194b2224f854ec38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:25 -0400 Subject: sunrpc: keep a count of swapfiles associated with the rpc_clnt Jerome reported seeing a warning pop when working with a swapfile on NFS. The nfs_swap_activate can end up calling sk_set_memalloc while holding the rcu_read_lock and that function can sleep. To fix that, we need to take a reference to the xprt while holding the rcu_read_lock, set the socket up for swapping and then drop that reference. But, xprt_put is not exported and having NFS deal with the underlying xprt is a bit of layering violation anyway. Fix this by adding a set of activate/deactivate functions that take a rpc_clnt pointer instead of an rpc_xprt, and have nfs_swap_activate and nfs_swap_deactivate call those. Also, add a per-rpc_clnt atomic counter to keep track of the number of active swapfiles associated with it. When the counter does a 0->1 transition, we enable swapping on the xprt, when we do a 1->0 transition we disable swapping on it. This also allows us to be a bit more selective with the RPC_TASK_SWAPPER flag. If non-swapper and swapper clnts are sharing a xprt, then we only need to flag the tasks from the swapper clnt with that flag. Acked-by: Mel Gorman Reported-by: Jerome Marchand Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { - int ret; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); *span = sis->pages; - rcu_read_lock(); - ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); - rcu_read_unlock(); - - return ret; + return rpc_clnt_swap_activate(clnt); } static void nfs_swap_deactivate(struct file *file) { struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); - rcu_read_lock(); - xs_swapper(rcu_dereference(clnt->cl_xprt), 0); - rcu_read_unlock(); + rpc_clnt_swap_deactivate(clnt); } -#endif const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, @@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif }; /* -- cgit From c86c90c656fa2c94c2491dd86cf09501193f0dad Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Thu, 4 Jun 2015 17:04:17 +0200 Subject: NFSv4: handle nfs4_get_referral failure nfs4_proc_lookup_common is supposed to return a posix error, we have to handle any error returned that isn't errno Reported-by: Olga Kornievskaia Signed-off-by: Frank S. Filz Signed-off-by: Dominique Martinet Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..d689ea37be84 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3355,6 +3355,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, goto out; case -NFS4ERR_MOVED: err = nfs4_get_referral(client, dir, name, fattr, fhandle); + if (err == -NFS4ERR_MOVED) + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); goto out; case -NFS4ERR_WRONGSEC: err = -EPERM; -- cgit From 11598b8ff2b97cf034d0c025cf125c92b574bafc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 10 Jun 2015 16:54:28 -0400 Subject: NFS: Remove unused nfs_rw_ops->rw_release() function This was only ever set to nfs_writeback_release_common(), a function which is completely empty. Let's just drop this function pointer and simplify the code a bit. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 -- fs/nfs/write.c | 6 ------ 2 files changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b39369510..e9953ab1ac40 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -690,8 +690,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, static void nfs_pgio_release(void *calldata) { struct nfs_pgio_header *hdr = calldata; - if (hdr->rw_ops->rw_release) - hdr->rw_ops->rw_release(hdr); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a1..d0f1f210342e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1347,11 +1347,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) -{ - /* do nothing! */ -} - /* * Special version of should_remove_suid() that ignores capabilities. */ @@ -2012,7 +2007,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, - .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, .rw_initiate = nfs_initiate_write, -- cgit From 4ed0d83d057c16efeca4210ba8fb2b5029333fb3 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Wed, 10 Jun 2015 20:15:29 +0530 Subject: NFS: Convert use of __constant_htonl to htonl In little endian cases, the macro htonl unfolds to __swab32 which provides special case for constants. In big endian cases, __constant_htonl and htonl expand directly to the same expression. So, replace __constant_htonl with htonl with the goal of getting rid of the definition of __constant_htonl completely. The semantic patch that performs this transformation is as follows: @@expression x;@@ - __constant_htonl(x) + htonl(x) Signed-off-by: Vaishali Thakkar Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 19ca95cdfd9b..6b1697a01dde 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r xdr_init_encode(&xdr_out, &rqstp->rq_res, p); status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); - if (status == __constant_htonl(NFS4ERR_RESOURCE)) + if (status == htonl(NFS4ERR_RESOURCE)) return rpc_garbage_args; if (hdr_arg.minorversion == 0) { -- cgit From e937ee714b2df638275a61a99e0d033682f764d2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 2 Jun 2015 18:58:46 +0800 Subject: nfs: Only update callback sequnce id when CB_SEQUENCE success When testing pnfs layout, nfsd got error NFS4ERR_SEQ_MISORDERED. It is caused by nfs return NFS4ERR_DELAY before validate_seqid(), don't update the sequnce id, but nfsd updates the sequnce id !!! According to RFC5661 20.9.3, " If CB_SEQUENCE returns an error, then the state of the slot (sequence ID, cached reply) MUST NOT change. " Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 197806fb87ff..7e9653aae5d8 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); /* Normal */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { - slot->seq_nr++; + if (likely(args->csa_sequenceid == slot->seq_nr + 1)) goto out_ok; - } /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { @@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_process_state *cps) { struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -429,7 +428,9 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) goto out; + tbl = &clp->cl_session->bc_slot_table; + slot = tbl->slots + args->csa_slotid; spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ @@ -444,7 +445,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; } - status = validate_seqid(&clp->cl_session->bc_slot_table, args); + status = validate_seqid(tbl, args); spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; @@ -468,6 +469,13 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + /* + * RFC5661 20.9.3 + * If CB_SEQUENCE returns an error, then the state of the slot + * (sequence ID, cached reply) MUST NOT change. + */ + slot->seq_nr++; + out: cps->clp = clp; /* put in nfs4_callback_compound */ for (i = 0; i < args->csa_nrclists; i++) -- cgit From 6f02dc88be1d5ecfcc2b708250f4e5d49295326c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 4 Jun 2015 18:40:13 -0400 Subject: nfs: deny backchannel RPCs with an incorrect authflavor instead of dropping them A drop should really only be done when the frame is malformed or we have reason to think that there is some sort of DoS going on. When we get an RPC with bad auth, we should send back an error instead. Cc: Andy Adamson Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8d129bb7355a..682529c00996 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) * pg_authenticate method for nfsv4 callback threads. * * The authflavor has been negotiated, so an incorrect flavor is a server - * bug. Drop packets with incorrect authflavor. + * bug. Deny packets with incorrect authflavor. * * All other checking done after NFS decoding where the nfs_client can be * found in nfs4_callback_compound @@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) switch (rqstp->rq_authop->flavour) { case RPC_AUTH_NULL: if (rqstp->rq_proc != CB_NULL) - return SVC_DROP; + return SVC_DENIED; break; case RPC_AUTH_GSS: /* No RPC_AUTH_GSS support yet in NFSv4.1 */ if (svc_is_backchannel(rqstp)) - return SVC_DROP; + return SVC_DENIED; } return SVC_OK; } -- cgit From 0579c8d2084adf932f09fe7edb5ee9328795d89f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 2 Jun 2015 18:59:01 +0800 Subject: nfs: Initialize cb_sequenceres information before validate_seqid() For a cb_layoutrecall replay, nfsd got CB_SEQUENCE status of zero, but all informations of cb_sequenceres are zero too !!! validate_seqid() return NFS4ERR_RETRY_UNCACHED_REP for a replay, and skip the initlize cb_sequenceres. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 7e9653aae5d8..17a5c571a006 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -445,6 +445,13 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; } + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + status = validate_seqid(tbl, args); spin_unlock(&tbl->slot_tbl_lock); if (status) @@ -462,13 +469,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, goto out; } - memcpy(&res->csr_sessionid, &args->csa_sessionid, - sizeof(res->csr_sessionid)); - res->csr_sequenceid = args->csa_sequenceid; - res->csr_slotid = args->csa_slotid; - res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - /* * RFC5661 20.9.3 * If CB_SEQUENCE returns an error, then the state of the slot -- cgit From 4e54ab8d8c744f977ba13404342aeb260e353330 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Jun 2015 21:13:58 -0400 Subject: NFS: Ensure that we update the sequence id under the slot table lock Fix a callback slot table regression. Fixes: e937ee714b2d ("nfs: Only update callback sequnce id when CB_SEQUENCE success") Cc: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 17a5c571a006..29e3c1b011b7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -435,14 +435,13 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { - spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); /* Return NFS4ERR_BADSESSION if we're draining the session * in order to reset it. */ if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) status = htonl(NFS4ERR_BADSESSION); - goto out; + goto out_unlock; } memcpy(&res->csr_sessionid, &args->csa_sessionid, @@ -453,9 +452,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; status = validate_seqid(tbl, args); - spin_unlock(&tbl->slot_tbl_lock); if (status) - goto out; + goto out_unlock; cps->slotid = args->csa_slotid; @@ -466,7 +464,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, */ if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { status = htonl(NFS4ERR_DELAY); - goto out; + goto out_unlock; } /* @@ -475,6 +473,8 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, * (sequence ID, cached reply) MUST NOT change. */ slot->seq_nr++; +out_unlock: + spin_unlock(&tbl->slot_tbl_lock); out: cps->clp = clp; /* put in nfs4_callback_compound */ -- cgit From 455b6ee6452751dc471b5a434f9398fb825d5149 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 12 Jun 2015 18:58:50 +0200 Subject: pnfs/flexfiles: use swap() in ff_layout_sort_mirrors() Use kernel.h macro definition. Thanks to Julia Lawall for Coccinelle scripting support. Signed-off-by: Fabian Frederick Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6..0f1410c94827 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -182,17 +182,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) { - struct nfs4_ff_layout_mirror *tmp; int i, j; for (i = 0; i < fls->mirror_array_cnt - 1; i++) { for (j = i + 1; j < fls->mirror_array_cnt; j++) if (fls->mirror_array[i]->efficiency < - fls->mirror_array[j]->efficiency) { - tmp = fls->mirror_array[i]; - fls->mirror_array[i] = fls->mirror_array[j]; - fls->mirror_array[j] = tmp; - } + fls->mirror_array[j]->efficiency) + swap(fls->mirror_array[i], + fls->mirror_array[j]); } } -- cgit From 3a6bb738792500e8b4534c0350c13a132bac0492 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:57 -0400 Subject: nfs: convert setclientid and exchange_id encoders to use clp->cl_owner_id ...instead of buffers that are part of their arg structs. We already hold a reference to the client, so we might as well use the allocated buffer. In the event that we can't allocate the clp->cl_owner_id, then just return -ENOMEM. Note too that we switch from a GFP_KERNEL allocation here to GFP_NOFS. It's possible we could end up trying to do a SETCLIENTID or EXCHANGE_ID in order to reclaim some memory, and the GFP_KERNEL allocations in the existing code could cause recursion back into NFS reclaim. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 ++++++++++++++++----- fs/nfs/nfs4xdr.c | 8 +++++--- 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d689ea37be84..b2afe9556147 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5046,7 +5046,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, - .sc_cb_ident = clp->cl_cb_ident, + .sc_clnt = clp, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -5076,6 +5076,12 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, nfs4_init_nonuniform_client_string(clp, setclientid.sc_name, sizeof(setclientid.sc_name)); + + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + /* cb_client4 */ setclientid.sc_netid_len = nfs4_init_callback_netid(clp, @@ -5085,9 +5091,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - dprintk("NFS call setclientid auth=%s, '%.*s'\n", + dprintk("NFS call setclientid auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - setclientid.sc_name_len, setclientid.sc_name); + clp->cl_owner_id); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { status = PTR_ERR(task); @@ -6850,9 +6856,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, nfs4_init_boot_verifier(clp, &verifier); args.id_len = nfs4_init_uniform_client_string(clp, args.id, sizeof(args.id)); - dprintk("NFS call exchange_id auth=%s, '%.*s'\n", + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + + dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - args.id_len, args.id); + clp->cl_owner_id); res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), GFP_NOFS); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0aea97841d30..f58e8a979397 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1667,13 +1667,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); encode_nfs4_verifier(xdr, setclientid->sc_verifier); - encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id), + setclientid->sc_clnt->cl_owner_id); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_prog); encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); - *p = cpu_to_be32(setclientid->sc_cb_ident); + *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident); } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) @@ -1747,7 +1748,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); encode_nfs4_verifier(xdr, args->verifier); - encode_string(xdr, args->id_len, args->id); + encode_string(xdr, strlen(args->client->cl_owner_id), + args->client->cl_owner_id); encode_uint32(xdr, args->flags); encode_uint32(xdr, args->state_protect.how); -- cgit From b8fb2f595e8009ad245d564b7a443d108a2334cd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:58 -0400 Subject: nfs: update maxsz values for SETCLIENTID and EXCHANGE_ID The spec allows for up to NFS4_OPAQUE_LIMIT (1k). While we'll almost certainly never use that much, these ops are generally the only ones in the compound so we might as well allow for them to be that large. Also, the existing code didn't add in a word for the opaque length field for either name string. Fix that while we're in there. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f58e8a979397..a07555d6968c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int); #define encode_setclientid_maxsz \ (op_encode_hdr_maxsz + \ XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ - XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ + /* client name */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* sc_prog */ + \ 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ @@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int); #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ encode_verifier_maxsz + \ 1 /* co_ownerid.len */ + \ - XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + /* eia_clientowner */ \ + 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ 1 /* flags */ + \ 1 /* spa_how */ + \ /* max is SP4_MACH_CRED (for now) */ + \ -- cgit From a319268891089386bd303bb90ace89d75e47102f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:43:59 -0400 Subject: nfs: make nfs4_init_nonuniform_client_string use a dynamically allocated buffer The way the *_client_string functions work is a little goofy. They build the string in an on-stack buffer and then use kstrdup to copy it. This is not only stack-heavy but artificially limits the size of the client name string. Change it so that we determine the length of the string, allocate it and then scnprintf into it. Since the contents of the nonuniform string depend on rcu-managed data structures, it's possible that they'll change between when we allocate the string and when we go to fill it. If that happens, free the string, recalculate the length and try again. If it the mismatch isn't resolved on the second try then just give up and return -EINVAL. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 74 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2afe9556147..c20322636a68 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4957,25 +4957,54 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, memcpy(bootverf->data, verf, sizeof(bootverf->data)); } -static unsigned int -nfs4_init_nonuniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_nonuniform_client_string(struct nfs_client *clp) { - unsigned int result; + int result; + size_t len; + char *str; + bool retried = false; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; +retry: + rcu_read_lock(); + len = 10 + strlen(clp->cl_ipaddr) + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + + 1; + rcu_read_unlock(); + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; rcu_read_lock(); - result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); rcu_read_unlock(); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + + /* Did something change? */ + if (result >= len) { + kfree(str); + if (retried) + return -EINVAL; + retried = true; + goto retry; + } + clp->cl_owner_id = str; + return 0; } static unsigned int @@ -5066,20 +5095,19 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) { setclientid.sc_name_len = nfs4_init_uniform_client_string(clp, setclientid.sc_name, sizeof(setclientid.sc_name)); - else - setclientid.sc_name_len = - nfs4_init_nonuniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); - - if (!clp->cl_owner_id) { - status = -ENOMEM; - goto out; + if (!clp->cl_owner_id) { + status = -ENOMEM; + goto out; + } + } else { + status = nfs4_init_nonuniform_client_string(clp); + if (status) + goto out; } /* cb_client4 */ -- cgit From 873e385116b2cc5c7daca8f51881371fadb90970 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 9 Jun 2015 19:44:00 -0400 Subject: nfs: make nfs4_init_uniform_client_string use a dynamically allocated buffer Change the uniform client string generator to dynamically allocate the NFSv4 client name string buffer. With this patch, we can eliminate the buffers that are embedded within the "args" structs and simply use the name string that is hanging off the client. This uniform string case is a little simpler than the nonuniform since we don't need to deal with RCU, but we do have two different cases, depending on whether there is a uniquifier or not. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 112 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c20322636a68..d181090124db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5007,28 +5007,78 @@ retry: return 0; } -static unsigned int -nfs4_init_uniform_client_string(struct nfs_client *clp, - char *buf, size_t len) +static int +nfs4_init_uniquifier_client_string(struct nfs_client *clp) { - const char *nodename = clp->cl_rpcclient->cl_nodename; - unsigned int result; + int result; + size_t len; + char *str; + + len = 10 + 10 + 1 + 10 + 1 + + strlen(nfs4_client_id_uniquifier) + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + nfs4_client_id_uniquifier, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; +} + +static int +nfs4_init_uniform_client_string(struct nfs_client *clp) +{ + int result; + size_t len; + char *str; if (clp->cl_owner_id != NULL) - return strlcpy(buf, clp->cl_owner_id, len); + return 0; if (nfs4_client_id_uniquifier[0] != '\0') - result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", - clp->rpc_ops->version, - clp->cl_minorversion, - nfs4_client_id_uniquifier, - nodename); - else - result = scnprintf(buf, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - nodename); - clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); - return result; + return nfs4_init_uniquifier_client_string(clp); + + len = 10 + 10 + 1 + 10 + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + result = scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); + if (result >= len) { + kfree(str); + return -EINVAL; + } + clp->cl_owner_id = str; + return 0; } /* @@ -5095,20 +5145,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); - if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) { - setclientid.sc_name_len = - nfs4_init_uniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); - if (!clp->cl_owner_id) { - status = -ENOMEM; - goto out; - } - } else { + + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) + status = nfs4_init_uniform_client_string(clp); + else status = nfs4_init_nonuniform_client_string(clp); - if (status) - goto out; - } + + if (status) + goto out; /* cb_client4 */ setclientid.sc_netid_len = @@ -6882,12 +6926,10 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, }; nfs4_init_boot_verifier(clp, &verifier); - args.id_len = nfs4_init_uniform_client_string(clp, args.id, - sizeof(args.id)); - if (!clp->cl_owner_id) { - status = -ENOMEM; + + status = nfs4_init_uniform_client_string(clp); + if (status) goto out; - } dprintk("NFS call exchange_id auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, -- cgit From df05a49f7204d81ba0e29fda3d56ccb730f05d87 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 13 Jun 2015 10:07:00 +0800 Subject: nfs: Fix showing truncated fsid/dev in, /proc/net/nfsfs/volumes A truncated fsid showing from /proc/fs/nfsfs/volumes as, NV SERVER PORT DEV FSID FSC v4 c0a80881 801 0:43 34931f044c2a439b no It should be as, NV SERVER PORT DEV FSID FSC v4 c0a80881 801 0:43 34931f044c2a439b:954c5d830fa4be8c no The max buffer length for storing "%llx:%llx" format should be 16 + 1 + 16 + 1 = 34 (16 for %llx, 1 for ':', 1 for '\0'). Also, for storing "%u:%u" of MAJOR() and MINOR() should be 8 + 1 + 3 + 1 = 13 (8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0'). v2, add comments for dev/fsid buffer and use sizeof in snprintf. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff3630..1e37491904ee 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1364,27 +1364,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) { struct nfs_server *server; struct nfs_client *clp; - char dev[8], fsid[17]; + char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0' + char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0' struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { - seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); + seq_puts(m, "NV SERVER PORT DEV FSID" + " FSC\n"); return 0; } /* display one transport per line on subsequent lines */ server = list_entry(v, struct nfs_server, master_link); clp = server->nfs_client; - snprintf(dev, 8, "%u:%u", + snprintf(dev, sizeof(dev), "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - snprintf(fsid, 17, "%llx:%llx", + snprintf(fsid, sizeof(fsid), "%llx:%llx", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); rcu_read_lock(); - seq_printf(m, "v%u %s %s %-7s %-17s %s\n", + seq_printf(m, "v%u %s %s %-12s %-33s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), -- cgit From ae2ffef383fad723690a4c5bad70f7035fd7d71a Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 12 Jun 2015 16:53:30 -0400 Subject: Recover from stateid-type error on SETATTR Client can receives stateid-type error (eg., BAD_STATEID) on SETATTR when delegation stateid was used. When no open state exists, in case of application calling truncate() on the file, client has no state to recover and fails with EIO. Instead, upon such error, return the bad delegation and then resend the SETATTR with a zero stateid. Signed-off: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d181090124db..6bcdecd61125 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -370,8 +370,14 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) + if (state == NULL) { + if (inode && nfs4_have_delegation(inode, + FMODE_READ)) { + nfs4_inode_return_delegation(inode); + exception->retry = 1; + } break; + } ret = nfs4_schedule_stateid_recovery(server, state); if (ret < 0) break; -- cgit From 5ba12443a132420be45d089e4c4ac8a1e26b7da8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Jun 2015 11:26:35 -0400 Subject: NFSv4: Fix stateid recovery on revoked delegations Ensure that we fix the non-NULL stateid case as well. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6bcdecd61125..0fc1b0cdda98 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case 0: return 0; case -NFS4ERR_OPENMODE: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: if (inode && nfs4_have_delegation(inode, FMODE_READ)) { nfs4_inode_return_delegation(inode); exception->retry = 1; @@ -367,21 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - if (state == NULL) { - if (inode && nfs4_have_delegation(inode, - FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - } - break; - } - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - goto wait_on_recovery; case -NFS4ERR_EXPIRED: if (state != NULL) { ret = nfs4_schedule_stateid_recovery(server, state); -- cgit From 1ca018d28d96d07788474abf66a5f3e9594841f5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2015 19:41:51 -0400 Subject: pNFS: Fix a memory leak when attempted pnfs fails pnfs_do_write() expects the call to pnfs_write_through_mds() to free the pgio header and to release the layout segment before exiting. The problem is that nfs_pgio_data_destroy() doesn't actually do this; it only frees the memory allocated by nfs_generic_pgio(). Ditto for pnfs_do_read()... Fix in both cases is to add a call to hdr->release(hdr). Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6..219ee6a3f1b3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1865,6 +1865,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } static enum pnfs_try_status @@ -1979,6 +1980,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); + hdr->release(hdr); } /* -- cgit From c70701131f7a8edea91fc49d11796d342cff7c62 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2015 19:56:22 -0400 Subject: NFS: Ensure we set NFS_CONTEXT_RESEND_WRITES when requeuing writes If a write attempt fails, and the write is queued up for resending to the server, as opposed to being dropped, then we need to set the appropriate flag so that nfs_file_fsync() does the right thing. Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + fs/nfs/write.c | 1 + 2 files changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 219ee6a3f1b3..d47c188682b1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1821,6 +1821,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) /* Resend all requests through the MDS */ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, hdr->completion_ops); + set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dfc19f1575a1..daf355642845 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1289,6 +1289,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); nfs_unlock_request(req); nfs_end_page_writeback(req); nfs_release_request(req); -- cgit From dfad7000f32e3a6187e1ee36d52494d821553f2b Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 18 Jun 2015 19:37:13 +0800 Subject: nfs: Fix comment for nfs_pageio_init() and nfs_pageio_complete_mirror() Signed-off-by: Yijing Wang Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e9953ab1ac40..d70228a81056 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -709,7 +709,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor * @inode: pointer to inode - * @doio: pointer to io function + * @pg_ops: pointer to pageio operations + * @compl_ops: pointer to pageio completion operations + * @rw_ops: pointer to nfs read/write operations * @bsize: io block size * @io_flags: extra parameters for the io function */ @@ -1184,6 +1186,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an * nfs_pageio_descriptor * @desc: pointer to io descriptor + * @mirror_idx: pointer to mirror index */ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, u32 mirror_idx) -- cgit From be3a5d233922d73f27002ce2767f6ec03c3f473d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:55 +0800 Subject: NFSv.2/pnfs Add a LAYOUTSTATS rpc function Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 9 +++- fs/nfs/nfs42proc.c | 27 ++++++++++++ fs/nfs/nfs42xdr.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 4 +- fs/nfs/nfs4xdr.c | 1 + 6 files changed, 161 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -5,11 +5,18 @@ #ifndef __LINUX_FS_NFS_NFS4_2_H #define __LINUX_FS_NFS_NFS4_2_H +/* + * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support + * more? Need to consider not to pre-alloc too much for a compound. + */ +#define PNFS_LAYOUTSTATS_MAXDEV (4) + /* nfs4.2proc.c */ int nfs42_proc_allocate(struct file *, loff_t, loff_t); int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); - +int nfs42_proc_layoutstats_generic(struct nfs_server *, + struct nfs42_layoutstat_data *); /* nfs4.2xdr.h */ extern struct rpc_procinfo nfs4_2_procedures[]; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..ac929685350b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -165,3 +165,30 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } + +static const struct rpc_call_ops nfs42_layoutstat_ops = { +}; + +int nfs42_proc_layoutstats_generic(struct nfs_server *server, + struct nfs42_layoutstat_data *data) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], + .rpc_argp = &data->args, + .rpc_resp = &data->res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = &nfs42_layoutstat_ops, + .callback_data = data, + .flags = RPC_TASK_ASYNC, + }; + struct rpc_task *task; + + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); + return 0; +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..9aae0205ef11 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,8 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#include "nfs42.h" + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -22,6 +24,16 @@ 1 /* whence */ + \ 2 /* offset */ + \ 2 /* length */) +#define encode_io_info_maxsz 4 +#define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ + 2 /* offset */ + \ + 2 /* length */ + \ + encode_stateid_maxsz + \ + encode_io_info_maxsz + \ + encode_io_info_maxsz + \ + 1 /* opaque devaddr4 length */ + \ + XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) +#define decode_layoutstats_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -45,6 +57,14 @@ #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_seek_maxsz) +#define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) +#define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) static void encode_fallocate(struct xdr_stream *xdr, @@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, encode_uint32(xdr, args->sa_what); } +static void encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); + p = reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, + NFS4_DEVICEID4_SIZE); + /* Encode layoutupdate4 */ + *p++ = cpu_to_be32(devinfo->layout_type); + if (devinfo->layoutstats_encode != NULL) + devinfo->layoutstats_encode(xdr, args, devinfo); + else + encode_uint32(xdr, 0); +} + /* * Encode ALLOCATE request */ @@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode LAYOUTSTATS request + */ +static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args) +{ + int i; + + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < args->num_dev; i++) + encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -169,6 +238,28 @@ out_overflow: return -EIO; } +static int decode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + int status; + __be32 *p; + + status = decode_op_hdr(xdr, OP_LAYOUTSTATS); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + + res->rpc_status = be32_to_cpup(p++); + return 0; + +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + /* * Decode ALLOCATE request */ @@ -246,4 +337,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode LAYOUTSTATS request + */ +static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_layoutstat_res *res) +{ + struct compound_hdr hdr; + int status, i; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); + for (i = 0; i < res->num_dev; i++) { + status = decode_layoutstats(xdr, res); + if (status) + goto out; + } +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); +extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e9cd45f1d60f..643ce3a91b22 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -476,8 +476,8 @@ struct nfs4_call_sync_data { struct nfs4_sequence_res *seq_res; }; -static void nfs4_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) { args->sa_slot = NULL; args->sa_cache_this = cache_reply; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a07555d6968c..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7431,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SEEK, enc_seek, dec_seek), PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), + PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), #endif /* CONFIG_NFS_V4_2 */ }; -- cgit From 1b4a4bd82c308aed36f13fff4beb33d3840cfa0e Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:51:56 +0800 Subject: pNFS: fill in nfs42_layoutstat_ops Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ac929685350b..a0530418255f 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -10,6 +10,11 @@ #include #include "nfs4_fs.h" #include "nfs42.h" +#include "iostat.h" +#include "pnfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -166,7 +171,44 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } +static void +nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *server = NFS_SERVER(data->args.inode); + + nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, + &data->res.seq_res, task); +} + +static void +nfs42_layoutstat_done(struct rpc_task *task, void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return; + + /* well, we don't care about errors at all! */ + if (task->tk_status) + dprintk("%s server returns %d\n", __func__, task->tk_status); +} + +static void +nfs42_layoutstat_release(void *calldata) +{ + struct nfs42_layoutstat_data *data = calldata; + + pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + nfs_iput_and_deactive(data->inode); + kfree(data->args.devinfo); + kfree(data); +} + static const struct rpc_call_ops nfs42_layoutstat_ops = { + .rpc_call_prepare = nfs42_layoutstat_prepare, + .rpc_call_done = nfs42_layoutstat_done, + .rpc_release = nfs42_layoutstat_release, }; int nfs42_proc_layoutstats_generic(struct nfs_server *server, @@ -186,6 +228,11 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, }; struct rpc_task *task; + data->inode = nfs_igrab_and_active(data->args.inode); + if (!data->inode) { + nfs42_layoutstat_release(data); + return -EAGAIN; + } nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); task = rpc_run_task(&task_setup); if (IS_ERR(task)) -- cgit From 8733408d6ed713d080c325262d7b51a780136d41 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:51:57 +0800 Subject: pnfs: add pnfs_report_layoutstat helper function Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 4 ++++ fs/nfs/pnfs.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 3 +++ 3 files changed, 56 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index a0530418255f..ee0248340a42 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -198,6 +198,10 @@ static void nfs42_layoutstat_release(void *calldata) { struct nfs42_layoutstat_data *data = calldata; + struct nfs_server *nfss = NFS_SERVER(data->args.inode); + + if (nfss->pnfs_curr_ld->cleanup_layoutstats) + nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); nfs_iput_and_deactive(data->inode); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d47c188682b1..6279cff7df74 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -35,6 +35,7 @@ #include "iostat.h" #include "nfs4trace.h" #include "delegation.h" +#include "nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -2250,3 +2251,51 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) } return thp; } + +int +pnfs_report_layoutstat(struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs42_layoutstat_data *data; + struct pnfs_layout_hdr *hdr; + int status = 0; + + if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) + goto out; + + spin_lock(&inode->i_lock); + if (!NFS_I(inode)->layout) { + spin_unlock(&inode->i_lock); + goto out; + } + hdr = NFS_I(inode)->layout; + pnfs_get_layout_hdr(hdr); + spin_unlock(&inode->i_lock); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + status = -ENOMEM; + goto out_put; + } + + data->args.fh = NFS_FH(inode); + data->args.inode = inode; + nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); + status = ld->prepare_layoutstats(&data->args); + if (status) + goto out_free; + + status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); + +out: + dprintk("%s returns %d\n", __func__, status); + return status; + +out_free: + kfree(data); +out_put: + pnfs_put_layout_hdr(hdr); + goto out; +} +EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3..0a4723945db2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); + int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); + void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { @@ -290,6 +292,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); +int pnfs_report_layoutstat(struct inode *inode); /* nfs4_deviceid_flags */ enum { -- cgit From 27797d1bb36bf9e21fa14094f23ab785b442bd76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:58 +0800 Subject: pNFS/flexfiles: Remove unused struct members user_name, group_name Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d..275e4b735247 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -48,8 +48,6 @@ struct nfs4_ff_layout_mirror { u32 fh_versions_cnt; struct nfs_fh *fh_versions; nfs4_stateid stateid; - struct nfs4_string user_name; - struct nfs4_string group_name; u32 uid; u32 gid; struct rpc_cred *cred; -- cgit From abcb7bfc9fdecd1550b98a5e13d40bfba6c0649b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Jun 2015 19:51:59 +0800 Subject: pNFS/flexfiles: add layoutstats tracking Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 192 +++++++++++++++++++++++++++++++-- fs/nfs/flexfilelayout/flexfilelayout.h | 22 ++++ 2 files changed, 203 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0f1410c94827..8ac31fbf040a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -412,6 +412,130 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) return 1; } +static void +nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t old, new; + + /* + * Note: careful here! + * If the counter is zero, then we must not increment it until after + * we've set the start_time. + * If we were instead to use atomic_inc_return(), then another + * request might come in, bump, and then call end_busy_timer() + * before we've set the timer->start_time. + */ + old = timer->start_time; + if (atomic_inc_not_zero(&timer->n_ops) == 0) { + new = ktime_get(); + cmpxchg(&timer->start_time.tv64, old.tv64, new.tv64); + atomic_inc(&timer->n_ops); + } +} + +static ktime_t +nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) +{ + ktime_t start, now; + + now = ktime_get(); + start.tv64 = xchg(&timer->start_time.tv64, now.tv64); + atomic_dec(&timer->n_ops); + return ktime_sub(now, start); +} + +static ktime_t +nfs4_ff_layout_calc_completion_time(struct rpc_task *task) +{ + return ktime_sub(ktime_get(), task->tk_start); +} + +static void +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layoutstat *layoutstat) +{ + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); +} + +static void +nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + + iostat->ops_requested++; + iostat->bytes_requested += requested; +} + +static void +nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, + __u64 requested, + __u64 completed, + ktime_t time_completed) +{ + struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; + ktime_t timer; + + iostat->ops_completed++; + iostat->bytes_completed += completed; + iostat->bytes_not_delivered += requested - completed; + + timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); + iostat->total_busy_time = + ktime_add(iostat->total_busy_time, timer); + iostat->aggregate_completion_time = + ktime_add(iostat->aggregate_completion_time, time_completed); +} + +static void +nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + spin_lock(&mirror->lock); + nfs4_ff_layoutstat_start_io(&mirror->read_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed) +{ + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, + __u64 requested) +{ + spin_lock(&mirror->lock); + nfs4_ff_layoutstat_start_io(&mirror->write_stat); + nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + spin_unlock(&mirror->lock); +} + +static void +nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, + struct nfs4_ff_layout_mirror *mirror, + __u64 requested, + __u64 completed, + enum nfs3_stable_how committed) +{ + if (committed == NFS_UNSTABLE) + requested = completed = 0; + + spin_lock(&mirror->lock); + nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, + requested, completed, + nfs4_ff_layout_calc_completion_time(task)); + spin_unlock(&mirror->lock); +} + static int ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, @@ -906,6 +1030,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) static int ff_layout_read_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_read( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -959,15 +1087,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_read_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_read_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -979,6 +1107,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1080,6 +1212,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, static int ff_layout_write_prepare_common(struct rpc_task *task, struct nfs_pgio_header *hdr) { + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count); + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1113,15 +1249,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - if (ff_layout_write_prepare_common(task, hdr)) - return; - if (ff_layout_setup_sequence(hdr->ds_clp, &hdr->args.seq_args, &hdr->res.seq_res, task)) return; + if (ff_layout_write_prepare_common(task, hdr)) + return; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ @@ -1131,6 +1267,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1149,8 +1290,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + nfs4_ff_layout_stat_io_start_write( + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + 0); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { + ff_layout_commit_prepare_common(task, data); rpc_call_start(task); } @@ -1158,10 +1308,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; - ff_layout_setup_sequence(wdata->ds_clp, + if (ff_layout_setup_sequence(wdata->ds_clp, &wdata->args.seq_args, &wdata->res.seq_res, - task); + task)) + return; + ff_layout_commit_prepare_common(task, data); +} + +static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + struct nfs_page *req; + __u64 count = 0; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); + + pnfs_generic_write_commit_done(task, data); } static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) @@ -1202,14 +1372,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, - .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, .rpc_release = pnfs_generic_commit_release, }; @@ -1253,7 +1423,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) hdr->args.fh = fh; - /* * Note that if we ever decide to split across DSes, * then we may need to handle dense-like offsets. @@ -1382,6 +1551,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, vers == 3 ? &ff_layout_commit_call_ops_v3 : &ff_layout_commit_call_ops_v4, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 275e4b735247..f7493f7cf13c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -41,6 +41,26 @@ struct nfs4_ff_layout_ds_err { struct nfs4_deviceid deviceid; }; +struct nfs4_ff_io_stat { + __u64 ops_requested; + __u64 bytes_requested; + __u64 ops_completed; + __u64 bytes_completed; + __u64 bytes_not_delivered; + ktime_t total_busy_time; + ktime_t aggregate_completion_time; +}; + +struct nfs4_ff_busy_timer { + ktime_t start_time; + atomic_t n_ops; +}; + +struct nfs4_ff_layoutstat { + struct nfs4_ff_io_stat io_stat; + struct nfs4_ff_busy_timer busy_timer; +}; + struct nfs4_ff_layout_mirror { u32 ds_count; u32 efficiency; @@ -52,6 +72,8 @@ struct nfs4_ff_layout_mirror { u32 gid; struct rpc_cred *cred; spinlock_t lock; + struct nfs4_ff_layoutstat read_stat; + struct nfs4_ff_layoutstat write_stat; }; struct nfs4_ff_layout_segment { -- cgit From d983803d3818a7aef3200147c72a45f573d84537 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:00 +0800 Subject: pNFS/flexfiles: track when layout is first used So that we can report cumulative time since the beginning of statistics collection of the layout. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 10 +++++++--- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 8ac31fbf040a..4215278c9c76 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -451,9 +451,13 @@ nfs4_ff_layout_calc_completion_time(struct rpc_task *task) } static void -nfs4_ff_layoutstat_start_io(struct nfs4_ff_layoutstat *layoutstat) +nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_ff_layoutstat *layoutstat) { + static const ktime_t notime = {0}; + nfs4_ff_start_busy_timer(&layoutstat->busy_timer); + cmpxchg(&mirror->start_time, notime, ktime_get()); } static void @@ -491,7 +495,7 @@ nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(&mirror->read_stat); + nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); spin_unlock(&mirror->lock); } @@ -514,7 +518,7 @@ nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(&mirror->write_stat); + nfs4_ff_layoutstat_start_io(mirror, &mirror->write_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); spin_unlock(&mirror->lock); } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f7493f7cf13c..0e7366f44d11 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -74,6 +74,7 @@ struct nfs4_ff_layout_mirror { spinlock_t lock; struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; + ktime_t start_time; }; struct nfs4_ff_layout_segment { -- cgit From ad4dc53e6496ba0b3c18aeda6f3367d9800b6ebf Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:01 +0800 Subject: pnfs/flexfiles: add ff_layout_prepare_layoutstats It fills in the generic part of LAYOUTSTATS call. One thing to note is that we don't really track if IO is continuous or not. So just fake to use the completed bytes for it. Still missing flexfiles specific part, which will be included in the next patch. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 71 ++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 4215278c9c76..cd999af504d9 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -20,6 +20,7 @@ #include "../nfs4trace.h" #include "../iostat.h" #include "../nfs.h" +#include "../nfs42.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -1659,6 +1660,75 @@ out: dprintk("%s: Return\n", __func__); } +static bool +ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, + struct pnfs_layout_segment *pls, + int *dev_count, int dev_limit) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *dev; + struct nfs42_layoutstat_devinfo *devinfo; + int i; + + for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { + if (*dev_count >= dev_limit) + break; + mirror = FF_LAYOUT_COMP(pls, i); + dev = FF_LAYOUT_DEVID_NODE(pls, i); + devinfo = &args->devinfo[*dev_count]; + memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); + devinfo->offset = pls->pls_range.offset; + devinfo->length = pls->pls_range.length; + /* well, we don't really know if IO is continuous or not! */ + devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; + devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; + devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; + devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + devinfo->layout_type = LAYOUT_FLEX_FILES; + devinfo->layoutstats_encode = NULL; + devinfo->layout_private = NULL; + + ++(*dev_count); + } + + return *dev_count < dev_limit; +} + +static int +ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) +{ + struct pnfs_layout_segment *pls; + int dev_count = 0; + + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + dev_count += FF_LAYOUT_MIRROR_COUNT(pls); + } + spin_unlock(&args->inode->i_lock); + /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ + if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { + dprintk("%s: truncating devinfo to limit (%d:%d)\n", + __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); + dev_count = PNFS_LAYOUTSTATS_MAXDEV; + } + args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); + if (!args->devinfo) + return -ENOMEM; + + dev_count = 0; + spin_lock(&args->inode->i_lock); + list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { + if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, + PNFS_LAYOUTSTATS_MAXDEV)) { + break; + } + } + spin_unlock(&args->inode->i_lock); + args->num_dev = dev_count; + + return 0; +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1681,6 +1751,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .alloc_deviceid_node = ff_layout_alloc_deviceid_node, .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, + .prepare_layoutstats = ff_layout_prepare_layoutstats, }; static int __init nfs4flexfilelayout_init(void) -- cgit From 27c430644369ccd9a2272492ba6d0e85e2e4800b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:02 +0800 Subject: pnfs/flexfiles: encode LAYOUTSTATS flexfiles specific data Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 178 ++++++++++++++++++++++++++++++++- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 177 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index cd999af504d9..6bcb6d6c9dc3 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -272,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, spin_lock_init(&fls->mirror_array[i]->lock); fls->mirror_array[i]->ds_count = ds_count; + fls->mirror_array[i]->lseg = &fls->generic_hdr; /* deviceid */ rc = decode_deviceid(&stream, &devid); @@ -1660,6 +1661,161 @@ out: dprintk("%s: Return\n", __func__); } +static int +ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +static size_t +ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, + const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + */ + return snprintf(buf, buflen, "%pI6c", addr); +} + +/* Derived from rpc_sockaddr2uaddr */ +static void +ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) +{ + struct sockaddr *sap = (struct sockaddr *)&da->da_addr; + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + char *netid; + unsigned short port; + int len, netid_len; + __be32 *p; + + switch (sap->sa_family) { + case AF_INET: + if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + netid = "tcp"; + netid_len = 3; + break; + case AF_INET6: + if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + netid = "tcp6"; + netid_len = 4; + break; + default: + /* we only support tcp and tcp6 */ + WARN_ON_ONCE(1); + return; + } + + snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); + len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + + p = xdr_reserve_space(xdr, 4 + netid_len); + xdr_encode_opaque(p, netid, netid_len); + + p = xdr_reserve_space(xdr, 4 + len); + xdr_encode_opaque(p, addrbuf, len); +} + +static void +ff_layout_encode_nfstime(struct xdr_stream *xdr, + ktime_t t) +{ + struct timespec64 ts; + __be32 *p; + + p = xdr_reserve_space(xdr, 12); + ts = ktime_to_timespec64(t); + p = xdr_encode_hyper(p, ts.tv_sec); + *p++ = cpu_to_be32(ts.tv_nsec); +} + +static void +ff_layout_encode_io_latency(struct xdr_stream *xdr, + struct nfs4_ff_io_stat *stat) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 5 * 8); + p = xdr_encode_hyper(p, stat->ops_requested); + p = xdr_encode_hyper(p, stat->bytes_requested); + p = xdr_encode_hyper(p, stat->ops_completed); + p = xdr_encode_hyper(p, stat->bytes_completed); + p = xdr_encode_hyper(p, stat->bytes_not_delivered); + ff_layout_encode_nfstime(xdr, stat->total_busy_time); + ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, + struct nfs42_layoutstat_args *args, + struct nfs42_layoutstat_devinfo *devinfo) +{ + struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; + struct nfs_fh *fh = &mirror->fh_versions[0]; + __be32 *p, *start; + + da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); + dprintk("%s: DS %s: encoding address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + /* netaddr4 */ + ff_layout_encode_netaddr(xdr, da); + /* nfs_fh4 */ + p = xdr_reserve_space(xdr, 4 + fh->size); + xdr_encode_opaque(p, fh->data, fh->size); + /* ff_io_latency4 read */ + spin_lock(&mirror->lock); + ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); + /* ff_io_latency4 write */ + ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); + spin_unlock(&mirror->lock); + /* nfstime4 */ + ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); + /* bool */ + p = xdr_reserve_space(xdr, 4); + *p = cpu_to_be32(false); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); +} + static bool ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, struct pnfs_layout_segment *pls, @@ -1674,6 +1830,8 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, if (*dev_count >= dev_limit) break; mirror = FF_LAYOUT_COMP(pls, i); + if (!mirror || !mirror->mirror_ds) + continue; dev = FF_LAYOUT_DEVID_NODE(pls, i); devinfo = &args->devinfo[*dev_count]; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); @@ -1685,8 +1843,10 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; devinfo->layout_type = LAYOUT_FLEX_FILES; - devinfo->layoutstats_encode = NULL; - devinfo->layout_private = NULL; + devinfo->layoutstats_encode = ff_layout_encode_layoutstats; + devinfo->layout_private = mirror; + /* lseg refcount put in cleanup_layoutstats */ + pnfs_get_lseg(pls); ++(*dev_count); } @@ -1729,6 +1889,19 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) return 0; } +static void +ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) +{ + struct nfs4_ff_layout_mirror *mirror; + int i; + + for (i = 0; i < data->args.num_dev; i++) { + mirror = data->args.devinfo[i].layout_private; + data->args.devinfo[i].layout_private = NULL; + pnfs_put_lseg(mirror->lseg); + } +} + static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -1752,6 +1925,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .encode_layoutreturn = ff_layout_encode_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, + .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 0e7366f44d11..7e248874f46d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -75,6 +75,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; + struct pnfs_layout_segment *lseg; /* back pointer */ }; struct nfs4_ff_layout_segment { -- cgit From 1bfe3b259ff2e579b2acb190f874397d53274497 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:03 +0800 Subject: nfs42: serialize LAYOUTSTATS calls of the same file There is no need to report concurrently. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 3 +++ fs/nfs/pnfs.c | 7 +++++++ 2 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ee0248340a42..06c74cd93875 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -204,6 +204,9 @@ nfs42_layoutstat_release(void *calldata) nfss->pnfs_curr_ld->cleanup_layoutstats(data); pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); + smp_mb__after_atomic(); nfs_iput_and_deactive(data->inode); kfree(data->args.devinfo); kfree(data); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6279cff7df74..5b5224341bcd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2257,6 +2257,7 @@ pnfs_report_layoutstat(struct inode *inode) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); struct nfs42_layoutstat_data *data; struct pnfs_layout_hdr *hdr; int status = 0; @@ -2264,6 +2265,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) + goto out; + spin_lock(&inode->i_lock); if (!NFS_I(inode)->layout) { spin_unlock(&inode->i_lock); @@ -2296,6 +2300,9 @@ out_free: kfree(data); out_put: pnfs_put_layout_hdr(hdr); + smp_mb__before_atomic(); + clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); + smp_mb__after_atomic(); goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); -- cgit From 97ba375b5df43790aa82c256f7f16ee8eb95c272 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 23 Jun 2015 19:52:04 +0800 Subject: pnfs/flexfiles: report layoutstat regularly As a simple scheme, report every minute if IO is still going on. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 27 +++++++++++++++++++++++---- fs/nfs/flexfilelayout/flexfilelayout.h | 6 +++++- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 6bcb6d6c9dc3..d3b3b6236711 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -452,14 +452,23 @@ nfs4_ff_layout_calc_completion_time(struct rpc_task *task) return ktime_sub(ktime_get(), task->tk_start); } -static void +static bool nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, struct nfs4_ff_layoutstat *layoutstat) { static const ktime_t notime = {0}; + ktime_t now = ktime_get(); nfs4_ff_start_busy_timer(&layoutstat->busy_timer); - cmpxchg(&mirror->start_time, notime, ktime_get()); + cmpxchg(&mirror->start_time.tv64, notime.tv64, now.tv64); + cmpxchg(&mirror->last_report_time.tv64, notime.tv64, now.tv64); + if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= + FF_LAYOUTSTATS_REPORT_INTERVAL) { + mirror->last_report_time = now; + return true; + } + + return false; } static void @@ -496,10 +505,15 @@ static void nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { + bool report; + spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); + report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); } static void @@ -519,10 +533,15 @@ static void nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, __u64 requested) { + bool report; + spin_lock(&mirror->lock); - nfs4_ff_layoutstat_start_io(mirror, &mirror->write_stat); + report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); spin_unlock(&mirror->lock); + + if (report) + pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); } static void diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 7e248874f46d..6fcd8d5e8e3d 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -15,6 +15,9 @@ * due to network error etc. */ #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 +/* LAYOUTSTATS report interval in ms */ +#define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) + struct nfs4_ff_ds_version { u32 version; u32 minor_version; @@ -62,6 +65,7 @@ struct nfs4_ff_layoutstat { }; struct nfs4_ff_layout_mirror { + struct pnfs_layout_segment *lseg; /* back pointer */ u32 ds_count; u32 efficiency; struct nfs4_ff_layout_ds *mirror_ds; @@ -75,7 +79,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; - struct pnfs_layout_segment *lseg; /* back pointer */ + ktime_t last_report_time; }; struct nfs4_ff_layout_segment { -- cgit From a24221dca1868101c9b4b5adde4a6a5b1a3a64a7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Jun 2015 12:10:23 -0400 Subject: nfs: fix potential credential leak in ff_layout_update_mirror_cred If we have two tasks racing to update a mirror's credentials, then they can end up leaking one (or more) sets of credentials. The first task will set mirror->cred and then the second task will just overwrite it. Use a cmpxchg to ensure that the creds are only set once. If we get to the point where we would set mirror->cred and find that they're already set, then we just release the creds that were just found. Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12..c19b9a88f748 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, __func__, PTR_ERR(cred)); return PTR_ERR(cred); } else { - mirror->cred = cred; + if (cmpxchg(&mirror->cred, NULL, cred)) + put_rpccred(cred); } } return 0; -- cgit From 0c8315dd56577445dd1afe6b9cfa06b7efdf2f82 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 24 Jun 2015 12:10:24 -0400 Subject: nfs: always update creds in mirror, even when we have an already connected ds A ds can be associated with more than one mirror, but we currently skip setting a mirror's credentials if we find that it's already set up with a connected client. The upshot is that we can end up sending DS writes with MDS credentials instead of properly setting them up. Fix nfs4_ff_layout_prepare_ds to always verify that the mirror's credentials are set up, even when we have a DS that's already connected. Reported-by: Tom Haynes Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index c19b9a88f748..f13e1969eedd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -387,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); if (ds->ds_clp) - goto out; + goto out_update_creds; flavor = nfs4_ff_layout_choose_authflavor(mirror); @@ -431,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, } } } - +out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) ds = NULL; out: -- cgit From 18a600897212c1480eb635112baeab017babfc83 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 25 Jun 2015 09:25:50 -0400 Subject: nfs: verify open flags before allowing open Commit 9597c13b forbade opens with O_APPEND|O_DIRECT for NFSv4: nfs: verify open flags before allowing an atomic open Currently, you can open a NFSv4 file with O_APPEND|O_DIRECT, but cannot fcntl(F_SETFL,...) with those flags. This flag combination is explicitly forbidden on NFSv3 opens, and it seems like it should also be on NFSv4. However, you can still open a file with O_DIRECT|O_APPEND if there exists a cached dentry for the file because nfs4_file_open() is used instead of nfs_atomic_open() and the check is bypassed. Add the check in nfs4_file_open() as well. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f58c17b3b480..dcd39d4e2efe 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp) dprintk("NFS: open file(%pd2)\n", dentry); + err = nfs_check_flags(openflags); + if (err) + return err; + if ((openflags & O_ACCMODE) == 3) openflags--; -- cgit From 865a7ecb2131a3ba26cc1d49daf18275375109f0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 25 Jun 2015 18:19:32 +0800 Subject: nfs: provide pnfs_report_layoutstat when NFS42 is disabled kbuild test robot reported: fs/built-in.o: In function `pnfs_report_layoutstat': >> (.text+0x151a1c): undefined reference to `nfs42_proc_layoutstats_generic' Reported-by: kbuild test robot Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ fs/nfs/pnfs.h | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5b5224341bcd..40bacebb5b97 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2252,6 +2252,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) return thp; } +#if IS_ENABLED(CONFIG_NFS_V4_2) int pnfs_report_layoutstat(struct inode *inode) { @@ -2306,3 +2307,4 @@ out_put: goto out; } EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); +#endif diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0a4723945db2..3e6ab7bfbabd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -292,8 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg); -int pnfs_report_layoutstat(struct inode *inode); - /* nfs4_deviceid_flags */ enum { NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ @@ -692,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) #endif /* CONFIG_NFS_V4_1 */ +#if IS_ENABLED(CONFIG_NFS_V4_2) +int pnfs_report_layoutstat(struct inode *inode); +#else +static inline int +pnfs_report_layoutstat(struct inode *inode) +{ + return 0; +} +#endif + #endif /* FS_NFS_PNFS_H */ -- cgit From 9bbd9bb40cfae5ff35710e88866fcadb0e8f91ed Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 26 Jun 2015 09:45:49 +0800 Subject: pnfs/flexfiles: protect ktime manipulation with mirror lock It looks as if xchg() and cmpxchg() are not available for 64-bit integers on sparc32: > New breakage seen in linux-next today: > > ERROR: "__xchg_called_with_bad_pointer" [fs/nfs/flexfilelayout/nfs_layout_flexfiles.ko] undefined! > ERROR: "__cmpxchg_called_with_bad_pointer" [fs/nfs/flexfilelayout/nfs_layout_flexfiles.ko] undefined! > make[2]: *** [__modpost] Error 1 > make[1]: *** [modules] Error 2 Given that mirror ktime manipulation is already under mirror->lock, let's make use of the fact. Reported-by: Paul Gortmaker Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d3b3b6236711..85b4234b8090 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -417,21 +417,9 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) static void nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) { - ktime_t old, new; - - /* - * Note: careful here! - * If the counter is zero, then we must not increment it until after - * we've set the start_time. - * If we were instead to use atomic_inc_return(), then another - * request might come in, bump, and then call end_busy_timer() - * before we've set the timer->start_time. - */ - old = timer->start_time; - if (atomic_inc_not_zero(&timer->n_ops) == 0) { - new = ktime_get(); - cmpxchg(&timer->start_time.tv64, old.tv64, new.tv64); - atomic_inc(&timer->n_ops); + /* first IO request? */ + if (atomic_inc_return(&timer->n_ops) == 1) { + timer->start_time = ktime_get(); } } @@ -440,9 +428,12 @@ nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) { ktime_t start, now; + if (atomic_dec_return(&timer->n_ops) < 0) + WARN_ON_ONCE(1); + now = ktime_get(); - start.tv64 = xchg(&timer->start_time.tv64, now.tv64); - atomic_dec(&timer->n_ops); + start = timer->start_time; + timer->start_time = now; return ktime_sub(now, start); } @@ -460,8 +451,10 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, ktime_t now = ktime_get(); nfs4_ff_start_busy_timer(&layoutstat->busy_timer); - cmpxchg(&mirror->start_time.tv64, notime.tv64, now.tv64); - cmpxchg(&mirror->last_report_time.tv64, notime.tv64, now.tv64); + if (ktime_equal(mirror->start_time, notime)) + mirror->start_time = now; + if (ktime_equal(mirror->last_report_time, notime)) + mirror->last_report_time = now; if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= FF_LAYOUTSTATS_REPORT_INTERVAL) { mirror->last_report_time = now; -- cgit From c0f5f5059f6c9c9efb6ca688f4d6a800c11f6b58 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Jun 2015 14:51:32 -0400 Subject: pNFS/flexfiles: Turn off layoutcommit for servers that don't need it This patch ensures that we record the value of 'ffl_flags' from the layout, and then checks for the presence of the FF_FLAGS_NO_LAYOUTCOMMIT flag before deciding whether or not to call pnfs_set_layoutcommit(). The effect is that servers now can decide whether or not they want the client to call layoutcommit before returning a writeable layout. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 16 +++++++++++++++- fs/nfs/flexfilelayout/flexfilelayout.h | 3 +++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 85b4234b8090..594ab35eacfb 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -343,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->gid); } + p = xdr_inline_decode(&stream, 4); + if (p) + fls->flags = be32_to_cpup(p); + ff_layout_sort_mirrors(fls); rc = ff_layout_check_layout(lgr); if (rc) @@ -1018,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return 0; } +static bool +ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg) +{ + return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT); +} + /* * We reference the rpc_cred of the first WRITE that triggers the need for * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. @@ -1030,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, static void ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) { + if (!ff_layout_need_layoutcommit(hdr->lseg)) + return; + pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, @@ -1221,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, return -EAGAIN; } - if (data->verf.committed == NFS_UNSTABLE) + if (data->verf.committed == NFS_UNSTABLE + && ff_layout_need_layoutcommit(data->lseg)) pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); return 0; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 6fcd8d5e8e3d..f92f9a0a856b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -9,6 +9,8 @@ #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H #define FS_NFS_NFS4FLEXFILELAYOUT_H +#define FF_FLAGS_NO_LAYOUTCOMMIT 1 + #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom @@ -85,6 +87,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layout_segment { struct pnfs_layout_segment generic_hdr; u64 stripe_unit; + u32 flags; u32 mirror_array_cnt; struct nfs4_ff_layout_mirror **mirror_array; }; -- cgit From d620876990f02788d5a663075df007ffb91bdfad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 26 Jun 2015 15:37:58 -0400 Subject: pNFS/flexfiles: Fix the reset of struct pgio_header when resending hdr->good_bytes needs to be set to the length of the request, not zero. Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 594ab35eacfb..c12951b9551e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -774,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) nfs_direct_set_resched_writes(hdr->dreq); /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = 0; + hdr->good_bytes = hdr->args.count; } return; } -- cgit From da2e8127510525eb4bce0fe34aff06192e042c8f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:30:57 -0400 Subject: NFSv4.2: Fix up a decoding error in layoutstats According to the spec, the server is only returning the status, which we decode in the op header. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9aae0205ef11..a6bd27da6286 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -241,23 +241,7 @@ out_overflow: static int decode_layoutstats(struct xdr_stream *xdr, struct nfs42_layoutstat_res *res) { - int status; - __be32 *p; - - status = decode_op_hdr(xdr, OP_LAYOUTSTATS); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - - res->rpc_status = be32_to_cpup(p++); - return 0; - -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; + return decode_op_hdr(xdr, OP_LAYOUTSTATS); } /* -- cgit From 6c5a0d891543873aefc3aaf846c1e7afe0982ff9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 27 Jun 2015 11:45:46 -0400 Subject: NFSv4.2: LAYOUTSTATS is optional to implement Make it so, by checking the return value for NFS4ERR_MOTSUPP and caching the information as a server capability. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 10 ++++++++-- fs/nfs/nfs4proc.c | 3 ++- fs/nfs/pnfs.c | 3 +++ 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 06c74cd93875..f486b80f927a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -189,9 +189,15 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata) if (!nfs4_sequence_done(task, &data->res.seq_res)) return; - /* well, we don't care about errors at all! */ - if (task->tk_status) + switch (task->tk_status) { + case 0: + break; + case -ENOTSUPP: + case -EOPNOTSUPP: + NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; + default: dprintk("%s server returns %d\n", __func__, task->tk_status); + } } static void diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 643ce3a91b22..8eab42407d39 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8635,7 +8635,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ATOMIC_OPEN_V1 | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE - | NFS_CAP_SEEK, + | NFS_CAP_SEEK + | NFS_CAP_LAYOUTSTATS, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 40bacebb5b97..0ba9a02c9566 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2266,6 +2266,9 @@ pnfs_report_layoutstat(struct inode *inode) if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) goto out; + if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) + goto out; + if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) goto out; -- cgit From 39f897fdbd46873defc8501102124f862e8f4d80 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 29 Jun 2015 14:28:54 +1000 Subject: NFSv4: When returning a delegation, don't reclaim an incompatible open mode. It is possible to have an active open with one mode, and a delegation for the same file with a different mode. In particular, a WR_ONLY open and an RD_ONLY delegation. This happens if a WR_ONLY open is followed by a RD_ONLY open which provides a delegation, but is then close. When returning the delegation, we currently try to claim opens for every open type (n_rdwr, n_rdonly, n_wronly). As there is no harm in claiming an open for a mode that we already have, this is often simplest. However if the delegation only provides a subset of the modes that we currently have open, this will produce an error from the server. So when claiming open modes prior to returning a delegation, skip the open request if the mode is not covered by the delegation - the open_stateid must already cover that mode, so there is nothing to do. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8eab42407d39..605c203de556 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1547,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod struct nfs4_state *newstate; int ret; + if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || + opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && + (opendata->o_arg.u.delegation_type & fmode) != fmode) + /* This mode can't have been delegated, so we must have + * a valid open_stateid to cover it - not need to reclaim. + */ + return 0; opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; opendata->o_arg.share_access = nfs4_map_atomic_open_share( -- cgit From db2efec0caba4f81a22d95a34da640b86c313c8e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 30 Jun 2015 14:12:30 -0400 Subject: nfs: take extra reference to fl->fl_file when running a LOCKU operation Jean reported another crash, similar to the one fixed by feaff8e5b2cf: BUG: unable to handle kernel NULL pointer dereference at 0000000000000148 IP: [] locks_get_lock_context+0xf/0xa0 PGD 0 Oops: 0000 [#1] SMP Modules linked in: nfsv3 nfs_layout_flexfiles rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache vmw_vsock_vmci_transport vsock cfg80211 rfkill coretemp crct10dif_pclmul ppdev vmw_balloon crc32_pclmul crc32c_intel ghash_clmulni_intel pcspkr vmxnet3 parport_pc i2c_piix4 microcode serio_raw parport nfsd floppy vmw_vmci acpi_cpufreq auth_rpcgss shpchp nfs_acl lockd grace sunrpc vmwgfx drm_kms_helper ttm drm mptspi scsi_transport_spi mptscsih ata_generic mptbase i2c_core pata_acpi CPU: 0 PID: 329 Comm: kworker/0:1H Not tainted 4.1.0-rc7+ #2 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/30/2013 Workqueue: rpciod rpc_async_schedule [sunrpc] 30ec000 RIP: 0010:[] [] locks_get_lock_context+0xf/0xa0 RSP: 0018:ffff8802330efc08 EFLAGS: 00010296 RAX: ffff8802330efc58 RBX: ffff880097187c80 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffff8802330efc18 R08: ffff88023fc173d8 R09: 3038b7bf00000000 R10: 00002f1a02000000 R11: 3038b7bf00000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff8802337a2300 R15: 0000000000000020 FS: 0000000000000000(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000148 CR3: 000000003680f000 CR4: 00000000001407f0 Stack: ffff880097187c80 ffff880097187cd8 ffff8802330efc98 ffffffff81250281 ffff8802330efc68 ffffffffa013e7df ffff8802330efc98 0000000000000246 ffff8801f6901c00 ffff880233d2b8d8 ffff8802330efc58 ffff8802330efc58 Call Trace: [] __posix_lock_file+0x31/0x5e0 [] ? rpc_wake_up_task_queue_locked.part.35+0xcf/0x240 [sunrpc] [] posix_lock_file_wait+0x3b/0xd0 [] ? nfs41_wake_and_assign_slot+0x32/0x40 [nfsv4] [] ? nfs41_sequence_done+0xd8/0x300 [nfsv4] [] do_vfs_lock+0x35/0x40 [nfsv4] [] nfs4_locku_done+0x81/0x120 [nfsv4] [] ? rpc_destroy_wait_queue+0x20/0x20 [sunrpc] [] ? rpc_destroy_wait_queue+0x20/0x20 [sunrpc] [] rpc_exit_task+0x2c/0x90 [sunrpc] [] ? call_refreshresult+0x170/0x170 [sunrpc] [] __rpc_execute+0x84/0x410 [sunrpc] [] rpc_async_schedule+0x15/0x20 [sunrpc] [] process_one_work+0x147/0x400 [] worker_thread+0x11b/0x460 [] ? rescuer_thread+0x2f0/0x2f0 [] kthread+0xc9/0xe0 [] ? perf_trace_xen_mmu_set_pmd+0xa0/0x160 [] ? kthread_create_on_node+0x170/0x170 [] ret_from_fork+0x42/0x70 [] ? kthread_create_on_node+0x170/0x170 Code: a5 81 e8 85 75 e4 ff c6 05 31 ee aa 00 01 eb 98 66 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 54 49 89 fc 53 <48> 8b 9f 48 01 00 00 48 85 db 74 08 48 89 d8 5b 41 5c 5d c3 83 RIP [] locks_get_lock_context+0xf/0xa0 RSP CR2: 0000000000000148 ---[ end trace 64484f16250de7ef ]--- The problem is almost exactly the same as the one fixed by feaff8e5b2cf. We must take a reference to the struct file when running the LOCKU compound to prevent the final fput from running until the operation is complete. Reported-by: Jean Spector Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 605c203de556..5e7638d8e31b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5484,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); + get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); p->server = NFS_SERVER(inode); return p; @@ -5495,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data) nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); put_nfs_open_context(calldata->ctx); + fput(calldata->fl.fl_file); kfree(calldata); } -- cgit From 6b55970b0f8057a40f964d3dcfa4a687f675b5e2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:54:53 +0800 Subject: nfs: Fix a memory leak when meeting an unsupported state protect Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5e7638d8e31b..75a874fd1591 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6972,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* unsupported! */ WARN_ON_ONCE(1); status = -EINVAL; - goto out_server_scope; + goto out_impl_id; } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); @@ -7000,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, /* use the most recent implementation id */ kfree(clp->cl_implid); clp->cl_implid = res.impl_id; + res.impl_id = NULL; if (clp->cl_serverscope != NULL && !nfs41_same_server_scope(clp->cl_serverscope, @@ -7013,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (clp->cl_serverscope == NULL) { clp->cl_serverscope = res.server_scope; - goto out; + res.server_scope = NULL; } - } else - kfree(res.impl_id); + } -out_server_owner: - kfree(res.server_owner); +out_impl_id: + kfree(res.impl_id); out_server_scope: kfree(res.server_scope); +out_server_owner: + kfree(res.server_owner); out: if (clp->cl_implid != NULL) dprintk("NFS reply exchange_id: Server Implementation ID: " -- cgit From 71f81e51ee0dc6d523a5229096c32eca2ab58bbb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:55:50 +0800 Subject: nfs: Remove unused argument in nfs_server_set_fsinfo() Commit e38eb6506f "NFS: set_pnfs_layoutdriver() from nfs4_proc_fsinfo()" have remove the using of mntfh from nfs_server_set_fsinfo(). Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1e37491904ee..7b3e5c367f5b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -825,7 +825,6 @@ error: * Load up the server record from information gained in an fsinfo record */ static void nfs_server_set_fsinfo(struct nfs_server *server, - struct nfs_fh *mntfh, struct nfs_fsinfo *fsinfo) { unsigned long max_rpc_payload; @@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs if (error < 0) goto out_error; - nfs_server_set_fsinfo(server, mntfh, &fsinfo); + nfs_server_set_fsinfo(server, &fsinfo); /* Get some general file system info */ if (server->namelen == 0) { -- cgit From 6a062a3687c20b80362b04e1ae7c9772d511977d Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 23:00:29 +0800 Subject: nfs: Use remove_proc_subtree() instead remove_proc_entry() Thanks for Al Viro's comments of killing proc_fs_nfs completely. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7b3e5c367f5b..ecebb406cc1a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1192,8 +1192,6 @@ void nfs_clients_init(struct net *net) } #ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_fs_nfs; - static int nfs_server_list_open(struct inode *inode, struct file *file); static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); @@ -1435,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net) */ int __init nfs_fs_proc_init(void) { - struct proc_dir_entry *p; - - proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); - if (!proc_fs_nfs) + if (!proc_mkdir("fs/nfsfs", NULL)) goto error_0; /* a file of servers with which we're dealing */ - p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); - if (!p) + if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers")) goto error_1; /* a file of volumes that we have mounted */ - p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); - if (!p) - goto error_2; - return 0; + if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes")) + goto error_1; -error_2: - remove_proc_entry("servers", proc_fs_nfs); + return 0; error_1: - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1465,9 +1456,7 @@ error_0: */ void nfs_fs_proc_exit(void) { - remove_proc_entry("volumes", proc_fs_nfs); - remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ -- cgit From 2785110d2e5c5bad54c8364391e02e3dd21eccf3 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:57:49 +0800 Subject: nfs: Don't setting FILE_CREATED flags always Commit 5bc2afc2b5 "NFSv4: Honour the 'opened' parameter in the atomic_open() filesystem method" have support the opened arguments now. Also, Commit 03da633aa7 "atomic_open: take care of EEXIST in no-open case with O_CREAT|O_EXCL in fs/namei.c" have change vfs's logical. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..21457bb0edd6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, { int err; - if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - *opened |= FILE_CREATED; - err = finish_open(file, dentry, do_open, opened); if (err) goto out; -- cgit From cd738ee985ccc1162e7ba2b4ecd41b6072a5befc Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:58:31 +0800 Subject: nfs: Remove unneeded micro checking of CONFIG_PROC_FS Have checking CONFIG_PROC_FS in include/linux/sunrpc/stats.h. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 14b4709b9242..b77b328a06d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2010,17 +2010,15 @@ static int __init init_nfs_fs(void) if (err) goto out1; -#ifdef CONFIG_PROC_FS rpc_proc_register(&init_net, &nfs_rpcstat); -#endif - if ((err = register_nfs_fs()) != 0) + + err = register_nfs_fs(); + if (err) goto out0; return 0; out0: -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2051,9 +2049,7 @@ static void __exit exit_nfs_fs(void) nfs_destroy_nfspagecache(); nfs_fscache_unregister(); unregister_pernet_subsys(&nfs_net_ops); -#ifdef CONFIG_PROC_FS rpc_proc_unregister(&init_net, "nfs"); -#endif unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); -- cgit From bc4da2a2bee2aaf5ed62b8bb676a15d010af0fc0 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:59:04 +0800 Subject: nfs: Drop bad comment in nfs41_walk_client_list() Commit 7b1f1fd184 "NFSv4/4.1: Fix bugs in nfs4[01]_walk_client_list" have change the logical of the list_for_each_entry(). Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18..3aa6a9ba5113 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -676,7 +676,6 @@ found: break; } - /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); nfs_put_client(prev); -- cgit From d356a7d1e75003063fd5f499366b1043a0f79f6c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 11:59:42 +0800 Subject: nfs: Remove invalid NFS_ATTR_FATTR_V4_REFERRAL checking in nfs4_get_rootfh NFS_ATTR_FATTR_V4_REFERRAL is only set in nfs4_proc_lookup_common. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4getroot.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index c0b3a16b4a00..039b3eb6d834 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c @@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p goto out; } - if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - printk(KERN_ERR "nfs4_get_rootfh:" - " getroot obtained referral\n"); - ret = -EREMOTE; - goto out; - } - memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); out: nfs_free_fattr(fsinfo.fattr); -- cgit From b4839ebe21fc5d543b933d83644981ea73e9ba36 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 1 Jul 2015 12:00:13 +0800 Subject: nfs: Remove invalid tk_pid from debug message Before rpc_run_task(), tk_pid is uninitiated as 0 always. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ++--- fs/nfs/pagelist.c | 3 +-- fs/nfs/write.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 75a874fd1591..6f228b5af819 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8150,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) struct rpc_task *task; int status = 0; - dprintk("NFS: %4d initiating layoutcommit call. sync %d " - "lbw: %llu inode %lu\n", - data->task.tk_pid, sync, + dprintk("NFS: initiating layoutcommit call. sync %d " + "lbw: %llu inode %lu\n", sync, data->args.lastbytewritten, data->args.inode->i_ino); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d70228a81056..1da68d3b1eda 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); - dprintk("NFS: %5u initiated pgio call " + dprintk("NFS: initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), hdr->args.count, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 84ba7268833d..359e9ad596c9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1551,7 +1551,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, /* Set up the initial task struct. */ nfs_ops->commit_setup(data, &msg); - dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); + dprintk("NFS: initiated commit call\n"); nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); -- cgit