From 033c006e5fe08233014d449715171f07e68a4a3a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 26 Sep 2017 09:14:05 +0200 Subject: nfs_common: fix build warning in grace.c This fix the following warning fs/nfs_common/grace.c:66:1: warning: no previous prototype for function '__state_in_grace' [-Wmissing-prototypes] by adding the missing static. Signed-off-by: Corentin Labbe Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 420d3a0ab258..519396967e79 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(locks_end_grace); * to answer ordinary lock requests, and when they should accept only * lock reclaims. */ -int +static int __state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); -- cgit From 809d4fcf9dd593af6d11ab1e6f0efc9fcfefb682 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 26 Sep 2017 09:14:06 +0200 Subject: nfs_common: move locks_in_grace comment at the right place Commit c87fb4a378f9 ("lockd: NLM grace period shouldn't block NFSv4 opens") made the locks_in_grace() comment be in the wrong place. This patch move this comment just at the right place. Signed-off-by: Corentin Labbe Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 519396967e79..c030cd618b99 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -55,13 +55,6 @@ locks_end_grace(struct lock_manager *lm) } EXPORT_SYMBOL_GPL(locks_end_grace); -/** - * locks_in_grace - * - * Lock managers call this function to determine when it is OK for them - * to answer ordinary lock requests, and when they should accept only - * lock reclaims. - */ static int __state_in_grace(struct net *net, bool open) { @@ -78,6 +71,13 @@ __state_in_grace(struct net *net, bool open) return false; } +/** + * locks_in_grace + * + * Lock managers call this function to determine when it is OK for them + * to answer ordinary lock requests, and when they should accept only + * lock reclaims. + */ int locks_in_grace(struct net *net) { return __state_in_grace(net, 0); -- cgit From 003278e431bffa4070d18c821eff1d95867f24db Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 26 Sep 2017 09:14:07 +0200 Subject: nfs_common: convert int to bool Since __state_in_grace return only true/false, make it return bool instead of int. Same change for the two user of it, locks_in_grace/opens_in_grace Signed-off-by: Corentin Labbe Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfs_common/grace.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index c030cd618b99..897b299db55e 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -55,7 +55,7 @@ locks_end_grace(struct lock_manager *lm) } EXPORT_SYMBOL_GPL(locks_end_grace); -static int +static bool __state_in_grace(struct net *net, bool open) { struct list_head *grace_list = net_generic(net, grace_net_id); @@ -78,15 +78,15 @@ __state_in_grace(struct net *net, bool open) * to answer ordinary lock requests, and when they should accept only * lock reclaims. */ -int locks_in_grace(struct net *net) +bool locks_in_grace(struct net *net) { - return __state_in_grace(net, 0); + return __state_in_grace(net, false); } EXPORT_SYMBOL_GPL(locks_in_grace); -int opens_in_grace(struct net *net) +bool opens_in_grace(struct net *net) { - return __state_in_grace(net, 1); + return __state_in_grace(net, true); } EXPORT_SYMBOL_GPL(opens_in_grace); -- cgit From 9542446048e01871fd323cba9a1d468b46cfceea Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 15 Sep 2017 16:02:52 -0400 Subject: nfsd: remove unnecessary nofilehandle checks These checks should have already be done centrally in nfsd4_proc_compound, the checks in each individual operation are unnecessary. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3c69db7d4905..7aff6d383d34 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -485,9 +485,6 @@ static __be32 nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - if (!cstate->current_fh.fh_dentry) - return nfserr_nofilehandle; - u->getfh = &cstate->current_fh; return nfs_ok; } @@ -535,9 +532,6 @@ static __be32 nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - if (!cstate->current_fh.fh_dentry) - return nfserr_nofilehandle; - fh_dup2(&cstate->save_fh, &cstate->current_fh); if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); @@ -703,10 +697,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_link *link = &u->link; - __be32 status = nfserr_nofilehandle; + __be32 status; - if (!cstate->save_fh.fh_dentry) - return status; status = nfsd_link(rqstp, &cstate->current_fh, link->li_name, link->li_namelen, &cstate->save_fh); if (!status) @@ -850,10 +842,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_rename *rename = &u->rename; - __be32 status = nfserr_nofilehandle; + __be32 status; - if (!cstate->save_fh.fh_dentry) - return status; if (opens_in_grace(SVC_NET(rqstp)) && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; -- cgit From 44d8660d3bb0a1c8363ebcb906af2343ea8e15f6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Sep 2017 20:51:31 -0400 Subject: nfsd: increase DRC cache limit An NFSv4.1+ client negotiates the size of its duplicate reply cache size in the initial CREATE_SESSION request. The server preallocates the memory for the duplicate reply cache to ensure that we'll never fail to record the response to a nonidempotent operation. To prevent a few CREATE_SESSIONs from consuming all of memory we set an upper limit based on nr_free_buffer_pages(). 1/2^10 has been too limiting in practice; 1/2^7 is still less than one percent. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7e3af3ef0917..6bbc717f40f2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -446,7 +446,7 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { - #define NFSD_DRC_SIZE_SHIFT 10 + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; -- cgit From de766e570413bd0484af0b580299b495ada625c3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 19 Sep 2017 19:25:41 -0400 Subject: nfsd: give out fewer session slots as limit approaches Instead of granting client's full requests until we hit our DRC size limit and then failing CREATE_SESSIONs (and hence mounts) completely, start granting clients smaller slot tables as we approach the limit. The factor chosen here is pretty much arbitrary. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0c04f81aa63b..9db8a19cceaa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1472,6 +1472,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) spin_lock(&nfsd_drc_lock); avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, nfsd_drc_max_mem - nfsd_drc_mem_used); + /* + * Never use more than a third of the remaining memory, + * unless it's the only way to give this client a slot: + */ + avail = clamp_t(int, avail, slotsize, avail/3); num = min_t(int, num, avail / slotsize); nfsd_drc_mem_used += num * slotsize; spin_unlock(&nfsd_drc_lock); -- cgit From a133552a0049aa2ff92f83dbe8d5185485b538c4 Mon Sep 17 00:00:00 2001 From: Jérémy Lefaure Date: Sun, 1 Oct 2017 15:30:47 -0400 Subject: nfsd: use ARRAY_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the ARRAY_SIZE macro improves the readability of the code. Found with Coccinelle with the following semantic patch: @r depends on (org || report)@ type T; T[] E; position p; @@ ( (sizeof(E)@p /sizeof(*E)) | (sizeof(E)@p /sizeof(E[...])) | (sizeof(E)@p /sizeof(T)) ) Signed-off-by: Jérémy Lefaure Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 34c1c449fddf..3ec72c931ac5 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "state.h" #include "netns.h" @@ -125,8 +126,6 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, }; -#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) - int nfsd_fault_inject_init(void) { unsigned int i; @@ -137,7 +136,7 @@ int nfsd_fault_inject_init(void) if (!debug_dir) goto fail; - for (i = 0; i < NUM_INJECT_OPS; i++) { + for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { op = &inject_ops[i]; if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd)) goto fail; -- cgit From 085def3ade52f2ffe3e31f42e98c27dcc222dd37 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 18 Oct 2017 16:17:18 -0400 Subject: nfsd4: fix cached replies to solo SEQUENCE compounds Currently our handling of 4.1+ requests without "cachethis" set is confusing and not quite correct. Suppose a client sends a compound consisting of only a single SEQUENCE op, and it matches the seqid in a session slot (so it's a retry), but the previous request with that seqid did not have "cachethis" set. The obvious thing to do might be to return NFS4ERR_RETRY_UNCACHED_REP, but the protocol only allows that to be returned on the op following the SEQUENCE, and there is no such op in this case. The protocol permits us to cache replies even if the client didn't ask us to. And it's easy to do so in the case of solo SEQUENCE compounds. So, when we get a solo SEQUENCE, we can either return the previously cached reply or NFSERR_SEQ_FALSE_RETRY if we notice it differs in some way from the original call. Currently, we're returning a corrupt reply in the case a solo SEQUENCE matches a previous compound with more ops. This actually matters because the Linux client recently started doing this as a way to recover from lost replies to idempotent operations in the case the process doing the original reply was killed: in that case it's difficult to keep the original arguments around to do a real retry, and the client no longer cares what the result is anyway, but it would like to make sure that the slot's sequence id has been incremented, and the solo SEQUENCE assures that: if the server never got the original reply, it will increment the sequence id. If it did get the original reply, it won't increment, and nothing else that about the reply really matters much. But we can at least attempt to return valid xdr! Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 20 +++++++++++++++----- fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 13 +++++++++++-- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9db8a19cceaa..3e96e02496bd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2292,14 +2292,16 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) dprintk("--> %s slot %p\n", __func__, slot); + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; - slot->sl_flags |= NFSD4_SLOT_INITIALIZED; - if (nfsd4_not_cached(resp)) { - slot->sl_datalen = 0; + if (!nfsd4_cache_this(resp)) { + slot->sl_flags &= ~NFSD4_SLOT_CACHED; return; } + slot->sl_flags |= NFSD4_SLOT_CACHED; + base = resp->cstate.data_offset; slot->sl_datalen = buf->len - base; if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen)) @@ -2326,8 +2328,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); - /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { + if (slot->sl_flags & NFSD4_SLOT_CACHED) + return op->status; + if (args->opcnt == 1) { + /* + * The original operation wasn't a solo sequence--we + * always cache those--so this retry must not match the + * original: + */ + op->status = nfserr_seq_false_retry; + } else { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 005c911b34ac..2488b7df1b35 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -174,6 +174,7 @@ struct nfsd4_slot { #define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_INITIALIZED (1 << 2) +#define NFSD4_SLOT_CACHED (1 << 3) u8 sl_flags; char sl_data[]; }; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1e4edbf70052..bc29511b6405 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE; } -static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) +/* + * The session reply cache only needs to cache replies that the client + * actually asked us to. But it's almost free for us to cache compounds + * consisting of only a SEQUENCE op, so we may as well cache those too. + * Also, the protocol doesn't give us a convenient response in the case + * of a replay of a solo SEQUENCE op that wasn't cached + * (RETRY_UNCACHED_REP can only be returned in the second op of a + * compound). + */ +static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp) { - return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) || nfsd4_is_solo_sequence(resp); } -- cgit From 53da6a53e1d414e05759fa59b7032ee08f4e22d7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 Oct 2017 20:38:49 -0400 Subject: nfsd4: catch some false session retries The spec allows us to return NFS4ERR_SEQ_FALSE_RETRY if we notice that the client is making a call that matches a previous (slot, seqid) pair but that *isn't* actually a replay, because some detail of the call doesn't actually match the previous one. Catching every such case is difficult, but we may as well catch a few easy ones. This also handles the case described in the previous patch, in a different way. The spec does however require us to catch the case where the difference is in the rpc credentials. This prevents somebody from snooping another user's replies by fabricating retries. (But the practical value of the attack is limited by the fact that the replies with the most sensitive data are READ replies, which are not normally cached.) Tested-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 37 ++++++++++++++++++++++++++++++++++++- fs/nfsd/state.h | 1 + 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3e96e02496bd..251dac7579ec 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1439,8 +1439,10 @@ free_session_slots(struct nfsd4_session *ses) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + free_svc_cred(&ses->se_slots[i]->sl_cred); kfree(ses->se_slots[i]); + } } /* @@ -2295,6 +2297,8 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; + free_svc_cred(&slot->sl_cred); + copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred); if (!nfsd4_cache_this(resp)) { slot->sl_flags &= ~NFSD4_SLOT_CACHED; @@ -3001,6 +3005,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp, return xb->len > session->se_fchannel.maxreq_sz; } +static bool replay_matches_cache(struct svc_rqst *rqstp, + struct nfsd4_sequence *seq, struct nfsd4_slot *slot) +{ + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + + if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) != + (bool)seq->cachethis) + return false; + /* + * If there's an error than the reply can have fewer ops than + * the call. But if we cached a reply with *more* ops than the + * call you're sending us now, then this new call is clearly not + * really a replay of the old one: + */ + if (slot->sl_opcnt < argp->opcnt) + return false; + /* This is the only check explicitly called by spec: */ + if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) + return false; + /* + * There may be more comparisons we could actually do, but the + * spec doesn't require us to catch every case where the calls + * don't match (that would require caching the call as well as + * the reply), so we don't bother. + */ + return true; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3060,6 +3092,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) goto out_put_session; + status = nfserr_seq_false_retry; + if (!replay_matches_cache(rqstp, seq, slot)) + goto out_put_session; cstate->slot = slot; cstate->session = session; cstate->clp = clp; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2488b7df1b35..86aa92d200e1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -169,6 +169,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) struct nfsd4_slot { u32 sl_seqid; __be32 sl_status; + struct svc_cred sl_cred; u32 sl_datalen; u16 sl_opcnt; #define NFSD4_SLOT_INUSE (1 << 0) -- cgit From dc3033e16c59a2c4e62b31341258a5786cbcee56 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 20 Oct 2017 17:33:18 +0300 Subject: lockd: double unregister of inetaddr notifiers lockd_up() can call lockd_unregister_notifiers twice: inside lockd_start_svc() when it calls lockd_svc_exit_thread() and then in error path of lockd_up() Patch forces lockd_start_svc() to unregister notifiers in all error cases and removes extra unregister in error path of lockd_up(). Fixes: cb7d224f82e4 "lockd: unregister notifier blocks if the service ..." Signed-off-by: Vasily Averin Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index b995bdc13976..f04ecfc7ece0 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv) printk(KERN_WARNING "lockd_up: svc_rqst allocation failed, error=%d\n", error); + lockd_unregister_notifiers(); goto out_rqst; } @@ -459,13 +460,16 @@ int lockd_up(struct net *net) } error = lockd_up_net(serv, net); - if (error < 0) - goto err_net; + if (error < 0) { + lockd_unregister_notifiers(); + goto err_put; + } error = lockd_start_svc(serv); - if (error < 0) - goto err_start; - + if (error < 0) { + lockd_down_net(serv, net); + goto err_put; + } nlmsvc_users++; /* * Note: svc_serv structures have an initial use count of 1, @@ -476,12 +480,6 @@ err_put: err_create: mutex_unlock(&nlmsvc_mutex); return error; - -err_start: - lockd_down_net(serv, net); -err_net: - lockd_unregister_notifiers(); - goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); -- cgit From a15dfcd529ab43265e70ec32d3b9d2286872d412 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:28 +0300 Subject: fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_stid.sc_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4layouts.c | 4 ++-- fs/nfsd/nfs4state.c | 24 ++++++++++++------------ fs/nfsd/state.h | 3 ++- 3 files changed, 16 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index e122da696f1b..fed076069dd2 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -335,7 +335,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) trace_layout_recall(&ls->ls_stid.sc_stateid); - atomic_inc(&ls->ls_stid.sc_count); + refcount_inc(&ls->ls_stid.sc_count); nfsd4_run_cb(&ls->ls_recall); out_unlock: @@ -440,7 +440,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) goto done; } - atomic_inc(&ls->ls_stid.sc_count); + refcount_inc(&ls->ls_stid.sc_count); list_add_tail(&new->lo_perstate, &ls->ls_layouts); new = NULL; done: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 251dac7579ec..df2e5b4b222e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -656,7 +656,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - atomic_set(&stid->sc_count, 1); + refcount_set(&stid->sc_count, 1); spin_lock_init(&stid->sc_lock); /* @@ -813,7 +813,7 @@ nfs4_put_stid(struct nfs4_stid *s) might_lock(&clp->cl_lock); - if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { + if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { wake_up_all(&close_wq); return; } @@ -913,7 +913,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) if (status) return status; ++fp->fi_delegees; - atomic_inc(&dp->dl_stid.sc_count); + refcount_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &clp->cl_delegations); @@ -1214,7 +1214,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, WARN_ON_ONCE(!list_empty(&stp->st_locks)); - if (!atomic_dec_and_test(&s->sc_count)) { + if (!refcount_dec_and_test(&s->sc_count)) { wake_up_all(&close_wq); return; } @@ -2079,7 +2079,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) s = find_stateid_locked(cl, t); if (s != NULL) { if (typemask & s->sc_type) - atomic_inc(&s->sc_count); + refcount_inc(&s->sc_count); else s = NULL; } @@ -3564,7 +3564,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) continue; if (local->st_stateowner == &oo->oo_owner) { ret = local; - atomic_inc(&ret->st_stid.sc_count); + refcount_inc(&ret->st_stid.sc_count); break; } } @@ -3623,7 +3623,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open) goto out_unlock; open->op_stp = NULL; - atomic_inc(&stp->st_stid.sc_count); + refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner); @@ -3671,7 +3671,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * there should be no danger of the refcount going back up again at * this point. */ - wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); + wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); if (s->st_stid.sc_file) { @@ -3833,7 +3833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * lock) we know the server hasn't removed the lease yet, we know * it's safe to take a reference. */ - atomic_inc(&dp->dl_stid.sc_count); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&dp->dl_recall); } @@ -5121,7 +5121,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = nfserr_locks_held; break; case NFS4_LOCK_STID: - atomic_inc(&s->sc_count); + refcount_inc(&s->sc_count); spin_unlock(&cl->cl_lock); ret = nfsd4_free_lock_stateid(stateid, s); goto out; @@ -5628,7 +5628,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, lockdep_assert_held(&clp->cl_lock); - atomic_inc(&stp->st_stid.sc_count); + refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); @@ -5654,7 +5654,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { if (lst->st_stid.sc_file == fp) { - atomic_inc(&lst->st_stid.sc_count); + refcount_inc(&lst->st_stid.sc_count); return lst; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 86aa92d200e1..c259271c35a4 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -36,6 +36,7 @@ #define _NFSD4_STATE_H #include +#include #include #include "nfsfh.h" @@ -83,7 +84,7 @@ struct nfsd4_callback_ops { * fields that are of general use to any stateid. */ struct nfs4_stid { - atomic_t sc_count; + refcount_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 -- cgit From cff7cb2ece397760195ff8a5fc6bf3c860810246 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:29 +0300 Subject: fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_cntl_odstate.co_odcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++--- fs/nfsd/state.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index df2e5b4b222e..3f10a397e5ba 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -568,7 +568,7 @@ alloc_clnt_odstate(struct nfs4_client *clp) co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); if (co) { co->co_client = clp; - atomic_set(&co->co_odcount, 1); + refcount_set(&co->co_odcount, 1); } return co; } @@ -586,7 +586,7 @@ static inline void get_clnt_odstate(struct nfs4_clnt_odstate *co) { if (co) - atomic_inc(&co->co_odcount); + refcount_inc(&co->co_odcount); } static void @@ -598,7 +598,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co) return; fp = co->co_file; - if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { + if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { list_del(&co->co_perfile); spin_unlock(&fp->fi_lock); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c259271c35a4..2ed368a91a10 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -468,7 +468,7 @@ struct nfs4_clnt_odstate { struct nfs4_client *co_client; struct nfs4_file *co_file; struct list_head co_perfile; - atomic_t co_odcount; + refcount_t co_odcount; }; /* -- cgit From 818a34eb266449b1c89242596039a5e44c9be04c Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:30 +0300 Subject: fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_file.fi_ref is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++--- fs/nfsd/state.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3f10a397e5ba..e905bf642a53 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -359,7 +359,7 @@ put_nfs4_file(struct nfs4_file *fi) { might_lock(&state_lock); - if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { + if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del_rcu(&fi->fi_hash); spin_unlock(&state_lock); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); @@ -3401,7 +3401,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, { lockdep_assert_held(&state_lock); - atomic_set(&fp->fi_ref, 1); + refcount_set(&fp->fi_ref, 1); spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); @@ -3697,7 +3697,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { if (fh_match(&fp->fi_fhandle, fh)) { - if (atomic_inc_not_zero(&fp->fi_ref)) + if (refcount_inc_not_zero(&fp->fi_ref)) return fp; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 2ed368a91a10..f3772ea8ba0d 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -484,7 +484,7 @@ struct nfs4_clnt_odstate { * the global state_lock spinlock. */ struct nfs4_file { - atomic_t fi_ref; + refcount_t fi_ref; spinlock_t fi_lock; struct hlist_node fi_hash; /* hash on fi_fhandle */ struct list_head fi_stateids; @@ -637,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); static inline void get_nfs4_file(struct nfs4_file *fi) { - atomic_inc(&fi->fi_ref); + refcount_inc(&fi->fi_ref); } struct file *find_any_file(struct nfs4_file *f); -- cgit From 256a89fa3deb6bb699b794e5bf00a72e2fe558b0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 12:04:11 +0200 Subject: nfds: avoid gettimeofday for nfssvc_boot time do_gettimeofday() is deprecated and we should generally use time64_t based functions instead. In case of nfsd, all three users of nfssvc_boot only use the initial time as a unique token, and are not affected by it overflowing, so they are not affected by the y2038 overflow. This converts the structure to timespec64 anyway and adds comments to all uses, to document that we have thought about it and avoid having to look at it again. Signed-off-by: Arnd Bergmann Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 2 +- fs/nfsd/nfs3xdr.c | 10 ++++++---- fs/nfsd/nfs4proc.c | 5 +++-- fs/nfsd/nfssvc.c | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 3714231a9d0f..1c91391f4805 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -107,7 +107,7 @@ struct nfsd_net { bool lockd_up; /* Time of server startup */ - struct timeval nfssvc_boot; + struct timespec64 nfssvc_boot; /* * Max number of connections this nfsd container will allow. Defaults diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index bf444b664011..3579e0ae1131 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -747,8 +747,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) if (resp->status == 0) { *p++ = htonl(resp->count); *p++ = htonl(resp->committed); - *p++ = htonl(nn->nfssvc_boot.tv_sec); - *p++ = htonl(nn->nfssvc_boot.tv_usec); + /* unique identifier, y2038 overflow can be ignored */ + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_nsec); } return xdr_ressize_check(rqstp, p); } @@ -1118,8 +1119,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) p = encode_wcc_data(rqstp, p, &resp->fh); /* Write verifier */ if (resp->status == 0) { - *p++ = htonl(nn->nfssvc_boot.tv_sec); - *p++ = htonl(nn->nfssvc_boot.tv_usec); + /* unique identifier, y2038 overflow can be ignored */ + *p++ = htonl((u32)nn->nfssvc_boot.tv_sec); + *p++ = htonl(nn->nfssvc_boot.tv_nsec); } return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7aff6d383d34..7d4495eac7a9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -564,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) /* * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy + * __force to keep sparse happy. y2038 time_t overflow is + * irrelevant in this usage. */ verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; memcpy(verifier->data, verf, sizeof(verifier->data)); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6bbc717f40f2..28ff3e078af6 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -516,7 +516,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - do_gettimeofday(&nn->nfssvc_boot); /* record boot time */ + ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */ return 0; } -- cgit From 7e981a8afa6d8d1d93e2b3d162aa81cc7b537208 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 6 Nov 2017 16:46:04 +0300 Subject: nfsd: use nfs->ns.inum as net ID Publishing of net pointer is not safe, let's use nfs->ns.inum instead Signed-off-by: Vasily Averin Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e905bf642a53..ecbc7b0dfa4d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7056,8 +7056,8 @@ nfs4_state_start_net(struct net *net) nn->nfsd4_manager.block_opens = true; locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); - printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", - nn->nfsd4_grace, net); + printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n", + nn->nfsd4_grace, net->ns.inum); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; } -- cgit From 95da1b3a5aded124dd1bda1e3cdb876184813140 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Fri, 3 Nov 2017 14:06:31 -0400 Subject: nfsd: deal with revoked delegations appropriately If a delegation has been revoked by the server, operations using that delegation should error out with NFS4ERR_DELEG_REVOKED in the >4.1 case, and NFS4ERR_BAD_STATEID otherwise. The server needs NFSv4.1 clients to explicitly free revoked delegations. If the server returns NFS4ERR_DELEG_REVOKED, the client will do that; otherwise it may just forget about the delegation and be unable to recover when it later sees SEQ4_STATUS_RECALLABLE_STATE_REVOKED set on a SEQUENCE reply. That can cause the Linux 4.1 client to loop in its stage manager. Signed-off-by: Andrew Elble Reviewed-by: Trond Myklebust Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ecbc7b0dfa4d..b82817767b9d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4016,7 +4016,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei { struct nfs4_stid *ret; - ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + ret = find_stateid_by_type(cl, s, + NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID); if (!ret) return NULL; return delegstateid(ret); @@ -4039,6 +4040,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; + if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) { + nfs4_put_stid(&deleg->dl_stid); + if (cl->cl_minorversion) + status = nfserr_deleg_revoked; + goto out; + } flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(deleg, flags); if (status) { @@ -4908,6 +4915,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + bool return_revoked = false; + + /* + * only return revoked delegations if explicitly asked. + * otherwise we report revoked or bad_stateid status. + */ + if (typemask & NFS4_REVOKED_DELEG_STID) + return_revoked = true; + else if (typemask & NFS4_DELEG_STID) + typemask |= NFS4_REVOKED_DELEG_STID; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; @@ -4922,6 +4939,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; + if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(*s); + if (cstate->minorversion) + return nfserr_deleg_revoked; + return nfserr_bad_stateid; + } return nfs_ok; } -- cgit