From 3c1818275cc65c6d53e0adfde0c989bfe89ab8d7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Jul 2017 10:14:55 -0400 Subject: NFS: Create NFS_ACCESS_* flags Passing the NFS v4 flags into the v3 code seems weird to me, even if they are defined to the same values. This patch adds in generic flags to help me feel better Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ceaeb1f6fb6..80f397dc96bb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2369,15 +2369,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); -#define NFS_MAY_READ (NFS4_ACCESS_READ) -#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ - NFS4_ACCESS_EXTEND | \ - NFS4_ACCESS_DELETE) -#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ - NFS4_ACCESS_EXTEND) +#define NFS_MAY_READ (NFS_ACCESS_READ) +#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \ + NFS_ACCESS_EXTEND | \ + NFS_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \ + NFS_ACCESS_EXTEND) #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE -#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) -#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE) static int nfs_access_calc_mask(u32 access_result, umode_t umode) { -- cgit From 1750d929b08764dd293d5bdddaa9bc4d3f94d228 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Jul 2017 12:00:21 -0400 Subject: NFS: Don't compare apples to elephants to determine access bits The NFS_ACCESS_* flags aren't a 1:1 mapping to the MAY_* flags, so checking for MAY_WHATEVER might have surprising results in nfs*_proc_access(). Let's simplify this check when determining which bits to ask for, and do it in a generic place instead of copying code for each NFS version. Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 11 ++++++++--- fs/nfs/nfs3proc.c | 17 +---------------- fs/nfs/nfs4proc.c | 19 +------------------ 3 files changed, 10 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 80f397dc96bb..db482d4c15d5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2425,9 +2425,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) if (!may_block) goto out; - /* Be clever: ask server to check for all possible rights */ - cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE - | NFS_MAY_WRITE | NFS_MAY_READ; + /* + * Determine which access bits we want to ask for... + */ + cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; + if (S_ISDIR(inode->i_mode)) + cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; + else + cache.mask |= NFS_ACCESS_EXECUTE; cache.cred = cred; status = NFS_PROTO(inode)->access(inode, &cache); if (status != 0) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d1e87ec0df84..44bf961d5812 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -187,6 +187,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs3_accessargs arg = { .fh = NFS_FH(inode), + .access = entry->mask, }; struct nfs3_accessres res; struct rpc_message msg = { @@ -195,25 +196,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_resp = &res, .rpc_cred = entry->cred, }; - int mode = entry->mask; int status = -ENOMEM; dprintk("NFS call access\n"); - - if (mode & MAY_READ) - arg.access |= NFS3_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE; - if (mode & MAY_EXEC) - arg.access |= NFS3_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND; - if (mode & MAY_EXEC) - arg.access |= NFS3_ACCESS_EXECUTE; - } - res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f90090e8c959..7fd642303104 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3889,6 +3889,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs4_accessargs args = { .fh = NFS_FH(inode), .bitmask = server->cache_consistency_bitmask, + .access = entry->mask, }; struct nfs4_accessres res = { .server = server, @@ -3899,26 +3900,8 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_resp = &res, .rpc_cred = entry->cred, }; - int mode = entry->mask; int status = 0; - /* - * Determine which access bits we want to ask for... - */ - if (mode & MAY_READ) - args.access |= NFS4_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_EXECUTE; - } - res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; -- cgit From b688741cb06695312f18b730653d6611e1bad28d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 25 Aug 2017 17:34:41 +1000 Subject: NFS: revalidate "." etc correctly on "open". For correct close-to-open semantics, NFS must validate the change attribute of a directory (or file) on open. Since commit ecf3d1f1aa74 ("vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op"), open() of "." or a path ending ".." is not revalidated reliably (except when that direct is a mount point). Prior to that commit, "." was revalidated using nfs_lookup_revalidate() which checks the LOOKUP_OPEN flag and forces revalidation if the flag is set. Since that commit, nfs_weak_revalidate() is used for NFSv3 (which ignores the flags) and nothing is used for NFSv4. This is fixed by using nfs_lookup_verify_inode() in nfs_weak_revalidate(). This does the revalidation exactly when needed. Also, add a definition of .d_weak_revalidate for NFSv4. The incorrect behavior is easily demonstrated by running "echo *" in some non-mountpoint NFS directory while watching network traffic. Without this patch, "echo *" sometimes doesn't produce any traffic. With the patch it always does. Fixes: ecf3d1f1aa74 ("vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op") cc: stable@vger.kernel.org (3.9+) Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index db482d4c15d5..c583093a066b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1241,8 +1241,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - if (nfs_mapping_need_revalidate_inode(inode)) - error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1393,6 +1392,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, -- cgit From 1fea73ac92f628538032ce746f7dd5d6f8dfcc32 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 25 Aug 2017 17:34:41 +1000 Subject: NFS: remove special-case revalidate in nfs_opendir() Commit f5a73672d181 ("NFS: allow close-to-open cache semantics to apply to root of NFS filesystem") added a call to __nfs_revalidate_inode() to nfs_opendir to as the lookup process wouldn't reliable do this. Subsequent commit a3fbbde70a0c ("VFS: we need to set LOOKUP_JUMPED on mountpoint crossing") make this unnecessary. So remove the unnecessary code. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c583093a066b..15104f719170 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp) goto out; } filp->private_data = ctx; - if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { - /* This is a mountpoint, so d_revalidate will never - * have been called, so we need to refresh the - * inode (for close-open consistency) ourselves. - */ - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - } out: put_rpccred(cred); return res; -- cgit From 3be0f80b5fe9c16eca2d538f799b94ca8aa59433 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Oct 2017 15:46:45 -0400 Subject: NFSv4.1: Fix up replays of interrupted requests If the previous request on a slot was interrupted before it was processed by the server, then our slot sequence number may be out of whack, and so we try the next operation using the old sequence number. The problem with this, is that not all servers check to see that the client is replaying the same operations as previously when they decide to go to the replay cache, and so instead of the expected error of NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could (if the operations match up) be mistaken by the client for a new reply. To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op in order to resync our slot sequence number. Cc: Olga Kornievskaia [olga.kornievskaia@gmail.com: fix an Oops] Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 148 +++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 103 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ac4f10b7f6c1..b547d935aaf0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); -extern int nfs4_setup_sequence(const struct nfs_client *client, +extern int nfs4_setup_sequence(struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7fd642303104..d0fd96b67640 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + struct rpc_cred *cred, + struct nfs4_slot *slot, + bool is_privileged); static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, struct rpc_cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, @@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task, #if defined(CONFIG_NFS_V4_1) -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +static void nfs41_release_slot(struct nfs4_slot *slot) { struct nfs4_session *session; struct nfs4_slot_table *tbl; - struct nfs4_slot *slot = res->sr_slot; bool send_new_highest_used_slotid = false; + if (!slot) + return; tbl = slot->table; session = tbl->session; @@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) send_new_highest_used_slotid = false; out_unlock: spin_unlock(&tbl->slot_tbl_lock); - res->sr_slot = NULL; if (send_new_highest_used_slotid) nfs41_notify_server(session->clp); if (waitqueue_active(&tbl->slot_waitq)) wake_up_all(&tbl->slot_waitq); } +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +{ + nfs41_release_slot(res->sr_slot); + res->sr_slot = NULL; +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task, /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: - /* If previous op on slot was interrupted and we reused - * the seq# and got a reply from the cache, then retry - */ - if (task->tk_status == -EREMOTEIO && interrupted) { - ++slot->seq_nr; - goto retry_nowait; - } /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; @@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task, * The slot id we used was probably retired. Try again * using a different slot id. */ + if (slot->seq_nr < slot->table->target_highest_slotid) + goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: /* * Was the last operation on this sequence interrupted? * If so, retry after bumping the sequence number. */ - if (interrupted) { - ++slot->seq_nr; - goto retry_nowait; - } + if (interrupted) + goto retry_new_seq; /* * Could this slot have been previously retired? * If so, then the server may be expecting seq_nr = 1! @@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task, slot->seq_nr = 1; goto retry_nowait; } - break; + goto session_recover; case -NFS4ERR_SEQ_FALSE_RETRY: - ++slot->seq_nr; - goto retry_nowait; + if (interrupted) + goto retry_new_seq; + goto session_recover; default: /* Just update the slot sequence no. */ slot->seq_done = 1; @@ -781,6 +785,11 @@ out: dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); out_noaction: return ret; +session_recover: + nfs4_schedule_session_recovery(session, res->sr_status); + goto retry_nowait; +retry_new_seq: + ++slot->seq_nr; retry_nowait: if (rpc_restart_call_prepare(task)) { nfs41_sequence_free_slot(res); @@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; +static void +nfs4_sequence_process_interrupted(struct nfs_client *client, + struct nfs4_slot *slot, struct rpc_cred *cred) +{ + struct rpc_task *task; + + task = _nfs41_proc_sequence(client, cred, slot, true); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + #else /* !CONFIG_NFS_V4_1 */ static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) @@ -877,9 +897,34 @@ int nfs4_sequence_done(struct rpc_task *task, } EXPORT_SYMBOL_GPL(nfs4_sequence_done); +static void +nfs4_sequence_process_interrupted(struct nfs_client *client, + struct nfs4_slot *slot, struct rpc_cred *cred) +{ + WARN_ON_ONCE(1); + slot->interrupted = 0; +} + #endif /* !CONFIG_NFS_V4_1 */ -int nfs4_setup_sequence(const struct nfs_client *client, +static +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct nfs4_slot *slot) +{ + if (!slot) + return; + slot->privileged = args->sa_privileged ? 1 : 0; + args->sa_slot = slot; + + res->sr_slot = slot; + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; + +} + +int nfs4_setup_sequence(struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task) @@ -897,29 +942,28 @@ int nfs4_setup_sequence(const struct nfs_client *client, task->tk_timeout = 0; } - spin_lock(&tbl->slot_tbl_lock); - /* The state manager will wait until the slot table is empty */ - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) - goto out_sleep; + for (;;) { + spin_lock(&tbl->slot_tbl_lock); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; + + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* Try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; + } + spin_unlock(&tbl->slot_tbl_lock); - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - /* Try again in 1/4 second */ - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - goto out_sleep; + if (likely(!slot->interrupted)) + break; + nfs4_sequence_process_interrupted(client, + slot, task->tk_msg.rpc_cred); } - spin_unlock(&tbl->slot_tbl_lock); - - slot->privileged = args->sa_privileged ? 1 : 0; - args->sa_slot = slot; - res->sr_slot = slot; - if (session) { - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - res->sr_status = 1; - } + nfs4_sequence_attach_slot(args, res, slot); trace_nfs4_setup_sequence(session, args); out_start: @@ -8118,6 +8162,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = { static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, + struct nfs4_slot *slot, bool is_privileged) { struct nfs4_sequence_data *calldata; @@ -8131,15 +8176,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .callback_ops = &nfs41_sequence_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; + struct rpc_task *ret; + ret = ERR_PTR(-EIO); if (!atomic_inc_not_zero(&clp->cl_count)) - return ERR_PTR(-EIO); + goto out_err; + + ret = ERR_PTR(-ENOMEM); calldata = kzalloc(sizeof(*calldata), GFP_NOFS); - if (calldata == NULL) { - nfs_put_client(clp); - return ERR_PTR(-ENOMEM); - } + if (calldata == NULL) + goto out_put_clp; nfs4_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); if (is_privileged) nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; @@ -8147,7 +8195,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, calldata->clp = clp; task_setup_data.callback_data = calldata; - return rpc_run_task(&task_setup_data); + ret = rpc_run_task(&task_setup_data); + if (IS_ERR(ret)) + goto out_err; + return ret; +out_put_clp: + nfs_put_client(clp); +out_err: + nfs41_release_slot(slot); + return ret; } static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) @@ -8157,7 +8213,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return -EAGAIN; - task = _nfs41_proc_sequence(clp, cred, false); + task = _nfs41_proc_sequence(clp, cred, NULL, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -8171,7 +8227,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred, true); + task = _nfs41_proc_sequence(clp, cred, NULL, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; -- cgit From a2a5dea7b6cb77365ed9c987f54d160668c8a95f Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:31 +0300 Subject: fs, nfs: convert nfs4_pnfs_ds.ds_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_pnfs_ds.ds_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.h | 3 ++- fs/nfs/pnfs_nfs.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 87f144f14d1e..cefa7daf6e85 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -30,6 +30,7 @@ #ifndef FS_NFS_PNFS_H #define FS_NFS_PNFS_H +#include #include #include #include @@ -54,7 +55,7 @@ struct nfs4_pnfs_ds { char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; struct nfs_client *ds_clp; - atomic_t ds_count; + refcount_t ds_count; unsigned long ds_state; #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ }; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 60da59be83b6..03aaa60c7768 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds) " client %p\n" " cl_exchange_flags %x\n", ds->ds_remotestr, - atomic_read(&ds->ds_count), ds->ds_clp, + refcount_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } @@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) { - if (atomic_dec_and_lock(&ds->ds_count, + if (refcount_dec_and_lock(&ds->ds_count, &nfs4_ds_cache_lock)) { list_del_init(&ds->ds_node); spin_unlock(&nfs4_ds_cache_lock); @@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; - atomic_set(&ds->ds_count, 1); + refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); @@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) } else { kfree(remotestr); kfree(ds); - atomic_inc(&tmp_ds->ds_count); + refcount_inc(&tmp_ds->ds_count); dprintk("%s data server %s found, inc'ed ds_count to %d\n", __func__, tmp_ds->ds_remotestr, - atomic_read(&tmp_ds->ds_count)); + refcount_read(&tmp_ds->ds_count)); ds = tmp_ds; } spin_unlock(&nfs4_ds_cache_lock); -- cgit From eba6dd691743a9d7a57735f36bf6946fc58878ec Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:32 +0300 Subject: fs, nfs: convert pnfs_layout_segment.pls_refcount from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 12 ++++++------ fs/nfs/pnfs.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3bcd669a3152..499bb710713e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -450,7 +450,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, { INIT_LIST_HEAD(&lseg->pls_list); INIT_LIST_HEAD(&lseg->pls_lc_list); - atomic_set(&lseg->pls_refcount, 1); + refcount_set(&lseg->pls_refcount, 1); set_bit(NFS_LSEG_VALID, &lseg->pls_flags); lseg->pls_layout = lo; lseg->pls_range = *range; @@ -507,13 +507,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) return; dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount), + refcount_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); lo = lseg->pls_layout; inode = lo->plh_inode; - if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { spin_unlock(&inode->i_lock); return; @@ -551,7 +551,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) { - if (!atomic_dec_and_test(&lseg->pls_refcount)) + if (!refcount_dec_and_test(&lseg->pls_refcount)) return false; pnfs_layout_remove_lseg(lseg->pls_layout, lseg); list_add(&lseg->pls_list, tmp_list); @@ -570,7 +570,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * outstanding io is finished. */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount)); + refcount_read(&lseg->pls_refcount)); if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) rv = 1; } @@ -1546,7 +1546,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, } dprintk("%s:Return lseg %p ref %d\n", - __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); + __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0); return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index cefa7daf6e85..f0e98e1f04b2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -64,7 +64,7 @@ struct pnfs_layout_segment { struct list_head pls_list; struct list_head pls_lc_list; struct pnfs_layout_range pls_range; - atomic_t pls_refcount; + refcount_t pls_refcount; u32 pls_seq; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; @@ -394,7 +394,7 @@ static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { - atomic_inc(&lseg->pls_refcount); + refcount_inc(&lseg->pls_refcount); smp_mb__after_atomic(); } return lseg; -- cgit From 2b28a7bee4539d27f4ec3b0298f03bfd83d4f794 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:33 +0300 Subject: fs, nfs: convert pnfs_layout_hdr.plh_refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable pnfs_layout_hdr.plh_refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 12 ++++++------ fs/nfs/pnfs.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 499bb710713e..4aab53baf664 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) { - atomic_inc(&lo->plh_refcount); + refcount_inc(&lo->plh_refcount); } static struct pnfs_layout_hdr * @@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) pnfs_layoutreturn_before_put_layout_hdr(lo); - if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); @@ -395,14 +395,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { lo->plh_retry_timestamp = jiffies; if (!test_and_set_bit(fail_bit, &lo->plh_flags)) - atomic_inc(&lo->plh_refcount); + refcount_inc(&lo->plh_refcount); } static void pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { if (test_and_clear_bit(fail_bit, &lo->plh_flags)) - atomic_dec(&lo->plh_refcount); + refcount_dec(&lo->plh_refcount); } static void @@ -472,7 +472,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ - atomic_dec(&lo->plh_refcount); + refcount_dec(&lo->plh_refcount); if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) return; if (list_empty(&lo->plh_segs) && @@ -1451,7 +1451,7 @@ alloc_init_layout_hdr(struct inode *ino, lo = pnfs_alloc_layout_hdr(ino, gfp_flags); if (!lo) return NULL; - atomic_set(&lo->plh_refcount, 1); + refcount_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); INIT_LIST_HEAD(&lo->plh_return_segs); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f0e98e1f04b2..78de7a2052f7 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,7 +180,7 @@ struct pnfs_layoutdriver_type { }; struct pnfs_layout_hdr { - atomic_t plh_refcount; + refcount_t plh_refcount; atomic_t plh_outstanding; /* number of RPCs out */ struct list_head plh_layouts; /* other client layouts */ struct list_head plh_bulk_destroy; -- cgit From 81a090b9975b89ca15e71d0bca945358f6fe7696 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:34 +0300 Subject: fs, nfs: convert nfs4_ff_layout_mirror.ref from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_ff_layout_mirror.ref is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 8 ++++---- fs/nfs/flexfilelayout/flexfilelayout.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b0fa83a60754..ff55a0a1b2e8 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, continue; if (!ff_mirror_match_fh(mirror, pos)) continue; - if (atomic_inc_not_zero(&pos->ref)) { + if (refcount_inc_not_zero(&pos->ref)) { spin_unlock(&inode->i_lock); return pos; } @@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) mirror = kzalloc(sizeof(*mirror), gfp_flags); if (mirror != NULL) { spin_lock_init(&mirror->lock); - atomic_set(&mirror->ref, 1); + refcount_set(&mirror->ref, 1); INIT_LIST_HEAD(&mirror->mirrors); } return mirror; @@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror) { - if (mirror != NULL && atomic_dec_and_test(&mirror->ref)) + if (mirror != NULL && refcount_dec_and_test(&mirror->ref)) ff_layout_free_mirror(mirror); } @@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) continue; /* mirror refcount put in cleanup_layoutstats */ - if (!atomic_inc_not_zero(&mirror->ref)) + if (!refcount_inc_not_zero(&mirror->ref)) continue; dev = &mirror->mirror_ds->id_node; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 98b34c9b0564..ca35426ac3b4 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -13,6 +13,7 @@ #define FF_FLAGS_NO_IO_THRU_MDS 2 #define FF_FLAGS_NO_READ_IO 4 +#include #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom @@ -81,7 +82,7 @@ struct nfs4_ff_layout_mirror { nfs4_stateid stateid; struct rpc_cred __rcu *ro_cred; struct rpc_cred __rcu *rw_cred; - atomic_t ref; + refcount_t ref; spinlock_t lock; unsigned long flags; struct nfs4_ff_layoutstat read_stat; -- cgit From 0896cade1224f167296c786043e9bdc0ca63d392 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:35 +0300 Subject: fs, nfs: convert nfs_cache_defer_req.count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_cache_defer_req.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/cache_lib.c | 6 +++--- fs/nfs/cache_lib.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 2ae676f93e6b..6167f13025d7 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -66,7 +66,7 @@ out: */ void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq) { - if (atomic_dec_and_test(&dreq->count)) + if (refcount_dec_and_test(&dreq->count)) kfree(dreq); } @@ -86,7 +86,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req) dreq = container_of(req, struct nfs_cache_defer_req, req); dreq->deferred_req.revisit = nfs_dns_cache_revisit; - atomic_inc(&dreq->count); + refcount_inc(&dreq->count); return &dreq->deferred_req; } @@ -98,7 +98,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void) dreq = kzalloc(sizeof(*dreq), GFP_KERNEL); if (dreq) { init_completion(&dreq->completion); - atomic_set(&dreq->count, 1); + refcount_set(&dreq->count, 1); dreq->req.defer = nfs_dns_cache_defer; } return dreq; diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 4116d2c3f52f..02b378c2eeef 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -15,7 +15,7 @@ struct nfs_cache_defer_req { struct cache_req req; struct cache_deferred_req deferred_req; struct completion completion; - atomic_t count; + refcount_t count; }; extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name); -- cgit From 194bc1f48100a3b6fbd50d7a2218b594f0c564b6 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:36 +0300 Subject: fs, nfs: convert nfs4_lock_state.ls_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_lock_state.ls_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4state.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b547d935aaf0..78946c6ead88 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -144,7 +144,7 @@ struct nfs4_lock_state { unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; - atomic_t ls_count; + refcount_t ls_count; fl_owner_t ls_owner; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d0fd96b67640..be8c75a2cbbe 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2562,7 +2562,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); spin_unlock(&state->state_lock); nfs4_put_lock_state(prev); @@ -5923,7 +5923,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.seqid = seqid; p->res.seqid = seqid; p->lsp = lsp; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); p->l_ctx = nfs_get_lock_context(ctx); @@ -6139,7 +6139,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; p->server = server; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); return p; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0378e2257ca7..1887134d5231 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -825,7 +825,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, ret = pos; } if (ret) - atomic_inc(&ret->ls_count); + refcount_inc(&ret->ls_count); return ret; } @@ -843,7 +843,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp == NULL) return NULL; nfs4_init_seqid_counter(&lsp->ls_seqid); - atomic_set(&lsp->ls_count, 1); + refcount_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); @@ -907,7 +907,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (lsp == NULL) return; state = lsp->ls_state; - if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock)) return; list_del(&lsp->ls_locks); if (list_empty(&state->lock_states)) @@ -927,7 +927,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; dst->fl_u.nfs4_fl.owner = lsp; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); } static void nfs4_fl_release_lock(struct file_lock *fl) -- cgit From 2f62b5aa4814be2c511553fd6afb4d35b6c2503b Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:37 +0300 Subject: fs, nfs: convert nfs_lock_context.count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_lock_context.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 134d9f560240..52a60e399a2d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { - atomic_set(&l_ctx->count, 1); + refcount_set(&l_ctx->count, 1); l_ctx->lockowner = current->files; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); @@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context do { if (pos->lockowner != current->files) continue; - atomic_inc(&pos->count); + refcount_inc(&pos->count); return pos; } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); return NULL; @@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) struct nfs_open_context *ctx = l_ctx->open_context; struct inode *inode = d_inode(ctx->dentry); - if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) + if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; list_del(&l_ctx->list); spin_unlock(&inode->i_lock); @@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->lock_context.count); + refcount_inc(&ctx->lock_context.count); return ctx; } EXPORT_SYMBOL_GPL(get_nfs_open_context); @@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) struct super_block *sb = ctx->dentry->d_sb; if (!list_empty(&ctx->list)) { - if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) + if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); - } else if (!atomic_dec_and_test(&ctx->lock_context.count)) + } else if (!refcount_dec_and_test(&ctx->lock_context.count)) return; if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); -- cgit From 212bf41d88c06afc23e03f9b274eebf1e8dba197 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:38 +0300 Subject: fs, nfs: convert nfs_client.cl_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_client.cl_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 10 +++++----- fs/nfs/filelayout/filelayout.c | 12 ++++++------ fs/nfs/flexfilelayout/flexfilelayout.c | 12 ++++++------ fs/nfs/nfs4client.c | 10 +++++----- fs/nfs/nfs4proc.c | 12 ++++++------ fs/nfs/nfs4state.c | 6 +++--- 6 files changed, 31 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 22880ef6d8dd..0ac2fb1c6b63 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; - atomic_set(&clp->cl_count, 1); + refcount_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); @@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp) nn = net_generic(clp->cl_net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { + if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); spin_unlock(&nn->nfs_client_lock); @@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat sap)) continue; - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); return clp; } return NULL; @@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, /* Copy data from the source */ server->nfs_client = source->nfs_client; server->destroy = source->destroy; - atomic_inc(&server->nfs_client->cl_count); + refcount_inc(&server->nfs_client->cl_count); nfs_server_copy_userdata(server, source); server->fsid = fattr->fsid; @@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), - atomic_read(&clp->cl_count), + refcount_read(&clp->cl_count), clp->cl_hostname); rcu_read_unlock(); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 508126eb49f9..4e54d8b5413a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) return PNFS_NOT_ATTEMPTED; dprintk("%s USE DS: %s cl_count %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count)); /* No multipath support. Use first DS */ - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count)); hdr->pgio_done_cb = filelayout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) goto out_err; dprintk("%s ino %lu, how %d cl_count %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); + data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count)); data->commit_done_cb = filelayout_commit_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ff55a0a1b2e8..c75ad982bcfc 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) vers = nfs4_ff_layout_ds_version(lseg, idx); dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); + ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); hdr->pgio_done_cb = ff_layout_read_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) @@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), + offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); hdr->pgio_done_cb = ff_layout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); @@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) vers = nfs4_ff_layout_ds_version(lseg, idx); dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), + data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), vers); data->commit_done_cb = ff_layout_commit_done_cb; data->cred = ds_cred; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e9bea90dc017..31b5bc0f10a4 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new, * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ if (pos->cl_cons_state > NFS_CS_READY) { - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); nfs_put_client(*prev); @@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new, * way that a SETCLIENTID_CONFIRM to pos can succeed is * if new and pos point to the same server: */ - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); nfs_put_client(prev); @@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; found: - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); *result = pos; status = 0; break; @@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident) spin_lock(&nn->nfs_client_lock); clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); return clp; } @@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, sid->data, NFS4_MAX_SESSIONID_LEN) != 0) continue; - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); return clp; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be8c75a2cbbe..82e5ed2ee6ba 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4870,7 +4870,7 @@ static void nfs4_renew_release(void *calldata) struct nfs4_renewdata *data = calldata; struct nfs_client *clp = data->client; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(data); @@ -4918,7 +4918,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, if (renew_flags == 0) return 0; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); if (data == NULL) { @@ -7499,7 +7499,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, struct nfs41_exchange_id_data *calldata; int status; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return ERR_PTR(-EIO); status = -ENOMEM; @@ -8099,7 +8099,7 @@ static void nfs41_sequence_release(void *data) struct nfs4_sequence_data *calldata = data; struct nfs_client *clp = calldata->clp; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(calldata); @@ -8128,7 +8128,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) trace_nfs4_sequence(clp, task->tk_status); if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); - if (atomic_read(&clp->cl_count) == 1) + if (refcount_read(&clp->cl_count) == 1) goto out; if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { @@ -8179,7 +8179,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_task *ret; ret = ERR_PTR(-EIO); - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) goto out_err; ret = ERR_PTR(-ENOMEM); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1887134d5231..3bd79b8c016b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1177,7 +1177,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); /* The rcu_read_lock() is not strictly necessary, as the state * manager is the only thread that ever changes the rpc_xprt @@ -1269,7 +1269,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); if (res) @@ -2510,7 +2510,7 @@ static void nfs4_state_manager(struct nfs_client *clp) break; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) break; - } while (atomic_read(&clp->cl_count) > 1); + } while (refcount_read(&clp->cl_count) > 1); return; out_error: if (strlen(section)) -- cgit From 3944369db701f075092357b511fd9f5755771585 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 1 Nov 2017 15:48:43 -0400 Subject: NFS: Avoid RCU usage in tracepoints There isn't an obvious way to acquire and release the RCU lock during a tracepoint, so we can't use the rpc_peeraddr2str() function here. Instead, rely on the client's cl_hostname, which should have similar enough information without needing an rcu_dereference(). Reported-by: Dave Jones Cc: stable@vger.kernel.org # v3.12 Signed-off-by: Anna Schumaker --- fs/nfs/nfs4trace.h | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index be1da19c65d6..7a5588c25f70 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -201,17 +201,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_ARGS(clp, error), TP_STRUCT__entry( - __string(dstaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)) + __string(dstaddr, clp->cl_hostname) __field(int, error) ), TP_fast_assign( __entry->error = error; - __assign_str(dstaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)); + __assign_str(dstaddr, clp->cl_hostname); ), TP_printk( @@ -1132,9 +1128,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __string(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __string(dstaddr, clp ? clp->cl_hostname : "unknown") ), TP_fast_assign( @@ -1147,9 +1141,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown") ), TP_printk( @@ -1191,9 +1183,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __string(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __string(dstaddr, clp ? clp->cl_hostname : "unknown") __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1208,9 +1198,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown") __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = -- cgit From 6089dd0d731028531fb1148be9fd33274ff90da4 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sat, 7 Oct 2017 16:02:21 +0200 Subject: NFS: Fix bool initialization/comparison Bool initializations should use true and false. Bool tests don't need comparisons. Signed-off-by: Thomas Meyer Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 2 +- fs/nfs/dir.c | 10 +++++----- fs/nfs/nfs4client.c | 2 +- fs/nfs/pnfs.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 14358de173fb..a82abff80dff 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -439,7 +439,7 @@ static bool referring_call_exists(struct nfs_client *clp, uint32_t nrclists, struct referring_call_list *rclists) { - bool status = 0; + bool status = false; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 15104f719170..a642ed3b13d9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -246,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri desc->cache_entry_index = index; return 0; out_eof: - desc->eof = 1; + desc->eof = true; return -EBADCOOKIE; } @@ -300,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (array->eof_index >= 0) { status = -EBADCOOKIE; if (*desc->dir_cookie == array->last_cookie) - desc->eof = 1; + desc->eof = true; } out: return status; @@ -754,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ent = &array->array[i]; if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { - desc->eof = 1; + desc->eof = true; break; } desc->ctx->pos++; @@ -766,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ctx->duped = 1; } if (array->eof_index >= 0) - desc->eof = 1; + desc->eof = true; kunmap(desc->page); cache_page_release(desc); @@ -866,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res == -EBADCOOKIE) { res = 0; /* This means either end of directory */ - if (*desc->dir_cookie && desc->eof == 0) { + if (*desc->dir_cookie && !desc->eof) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc); if (res == 0) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 31b5bc0f10a4..12bbab0becb4 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, minorversion) == false) + if (!nfs4_cb_match_client(addr, clp, minorversion)) continue; if (!nfs4_has_session(clp)) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4aab53baf664..ec30dacf1a24 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1513,7 +1513,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, if ((range->iomode == IOMODE_RW && ls_range->iomode != IOMODE_RW) || (range->iomode != ls_range->iomode && - strict_iomode == true) || + strict_iomode) || !pnfs_lseg_range_intersecting(ls_range, range)) return 0; -- cgit From c9399f21c215453b414702758b8c4b7d66605eac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:01 -0500 Subject: NFSv4: Fix OPEN / CLOSE race Ben Coddington has noted the following race between OPEN and CLOSE on a single client. Process 1 Process 2 Server ========= ========= ====== 1) OPEN file 2) OPEN file 3) Process OPEN (1) seqid=1 4) Process OPEN (2) seqid=2 5) Reply OPEN (2) 6) Receive reply (2) 7) new stateid, seqid=2 8) CLOSE file, using stateid w/ seqid=2 9) Reply OPEN (1) 10( Process CLOSE (8) 11) Reply CLOSE (8) 12) Forget stateid file closed 13) Receive reply (7) 14) Forget stateid file closed. 15) Receive reply (1). 16) New stateid seqid=1 is really the same stateid that was closed. IOW: the reply to the first OPEN is delayed. Since "Process 2" does not wait before closing the file, and it does not cache the closed stateid, then when the delayed reply is finally received, it is treated as setting up a new stateid by the client. The fix is to ensure that the client processes the OPEN and CLOSE calls in the same order in which the server processed them. This commit ensures that we examine the seqid of the stateid returned by OPEN. If it is a new stateid, we assume the seqid must be equal to the value 1, and that each state transition increments the seqid value by 1 (See RFC7530, Section 9.1.4.2, and RFC5661, Section 8.2.2). If the tracker sees that an OPEN returns with a seqid that is greater than the cached seqid + 1, then it bumps a flag to ensure that the caller waits for the RPCs carrying the missing seqids to complete. Note that there can still be pathologies where the server crashes before it can even send us the missing seqids. Since the OPEN call is still holding a slot when it waits here, that could cause the recovery to stall forever. To avoid that, we time out after a 5 second wait. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 3 ++ fs/nfs/nfs4proc.c | 154 +++++++++++++++++++++++++++++++++++++++++------------ fs/nfs/nfs4state.c | 1 + 3 files changed, 123 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 78946c6ead88..fdcfbab886bb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -161,6 +161,7 @@ enum { NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ + NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */ }; struct nfs4_state { @@ -184,6 +185,8 @@ struct nfs4_state { unsigned int n_rdwr; /* Number of read/write references */ fmode_t state; /* State on the server (R,W, or RW) */ atomic_t count; + + wait_queue_head_t waitq; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 82e5ed2ee6ba..cec4bcba65e8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1378,6 +1378,25 @@ static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state) } #endif /* CONFIG_NFS_V4_1 */ +static void nfs_state_log_update_open_stateid(struct nfs4_state *state) +{ + if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + wake_up_all(&state->waitq); +} + +static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); + u32 stateid_seqid = be32_to_cpu(stateid->seqid); + + if (stateid_seqid == state_seqid + 1U || + (stateid_seqid == 1U && state_seqid == 0xffffffffU)) + nfs_state_log_update_open_stateid(state); + else + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); +} + static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1393,18 +1412,32 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) nfs4_state_mark_reclaim_nograce(clp, state); } +/* + * Check for whether or not the caller may update the open stateid + * to the value passed in by stateid. + * + * Note: This function relies heavily on the server implementing + * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2 + * correctly. + * i.e. The stateid seqids have to be initialised to 1, and + * are then incremented on every state transition. + */ static bool nfs_need_update_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid, nfs4_stateid *freeme) + const nfs4_stateid *stateid) { - if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) - return true; - if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { - nfs4_stateid_copy(freeme, &state->open_stateid); - nfs_test_and_clear_all_open_stateid(state); + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (stateid->seqid == cpu_to_be32(1)) + nfs_state_log_update_open_stateid(state); + else + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); return true; } - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) + + if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + nfs_state_log_out_of_order_open_stateid(state, stateid); return true; + } return false; } @@ -1443,11 +1476,13 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (nfs4_stateid_match_other(stateid, &state->open_stateid) && !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { nfs_resync_open_stateid_locked(state); - return; + goto out; } if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); +out: + nfs_state_log_update_open_stateid(state); } static void nfs_clear_open_stateid(struct nfs4_state *state, @@ -1464,29 +1499,57 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, } static void nfs_set_open_stateid_locked(struct nfs4_state *state, - const nfs4_stateid *stateid, fmode_t fmode, - nfs4_stateid *freeme) + const nfs4_stateid *stateid, nfs4_stateid *freeme) { - switch (fmode) { - case FMODE_READ: - set_bit(NFS_O_RDONLY_STATE, &state->flags); + DEFINE_WAIT(wait); + int status = 0; + for (;;) { + + if (!nfs_need_update_open_stateid(state, stateid)) + return; + if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) break; - case FMODE_WRITE: - set_bit(NFS_O_WRONLY_STATE, &state->flags); + if (status) break; - case FMODE_READ|FMODE_WRITE: - set_bit(NFS_O_RDWR_STATE, &state->flags); + /* Rely on seqids for serialisation with NFSv4.0 */ + if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) + break; + + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + /* + * Ensure we process the state changes in the same order + * in which the server processed them by delaying the + * update of the stateid until we are in sequence. + */ + write_sequnlock(&state->seqlock); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + if (!signal_pending(current)) { + if (schedule_timeout(5*HZ) == 0) + status = -EAGAIN; + else + status = 0; + } else + status = -EINTR; + finish_wait(&state->waitq, &wait); + rcu_read_lock(); + spin_lock(&state->owner->so_lock); + write_seqlock(&state->seqlock); } - if (!nfs_need_update_open_stateid(state, stateid, freeme)) - return; + + if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs4_stateid_copy(freeme, &state->open_stateid); + nfs_test_and_clear_all_open_stateid(state); + } + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + nfs_state_log_update_open_stateid(state); } -static void __update_open_stateid(struct nfs4_state *state, +static void nfs_state_set_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, - const nfs4_stateid *deleg_stateid, fmode_t fmode, nfs4_stateid *freeme) { @@ -1494,17 +1557,34 @@ static void __update_open_stateid(struct nfs4_state *state, * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ - spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); - if (deleg_stateid != NULL) { - nfs4_stateid_copy(&state->stateid, deleg_stateid); - set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs_set_open_stateid_locked(state, open_stateid, freeme); + switch (fmode) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); } - if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme); + set_bit(NFS_OPEN_STATE, &state->flags); + write_sequnlock(&state->seqlock); +} + +static void nfs_state_set_delegation(struct nfs4_state *state, + const nfs4_stateid *deleg_stateid, + fmode_t fmode) +{ + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, deleg_stateid); + set_bit(NFS_DELEGATED_STATE, &state->flags); write_sequnlock(&state->seqlock); - update_open_stateflags(state, fmode); - spin_unlock(&state->owner->so_lock); } static int update_open_stateid(struct nfs4_state *state, @@ -1522,6 +1602,12 @@ static int update_open_stateid(struct nfs4_state *state, fmode &= (FMODE_READ|FMODE_WRITE); rcu_read_lock(); + spin_lock(&state->owner->so_lock); + if (open_stateid != NULL) { + nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme); + ret = 1; + } + deleg_cur = rcu_dereference(nfsi->delegation); if (deleg_cur == NULL) goto no_delegation; @@ -1538,18 +1624,16 @@ static int update_open_stateid(struct nfs4_state *state, goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, - fmode, &freeme); + nfs_state_set_delegation(state, &deleg_cur->stateid, fmode); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); no_delegation: + if (ret) + update_open_stateflags(state, fmode); + spin_unlock(&state->owner->so_lock); rcu_read_unlock(); - if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, fmode, &freeme); - ret = 1; - } if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(clp); if (freeme.type != 0) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3bd79b8c016b..6e3f37288348 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -645,6 +645,7 @@ nfs4_alloc_open_state(void) INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); seqlock_init(&state->seqlock); + init_waitqueue_head(&state->waitq); return state; } -- cgit From ad9e02dc026b75069f6a336e0daf2d54925967b6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:02 -0500 Subject: NFSv4: Add a tracepoint to document open stateid updates Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +++ fs/nfs/nfs4trace.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cec4bcba65e8..aa8ea518ec6f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1481,6 +1481,7 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, 0); out: nfs_state_log_update_open_stateid(state); } @@ -1524,6 +1525,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, write_sequnlock(&state->seqlock); spin_unlock(&state->owner->so_lock); rcu_read_unlock(); + trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); if (!signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; @@ -1545,6 +1547,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, status); nfs_state_log_update_open_stateid(state); } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 7a5588c25f70..06ac7156c44b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1061,6 +1061,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( -- cgit From 8fd1ab747d2b1ec7ec663ad0b41a32eaa35117a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:03 -0500 Subject: NFSv4: Fix open create exclusive when the server reboots If the server that does not implement NFSv4.1 persistent session semantics reboots while we are performing an exclusive create, then the return value of NFS4ERR_DELAY when we replay the open during the grace period causes us to lose the verifier. When the grace period expires, and we present a new verifier, the server will then correctly reply NFS4ERR_EXIST. This commit ensures that we always present the same verifier when replaying the OPEN. Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aa8ea518ec6f..b2d628c9425c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1088,6 +1088,12 @@ struct nfs4_opendata { int rpc_status; }; +struct nfs4_open_createattrs { + struct nfs4_label *label; + struct iattr *sattr; + const __u32 verf[2]; +}; + static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, int err, struct nfs4_exception *exception) { @@ -1157,8 +1163,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, - const struct iattr *attrs, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, enum open_claim_type4 claim, gfp_t gfp_mask) { @@ -1166,6 +1171,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct inode *dir = d_inode(parent); struct nfs_server *server = NFS_SERVER(dir); struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); + struct nfs4_label *label = (c != NULL) ? c->label : NULL; struct nfs4_opendata *p; p = kzalloc(sizeof(*p), gfp_mask); @@ -1231,15 +1237,11 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_DELEG_PREV_FH: p->o_arg.fh = NFS_FH(d_inode(dentry)); } - if (attrs != NULL && attrs->ia_valid != 0) { - __u32 verf[2]; - + if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) { p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, attrs, sizeof(p->attrs)); + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); - verf[0] = jiffies; - verf[1] = current->pid; - memcpy(p->o_arg.u.verifier.data, verf, + memcpy(p->o_arg.u.verifier.data, c->verf, sizeof(p->o_arg.u.verifier.data)); } p->c_arg.fh = &p->o_res.fh; @@ -1892,7 +1894,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context struct nfs4_opendata *opendata; opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, - NULL, NULL, claim, GFP_NOFS); + NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -2823,8 +2825,7 @@ out: static int _nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, - struct iattr *sattr, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, int *opened) { struct nfs4_state_owner *sp; @@ -2836,6 +2837,8 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; + struct iattr *sattr = c->sattr; + struct nfs4_label *label = c->label; struct nfs4_label *olabel = NULL; int status; @@ -2854,8 +2857,8 @@ static int _nfs4_do_open(struct inode *dir, status = -ENOMEM; if (d_really_is_positive(dentry)) claim = NFS4_OPEN_CLAIM_FH; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - label, claim, GFP_KERNEL); + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, + c, claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -2936,10 +2939,18 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; + struct nfs4_open_createattrs c = { + .label = label, + .sattr = sattr, + .verf = { + [0] = (__u32)jiffies, + [1] = (__u32)current->pid, + }, + }; int status; do { - status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened); + status = _nfs4_do_open(dir, ctx, flags, &c, opened); res = ctx->state; trace_nfs4_open_file(ctx, flags, status); if (status == 0) -- cgit From d803224c84be067754db7fa58a93f36f61566493 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:04 -0500 Subject: NFS: Fix a typo in nfs_rename() On successful rename, the "old_dentry" is retained and is attached to the "new_dir", so we need to call nfs_set_verifier() accordingly. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a642ed3b13d9..927fd2768ead 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2057,7 +2057,7 @@ out: * should mark the directories for revalidation. */ d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, + nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry); -- cgit From 12f275cdd1638a163b77b3d65625fc14a81dab2b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:05 -0500 Subject: NFSv4: Retry CLOSE and DELEGRETURN on NFS4ERR_OLD_STATEID. If we're racing with an OPEN, then retry the operation instead of declaring it a success. Signed-off-by: Trond Myklebust [Andrew W Elble: Fix a typo in nfs4_refresh_open_stateid] Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 27 +++++++++++++++++++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 21 +++++++++++++++++++-- fs/nfs/nfs4state.c | 16 ++++++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 606dd3871f66..ade44ca0c66c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1040,6 +1040,33 @@ int nfs_delegations_present(struct nfs_client *clp) return ret; } +/** + * nfs4_refresh_delegation_stateid - Update delegation stateid seqid + * @dst: stateid to refresh + * @inode: inode to check + * + * Returns "true" and updates "dst->seqid" * if inode had a delegation + * that matches our delegation stateid. Otherwise "false" is returned. + */ +bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct nfs_delegation *delegation; + bool ret = false; + if (!inode) + goto out; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && + nfs4_stateid_match_other(dst, &delegation->stateid)) { + dst->seqid = delegation->stateid.seqid; + return ret; + } + rcu_read_unlock(); +out: + return ret; +} + /** * nfs4_copy_delegation_stateid - Copy inode's state ID information * @inode: inode to check diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index e9d555796873..fe9f3882adae 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -61,6 +61,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); +bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdcfbab886bb..feb084bc5565 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -460,6 +460,8 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, const struct nfs_lock_context *, nfs4_stateid *, struct rpc_cred **); +extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2d628c9425c..3e385154196a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3199,13 +3199,21 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } break; + case -NFS4ERR_OLD_STATEID: + /* Did we race with OPEN? */ + if (nfs4_refresh_open_stateid(&calldata->arg.stateid, + state)) { + task->tk_status = 0; + rpc_restart_call_prepare(task); + } + goto out_release; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: nfs4_free_revoked_stateid(server, &calldata->arg.stateid, task->tk_msg.rpc_cred); - case -NFS4ERR_OLD_STATEID: + /* Fallthrough */ case -NFS4ERR_BAD_STATEID: if (!nfs4_stateid_match(&calldata->arg.stateid, &state->open_stateid)) { @@ -3214,6 +3222,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } if (calldata->arg.fmode == 0) break; + /* Fallthrough */ default: if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -5793,11 +5802,19 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) nfs4_free_revoked_stateid(data->res.server, data->args.stateid, task->tk_msg.rpc_cred); + /* Fallthrough */ case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) { + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } + task->tk_status = 0; + break; case -NFS4ERR_ACCESS: if (data->args.bitmask) { data->args.bitmask = NULL; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6e3f37288348..cee1e000b41e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -986,6 +986,22 @@ out: return ret; } +bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +{ + bool ret; + int seq; + + do { + ret = false; + seq = read_seqbegin(&state->seqlock); + if (nfs4_state_match_open_stateid_other(state, dst)) { + dst->seqid = state->open_stateid.seqid; + ret = true; + } + } while (read_seqretry(&state->seqlock, seq)); + return ret; +} + static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { const nfs4_stateid *src; -- cgit From c82bac6f4b2af18dcb392b10e80c809ee17b2b1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:06 -0500 Subject: NFSv4: Don't try to CLOSE if the stateid 'other' field has changed If the stateid is no longer recognised on the server, either due to a restart, or due to a competing CLOSE call, then we do not have to retry. Any open contexts that triggered a reopen of the file, will also act as triggers for any CLOSE for the updated stateids. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 14 ++++---------- fs/nfs/nfs4state.c | 9 +++++++-- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index feb084bc5565..9721b74786d1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -462,6 +462,8 @@ extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, struct rpc_cred **); extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state); +extern bool nfs4_copy_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3e385154196a..ef313d602a85 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3215,14 +3215,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) task->tk_msg.rpc_cred); /* Fallthrough */ case -NFS4ERR_BAD_STATEID: - if (!nfs4_stateid_match(&calldata->arg.stateid, - &state->open_stateid)) { - rpc_restart_call_prepare(task); - goto out_release; - } - if (calldata->arg.fmode == 0) - break; - /* Fallthrough */ + break; default: if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -3254,7 +3247,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); - nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid); /* Calculate the change in open mode */ calldata->arg.fmode = 0; if (state->n_rdwr == 0) { @@ -3272,7 +3264,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; if (!nfs4_valid_open_stateid(state) || - test_bit(NFS_OPEN_STATE, &state->flags) == 0) + !nfs4_refresh_open_stateid(&calldata->arg.stateid, state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -3366,6 +3358,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); + if (!nfs4_copy_open_stateid(&calldata->arg.stateid, state)) + goto out_free_calldata; /* Serialization for the sequence id */ alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cee1e000b41e..b6a0cf7fa1f6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1002,18 +1002,23 @@ bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) return ret; } -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + bool ret; const nfs4_stateid *src; int seq; do { + ret = false; src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - if (test_bit(NFS_OPEN_STATE, &state->flags)) + if (test_bit(NFS_OPEN_STATE, &state->flags)) { src = &state->open_stateid; + ret = true; + } nfs4_stateid_copy(dst, src); } while (read_seqretry(&state->seqlock, seq)); + return ret; } /* -- cgit From 7380020e77b61361207420e78a9da925bc79ab4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:07 -0500 Subject: pNFS: Retry NFS4ERR_OLD_STATEID errors in layoutreturn-on-close If our layoutreturn on close operation returns an NFS4ERR_OLD_STATEID, then try to update the stateid and retry. We know that there should be no further LAYOUTGET requests being launched. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 ++++++++++++++++-- fs/nfs/pnfs.c | 18 ++++++++++++++++++ fs/nfs/pnfs.h | 6 ++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef313d602a85..7ff9c43f79eb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3166,11 +3166,18 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->arg.lr_args = NULL; calldata->res.lr_res = NULL; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid, + calldata->inode)) { + calldata->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_UNKNOWN_LAYOUTTYPE: case -NFS4ERR_WRONG_CRED: calldata->arg.lr_args = NULL; @@ -5771,11 +5778,18 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) data->args.lr_args = NULL; data->res.lr_res = NULL; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid, + data->inode)) { + data->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_UNKNOWN_LAYOUTTYPE: case -NFS4ERR_WRONG_CRED: data->args.lr_args = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ec30dacf1a24..d602fe9e1ac8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -354,6 +354,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, pnfs_lseg_dec_and_remove_zero(lseg, free_me); } +/* + * Update the seqid of a layout stateid + */ +bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct pnfs_layout_hdr *lo; + bool ret = false; + + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) { + dst->seqid = lo->plh_stateid.seqid; + ret = true; + } + spin_unlock(&inode->i_lock); + return ret; +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 78de7a2052f7..8d507c361d98 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -252,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, bool is_recall); int pnfs_destroy_layouts_byclid(struct nfs_client *clp, bool is_recall); +bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode); void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -765,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) { } +static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, + struct inode *inode) +{ + return false; +} #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) -- cgit From ff90514ebf96695b937a116b860ff214a8d4a2ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:08 -0500 Subject: NFSv4: Retry NFS4ERR_OLD_STATEID errors in layoutreturn If our layoutreturn returns an NFS4ERR_OLD_STATEID, then try to update the stateid and retry. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ff9c43f79eb..8e7604a1eee4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8750,18 +8750,27 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) server = NFS_SERVER(lrp->args.inode); switch (task->tk_status) { + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&lrp->args.stateid, + lrp->args.inode)) + goto out_restart; + /* Fallthrough */ default: task->tk_status = 0; + /* Fallthrough */ case 0: break; case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) break; - nfs4_sequence_free_slot(&lrp->res.seq_res); - rpc_restart_call_prepare(task); - return; + goto out_restart; } dprintk("<-- %s\n", __func__); + return; +out_restart: + task->tk_status = 0; + nfs4_sequence_free_slot(&lrp->res.seq_res); + rpc_restart_call_prepare(task); } static void nfs4_layoutreturn_release(void *calldata) -- cgit From 91b30d2e7f303dee9cdd11010e9058bc9124bc57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:09 -0500 Subject: NFSv4: cleanup nfs4_close_done Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8e7604a1eee4..4a5b01c91ab4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3168,11 +3168,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_OLD_STATEID: if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid, - calldata->inode)) { - calldata->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; - } + calldata->inode)) + goto lr_restart; /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: @@ -3182,9 +3179,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_WRONG_CRED: calldata->arg.lr_args = NULL; calldata->res.lr_res = NULL; - calldata->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; + goto lr_restart; } } @@ -3200,19 +3195,15 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.bitmask != NULL) { calldata->arg.bitmask = NULL; calldata->res.fattr = NULL; - task->tk_status = 0; - rpc_restart_call_prepare(task); - goto out_release; + goto out_restart; } break; case -NFS4ERR_OLD_STATEID: /* Did we race with OPEN? */ if (nfs4_refresh_open_stateid(&calldata->arg.stateid, - state)) { - task->tk_status = 0; - rpc_restart_call_prepare(task); - } + state)) + goto out_restart; goto out_release; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: @@ -3224,17 +3215,23 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_BAD_STATEID: break; default: - if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - goto out_release; - } + if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) + goto out_restart; } nfs_clear_open_stateid(state, &calldata->arg.stateid, res_stateid, calldata->arg.fmode); out_release: + task->tk_status = 0; nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); + return; +lr_restart: + calldata->res.lr_ret = 0; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; } static void nfs4_close_prepare(struct rpc_task *task, void *data) -- cgit From 140087fdf65b271055de1e1669c8016a908b6a85 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:10 -0500 Subject: NFSv4: Clean up nfs4_delegreturn_done Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4a5b01c91ab4..1090016a238f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5777,11 +5777,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) break; case -NFS4ERR_OLD_STATEID: if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid, - data->inode)) { - data->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; - } + data->inode)) + goto lr_restart; /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: @@ -5791,9 +5788,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_WRONG_CRED: data->args.lr_args = NULL; data->res.lr_res = NULL; - data->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; + goto lr_restart; } } @@ -5813,29 +5808,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case -NFS4ERR_OLD_STATEID: - if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) { - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; - } + if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) + goto out_restart; task->tk_status = 0; break; case -NFS4ERR_ACCESS: if (data->args.bitmask) { data->args.bitmask = NULL; data->res.fattr = NULL; - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; + goto out_restart; } + /* Fallthrough */ default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; + goto out_restart; } } data->rpc_status = task->tk_status; + return; +lr_restart: + data->res.lr_ret = 0; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); } static void nfs4_delegreturn_release(void *calldata) -- cgit From 46280d9d3de67f64c5bd30a7bbdc372d69f38d98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:11 -0500 Subject: NFSv4: Check the open stateid when searching for expired state Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b6a0cf7fa1f6..ff2794412e6f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1431,6 +1431,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode, found = true; continue; } + if (nfs4_stateid_match_other(&state->open_stateid, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) { + found = true; + continue; + } if (nfs_state_lock_state_matches_stateid(state, stateid) && nfs4_state_mark_reclaim_nograce(clp, state)) found = true; -- cgit From e1fff5df6e04818c711882d40f06e97891bca36e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Nov 2017 13:10:46 -0500 Subject: NFSv4: nfs_set_open_stateid must not trigger state recovery for closed state In nfs_set_open_stateid_locked, we must ignore stateids from closed state. Reported-by: Andrew W Elble Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1090016a238f..2c9c22579610 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1541,7 +1541,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, write_seqlock(&state->seqlock); } - if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (test_bit(NFS_OPEN_STATE, &state->flags) && + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { nfs4_stateid_copy(freeme, &state->open_stateid); nfs_test_and_clear_all_open_stateid(state); } -- cgit From fcd8843c406b46433857ae45e5e9d84b01a7d20b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Nov 2017 12:39:44 -0500 Subject: NFSv4: Replace closed stateids with the "invalid special stateid" When decoding a CLOSE, replace the stateid returned by the server with the "invalid special stateid" described in RFC5661, Section 8.2.3. In nfs_set_open_stateid_locked, ignore stateids from closed state. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4state.c | 8 ++++++++ fs/nfs/nfs4xdr.c | 12 +++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9721b74786d1..dcfd1afc8d1f 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -481,6 +481,7 @@ extern int nfs4_sequence_done(struct rpc_task *task, extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); extern const nfs4_stateid zero_stateid; +extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ff2794412e6f..54fd56d715a8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = { { .data = { 0 } }, .type = NFS4_SPECIAL_STATEID_TYPE, }; +const nfs4_stateid invalid_stateid = { + { + .seqid = cpu_to_be32(0xffffffffU), + .other = { 0 }, + }, + .type = NFS4_INVALID_STATEID_TYPE, +}; + static DEFINE_MUTEX(nfs_clid_init_mutex); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 14ed9791ec9c..77c6729e57f0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state return decode_stateid(xdr, stateid); } +static int decode_invalid_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + nfs4_stateid dummy; + + nfs4_stateid_copy(stateid, &invalid_stateid); + return decode_stateid(xdr, &dummy); +} + static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) { int status; @@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) if (status != -EIO) nfs_increment_open_seqid(status, res->seqid); if (!status) - status = decode_open_stateid(xdr, &res->stateid); + status = decode_invalid_stateid(xdr, &res->stateid); return status; } @@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr, res->lrs_present = be32_to_cpup(p); if (res->lrs_present) status = decode_layout_stateid(xdr, &res->stateid); + else + nfs4_stateid_copy(&res->stateid, &invalid_stateid); return status; out_overflow: print_overflow_msg(__func__, xdr); -- cgit From 0671d8f108762efc51ca893dbf8f0ba72f655c3d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 7 Nov 2017 08:51:00 +0100 Subject: nfs/write: Use common error handling code in nfs_lock_and_join_requests() Add a jump target so that a bit of exception handling can be better reused at the end of this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index babebbccae2a..5b5f464f6f2a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -487,10 +487,8 @@ try_again: } ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } + if (ret < 0) + goto release_request; /* lock each request in the page group */ total_bytes = head->wb_bytes; @@ -515,8 +513,7 @@ try_again: if (ret < 0) { nfs_unroll_locks(inode, head, subreq); nfs_release_request(subreq); - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + goto release_request; } } /* @@ -532,8 +529,8 @@ try_again: nfs_page_group_unlock(head); nfs_unroll_locks(inode, head, subreq); nfs_unlock_and_release_request(subreq); - nfs_unlock_and_release_request(head); - return ERR_PTR(-EIO); + ret = -EIO; + goto release_request; } } @@ -576,6 +573,10 @@ try_again: /* still holds ref on head from nfs_page_find_head_request * and still has lock on head from lock loop */ return head; + +release_request: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); } static void nfs_write_error_remove_page(struct nfs_page *req) -- cgit From b0b5352d9a507b344d4a2aec21f0105c585251fe Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 11:48:16 +0300 Subject: nfs client: exit_net cleanup check added Be sure that nfs_client_list and nfs_volume_list lists initialized in net_init hook were return to initial state in net_exit hook. Signed-off-by: Vasily Averin Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52a60e399a2d..e5fcc1f095ae 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_fs_proc_net_exit(net); nfs_cleanup_cb_ident_idr(net); + WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); + WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); } static struct pernet_operations nfs_net_ops = { -- cgit From e4949e4b3d5e056bcecebd340d4c8fab7ed1c20d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 8 Nov 2017 08:56:55 +0300 Subject: nfs: remove net pointer from messages Publishing of net pointer is not safe, use net->ns.inum instead Signed-off-by: Vasily Averin Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 2cddf7f437e6..387369c2107b 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -48,15 +48,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nn->nfs_callback_tcpport, PF_INET, net); + dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", + nn->nfs_callback_tcpport, PF_INET, net->ns.inum); ret = svc_create_xprt(serv, "tcp", net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nn->nfs_callback_tcpport6, PF_INET6, net); + dprintk("NFS: Callback listener port = %u (af %u, net %x\n", + nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum); } else if (ret != -EAFNOSUPPORT) goto out_err; return 0; @@ -184,7 +184,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc if (--nn->cb_users[minorversion]) return; - dprintk("NFS: destroy per-net callback data; net=%p\n", net); + dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); svc_shutdown_net(serv, net); } @@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, if (nn->cb_users[minorversion]++) return 0; - dprintk("NFS: create per-net callback data; net=%p\n", net); + dprintk("NFS: create per-net callback data; net=%x\n", net->ns.inum); ret = svc_bind(serv, net); if (ret < 0) { @@ -222,7 +222,7 @@ err_socks: err_bind: nn->cb_users[minorversion]--; dprintk("NFS: Couldn't create callback socket: err = %d; " - "net = %p\n", ret, net); + "net = %x\n", ret, net->ns.inum); return ret; } -- cgit From fd53dde83978ba5f7db3183ce56b3a1c39f448b0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 20:49:19 -0600 Subject: NFS: super: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 703509 Addresses-Coverity-ID: 703510 Addresses-Coverity-ID: 703511 Addresses-Coverity-ID: 703512 Addresses-Coverity-ID: 703513 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c9d24bae3025..da19c723a244 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; @@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp6: mountfamily = AF_INET6; + /* fall through */ case Opt_xprt_udp: mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; + /* fall through */ case Opt_xprt_tcp: mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; break; @@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options, args->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: - data->namlen = 0; + data->namlen = 0; /* fall through */ case 2: - data->bsize = 0; + data->bsize = 0; /* fall through */ case 3: if (data->flags & NFS_MOUNT_VER3) goto out_no_v3; @@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options, memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); /* Turn off security negotiation */ extra_flags |= NFS_MOUNT_SECFLAVOUR; + /* fall through */ case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; + /* fall through */ case 5: memset(data->context, 0, sizeof(data->context)); + /* fall through */ case 6: if (data->flags & NFS_MOUNT_VER3) { if (data->root.size > NFS3_FHSIZE || data->root.size == 0) -- cgit From c05cefcc72416a37eba5a2b35f0704ed758a9145 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 5 Nov 2017 15:45:22 -0500 Subject: nfs: Fix ugly referral attributes Before traversing a referral and performing a mount, the mounted-on directory looks strange: dr-xr-xr-x. 2 4294967294 4294967294 0 Dec 31 1969 dir.0 nfs4_get_referral is wiping out any cached attributes with what was returned via GETATTR(fs_locations), but the bit mask for that operation does not request any file attributes. Retrieve owner and timestamp information so that the memcpy in nfs4_get_referral fills in more attributes. Changes since v1: - Don't request attributes that the client unconditionally replaces - Request only MOUNTED_ON_FILEID or FILEID attribute, not both - encode_fs_locations() doesn't use the third bitmask word Fixes: 6b97fd3da1ea ("NFSv4: Follow a referral") Suggested-by: Pradeep Thomas Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2c9c22579610..6b23a032ee1e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -258,15 +258,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE }; const u32 nfs4_fs_locations_bitmap[3] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE + FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID | FATTR4_WORD0_FILEID | FATTR4_WORD0_FS_LOCATIONS, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER + FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED @@ -6907,9 +6904,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); - u32 bitmask[3] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; + u32 bitmask[3]; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), .name = name, @@ -6928,12 +6923,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, dprintk("%s: start\n", __func__); + bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS; + bitmask[1] = nfs4_fattr_bitmap[1]; + /* Ask for the fileid of the absent filesystem if mounted_on_fileid * is not supported */ if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + bitmask[0] &= ~FATTR4_WORD0_FILEID; else - bitmask[0] |= FATTR4_WORD0_FILEID; + bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; -- cgit From f02fee227e5f21981152850744a6084ff3fa94ee Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Tue, 7 Nov 2017 16:25:47 -0600 Subject: NFS: Fix typo in nomigration mount option The option was incorrectly masking off all other options. Signed-off-by: Joshua Watt Cc: stable@vger.kernel.org #3.7 Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index da19c723a244..43cadb28db6e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw, mnt->options |= NFS_OPTION_MIGRATION; break; case Opt_nomigration: - mnt->options &= NFS_OPTION_MIGRATION; + mnt->options &= ~NFS_OPTION_MIGRATION; break; /* -- cgit From fcfa447062b2061e11f68b846d61cbfe60d0d604 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 10 Nov 2017 06:27:49 -0500 Subject: NFS: Revert "NFS: Move the flock open mode check into nfs_flock()" Commit e12937279c8b "NFS: Move the flock open mode check into nfs_flock()" changed NFSv3 behavior for flock() such that the open mode must match the lock type, however that requirement shouldn't be enforced for flock(). Signed-off-by: Benjamin Coddington Cc: stable@vger.kernel.org # v4.12 Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 18 ++---------------- fs/nfs/nfs4proc.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0214dd1e1060..81cca49a8375 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; - /* - * VFS doesn't require the open mode to match a flock() lock's type. - * NFS, however, may simulate flock() locking with posix locking which - * requires the open mode to match the lock type. - */ - switch (fl->fl_type) { - case F_UNLCK: + /* We're simulating flock() locks using posix locks on the server */ + if (fl->fl_type == F_UNLCK) return do_unlk(filp, cmd, fl, is_local); - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - return -EBADF; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - } - return do_setlk(filp, cmd, fl, is_local); } EXPORT_SYMBOL_GPL(nfs_flock); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b23a032ee1e..56fa5a16e097 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6709,6 +6709,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) return -ENOLCK; + /* + * Don't rely on the VFS having checked the file open mode, + * since it won't do this for flock() locks. + */ + switch (request->fl_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + } + status = nfs4_set_lock_state(state, request); if (status != 0) return status; -- cgit