diff options
Diffstat (limited to 'fs/nfs/nfs4proc.c')
| -rw-r--r-- | fs/nfs/nfs4proc.c | 8055 |
1 files changed, 5788 insertions, 2267 deletions
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf11799297c4..93c6ce04332b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,10 +51,10 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/module.h> -#include <linux/nfs_idmap.h> #include <linux/xattr.h> #include <linux/utsname.h> #include <linux/freezer.h> +#include <linux/iversion.h> #include "nfs4_fs.h" #include "delegation.h" @@ -63,31 +63,50 @@ #include "callback.h" #include "pnfs.h" #include "netns.h" +#include "sysfs.h" +#include "nfs4idmap.h" #include "nfs4session.h" #include "fscache.h" +#include "nfs42.h" + +#include "nfs4trace.h" #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_BITMASK_SZ 3 + #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) +/* file attributes which can be mapped to nfs attributes */ +#define NFS4_VALID_ATTRS (ATTR_MODE \ + | ATTR_UID \ + | ATTR_GID \ + | ATTR_SIZE \ + | ATTR_ATIME \ + | ATTR_MTIME \ + | ATTR_CTIME \ + | ATTR_ATIME_SET \ + | ATTR_MTIME_SET) + struct nfs4_opendata; -static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); -static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); -static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); -static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, +static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct inode *inode); +static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, - struct nfs4_label *olabel); + struct nfs_open_context *ctx, struct nfs4_label *ilabel); #ifdef CONFIG_NFS_V4_1 -static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, - struct rpc_cred *); +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + const struct cred *cred, + struct nfs4_slot *slot, + bool is_privileged); +static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *, + const struct cred *); static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *, - struct rpc_cred *); + const struct cred *, bool); #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL @@ -95,6 +114,7 @@ static inline struct nfs4_label * nfs4_label_init_security(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs4_label *label) { + struct lsm_context shim; int err; if (label == NULL) @@ -103,21 +123,32 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) return NULL; - if (NFS_SERVER(dir)->nfs_client->cl_minorversion < 2) - return NULL; + label->lfs = 0; + label->pi = 0; + label->len = 0; + label->label = NULL; err = security_dentry_init_security(dentry, sattr->ia_mode, - &dentry->d_name, (void **)&label->label, &label->len); - if (err == 0) - return label; + &dentry->d_name, NULL, &shim); + if (err) + return NULL; - return NULL; + label->lsmid = shim.id; + label->label = shim.context; + label->len = shim.len; + return label; } static inline void nfs4_label_release_security(struct nfs4_label *label) { - if (label) - security_release_secctx(label->label, label->len); + struct lsm_context shim; + + if (label) { + shim.context = label->label; + shim.len = label->len; + shim.id = label->lsmid; + security_release_secctx(&shim); + } } static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) { @@ -148,8 +179,10 @@ static int nfs4_map_errors(int err) case -NFS4ERR_RESOURCE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: + case -NFS4ERR_WRONG_CRED: return -EPERM; case -NFS4ERR_BADOWNER: case -NFS4ERR_BADNAME: @@ -158,10 +191,13 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; - case -NFS4ERR_ACCESS: - return -EACCES; case -NFS4ERR_FILE_OPEN: return -EBUSY; + case -NFS4ERR_NOT_SAME: + return -ENOTSYNC; + case -ENETDOWN: + case -ENETUNREACH: + break; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -186,8 +222,10 @@ const u32 nfs4_fattr_bitmap[3] = { | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA - | FATTR4_WORD1_TIME_MODIFY, + | FATTR4_WORD1_TIME_MODIFY + | FATTR4_WORD1_MOUNTED_ON_FILEID, #ifdef CONFIG_NFS_V4_SECURITY_LABEL FATTR4_WORD2_SECURITY_LABEL #endif @@ -206,14 +244,17 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_CREATE | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY, FATTR4_WORD2_MDSTHRESHOLD +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + | FATTR4_WORD2_SECURITY_LABEL +#endif }; static const u32 nfs4_open_noattr_bitmap[3] = { FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE | FATTR4_WORD0_FILEID, }; @@ -239,18 +280,18 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE + | FATTR4_WORD2_CLONE_BLKSIZE + | FATTR4_WORD2_CHANGE_ATTR_TYPE + | FATTR4_WORD2_XATTR_SUPPORT }; const u32 nfs4_fs_locations_bitmap[3] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE + FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID | FATTR4_WORD0_FILEID | FATTR4_WORD0_FS_LOCATIONS, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER + FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED @@ -260,9 +301,50 @@ const u32 nfs4_fs_locations_bitmap[3] = { | FATTR4_WORD1_MOUNTED_ON_FILEID, }; +static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, + struct inode *inode, unsigned long flags) +{ + unsigned long cache_validity; + + memcpy(dst, src, NFS4_BITMASK_SZ*sizeof(*dst)); + if (!inode || !nfs_have_read_or_write_delegation(inode)) + return; + + cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags; + + /* Remove the attributes over which we have full control */ + dst[1] &= ~FATTR4_WORD1_RAWDEV; + if (!(cache_validity & NFS_INO_INVALID_SIZE)) + dst[0] &= ~FATTR4_WORD0_SIZE; + + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + dst[0] &= ~FATTR4_WORD0_CHANGE; + + if (!(cache_validity & NFS_INO_INVALID_MODE)) + dst[1] &= ~FATTR4_WORD1_MODE; + if (!(cache_validity & NFS_INO_INVALID_OTHER)) + dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP); + + if (!(cache_validity & NFS_INO_INVALID_BTIME)) + dst[1] &= ~FATTR4_WORD1_TIME_CREATE; + + if (nfs_have_delegated_mtime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET); + if (!(cache_validity & NFS_INO_INVALID_MTIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET); + if (!(cache_validity & NFS_INO_INVALID_CTIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY_SET); + } else if (nfs_have_delegated_atime(inode)) { + if (!(cache_validity & NFS_INO_INVALID_ATIME)) + dst[1] &= ~(FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET); + } +} + static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) { + unsigned int attrs = FATTR4_WORD0_FILEID | FATTR4_WORD0_TYPE; __be32 *start, *p; if (cookie > 2) { @@ -293,9 +375,10 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent memcpy(p, ".\0\0\0", 4); /* entry */ p++; *p++ = xdr_one; /* bitmap length */ - *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ - *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); + *p++ = htonl(attrs); /* bitmap */ + *p++ = htonl(12); /* attribute buffer length */ + *p++ = htonl(NF4DIR); + p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry))); } *p++ = xdr_one; /* next */ @@ -305,65 +388,167 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent memcpy(p, "..\0\0", 4); /* entry */ p++; *p++ = xdr_one; /* bitmap length */ - *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ - *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); + *p++ = htonl(attrs); /* bitmap */ + *p++ = htonl(12); /* attribute buffer length */ + *p++ = htonl(NF4DIR); + spin_lock(&dentry->d_lock); + p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent))); + spin_unlock(&dentry->d_lock); readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; kunmap_atomic(start); } -static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) +static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version) { - int res = 0; + if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) { + fattr->pre_change_attr = version; + fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; + } +} - might_sleep(); +static void nfs4_test_and_free_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + const struct cred *cred) +{ + const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops; + ops->test_and_free_expired(server, stateid, cred); +} + +static void __nfs4_free_revoked_stateid(struct nfs_server *server, + nfs4_stateid *stateid, + const struct cred *cred) +{ + stateid->type = NFS4_REVOKED_STATEID_TYPE; + nfs4_test_and_free_stateid(server, stateid, cred); +} + +static void nfs4_free_revoked_stateid(struct nfs_server *server, + const nfs4_stateid *stateid, + const struct cred *cred) +{ + nfs4_stateid tmp; + + nfs4_stateid_copy(&tmp, stateid); + __nfs4_free_revoked_stateid(server, &tmp, cred); +} + +static long nfs4_update_delay(long *timeout) +{ + long ret; + if (!timeout) + return NFS4_POLL_RETRY_MAX; if (*timeout <= 0) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - freezable_schedule_timeout_killable_unsafe(*timeout); - if (fatal_signal_pending(current)) - res = -ERESTARTSYS; + ret = *timeout; *timeout <<= 1; - return res; + return ret; +} + +static int nfs4_delay_killable(long *timeout) +{ + might_sleep(); + + if (unlikely(nfs_current_task_exiting())) + return -EINTR; + __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); + schedule_timeout(nfs4_update_delay(timeout)); + if (!__fatal_signal_pending(current)) + return 0; + return -EINTR; +} + +static int nfs4_delay_interruptible(long *timeout) +{ + might_sleep(); + + if (unlikely(nfs_current_task_exiting())) + return -EINTR; + __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE); + schedule_timeout(nfs4_update_delay(timeout)); + if (!signal_pending(current)) + return 0; + return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS; +} + +static int nfs4_delay(long *timeout, bool interruptible) +{ + if (interruptible) + return nfs4_delay_interruptible(timeout); + return nfs4_delay_killable(timeout); +} + +static const nfs4_stateid * +nfs4_recoverable_stateid(const nfs4_stateid *stateid) +{ + if (!stateid) + return NULL; + switch (stateid->type) { + case NFS4_OPEN_STATEID_TYPE: + case NFS4_LOCK_STATEID_TYPE: + case NFS4_DELEGATION_STATEID_TYPE: + return stateid; + default: + break; + } + return NULL; } /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_do_handle_exception(struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; + const nfs4_stateid *stateid; struct inode *inode = exception->inode; int ret = errorcode; + exception->delay = 0; + exception->recovering = 0; exception->retry = 0; + + stateid = nfs4_recoverable_stateid(exception->stateid); + if (stateid == NULL && state != NULL) + stateid = nfs4_recoverable_stateid(&state->stateid); + switch(errorcode) { case 0: return 0; - case -NFS4ERR_OPENMODE: - if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - return 0; - } - if (state == NULL) - break; - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - goto wait_on_recovery; + case -NFS4ERR_BADHANDLE: + case -ESTALE: + if (inode != NULL && S_ISREG(inode->i_mode)) + pnfs_destroy_layout(NFS_I(inode)); + break; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { - nfs_remove_bad_delegation(inode); - exception->retry = 1; - break; + case -NFS4ERR_PARTNER_NO_AUTH: + if (inode != NULL && stateid != NULL) { + nfs_inode_find_state_and_recover(inode, + stateid); + goto wait_on_recovery; + } + fallthrough; + case -NFS4ERR_OPENMODE: + if (inode) { + int err; + + err = nfs_async_inode_return_delegation(inode, + stateid); + if (err == 0) + goto wait_on_recovery; + if (stateid != NULL && stateid->type == NFS4_DELEGATION_STATEID_TYPE) { + exception->retry = 1; + break; + } } if (state == NULL) break; @@ -371,16 +556,18 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc if (ret < 0) break; goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - ret = nfs4_schedule_stateid_recovery(server, state); - if (ret < 0) - break; - } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; + case -NFS4ERR_MOVED: + ret = nfs4_schedule_migration_recovery(server); + if (ret < 0) + break; + goto wait_on_recovery; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -389,9 +576,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_DEADSESSION: case -NFS4ERR_SEQ_FALSE_RETRY: case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR: %d Reset session\n", __func__, - errorcode); - nfs4_schedule_session_recovery(clp->cl_session, errorcode); + /* Handled in nfs41_sequence_process() */ goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: @@ -402,11 +587,17 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc ret = -EBUSY; break; } - case -NFS4ERR_GRACE: + fallthrough; case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client, &exception->timeout); - if (ret != 0) - break; + nfs_inc_server_stats(server, NFSIOS_DELAY); + fallthrough; + case -NFS4ERR_GRACE: + case -NFS4ERR_LAYOUTTRYLATER: + case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: + exception->delay = 1; + return 0; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; @@ -427,12 +618,140 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc /* We failed to handle the error */ return nfs4_map_errors(ret); wait_on_recovery: - ret = nfs4_wait_clnt_recover(clp); + exception->recovering = 1; + return 0; +} + +/* + * Track the number of NFS4ERR_DELAY related retransmissions and return + * EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit + * set by 'nfs_delay_retrans'. + */ +static int nfs4_exception_should_retrans(const struct nfs_server *server, + struct nfs4_exception *exception) +{ + if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) { + if (exception->retrans++ >= (unsigned short)nfs_delay_retrans) + return -EAGAIN; + } + return 0; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + int ret2 = nfs4_exception_should_retrans(server, exception); + if (ret2 < 0) { + exception->retry = 0; + return ret2; + } + ret = nfs4_delay(&exception->timeout, + exception->interruptible); + goto out_retry; + } + if (exception->recovering) { + if (exception->task_is_privileged) + return -EDEADLOCK; + ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; + goto out_retry; + } + return ret; +out_retry: if (ret == 0) exception->retry = 1; return ret; } +static int +nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + if ((task->tk_rpc_status == -ENETDOWN || + task->tk_rpc_status == -ENETUNREACH) && + task->tk_flags & RPC_TASK_NETUNREACH_FATAL) { + exception->delay = 0; + exception->recovering = 0; + exception->retry = 0; + return -EIO; + } + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + int ret2 = nfs4_exception_should_retrans(server, exception); + if (ret2 < 0) { + exception->retry = 0; + return ret2; + } + rpc_delay(task, nfs4_update_delay(&exception->timeout)); + goto out_retry; + } + if (exception->recovering) { + if (exception->task_is_privileged) + return -EDEADLOCK; + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + goto out_retry; + } + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + ret = -EIO; + return ret; +out_retry: + if (ret == 0) { + exception->retry = 1; + /* + * For NFS4ERR_MOVED, the client transport will need to + * be recomputed after migration recovery has completed. + */ + if (errorcode == -NFS4ERR_MOVED) + rpc_task_release_transport(task); + } + return ret; +} + +int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, + struct nfs4_state *state, long *timeout) +{ + struct nfs4_exception exception = { + .state = state, + }; + + if (task->tk_status >= 0) + return 0; + if (timeout) + exception.timeout = *timeout; + task->tk_status = nfs4_async_handle_exception(task, server, + task->tk_status, + &exception); + if (exception.delay && timeout) + *timeout = exception.timeout; + if (exception.retry) + return -EAGAIN; + return 0; +} + +/* + * Return 'true' if 'clp' is using an rpc_client that is integrity protected + * or 'false' otherwise. + */ +static bool _nfs4_is_integrity_protected(struct nfs_client *clp) +{ + rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor; + return (flavor == RPC_AUTH_GSS_KRB5I) || (flavor == RPC_AUTH_GSS_KRB5P); +} static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) { @@ -444,26 +763,69 @@ static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp) static void renew_lease(const struct nfs_server *server, unsigned long timestamp) { - do_renew_lease(server->nfs_client, timestamp); + struct nfs_client *clp = server->nfs_client; + + if (!nfs4_has_session(clp)) + do_renew_lease(clp, timestamp); +} + +struct nfs4_call_sync_data { + const struct nfs_server *seq_server; + struct nfs4_sequence_args *seq_args; + struct nfs4_sequence_res *seq_res; +}; + +void nfs4_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply, + int privileged) +{ + args->sa_slot = NULL; + args->sa_cache_this = cache_reply; + args->sa_privileged = privileged; + + res->sr_slot = NULL; +} + +static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res) +{ + struct nfs4_slot *slot = res->sr_slot; + struct nfs4_slot_table *tbl; + + tbl = slot->table; + spin_lock(&tbl->slot_tbl_lock); + if (!nfs41_wake_and_assign_slot(tbl, slot)) + nfs4_free_slot(tbl, slot); + spin_unlock(&tbl->slot_tbl_lock); + + res->sr_slot = NULL; +} + +static int nfs40_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) + nfs40_sequence_free_slot(res); + return 1; } #if defined(CONFIG_NFS_V4_1) -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +static void nfs41_release_slot(struct nfs4_slot *slot) { struct nfs4_session *session; struct nfs4_slot_table *tbl; bool send_new_highest_used_slotid = false; - if (!res->sr_slot) { - /* just wake up the next guy waiting since - * we may have not consumed a slot after all */ - dprintk("%s: No slot\n", __func__); + if (!slot) return; - } - tbl = res->sr_slot->table; + tbl = slot->table; session = tbl->session; + /* Bump the slot sequence number */ + if (slot->seq_done) + slot->seq_nr++; + slot->seq_done = 0; + spin_lock(&tbl->slot_tbl_lock); /* Be nice to the server: try to ensure that the last transmitted * value for highest_user_slotid <= target_highest_slotid @@ -471,51 +833,83 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) if (tbl->highest_used_slotid > tbl->target_highest_slotid) send_new_highest_used_slotid = true; - if (nfs41_wake_and_assign_slot(tbl, res->sr_slot)) { + if (nfs41_wake_and_assign_slot(tbl, slot)) { send_new_highest_used_slotid = false; goto out_unlock; } - nfs4_free_slot(tbl, res->sr_slot); + nfs4_free_slot(tbl, slot); if (tbl->highest_used_slotid != NFS4_NO_SLOT) send_new_highest_used_slotid = false; out_unlock: spin_unlock(&tbl->slot_tbl_lock); - res->sr_slot = NULL; if (send_new_highest_used_slotid) - nfs41_server_notify_highest_slotid_update(session->clp); + nfs41_notify_server(session->clp); + if (waitqueue_active(&tbl->slot_waitq)) + wake_up_all(&tbl->slot_waitq); +} + +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +{ + nfs41_release_slot(res->sr_slot); + res->sr_slot = NULL; +} + +static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot, + u32 seqnr) +{ + if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0) + slot->seq_nr_highest_sent = seqnr; +} +static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, u32 seqnr) +{ + nfs4_slot_sequence_record_sent(slot, seqnr); + slot->seq_nr_last_acked = seqnr; +} + +static void nfs4_probe_sequence(struct nfs_client *client, const struct cred *cred, + struct nfs4_slot *slot) +{ + struct rpc_task *task = _nfs41_proc_sequence(client, cred, slot, true); + if (!IS_ERR(task)) + rpc_put_task_async(task); } -static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +static int nfs41_sequence_process(struct rpc_task *task, + struct nfs4_sequence_res *res) { struct nfs4_session *session; - struct nfs4_slot *slot; + struct nfs4_slot *slot = res->sr_slot; struct nfs_client *clp; - bool interrupted = false; + int status; int ret = 1; + if (slot == NULL) + goto out_noaction; /* don't increment the sequence number if the task wasn't sent */ - if (!RPC_WAS_SENT(task)) + if (!RPC_WAS_SENT(task) || slot->seq_done) goto out; - slot = res->sr_slot; session = slot->table->session; + clp = session->clp; - if (slot->interrupted) { - slot->interrupted = 0; - interrupted = true; - } + trace_nfs4_sequence_done(session, res); + + status = res->sr_status; + if (task->tk_status == -NFS4ERR_DEADSESSION) + status = -NFS4ERR_DEADSESSION; /* Check the SEQUENCE operation status */ - switch (res->sr_status) { + switch (status) { case 0: + /* Mark this sequence number as having been acked */ + nfs4_slot_sequence_acked(slot, slot->seq_nr); /* Update the slot's sequence and clientid lease timer */ - ++slot->seq_nr; - clp = session->clp; + slot->seq_done = 1; do_renew_lease(clp, res->sr_timestamp); /* Check sequence flags */ - if (res->sr_status_flags != 0) - nfs4_schedule_lease_recovery(clp); + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags, + !!slot->privileged); nfs41_update_target_slotid(slot->table, slot, res); break; case 1: @@ -523,9 +917,9 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * * sr_status remains 1 if an RPC level error occurred. * The server may or may not have processed the sequence * operation.. - * Mark the slot as having hosted an interrupted RPC call. */ - slot->interrupted = 1; + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); + slot->seq_done = 1; goto out; case -NFS4ERR_DELAY: /* The server detected a resend of the RPC call and @@ -537,44 +931,77 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * slot->slot_nr, slot->seq_nr); goto out_retry; + case -NFS4ERR_RETRY_UNCACHED_REP: + case -NFS4ERR_SEQ_FALSE_RETRY: + /* + * The server thinks we tried to replay a request. + * Retry the call after bumping the sequence ID. + */ + nfs4_slot_sequence_acked(slot, slot->seq_nr); + goto retry_new_seq; case -NFS4ERR_BADSLOT: /* * The slot id we used was probably retired. Try again * using a different slot id. */ + if (slot->slot_nr < slot->table->target_highest_slotid) + goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: + nfs4_slot_sequence_record_sent(slot, slot->seq_nr); /* - * Was the last operation on this sequence interrupted? - * If so, retry after bumping the sequence number. + * Were one or more calls using this slot interrupted? + * If the server never received the request, then our + * transmitted slot sequence number may be too high. However, + * if the server did receive the request then it might + * accidentally give us a reply with a mismatched operation. + * We can sort this out by sending a lone sequence operation + * to the server on the same slot. */ - if (interrupted) { - ++slot->seq_nr; + if ((s32)(slot->seq_nr - slot->seq_nr_last_acked) > 1) { + slot->seq_nr--; + if (task->tk_msg.rpc_proc != &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE]) { + nfs4_probe_sequence(clp, task->tk_msg.rpc_cred, slot); + res->sr_slot = NULL; + } goto retry_nowait; } /* - * Could this slot have been previously retired? - * If so, then the server may be expecting seq_nr = 1! + * RFC5661: + * A retry might be sent while the original request is + * still in progress on the replier. The replier SHOULD + * deal with the issue by returning NFS4ERR_DELAY as the + * reply to SEQUENCE or CB_SEQUENCE operation, but + * implementations MAY return NFS4ERR_SEQ_MISORDERED. + * + * Restart the search after a delay. */ - if (slot->seq_nr != 1) { - slot->seq_nr = 1; - goto retry_nowait; - } - break; - case -NFS4ERR_SEQ_FALSE_RETRY: - ++slot->seq_nr; - goto retry_nowait; + slot->seq_nr = slot->seq_nr_highest_sent; + goto out_retry; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto session_recover; default: /* Just update the slot sequence no. */ - ++slot->seq_nr; + slot->seq_done = 1; } out: /* The session may be reset by one of the error handlers. */ dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); - nfs41_sequence_free_slot(res); +out_noaction: return ret; +session_recover: + set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state); + nfs4_schedule_session_recovery(session, status); + dprintk("%s ERROR: %d Reset session\n", __func__, status); + nfs41_sequence_free_slot(res); + goto out; +retry_new_seq: + ++slot->seq_nr; retry_nowait: if (rpc_restart_call_prepare(task)) { + nfs41_sequence_free_slot(res); task->tk_status = 0; ret = 0; } @@ -586,153 +1013,208 @@ out_retry: return 0; } -static int nfs4_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) +int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + if (!nfs41_sequence_process(task, res)) + return 0; + if (res->sr_slot != NULL) + nfs41_sequence_free_slot(res); + return 1; + +} +EXPORT_SYMBOL_GPL(nfs41_sequence_done); + +static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) +{ + if (res->sr_slot == NULL) + return 1; + if (res->sr_slot->table->session != NULL) + return nfs41_sequence_process(task, res); + return nfs40_sequence_done(task, res); +} + +static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) { + if (res->sr_slot->table->session != NULL) + nfs41_sequence_free_slot(res); + else + nfs40_sequence_free_slot(res); + } +} + +int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { if (res->sr_slot == NULL) return 1; + if (!res->sr_slot->table->session) + return nfs40_sequence_done(task, res); return nfs41_sequence_done(task, res); } +EXPORT_SYMBOL_GPL(nfs4_sequence_done); -static void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) { - args->sa_slot = NULL; - args->sa_cache_this = 0; - args->sa_privileged = 0; - if (cache_reply) - args->sa_cache_this = 1; - res->sr_slot = NULL; + struct nfs4_call_sync_data *data = calldata; + + dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); + + nfs4_setup_sequence(data->seq_server->nfs_client, + data->seq_args, data->seq_res, task); } -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) +static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) { - args->sa_privileged = 1; + struct nfs4_call_sync_data *data = calldata; + + nfs41_sequence_done(task, data->seq_res); } -int nfs41_setup_sequence(struct nfs4_session *session, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) +static const struct rpc_call_ops nfs41_call_sync_ops = { + .rpc_call_prepare = nfs41_call_sync_prepare, + .rpc_call_done = nfs41_call_sync_done, +}; + +#else /* !CONFIG_NFS_V4_1 */ + +static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { + return nfs40_sequence_done(task, res); +} + +static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res) +{ + if (res->sr_slot != NULL) + nfs40_sequence_free_slot(res); +} + +int nfs4_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) +{ + return nfs40_sequence_done(task, res); +} +EXPORT_SYMBOL_GPL(nfs4_sequence_done); + +#endif /* !CONFIG_NFS_V4_1 */ + +static void nfs41_sequence_res_init(struct nfs4_sequence_res *res) +{ + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; +} + +static +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct nfs4_slot *slot) +{ + if (!slot) + return; + slot->privileged = args->sa_privileged ? 1 : 0; + args->sa_slot = slot; + + res->sr_slot = slot; +} + +int nfs4_setup_sequence(struct nfs_client *client, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + struct nfs4_session *session = nfs4_get_session(client); + struct nfs4_slot_table *tbl = client->cl_slot_tbl; struct nfs4_slot *slot; - struct nfs4_slot_table *tbl; - dprintk("--> %s\n", __func__); /* slot already allocated? */ if (res->sr_slot != NULL) - goto out_success; - - tbl = &session->fc_slot_table; + goto out_start; - task->tk_timeout = 0; + if (session) + tbl = &session->fc_slot_table; spin_lock(&tbl->slot_tbl_lock); - if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state) && - !args->sa_privileged) { - /* The state manager will wait until the slot table is empty */ - dprintk("%s session is draining\n", __func__); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) goto out_sleep; - } slot = nfs4_alloc_slot(tbl); if (IS_ERR(slot)) { - /* If out of memory, try again in 1/4 second */ if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - dprintk("<-- %s: no free slots\n", __func__); + goto out_sleep_timeout; goto out_sleep; } spin_unlock(&tbl->slot_tbl_lock); - args->sa_slot = slot; - - dprintk("<-- %s slotid=%d seqid=%d\n", __func__, - slot->slot_nr, slot->seq_nr); + nfs4_sequence_attach_slot(args, res, slot); - res->sr_slot = slot; - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - /* - * sr_status is only set in decode_sequence, and so will remain - * set to 1 if an rpc level failure occurs. - */ - res->sr_status = 1; -out_success: + trace_nfs4_setup_sequence(session, args); +out_start: + nfs41_sequence_res_init(res); rpc_call_start(task); return 0; +out_sleep_timeout: + /* Try again in 1/4 second */ + if (args->sa_privileged) + rpc_sleep_on_priority_timeout(&tbl->slot_tbl_waitq, task, + jiffies + (HZ >> 2), RPC_PRIORITY_PRIVILEGED); + else + rpc_sleep_on_timeout(&tbl->slot_tbl_waitq, task, + NULL, jiffies + (HZ >> 2)); + spin_unlock(&tbl->slot_tbl_lock); + return -EAGAIN; out_sleep: - /* Privileged tasks are queued with top priority */ if (args->sa_privileged) rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task, - NULL, RPC_PRIORITY_PRIVILEGED); + RPC_PRIORITY_PRIVILEGED); else rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); return -EAGAIN; } -EXPORT_SYMBOL_GPL(nfs41_setup_sequence); +EXPORT_SYMBOL_GPL(nfs4_setup_sequence); -int nfs4_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) +static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata) { - struct nfs4_session *session = nfs4_get_session(server); - int ret = 0; - - if (session == NULL) { - rpc_call_start(task); - goto out; - } - - dprintk("--> %s clp %p session %p sr_slot %d\n", - __func__, session->clp, session, res->sr_slot ? - res->sr_slot->slot_nr : -1); + struct nfs4_call_sync_data *data = calldata; + nfs4_setup_sequence(data->seq_server->nfs_client, + data->seq_args, data->seq_res, task); +} - ret = nfs41_setup_sequence(session, args, res, task); -out: - dprintk("<-- %s status=%d\n", __func__, ret); - return ret; +static void nfs40_call_sync_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_call_sync_data *data = calldata; + nfs4_sequence_done(task, data->seq_res); } -struct nfs41_call_sync_data { - const struct nfs_server *seq_server; - struct nfs4_sequence_args *seq_args; - struct nfs4_sequence_res *seq_res; +static const struct rpc_call_ops nfs40_call_sync_ops = { + .rpc_call_prepare = nfs40_call_sync_prepare, + .rpc_call_done = nfs40_call_sync_done, }; -static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) +static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup) { - struct nfs41_call_sync_data *data = calldata; - struct nfs4_session *session = nfs4_get_session(data->seq_server); - - dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); - - nfs41_setup_sequence(session, data->seq_args, data->seq_res, task); -} + int ret; + struct rpc_task *task; -static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) -{ - struct nfs41_call_sync_data *data = calldata; + task = rpc_run_task(task_setup); + if (IS_ERR(task)) + return PTR_ERR(task); - nfs41_sequence_done(task, data->seq_res); + ret = task->tk_status; + rpc_put_task(task); + return ret; } -static const struct rpc_call_ops nfs41_call_sync_ops = { - .rpc_call_prepare = nfs41_call_sync_prepare, - .rpc_call_done = nfs41_call_sync_done, -}; - -static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) +static int nfs4_do_call_sync(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + unsigned short task_flags) { - int ret; - struct rpc_task *task; - struct nfs41_call_sync_data data = { + struct nfs_client *clp = server->nfs_client; + struct nfs4_call_sync_data data = { .seq_server = server, .seq_args = args, .seq_res = res, @@ -740,95 +1222,123 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct rpc_task_setup task_setup = { .rpc_client = clnt, .rpc_message = msg, - .callback_ops = &nfs41_call_sync_ops, - .callback_data = &data + .callback_ops = clp->cl_mvops->call_sync_ops, + .callback_data = &data, + .flags = task_flags, }; - task = rpc_run_task(&task_setup); - if (IS_ERR(task)) - ret = PTR_ERR(task); - else { - ret = task->tk_status; - rpc_put_task(task); - } - return ret; + return nfs4_call_sync_custom(&task_setup); } -#else -static -void nfs41_init_sequence(struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, int cache_reply) +static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res) { + unsigned short task_flags = 0; + + if (server->caps & NFS_CAP_MOVEABLE) + task_flags = RPC_TASK_MOVEABLE; + return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags); } -static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) + +int nfs4_call_sync(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) { + nfs4_init_sequence(args, res, cache_reply, 0); + return nfs4_call_sync_sequence(clnt, server, msg, args, res); } - -static int nfs4_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) +static void +nfs4_inc_nlink_locked(struct inode *inode) { - return 1; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_NLINK); + inc_nlink(inode); } -#endif /* CONFIG_NFS_V4_1 */ -static -int _nfs4_call_sync(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) +static void +nfs4_inc_nlink(struct inode *inode) { - return rpc_call_sync(clnt, msg, 0); + spin_lock(&inode->i_lock); + nfs4_inc_nlink_locked(inode); + spin_unlock(&inode->i_lock); } -static -int nfs4_call_sync(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - int cache_reply) +static void +nfs4_dec_nlink_locked(struct inode *inode) { - nfs41_init_sequence(args, res, cache_reply); - return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, - args, res); + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME | + NFS_INO_INVALID_NLINK); + drop_nlink(inode); } -static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) +static void +nfs4_update_changeattr_locked(struct inode *inode, + struct nfs4_change_info *cinfo, + unsigned long timestamp, unsigned long cache_validity) { - struct nfs_inode *nfsi = NFS_I(dir); + struct nfs_inode *nfsi = NFS_I(inode); + u64 change_attr = inode_peek_iversion_raw(inode); + + if (!nfs_have_delegated_mtime(inode)) + cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + if (S_ISDIR(inode->i_mode)) + cache_validity |= NFS_INO_INVALID_DATA; + + switch (NFS_SERVER(inode)->change_attr_type) { + case NFS4_CHANGE_TYPE_IS_UNDEFINED: + if (cinfo->after == change_attr) + goto out; + break; + default: + if ((s64)(change_attr - cinfo->after) >= 0) + goto out; + } + inode_set_iversion_raw(inode, cinfo->after); + if (!cinfo->atomic || cinfo->before != change_attr) { + if (S_ISDIR(inode->i_mode)) + nfs_force_lookup_revalidate(inode); + + if (!nfs_have_delegated_attributes(inode)) + cache_validity |= + NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | + NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER | + NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK | + NFS_INO_INVALID_MODE | NFS_INO_INVALID_BTIME | + NFS_INO_INVALID_XATTR; + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + } + nfsi->attrtimeo_timestamp = jiffies; + nfsi->read_cache_jiffies = timestamp; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); + nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; +out: + nfs_set_cache_invalid(inode, cache_validity); +} + +void +nfs4_update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, + unsigned long timestamp, unsigned long cache_validity) +{ spin_lock(&dir->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != dir->i_version) - nfs_force_lookup_revalidate(dir); - dir->i_version = cinfo->after; - nfs_fscache_invalidate(dir); + nfs4_update_changeattr_locked(dir, cinfo, timestamp, cache_validity); spin_unlock(&dir->i_lock); } -struct nfs4_opendata { - struct kref kref; - struct nfs_openargs o_arg; - struct nfs_openres o_res; - struct nfs_open_confirmargs c_arg; - struct nfs_open_confirmres c_res; - struct nfs4_string owner_name; - struct nfs4_string group_name; - struct nfs_fattr f_attr; - struct nfs4_label *f_label; - struct dentry *dir; - struct dentry *dentry; - struct nfs4_state_owner *owner; - struct nfs4_state *state; - struct iattr attrs; - unsigned long timestamp; - unsigned int rpc_done : 1; - unsigned int is_recover : 1; - int rpc_status; - int cancelled; +struct nfs4_open_createattrs { + struct nfs4_label *label; + struct iattr *sattr; + const __u32 verf[2]; }; static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, @@ -843,6 +1353,58 @@ static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, return true; } +static fmode_t _nfs4_ctx_to_accessmode(const struct nfs_open_context *ctx) +{ + return ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); +} + +static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) +{ + fmode_t ret = ctx->mode & (FMODE_READ|FMODE_WRITE); + + return (ctx->mode & FMODE_EXEC) ? FMODE_READ | ret : ret; +} + +static u32 +nfs4_fmode_to_share_access(fmode_t fmode) +{ + u32 res = 0; + + switch (fmode & (FMODE_READ | FMODE_WRITE)) { + case FMODE_READ: + res = NFS4_SHARE_ACCESS_READ; + break; + case FMODE_WRITE: + res = NFS4_SHARE_ACCESS_WRITE; + break; + case FMODE_READ|FMODE_WRITE: + res = NFS4_SHARE_ACCESS_BOTH; + } + return res; +} + +static u32 +nfs4_map_atomic_open_share(struct nfs_server *server, + fmode_t fmode, int openflags) +{ + u32 res = nfs4_fmode_to_share_access(fmode); + + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + goto out; + /* Want no delegation if we're using O_DIRECT */ + if (openflags & O_DIRECT) { + res |= NFS4_SHARE_WANT_NO_DELEG; + goto out; + } + /* res |= NFS4_SHARE_WANT_NO_PREFERENCE; */ + if (server->caps & NFS_CAP_DELEGTIME) + res |= NFS4_SHARE_WANT_DELEG_TIMESTAMPS; + if (server->caps & NFS_CAP_OPEN_XOR) + res |= NFS4_SHARE_WANT_OPEN_XOR_DELEGATION; +out: + return res; +} + static enum open_claim_type4 nfs4_map_atomic_open_claim(struct nfs_server *server, enum open_claim_type4 claim) @@ -864,7 +1426,6 @@ nfs4_map_atomic_open_claim(struct nfs_server *server, static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; - p->o_res.f_label = p->f_label; p->o_res.seqid = p->o_arg.seqid; p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; @@ -875,26 +1436,32 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, - const struct iattr *attrs, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, enum open_claim_type4 claim, gfp_t gfp_mask) { struct dentry *parent = dget_parent(dentry); - struct inode *dir = parent->d_inode; + struct inode *dir = d_inode(parent); struct nfs_server *server = NFS_SERVER(dir); + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); + struct nfs4_label *label = (c != NULL) ? c->label : NULL; struct nfs4_opendata *p; p = kzalloc(sizeof(*p), gfp_mask); if (p == NULL) goto err; - p->f_label = nfs4_label_alloc(server, gfp_mask); - if (IS_ERR(p->f_label)) + p->f_attr.label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->f_attr.label)) goto err_free_p; - p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); - if (p->o_arg.seqid == NULL) + p->a_label = nfs4_label_alloc(server, gfp_mask); + if (IS_ERR(p->a_label)) + goto err_free_f; + + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; + p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask); + if (IS_ERR(p->o_arg.seqid)) goto err_free_label; nfs_sb_active(dentry->d_sb); p->dentry = dget(dentry); @@ -903,13 +1470,31 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); - /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS - * will return permission denied for all bits until close */ - if (!(flags & O_EXCL)) { - /* ask server to check for all possible rights as results - * are cached */ + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); + p->o_arg.share_access = nfs4_map_atomic_open_share(server, + fmode, flags); + if (flags & O_CREAT) { + p->o_arg.umask = current_umask(); + p->o_arg.label = nfs4_label_copy(p->a_label, label); + if (c->sattr != NULL && c->sattr->ia_valid != 0) { + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); + + memcpy(p->o_arg.u.verifier.data, c->verf, + sizeof(p->o_arg.u.verifier.data)); + } + } + /* ask server to check for all possible rights as results + * are cached */ + switch (p->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE | + NFS4_ACCESS_EXECUTE | + nfs_access_xattr_mask(server); } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); @@ -918,8 +1503,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.server = server; p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; - p->o_arg.label = label; - p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: @@ -930,18 +1513,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: case NFS4_OPEN_CLAIM_DELEG_PREV_FH: - p->o_arg.fh = NFS_FH(dentry->d_inode); - } - if (attrs != NULL && attrs->ia_valid != 0) { - __be32 verf[2]; - - p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, attrs, sizeof(p->attrs)); - - verf[0] = jiffies; - verf[1] = current->pid; - memcpy(p->o_arg.u.verifier.data, verf, - sizeof(p->o_arg.u.verifier.data)); + p->o_arg.fh = NFS_FH(d_inode(dentry)); } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; @@ -951,7 +1523,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, return p; err_free_label: - nfs4_label_free(p->f_label); + nfs4_label_free(p->a_label); +err_free_f: + nfs4_label_free(p->f_attr.label); err_free_p: kfree(p); err: @@ -965,17 +1539,21 @@ static void nfs4_opendata_free(struct kref *kref) struct nfs4_opendata, kref); struct super_block *sb = p->dentry->d_sb; + nfs4_lgopen_release(p->lgp); nfs_free_seqid(p->o_arg.seqid); + nfs4_sequence_free_slot(&p->o_res.seq_res); if (p->state != NULL) nfs4_put_open_state(p->state); nfs4_put_state_owner(p->owner); - nfs4_label_free(p->f_label); + nfs4_label_free(p->a_label); + nfs4_label_free(p->f_attr.label); dput(p->dir); dput(p->dentry); nfs_sb_deactive(sb); nfs_fattr_free_names(&p->f_attr); + kfree(p->f_attr.mdsthreshold); kfree(p); } @@ -985,20 +1563,35 @@ static void nfs4_opendata_put(struct nfs4_opendata *p) kref_put(&p->kref, nfs4_opendata_free); } -static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) +static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, + fmode_t fmode) { - int ret; - - ret = rpc_wait_for_completion_task(task); - return ret; + switch(fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + return state->n_rdwr != 0; + case FMODE_WRITE: + return state->n_wronly != 0; + case FMODE_READ: + return state->n_rdonly != 0; + } + WARN_ON_ONCE(1); + return false; } -static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) +static int can_open_cached(struct nfs4_state *state, fmode_t mode, + int open_mode, enum open_claim_type4 claim) { int ret = 0; if (open_mode & (O_EXCL|O_TRUNC)) goto out; + switch (claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + goto out; + default: + break; + } switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 @@ -1016,16 +1609,24 @@ out: return ret; } -static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) +static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode, + enum open_claim_type4 claim) { if (delegation == NULL) return 0; if ((delegation->type & fmode) != fmode) return 0; - if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) - return 0; - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + switch (claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + if (!test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) + break; + fallthrough; + default: return 0; + } nfs_mark_delegation_referenced(delegation); return 1; } @@ -1045,97 +1646,321 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) nfs4_state_set_mode_locked(state, state->state | fmode); } -static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +#ifdef CONFIG_NFS_V4_1 +static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state) +{ + if (state->n_rdonly && !test_bit(NFS_O_RDONLY_STATE, &state->flags)) + return true; + if (state->n_wronly && !test_bit(NFS_O_WRONLY_STATE, &state->flags)) + return true; + if (state->n_rdwr && !test_bit(NFS_O_RDWR_STATE, &state->flags)) + return true; + return false; +} +#endif /* CONFIG_NFS_V4_1 */ + +static void nfs_state_log_update_open_stateid(struct nfs4_state *state) +{ + if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + wake_up_all(&state->waitq); +} + +static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { + struct nfs_client *clp = state->owner->so_server->nfs_client; + bool need_recover = false; + + if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly) + need_recover = true; + if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly) + need_recover = true; + if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr) + need_recover = true; + if (need_recover) + nfs4_state_mark_reclaim_nograce(clp, state); +} + +/* + * Check for whether or not the caller may update the open stateid + * to the value passed in by stateid. + * + * Note: This function relies heavily on the server implementing + * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2 + * correctly. + * i.e. The stateid seqids have to be initialised to 1, and + * are then incremented on every state transition. + */ +static bool nfs_stateid_is_sequential(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + if (test_bit(NFS_OPEN_STATE, &state->flags)) { + /* The common case - we're updating to a new sequence number */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (nfs4_stateid_is_next(&state->open_stateid, stateid)) + return true; + return false; + } + /* The server returned a new stateid */ + } + /* This is the first OPEN in this generation */ + if (stateid->seqid == cpu_to_be32(1)) + return true; + return false; +} + +static void nfs_resync_open_stateid_locked(struct nfs4_state *state) +{ + if (!(state->n_wronly || state->n_rdonly || state->n_rdwr)) + return; + if (state->n_wronly) + set_bit(NFS_O_WRONLY_STATE, &state->flags); + if (state->n_rdonly) + set_bit(NFS_O_RDONLY_STATE, &state->flags); + if (state->n_rdwr) + set_bit(NFS_O_RDWR_STATE, &state->flags); + set_bit(NFS_OPEN_STATE, &state->flags); +} + +static void nfs_clear_open_stateid_locked(struct nfs4_state *state, + nfs4_stateid *stateid, fmode_t fmode) +{ + clear_bit(NFS_O_RDWR_STATE, &state->flags); + switch (fmode & (FMODE_READ|FMODE_WRITE)) { + case FMODE_WRITE: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_READ: + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case 0: + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); + } + if (stateid == NULL) + return; + /* Handle OPEN+OPEN_DOWNGRADE races */ + if (nfs4_stateid_match_other(stateid, &state->open_stateid) && + !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + nfs_resync_open_stateid_locked(state); + goto out; + } if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); - set_bit(NFS_OPEN_STATE, &state->flags); - switch (fmode) { - case FMODE_READ: - set_bit(NFS_O_RDONLY_STATE, &state->flags); + trace_nfs4_open_stateid_update(state->inode, stateid, 0); +out: + nfs_state_log_update_open_stateid(state); +} + +static void nfs_clear_open_stateid(struct nfs4_state *state, + nfs4_stateid *arg_stateid, + nfs4_stateid *stateid, fmode_t fmode) +{ + write_seqlock(&state->seqlock); + /* Ignore, if the CLOSE argment doesn't match the current stateid */ + if (nfs4_state_match_open_stateid_other(state, arg_stateid)) + nfs_clear_open_stateid_locked(state, stateid, fmode); + write_sequnlock(&state->seqlock); + if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) + nfs4_schedule_state_manager(state->owner->so_server->nfs_client); +} + +static void nfs_set_open_stateid_locked(struct nfs4_state *state, + const nfs4_stateid *stateid, nfs4_stateid *freeme) + __must_hold(&state->owner->so_lock) + __must_hold(&state->seqlock) + __must_hold(RCU) + +{ + DEFINE_WAIT(wait); + int status = 0; + for (;;) { + + if (nfs_stateid_is_sequential(state, stateid)) break; - case FMODE_WRITE: - set_bit(NFS_O_WRONLY_STATE, &state->flags); + + if (status) break; - case FMODE_READ|FMODE_WRITE: - set_bit(NFS_O_RDWR_STATE, &state->flags); + /* Rely on seqids for serialisation with NFSv4.0 */ + if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) + break; + + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + /* + * Ensure we process the state changes in the same order + * in which the server processed them by delaying the + * update of the stateid until we are in sequence. + */ + write_sequnlock(&state->seqlock); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); + + if (!fatal_signal_pending(current) && + !nfs_current_task_exiting()) { + if (schedule_timeout(5*HZ) == 0) + status = -EAGAIN; + else + status = 0; + } else + status = -EINTR; + finish_wait(&state->waitq, &wait); + rcu_read_lock(); + spin_lock(&state->owner->so_lock); + write_seqlock(&state->seqlock); + } + + if (test_bit(NFS_OPEN_STATE, &state->flags) && + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs4_stateid_copy(freeme, &state->open_stateid); + nfs_test_and_clear_all_open_stateid(state); } + + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) + nfs4_stateid_copy(&state->stateid, stateid); + nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, status); + nfs_state_log_update_open_stateid(state); } -static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) +static void nfs_state_set_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + fmode_t fmode, + nfs4_stateid *freeme) { + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ write_seqlock(&state->seqlock); - nfs_set_open_stateid_locked(state, stateid, fmode); + nfs_set_open_stateid_locked(state, open_stateid, freeme); + switch (fmode) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); + } + set_bit(NFS_OPEN_STATE, &state->flags); write_sequnlock(&state->seqlock); } -static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) +static void nfs_state_clear_open_state_flags(struct nfs4_state *state) +{ + clear_bit(NFS_O_RDWR_STATE, &state->flags); + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + clear_bit(NFS_OPEN_STATE, &state->flags); +} + +static void nfs_state_set_delegation(struct nfs4_state *state, + const nfs4_stateid *deleg_stateid, + fmode_t fmode) { /* * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ write_seqlock(&state->seqlock); - if (deleg_stateid != NULL) { - nfs4_stateid_copy(&state->stateid, deleg_stateid); - set_bit(NFS_DELEGATED_STATE, &state->flags); - } - if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, fmode); + nfs4_stateid_copy(&state->stateid, deleg_stateid); + set_bit(NFS_DELEGATED_STATE, &state->flags); write_sequnlock(&state->seqlock); - spin_lock(&state->owner->so_lock); - update_open_stateflags(state, fmode); - spin_unlock(&state->owner->so_lock); } -static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *delegation, fmode_t fmode) +static void nfs_state_clear_delegation(struct nfs4_state *state) { + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + write_sequnlock(&state->seqlock); +} + +int update_open_stateid(struct nfs4_state *state, + const nfs4_stateid *open_stateid, + const nfs4_stateid *delegation, + fmode_t fmode) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *deleg_cur; + nfs4_stateid freeme = { }; int ret = 0; fmode &= (FMODE_READ|FMODE_WRITE); rcu_read_lock(); - deleg_cur = rcu_dereference(nfsi->delegation); + spin_lock(&state->owner->so_lock); + if (open_stateid != NULL) { + nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme); + ret = 1; + } + + deleg_cur = nfs4_get_valid_delegation(state->inode); if (deleg_cur == NULL) goto no_delegation; spin_lock(&deleg_cur->lock); - if (nfsi->delegation != deleg_cur || + if (rcu_dereference(nfsi->delegation) != deleg_cur || test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) || (deleg_cur->type & fmode) != fmode) goto no_delegation_unlock; if (delegation == NULL) delegation = &deleg_cur->stateid; - else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) + else if (!nfs4_stateid_match_other(&deleg_cur->stateid, delegation)) goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, fmode); + nfs_state_set_delegation(state, &deleg_cur->stateid, fmode); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); no_delegation: + if (ret) + update_open_stateflags(state, fmode); + spin_unlock(&state->owner->so_lock); rcu_read_unlock(); - if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, fmode); - ret = 1; - } + if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) + nfs4_schedule_state_manager(clp); + if (freeme.type != 0) + nfs4_test_and_free_stateid(server, &freeme, + state->owner->so_cred); return ret; } +static bool nfs4_update_lock_stateid(struct nfs4_lock_state *lsp, + const nfs4_stateid *stateid) +{ + struct nfs4_state *state = lsp->ls_state; + bool ret = false; + + spin_lock(&state->state_lock); + if (!nfs4_stateid_match_other(stateid, &lsp->ls_stateid)) + goto out_noupdate; + if (!nfs4_stateid_is_newer(stateid, &lsp->ls_stateid)) + goto out_noupdate; + nfs4_stateid_copy(&lsp->ls_stateid, stateid); + ret = true; +out_noupdate: + spin_unlock(&state->state_lock); + return ret; +} static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode) { struct nfs_delegation *delegation; + fmode &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(inode)->delegation); + delegation = nfs4_get_valid_delegation(inode); if (delegation == NULL || (delegation->type & fmode) == fmode) { rcu_read_unlock(); return; @@ -1147,26 +1972,24 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) { struct nfs4_state *state = opendata->state; - struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; + enum open_claim_type4 claim = opendata->o_arg.claim; nfs4_stateid stateid; int ret = -EAGAIN; for (;;) { - if (can_open_cached(state, fmode, open_mode)) { - spin_lock(&state->owner->so_lock); - if (can_open_cached(state, fmode, open_mode)) { - update_open_stateflags(state, fmode); - spin_unlock(&state->owner->so_lock); - goto out_return_state; - } + spin_lock(&state->owner->so_lock); + if (can_open_cached(state, fmode, open_mode, claim)) { + update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); + goto out_return_state; } + spin_unlock(&state->owner->so_lock); rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); - if (!can_open_delegated(delegation, fmode)) { + delegation = nfs4_get_valid_delegation(state->inode); + if (!can_open_delegated(delegation, fmode, claim)) { rcu_read_unlock(); break; } @@ -1188,35 +2011,46 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) out: return ERR_PTR(ret); out_return_state: - atomic_inc(&state->count); + refcount_inc(&state->count); return state; } static void -nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state) -{ - struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client; - struct nfs_delegation *delegation; - int delegation_flags = 0; - - rcu_read_lock(); - delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation) - delegation_flags = delegation->flags; - rcu_read_unlock(); - if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) { +nfs4_process_delegation(struct inode *inode, const struct cred *cred, + enum open_claim_type4 claim, + const struct nfs4_open_delegation *delegation) +{ + switch (delegation->open_delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG: + case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG: + break; + default: + return; + } + switch (claim) { + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - clp->cl_hostname); - } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) - nfs_inode_set_delegation(state->inode, - data->owner->so_cred, - &data->o_res); - else - nfs_inode_reclaim_delegation(state->inode, - data->owner->so_cred, - &data->o_res); + NFS_SERVER(inode)->nfs_client->cl_hostname); + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + nfs_inode_reclaim_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); + break; + default: + nfs_inode_set_delegation(inode, cred, delegation->type, + &delegation->stateid, + delegation->pagemod_limit, + delegation->open_delegation_type); + } + if (delegation->do_recall) + nfs_async_inode_return_delegation(inode, &delegation->stateid); } /* @@ -1231,121 +2065,183 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) int ret; if (!data->rpc_done) { - ret = data->rpc_status; - goto err; + if (data->rpc_status) + return ERR_PTR(data->rpc_status); + return nfs4_try_open_cached(data); } - ret = -ESTALE; - if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || - !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || - !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) - goto err; - - ret = -ENOMEM; - state = nfs4_get_open_state(inode, data->owner); - if (state == NULL) - goto err; - ret = nfs_refresh_inode(inode, &data->f_attr); if (ret) - goto err; + return ERR_PTR(ret); - nfs_setsecurity(inode, &data->f_attr, data->f_label); + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + refcount_inc(&state->count); return state; -err: - return ERR_PTR(ret); +} +static struct inode * +nfs4_opendata_get_inode(struct nfs4_opendata *data) +{ + struct inode *inode; + + switch (data->o_arg.claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + if (!(data->f_attr.valid & NFS_ATTR_FATTR)) + return ERR_PTR(-EAGAIN); + inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, + &data->f_attr); + break; + default: + inode = d_inode(data->dentry); + ihold(inode); + nfs_refresh_inode(inode, &data->f_attr); + } + return inode; } static struct nfs4_state * -_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data) { + struct nfs4_state *state; struct inode *inode; - struct nfs4_state *state = NULL; - int ret; + + inode = nfs4_opendata_get_inode(data); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (data->state != NULL && data->state->inode == inode) { + state = data->state; + refcount_inc(&state->count); + } else + state = nfs4_get_open_state(inode, data->owner); + iput(inode); + if (state == NULL) + state = ERR_PTR(-ENOMEM); + return state; +} + +static struct nfs4_state * +_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) +{ + struct nfs4_state *state; if (!data->rpc_done) { state = nfs4_try_open_cached(data); + trace_nfs4_cached_open(data->state); goto out; } - ret = -EAGAIN; - if (!(data->f_attr.valid & NFS_ATTR_FATTR)) - goto err; - inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr, data->f_label); - ret = PTR_ERR(inode); - if (IS_ERR(inode)) - goto err; - ret = -ENOMEM; - state = nfs4_get_open_state(inode, data->owner); - if (state == NULL) - goto err_put_inode; - if (data->o_res.delegation_type != 0) - nfs4_opendata_check_deleg(data, state); - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); - iput(inode); + state = nfs4_opendata_find_nfs4_state(data); + if (IS_ERR(state)) + goto out; + + nfs4_process_delegation(state->inode, + data->owner->so_cred, + data->o_arg.claim, + &data->o_res.delegation); + + if (!(data->o_res.rflags & NFS4_OPEN_RESULT_NO_OPEN_STATEID)) { + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } + } else if (!update_open_stateid(state, NULL, NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } out: nfs_release_seqid(data->o_arg.seqid); return state; -err_put_inode: - iput(inode); -err: - return ERR_PTR(ret); } static struct nfs4_state * nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) { + struct nfs4_state *ret; + if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) - return _nfs4_opendata_reclaim_to_nfs4_state(data); - return _nfs4_opendata_to_nfs4_state(data); + ret =_nfs4_opendata_reclaim_to_nfs4_state(data); + else + ret = _nfs4_opendata_to_nfs4_state(data); + nfs4_sequence_free_slot(&data->o_res.seq_res); + return ret; } -static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) +static struct nfs_open_context * +nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_open_context *ctx; - spin_lock(&state->inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { if (ctx->state != state) continue; - get_nfs_open_context(ctx); - spin_unlock(&state->inode->i_lock); + if ((ctx->mode & mode) != mode) + continue; + if (!get_nfs_open_context(ctx)) + continue; + rcu_read_unlock(); return ctx; } - spin_unlock(&state->inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ENOENT); } +static struct nfs_open_context * +nfs4_state_find_open_context(struct nfs4_state *state) +{ + struct nfs_open_context *ctx; + + ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE); + if (!IS_ERR(ctx)) + return ctx; + ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE); + if (!IS_ERR(ctx)) + return ctx; + return nfs4_state_find_open_context_mode(state, FMODE_READ); +} + static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state, enum open_claim_type4 claim) { struct nfs4_opendata *opendata; opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, - NULL, NULL, claim, GFP_NOFS); + NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; - atomic_inc(&state->count); + refcount_inc(&state->count); return opendata; } -static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmode, struct nfs4_state **res) +static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, + fmode_t fmode) { struct nfs4_state *newstate; + struct nfs_server *server = NFS_SB(opendata->dentry->d_sb); + int openflags = opendata->o_arg.open_flags; int ret; - opendata->o_arg.open_flags = 0; + if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) + return 0; opendata->o_arg.fmode = fmode; + opendata->o_arg.share_access = + nfs4_map_atomic_open_share(server, fmode, openflags); memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -1355,44 +2251,27 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod newstate = nfs4_opendata_to_nfs4_state(opendata); if (IS_ERR(newstate)) return PTR_ERR(newstate); + if (newstate != opendata->state) + ret = -ESTALE; nfs4_close_state(newstate, fmode); - *res = newstate; - return 0; + return ret; } static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state) { - struct nfs4_state *newstate; int ret; /* memory barrier prior to reading state->n_* */ - clear_bit(NFS_DELEGATED_STATE, &state->flags); - clear_bit(NFS_OPEN_STATE, &state->flags); smp_rmb(); - if (state->n_rdwr != 0) { - clear_bit(NFS_O_RDWR_STATE, &state->flags); - ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } - if (state->n_wronly != 0) { - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } - if (state->n_rdonly != 0) { - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); - if (ret != 0) - return ret; - if (newstate != state) - return -ESTALE; - } + ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); + if (ret != 0) + return ret; + ret = nfs4_open_recover_helper(opendata, FMODE_WRITE); + if (ret != 0) + return ret; + ret = nfs4_open_recover_helper(opendata, FMODE_READ); + if (ret != 0) + return ret; /* * We may have performed cached opens for all three recoveries. * Check if we need to update the current stateid. @@ -1415,7 +2294,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state { struct nfs_delegation *delegation; struct nfs4_opendata *opendata; - fmode_t delegation_type = 0; + u32 delegation_type = NFS4_OPEN_DELEGATE_NONE; int status; opendata = nfs4_open_recoverdata_alloc(ctx, state, @@ -1424,8 +2303,20 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state return PTR_ERR(opendata); rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) - delegation_type = delegation->type; + if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0) { + switch(delegation->type) { + case FMODE_READ: + delegation_type = NFS4_OPEN_DELEGATE_READ; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG; + break; + case FMODE_WRITE: + case FMODE_READ|FMODE_WRITE: + delegation_type = NFS4_OPEN_DELEGATE_WRITE; + if (test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) + delegation_type = NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG; + } + } rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); @@ -1440,6 +2331,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state int err; do { err = _nfs4_do_open_reclaim(ctx, state); + trace_nfs4_open_reclaim(ctx, 0, err); if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) continue; if (err != -NFS4ERR_DELAY) @@ -1457,83 +2349,121 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta ctx = nfs4_state_find_open_context(state); if (IS_ERR(ctx)) return -EAGAIN; + clear_bit(NFS_DELEGATED_STATE, &state->flags); + nfs_state_clear_open_state_flags(state); ret = nfs4_do_open_reclaim(ctx, state); put_nfs_open_context(ctx); return ret; } -static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, int err) +static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs4_state *state, const nfs4_stateid *stateid, struct file_lock *fl, int err) { switch (err) { default: printk(KERN_ERR "NFS: %s: unhandled error " "%d.\n", __func__, err); + fallthrough; case 0: case -ENOENT: + case -EAGAIN: case -ESTALE: + case -ETIMEDOUT: break; case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_DEADSESSION: - set_bit(NFS_DELEGATED_STATE, &state->flags); - nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); return -EAGAIN; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: - set_bit(NFS_DELEGATED_STATE, &state->flags); - case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ nfs4_schedule_lease_recovery(server->nfs_client); return -EAGAIN; + case -NFS4ERR_MOVED: + nfs4_schedule_migration_recovery(server); + return -EAGAIN; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(server->nfs_client); + return -EAGAIN; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - return 0; + return -EAGAIN; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - set_bit(NFS_DELEGATED_STATE, &state->flags); ssleep(1); return -EAGAIN; case -ENOMEM: case -NFS4ERR_DENIED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + if (fl) { + struct nfs4_lock_state *lsp = fl->fl_u.nfs4_fl.owner; + if (lsp) + set_bit(NFS_LOCK_LOST, &lsp->ls_flags); + } return 0; } return err; } -int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +int nfs4_open_delegation_recall(struct nfs_open_context *ctx, + struct nfs4_state *state, const nfs4_stateid *stateid) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_opendata *opendata; - int err; + int err = 0; opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_DELEG_CUR_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); - err = nfs4_open_recover(opendata, state); + if (!test_bit(NFS_O_RDWR_STATE, &state->flags)) { + err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); + if (err) + goto out; + } + if (!test_bit(NFS_O_WRONLY_STATE, &state->flags)) { + err = nfs4_open_recover_helper(opendata, FMODE_WRITE); + if (err) + goto out; + } + if (!test_bit(NFS_O_RDONLY_STATE, &state->flags)) { + err = nfs4_open_recover_helper(opendata, FMODE_READ); + if (err) + goto out; + } + nfs_state_clear_delegation(state); +out: nfs4_opendata_put(opendata); - return nfs4_handle_delegation_recall_error(server, state, stateid, err); + return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err); +} + +static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_opendata *data = calldata; + + nfs4_setup_sequence(data->o_arg.server->nfs_client, + &data->c_arg.seq_args, &data->c_res.seq_res, task); } static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; + nfs40_sequence_done(task, &data->c_res.seq_res); + data->rpc_status = task->tk_status; if (data->rpc_status == 0) { nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); nfs_confirm_seqid(&data->owner->so_seqid, 0); renew_lease(data->o_res.server, data->timestamp); - data->rpc_done = 1; + data->rpc_done = true; } } @@ -1543,7 +2473,7 @@ static void nfs4_open_confirm_release(void *calldata) struct nfs4_state *state = NULL; /* If this request hasn't been cancelled, do nothing */ - if (data->cancelled == 0) + if (!data->cancelled) goto out_free; /* In case of error, no cleanup! */ if (!data->rpc_done) @@ -1556,6 +2486,7 @@ out_free: } static const struct rpc_call_ops nfs4_open_confirm_ops = { + .rpc_call_prepare = nfs4_open_confirm_prepare, .rpc_call_done = nfs4_open_confirm_done, .rpc_release = nfs4_open_confirm_release, }; @@ -1565,7 +2496,7 @@ static const struct rpc_call_ops nfs4_open_confirm_ops = { */ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) { - struct nfs_server *server = NFS_SERVER(data->dir->d_inode); + struct nfs_server *server = NFS_SERVER(d_inode(data->dir)); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], @@ -1579,20 +2510,22 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) .callback_ops = &nfs4_open_confirm_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int status; + nfs4_init_sequence(&data->c_arg.seq_args, &data->c_res.seq_res, 1, + data->is_recover); kref_get(&data->kref); - data->rpc_done = 0; + data->rpc_done = false; data->rpc_status = 0; data->timestamp = jiffies; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); if (status != 0) { - data->cancelled = 1; + data->cancelled = true; smp_wmb(); } else status = data->rpc_status; @@ -1605,6 +2538,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) struct nfs4_opendata *data = calldata; struct nfs4_state_owner *sp = data->owner; struct nfs_client *clp = sp->so_server->nfs_client; + enum open_claim_type4 claim = data->o_arg.claim; if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0) goto out_wait; @@ -1615,29 +2549,30 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (data->state != NULL) { struct nfs_delegation *delegation; - if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags)) + if (can_open_cached(data->state, data->o_arg.fmode, + data->o_arg.open_flags, claim)) goto out_no_action; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); - if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR && - data->o_arg.claim != NFS4_OPEN_CLAIM_DELEG_CUR_FH && - can_open_delegated(delegation, data->o_arg.fmode)) + delegation = nfs4_get_valid_delegation(data->state->inode); + if (can_open_delegated(delegation, data->o_arg.fmode, claim)) goto unlock_no_action; rcu_read_unlock(); } /* Update client id. */ data->o_arg.clientid = clp->cl_clientid; - switch (data->o_arg.claim) { + switch (claim) { + default: + break; case NFS4_OPEN_CLAIM_PREVIOUS: case NFS4_OPEN_CLAIM_DELEG_CUR_FH: case NFS4_OPEN_CLAIM_DELEG_PREV_FH: data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0]; + fallthrough; case NFS4_OPEN_CLAIM_FH: task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; - nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; - if (nfs4_setup_sequence(data->o_arg.server, + if (nfs4_setup_sequence(data->o_arg.server->nfs_client, &data->o_arg.seq_args, &data->o_res.seq_res, task) != 0) @@ -1646,14 +2581,19 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) /* Set the create mode (note dependency on the session type) */ data->o_arg.createmode = NFS4_CREATE_UNCHECKED; if (data->o_arg.open_flags & O_EXCL) { - data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; - if (nfs4_has_persistent_session(clp)) + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; + if (clp->cl_mvops->minor_version == 0) { + data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE; + /* don't put an ACCESS op in OPEN compound if O_EXCL, + * because ACCESS will return permission denied for + * all bits until close */ + data->o_res.access_request = data->o_arg.access = 0; + } else if (nfs4_has_persistent_session(clp)) data->o_arg.createmode = NFS4_CREATE_GUARDED; - else if (clp->cl_mvops->minor_version > 0) - data->o_arg.createmode = NFS4_CREATE_EXCLUSIVE4_1; } return; unlock_no_action: + trace_nfs4_cached_open(data->state); rcu_read_unlock(); out_no_action: task->tk_action = NULL; @@ -1667,7 +2607,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; - if (!nfs4_sequence_done(task, &data->o_res.seq_res)) + if (!nfs4_sequence_process(task, &data->o_res.seq_res)) return; if (task->tk_status == 0) { @@ -1689,7 +2629,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) nfs_confirm_seqid(&data->owner->so_seqid, 0); } - data->rpc_done = 1; + data->rpc_done = true; } static void nfs4_open_release(void *calldata) @@ -1697,11 +2637,13 @@ static void nfs4_open_release(void *calldata) struct nfs4_opendata *data = calldata; struct nfs4_state *state = NULL; - /* If this request hasn't been cancelled, do nothing */ - if (data->cancelled == 0) - goto out_free; /* In case of error, no cleanup! */ - if (data->rpc_status != 0 || !data->rpc_done) + if (data->rpc_status != 0 || !data->rpc_done) { + nfs_release_seqid(data->o_arg.seqid); + goto out_free; + } + /* If this request hasn't been cancelled, do nothing */ + if (!data->cancelled) goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) @@ -1719,9 +2661,10 @@ static const struct rpc_call_ops nfs4_open_ops = { .rpc_release = nfs4_open_release, }; -static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) +static int nfs4_run_open_task(struct nfs4_opendata *data, + struct nfs_open_context *ctx) { - struct inode *dir = data->dir->d_inode; + struct inode *dir = d_inode(data->dir); struct nfs_server *server = NFS_SERVER(dir); struct nfs_openargs *o_arg = &data->o_arg; struct nfs_openres *o_res = &data->o_res; @@ -1738,62 +2681,72 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) .callback_ops = &nfs4_open_ops, .callback_data = data, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int status; - nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); + if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + kref_get(&data->kref); - data->rpc_done = 0; + data->rpc_done = false; data->rpc_status = 0; - data->cancelled = 0; - data->is_recover = 0; - if (isrecover) { - nfs4_set_sequence_privileged(&o_arg->seq_args); - data->is_recover = 1; + data->cancelled = false; + data->is_recover = false; + if (!ctx) { + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); + data->is_recover = true; + task_setup_data.flags |= RPC_TASK_TIMEOUT; + } else { + nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); + pnfs_lgopen_prepare(data, ctx); } task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - status = nfs4_wait_for_completion_rpc_task(task); - if (status != 0) { - data->cancelled = 1; - smp_wmb(); - } else - status = data->rpc_status; - rpc_put_task(task); + if (IS_ERR(task)) + return PTR_ERR(task); + status = rpc_wait_for_completion_task(task); + if (status != 0) { + data->cancelled = true; + smp_wmb(); + } else + status = data->rpc_status; + rpc_put_task(task); return status; } static int _nfs4_recover_proc_open(struct nfs4_opendata *data) { - struct inode *dir = data->dir->d_inode; + struct inode *dir = d_inode(data->dir); struct nfs_openres *o_res = &data->o_res; - int status; + int status; - status = nfs4_run_open_task(data, 1); + status = nfs4_run_open_task(data, NULL); if (status != 0 || !data->rpc_done) return status; nfs_fattr_map_and_free_names(NFS_SERVER(dir), &data->f_attr); - if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) status = _nfs4_proc_open_confirm(data); - if (status != 0) - return status; - } return status; } -static int nfs4_opendata_access(struct rpc_cred *cred, +/* + * Additional permission checks in order to distinguish between an + * open for read, and an open for execute. This works around the + * fact that NFSv4 OPEN treats read and execute permissions as being + * the same. + * Note that in the non-execute case, we want to turn off permission + * checking if we just created a new file (POSIX open() semantics). + */ +static int nfs4_opendata_access(const struct cred *cred, struct nfs4_opendata *opendata, - struct nfs4_state *state, fmode_t fmode, - int openflags) + struct nfs4_state *state, fmode_t fmode) { struct nfs_access_entry cache; - u32 mask; + u32 mask, flags; /* access call failed or for some reason the server doesn't * support any access modes -- defer access call until later */ @@ -1801,41 +2754,38 @@ static int nfs4_opendata_access(struct rpc_cred *cred, return 0; mask = 0; - /* don't check MAY_WRITE - a newly created file may not have - * write mode bits, but POSIX allows the creating process to write. - * use openflags to check for exec, because fmode won't - * always have FMODE_EXEC set when file open for exec. */ - if (openflags & __FMODE_EXEC) { + if (fmode & FMODE_EXEC) { /* ONLY check for exec rights */ - mask = MAY_EXEC; - } else if (fmode & FMODE_READ) - mask = MAY_READ; + if (S_ISDIR(state->inode->i_mode)) + mask = NFS4_ACCESS_LOOKUP; + else + mask = NFS4_ACCESS_EXECUTE; + } else if ((fmode & FMODE_READ) && !opendata->file_created) + mask = NFS4_ACCESS_READ; - cache.cred = cred; - cache.jiffies = jiffies; nfs_access_set_mask(&cache, opendata->o_res.access_result); - nfs_access_add_cache(state->inode, &cache); + nfs_access_add_cache(state->inode, &cache, cred); - if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) + flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP; + if ((mask & ~cache.mask & flags) == 0) return 0; - /* even though OPEN succeeded, access is denied. Close the file */ - nfs4_close_state(state, fmode); return -EACCES; } /* * Note: On error, nfs4_proc_open will free the struct nfs4_opendata */ -static int _nfs4_proc_open(struct nfs4_opendata *data) +static int _nfs4_proc_open(struct nfs4_opendata *data, + struct nfs_open_context *ctx) { - struct inode *dir = data->dir->d_inode; + struct inode *dir = d_inode(data->dir); struct nfs_server *server = NFS_SERVER(dir); struct nfs_openargs *o_arg = &data->o_arg; struct nfs_openres *o_res = &data->o_res; int status; - status = nfs4_run_open_task(data, 0); + status = nfs4_run_open_task(data, ctx); if (!data->rpc_done) return status; if (status != 0) { @@ -1847,8 +2797,17 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) nfs_fattr_map_and_free_names(server, &data->f_attr); - if (o_arg->open_flags & O_CREAT) - update_changeattr(dir, &o_res->cinfo); + if (o_arg->open_flags & O_CREAT) { + if (o_arg->open_flags & O_EXCL) + data->file_created = true; + else if (o_res->cinfo.before != o_res->cinfo.after) + data->file_created = true; + if (data->file_created || + inode_peek_iversion_raw(dir) != o_res->cinfo.after) + nfs4_update_changeattr(dir, &o_res->cinfo, + o_res->f_attr->time_start, + NFS_INO_INVALID_DATA); + } if ((o_res->rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) == 0) server->caps &= ~NFS_CAP_POSIX_LOCK; if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { @@ -1856,14 +2815,15 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) if (status != 0) return status; } - if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); - return 0; -} + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) { + struct nfs_fh *fh = &o_res->fh; -static int nfs4_recover_expired_lease(struct nfs_server *server) -{ - return nfs4_client_recover_expired_lease(server->nfs_client); + nfs4_sequence_free_slot(&o_res->seq_res); + if (o_arg->claim == NFS4_OPEN_CLAIM_FH) + fh = NFS_FH(d_inode(data->dentry)); + nfs4_proc_getattr(server, fh, o_res->f_attr, NULL); + } + return 0; } /* @@ -1876,10 +2836,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_FH); + opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); + /* + * We're not recovering a delegation, so ask for no delegation. + * Otherwise the recovery thread could deadlock with an outstanding + * delegation return. + */ + opendata->o_arg.open_flags = O_DIRECT; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) d_drop(ctx->dentry); @@ -1895,6 +2860,7 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state do { err = _nfs4_open_expired(ctx, state); + trace_nfs4_open_expired(ctx, 0, err); if (nfs4_clear_cap_atomic_open_v1(server, err, &exception)) continue; switch (err) { @@ -1923,45 +2889,170 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + nfs_remove_bad_delegation(state->inode, stateid); + nfs_state_clear_delegation(state); +} + +static void nfs40_clear_delegation_stateid(struct nfs4_state *state) +{ + if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) + nfs_finish_clear_delegation_stateid(state, NULL); +} + +static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + /* NFSv4.0 doesn't allow for delegation recovery on open expire */ + nfs40_clear_delegation_stateid(state); + nfs_state_clear_open_state_flags(state); + return nfs4_open_expired(sp, state); +} + +static int nfs40_test_and_free_expired_stateid(struct nfs_server *server, + nfs4_stateid *stateid, const struct cred *cred) +{ + return -NFS4ERR_BAD_STATEID; +} + #if defined(CONFIG_NFS_V4_1) -static void nfs41_clear_delegation_stateid(struct nfs4_state *state) +static int nfs41_test_and_free_expired_stateid(struct nfs_server *server, + nfs4_stateid *stateid, const struct cred *cred) +{ + int status; + + switch (stateid->type) { + default: + break; + case NFS4_INVALID_STATEID_TYPE: + case NFS4_SPECIAL_STATEID_TYPE: + case NFS4_FREED_STATEID_TYPE: + return -NFS4ERR_BAD_STATEID; + case NFS4_REVOKED_STATEID_TYPE: + goto out_free; + } + + status = nfs41_test_stateid(server, stateid, cred); + switch (status) { + case -NFS4ERR_EXPIRED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + break; + default: + return status; + } +out_free: + /* Ack the revoked state to the server */ + nfs41_free_stateid(server, stateid, cred, true); + return -NFS4ERR_EXPIRED; +} + +static int nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - nfs4_stateid *stateid = &state->stateid; + nfs4_stateid stateid; struct nfs_delegation *delegation; - struct rpc_cred *cred = NULL; - int status = -NFS4ERR_BAD_STATEID; - - /* If a state reset has been done, test_stateid is unneeded */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - return; + const struct cred *cred = NULL; + int status, ret = NFS_OK; /* Get the delegation credential for use by test/free_stateid */ rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && - nfs4_stateid_match(&delegation->stateid, stateid)) { - cred = get_rpccred(delegation->cred); - rcu_read_unlock(); - status = nfs41_test_stateid(server, stateid, cred); - } else + if (delegation == NULL) { rcu_read_unlock(); + nfs_state_clear_delegation(state); + return NFS_OK; + } - if (status != NFS_OK) { - /* Free the stateid unless the server explicitly - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - nfs_remove_bad_delegation(state->inode); + spin_lock(&delegation->lock); + nfs4_stateid_copy(&stateid, &delegation->stateid); - write_seqlock(&state->seqlock); - nfs4_stateid_copy(&state->stateid, &state->open_stateid); - write_sequnlock(&state->seqlock); - clear_bit(NFS_DELEGATED_STATE, &state->flags); + if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, + &delegation->flags)) { + spin_unlock(&delegation->lock); + rcu_read_unlock(); + return NFS_OK; } - if (cred != NULL) - put_rpccred(cred); + if (delegation->cred) + cred = get_cred(delegation->cred); + spin_unlock(&delegation->lock); + rcu_read_unlock(); + status = nfs41_test_and_free_expired_stateid(server, &stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) + nfs_finish_clear_delegation_stateid(state, &stateid); + else + ret = status; + + put_cred(cred); + return ret; +} + +static void nfs41_delegation_recover_stateid(struct nfs4_state *state) +{ + nfs4_stateid tmp; + + if (test_bit(NFS_DELEGATED_STATE, &state->flags) && + nfs4_copy_delegation_stateid(state->inode, state->state, + &tmp, NULL) && + nfs4_stateid_match_other(&state->stateid, &tmp)) + nfs_state_set_delegation(state, &tmp, state->state); + else + nfs_state_clear_delegation(state); +} + +/** + * nfs41_check_expired_locks - possibly free a lock stateid + * + * @state: NFSv4 state for an inode + * + * Returns NFS_OK if recovery for this stateid is now finished. + * Otherwise a negative NFS4ERR value is returned. + */ +static int nfs41_check_expired_locks(struct nfs4_state *state) +{ + int status, ret = NFS_OK; + struct nfs4_lock_state *lsp, *prev = NULL; + struct nfs_server *server = NFS_SERVER(state->inode); + + if (!test_bit(LK_STATE_IN_USE, &state->flags)) + goto out; + + spin_lock(&state->state_lock); + list_for_each_entry(lsp, &state->lock_states, ls_locks) { + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { + const struct cred *cred = lsp->ls_state->owner->so_cred; + + refcount_inc(&lsp->ls_count); + spin_unlock(&state->state_lock); + + nfs4_put_lock_state(prev); + prev = lsp; + + status = nfs41_test_and_free_expired_stateid(server, + &lsp->ls_stateid, + cred); + trace_nfs4_test_lock_stateid(state, lsp, status); + if (status == -NFS4ERR_EXPIRED || + status == -NFS4ERR_BAD_STATEID) { + clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + lsp->ls_stateid.type = NFS4_INVALID_STATEID_TYPE; + if (!recover_lost_locks) + set_bit(NFS_LOCK_LOST, &lsp->ls_flags); + } else if (status != NFS_OK) { + ret = status; + nfs4_put_lock_state(prev); + goto out; + } + spin_lock(&state->state_lock); + } + } + spin_unlock(&state->state_lock); + nfs4_put_lock_state(prev); +out: + return ret; } /** @@ -1976,35 +3067,35 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); nfs4_stateid *stateid = &state->open_stateid; - struct rpc_cred *cred = state->owner->so_cred; + const struct cred *cred = state->owner->so_cred; int status; - /* If a state reset has been done, test_stateid is unneeded */ - if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) && - (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) && - (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0)) + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) return -NFS4ERR_BAD_STATEID; - - status = nfs41_test_stateid(server, stateid, cred); - if (status != NFS_OK) { - /* Free the stateid unless the server explicitly - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_bit(NFS_O_RDWR_STATE, &state->flags); - clear_bit(NFS_OPEN_STATE, &state->flags); + status = nfs41_test_and_free_expired_stateid(server, stateid, cred); + trace_nfs4_test_open_stateid(state, NULL, status); + if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { + nfs_state_clear_open_state_flags(state); + stateid->type = NFS4_INVALID_STATEID_TYPE; + return status; } - return status; + if (nfs_open_stateid_recover_openmode(state)) + return -NFS4ERR_OPENMODE; + return NFS_OK; } static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { int status; - nfs41_clear_delegation_stateid(state); + status = nfs41_check_delegation_stateid(state); + if (status != NFS_OK) + return status; + nfs41_delegation_recover_stateid(state); + + status = nfs41_check_expired_locks(state); + if (status != NFS_OK) + return status; status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -2017,68 +3108,115 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st * fields corresponding to attributes that were used to store the verifier. * Make sure we clobber those fields in the later setattr call */ -static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) +static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, + struct iattr *sattr, struct nfs4_label **label) { - if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && - !(sattr->ia_valid & ATTR_ATIME_SET)) - sattr->ia_valid |= ATTR_ATIME; + const __u32 *bitmask = opendata->o_arg.server->exclcreat_bitmask; + __u32 attrset[3]; + unsigned ret; + unsigned i; + + for (i = 0; i < ARRAY_SIZE(attrset); i++) { + attrset[i] = opendata->o_res.attrset[i]; + if (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE4_1) + attrset[i] &= ~bitmask[i]; + } - if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && - !(sattr->ia_valid & ATTR_MTIME_SET)) - sattr->ia_valid |= ATTR_MTIME; + ret = (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE) ? + sattr->ia_valid : 0; + + if ((attrset[1] & (FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET))) { + if (sattr->ia_valid & ATTR_ATIME_SET) + ret |= ATTR_ATIME_SET; + else + ret |= ATTR_ATIME; + } + + if ((attrset[1] & (FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET))) { + if (sattr->ia_valid & ATTR_MTIME_SET) + ret |= ATTR_MTIME_SET; + else + ret |= ATTR_MTIME; + } + + if (!(attrset[2] & FATTR4_WORD2_SECURITY_LABEL)) + *label = NULL; + return ret; } static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, - fmode_t fmode, - int flags, struct nfs_open_context *ctx) { struct nfs4_state_owner *sp = opendata->owner; struct nfs_server *server = sp->so_server; struct dentry *dentry; struct nfs4_state *state; - unsigned int seq; + fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); + struct inode *dir = d_inode(opendata->dir); + unsigned long dir_verifier; int ret; - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); + dir_verifier = nfs_save_change_attribute(dir); - ret = _nfs4_proc_open(opendata); + ret = _nfs4_proc_open(opendata, ctx); if (ret != 0) goto out; - state = nfs4_opendata_to_nfs4_state(opendata); + state = _nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); if (IS_ERR(state)) goto out; + ctx->state = state; if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK) + set_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags); + if (opendata->o_res.rflags & NFS4_OPEN_RESULT_PRESERVE_UNLINKED) + set_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(state->inode)->flags); dentry = opendata->dentry; - if (dentry->d_inode == NULL) { - /* FIXME: Is this d_drop() ever needed? */ + if (d_really_is_negative(dentry)) { + struct dentry *alias; d_drop(dentry); - dentry = d_add_unique(dentry, igrab(state->inode)); - if (dentry == NULL) { - dentry = opendata->dentry; - } else if (dentry != ctx->dentry) { + alias = d_splice_alias(igrab(state->inode), dentry); + /* d_splice_alias() can't fail here - it's a non-directory */ + if (alias) { dput(ctx->dentry); - ctx->dentry = dget(dentry); + ctx->dentry = dentry = alias; } - nfs_set_verifier(dentry, - nfs_save_change_attribute(opendata->dir->d_inode)); } - ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); + switch(opendata->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + if (!opendata->rpc_done) + break; + if (opendata->o_res.delegation.type != 0) + dir_verifier = nfs_save_change_attribute(dir); + nfs_set_verifier(dentry, dir_verifier); + } + + /* Parse layoutget results before we check for access */ + pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); + + ret = nfs4_opendata_access(sp->so_cred, opendata, state, acc_mode); if (ret != 0) goto out; - ctx->state = state; - if (dentry->d_inode == state->inode) { + if (d_inode(dentry) == state->inode) nfs_inode_attach_open_context(ctx); - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) - nfs4_schedule_stateid_recovery(server, state); - } + out: + if (!opendata->cancelled) { + if (opendata->lgp) { + nfs4_lgopen_release(opendata->lgp); + opendata->lgp = NULL; + } + nfs4_sequence_free_slot(&opendata->o_res.seq_res); + } return ret; } @@ -2088,19 +3226,20 @@ out: static int _nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, - struct iattr *sattr, - struct nfs4_label *label) + const struct nfs4_open_createattrs *c, + int *opened) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *opendata; struct dentry *dentry = ctx->dentry; - struct rpc_cred *cred = ctx->cred; + const struct cred *cred = ctx->cred; struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; - fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); + fmode_t fmode = _nfs4_ctx_to_openmode(ctx); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; - struct nfs4_label *olabel = NULL; + struct iattr *sattr = c->sattr; + struct nfs4_label *label = c->label; int status; /* Protect against reboot recovery conflicts */ @@ -2110,71 +3249,70 @@ static int _nfs4_do_open(struct inode *dir, dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out_err; } - status = nfs4_recover_expired_lease(server); + status = nfs4_client_recover_expired_lease(server->nfs_client); if (status != 0) goto err_put_state_owner; - if (dentry->d_inode != NULL) - nfs4_return_incompatible_delegation(dentry->d_inode, fmode); + if (d_really_is_positive(dentry)) + nfs4_return_incompatible_delegation(d_inode(dentry), fmode); status = -ENOMEM; - if (dentry->d_inode) + if (d_really_is_positive(dentry)) claim = NFS4_OPEN_CLAIM_FH; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - label, claim, GFP_KERNEL); + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, + c, claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; - if (label) { - olabel = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(olabel)) { - status = PTR_ERR(olabel); - goto err_opendata_put; + if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { + if (!opendata->f_attr.mdsthreshold) { + opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); + if (!opendata->f_attr.mdsthreshold) + goto err_opendata_put; } - } - - if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { - opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); - if (!opendata->f_attr.mdsthreshold) - goto err_free_label; opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } - if (dentry->d_inode != NULL) - opendata->state = nfs4_get_open_state(dentry->d_inode, sp); + if (d_really_is_positive(dentry)) + opendata->state = nfs4_get_open_state(d_inode(dentry), sp); - status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); + status = _nfs4_open_and_get_state(opendata, ctx); if (status != 0) - goto err_free_label; + goto err_opendata_put; state = ctx->state; - if ((opendata->o_arg.open_flags & O_EXCL) && + if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) && (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) { - nfs4_exclusive_attrset(opendata, sattr); - - nfs_fattr_init(opendata->o_res.f_attr); - status = nfs4_do_setattr(state->inode, cred, - opendata->o_res.f_attr, sattr, - state, label, olabel); - if (status == 0) { - nfs_setattr_update_inode(state->inode, sattr); - nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); - nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); + unsigned attrs = nfs4_exclusive_attrset(opendata, sattr, &label); + /* + * send create attributes which was not set by open + * with an extra setattr. + */ + if (attrs || label) { + unsigned ia_old = sattr->ia_valid; + + sattr->ia_valid = attrs; + nfs_fattr_init(opendata->o_res.f_attr); + status = nfs4_do_setattr(state->inode, cred, + opendata->o_res.f_attr, sattr, + ctx, label); + if (status == 0) { + nfs_setattr_update_inode(state->inode, sattr, + opendata->o_res.f_attr); + nfs_setsecurity(state->inode, opendata->o_res.f_attr); + } + sattr->ia_valid = ia_old; } } + if (opened && opendata->file_created) + *opened = 1; - if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) + if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { *ctx_th = opendata->f_attr.mdsthreshold; - else - kfree(opendata->f_attr.mdsthreshold); - opendata->f_attr.mdsthreshold = NULL; - - nfs4_label_free(olabel); + opendata->f_attr.mdsthreshold = NULL; + } nfs4_opendata_put(opendata); nfs4_put_state_owner(sp); return 0; -err_free_label: - nfs4_label_free(olabel); err_opendata_put: - kfree(opendata->f_attr.mdsthreshold); nfs4_opendata_put(opendata); err_put_state_owner: nfs4_put_state_owner(sp); @@ -2187,16 +3325,28 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, struct iattr *sattr, - struct nfs4_label *label) + struct nfs4_label *label, + int *opened) { struct nfs_server *server = NFS_SERVER(dir); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; struct nfs4_state *res; + struct nfs4_open_createattrs c = { + .label = label, + .sattr = sattr, + .verf = { + [0] = (__u32)jiffies, + [1] = (__u32)current->pid, + }, + }; int status; do { - status = _nfs4_do_open(dir, ctx, flags, sattr, label); + status = _nfs4_do_open(dir, ctx, flags, &c, opened); res = ctx->state; + trace_nfs4_open_file(ctx, flags, status); if (status == 0) break; /* NOTE: BAD_SEQID means the server and client disagree about the @@ -2227,6 +3377,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; @@ -2240,76 +3395,106 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, return res; } -static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, - struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, - struct nfs4_label *olabel) +static int _nfs4_do_setattr(struct inode *inode, + struct nfs_setattrargs *arg, + struct nfs_setattrres *res, + const struct cred *cred, + struct nfs_open_context *ctx) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_setattrargs arg = { - .fh = NFS_FH(inode), - .iap = sattr, - .server = server, - .bitmask = server->attr_bitmask, - .label = ilabel, - }; - struct nfs_setattrres res = { - .fattr = fattr, - .label = olabel, - .server = server, - }; - struct rpc_message msg = { + struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &arg, - .rpc_resp = &res, + .rpc_argp = arg, + .rpc_resp = res, .rpc_cred = cred, - }; + }; + const struct cred *delegation_cred = NULL; unsigned long timestamp = jiffies; - fmode_t fmode; bool truncate; int status; - arg.bitmask = nfs4_bitmask(server, ilabel); - if (ilabel) - arg.bitmask = nfs4_bitmask(server, olabel); - - nfs_fattr_init(fattr); + nfs_fattr_init(res->fattr); /* Servers should only apply open mode checks for file size changes */ - truncate = (sattr->ia_valid & ATTR_SIZE) ? true : false; - fmode = truncate ? FMODE_WRITE : FMODE_READ; + truncate = (arg->iap->ia_valid & ATTR_SIZE) ? true : false; + if (!truncate) { + nfs4_inode_make_writeable(inode); + goto zero_stateid; + } - if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { + if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { /* Use that stateid */ - } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { - struct nfs_lockowner lockowner = { - .l_owner = current->files, - .l_pid = current->tgid, - }; - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - &lockowner); - } else - nfs4_stateid_copy(&arg.stateid, &zero_stateid); + } else if (ctx != NULL && ctx->state) { + struct nfs_lock_context *l_ctx; + if (!nfs4_valid_open_stateid(ctx->state)) + return -EBADF; + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) + return PTR_ERR(l_ctx); + status = nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, l_ctx, + &arg->stateid, &delegation_cred); + nfs_put_lock_context(l_ctx); + if (status == -EIO) + return -EBADF; + else if (status == -EAGAIN) + goto zero_stateid; + } else { +zero_stateid: + nfs4_stateid_copy(&arg->stateid, &zero_stateid); + } + if (delegation_cred) + msg.rpc_cred = delegation_cred; - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); - if (status == 0 && state != NULL) + status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1); + + put_cred(delegation_cred); + if (status == 0 && ctx != NULL) renew_lease(server, timestamp); + trace_nfs4_setattr(inode, &arg->stateid, status); return status; } -static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, +static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, - struct nfs4_label *olabel) + struct nfs_open_context *ctx, struct nfs4_label *ilabel) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS4_BITMASK_SZ]; + struct nfs4_state *state = ctx ? ctx->state : NULL; + struct nfs_setattrargs arg = { + .fh = NFS_FH(inode), + .iap = sattr, + .server = server, + .bitmask = bitmask, + .label = ilabel, + }; + struct nfs_setattrres res = { + .fattr = fattr, + .server = server, + }; struct nfs4_exception exception = { .state = state, .inode = inode, + .stateid = &arg.stateid, }; + unsigned long adjust_flags = NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME; int err; + + if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID)) + adjust_flags |= NFS_INO_INVALID_MODE; + if (sattr->ia_valid & (ATTR_UID | ATTR_GID)) + adjust_flags |= NFS_INO_INVALID_OTHER; + if (sattr->ia_valid & ATTR_ATIME) + adjust_flags |= NFS_INO_INVALID_ATIME; + if (sattr->ia_valid & ATTR_MTIME) + adjust_flags |= NFS_INO_INVALID_MTIME; + do { - err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), + inode, adjust_flags); + + err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); switch (err) { case -NFS4ERR_OPENMODE: if (!(sattr->ia_valid & ATTR_SIZE)) { @@ -2332,15 +3517,126 @@ out: return err; } +static bool +nfs4_wait_on_layoutreturn(struct inode *inode, struct rpc_task *task) +{ + if (inode == NULL || !nfs_have_layout(inode)) + return false; + + return pnfs_wait_on_layoutreturn(inode, task); +} + +/* + * Update the seqid of an open stateid + */ +static void nfs4_sync_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state) +{ + __be32 seqid_open; + u32 dst_seqid; + int seq; + + for (;;) { + if (!nfs4_valid_open_stateid(state)) + break; + seq = read_seqbegin(&state->seqlock); + if (!nfs4_state_match_open_stateid_other(state, dst)) { + nfs4_stateid_copy(dst, &state->open_stateid); + if (read_seqretry(&state->seqlock, seq)) + continue; + break; + } + seqid_open = state->open_stateid.seqid; + if (read_seqretry(&state->seqlock, seq)) + continue; + + dst_seqid = be32_to_cpu(dst->seqid); + if ((s32)(dst_seqid - be32_to_cpu(seqid_open)) < 0) + dst->seqid = seqid_open; + break; + } +} + +/* + * Update the seqid of an open stateid after receiving + * NFS4ERR_OLD_STATEID + */ +static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst, + struct nfs4_state *state) +{ + __be32 seqid_open; + u32 dst_seqid; + bool ret; + int seq, status = -EAGAIN; + DEFINE_WAIT(wait); + + for (;;) { + ret = false; + if (!nfs4_valid_open_stateid(state)) + break; + seq = read_seqbegin(&state->seqlock); + if (!nfs4_state_match_open_stateid_other(state, dst)) { + if (read_seqretry(&state->seqlock, seq)) + continue; + break; + } + + write_seqlock(&state->seqlock); + seqid_open = state->open_stateid.seqid; + + dst_seqid = be32_to_cpu(dst->seqid); + + /* Did another OPEN bump the state's seqid? try again: */ + if ((s32)(be32_to_cpu(seqid_open) - dst_seqid) > 0) { + dst->seqid = seqid_open; + write_sequnlock(&state->seqlock); + ret = true; + break; + } + + /* server says we're behind but we haven't seen the update yet */ + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + write_sequnlock(&state->seqlock); + trace_nfs4_close_stateid_update_wait(state->inode, dst, 0); + + if (fatal_signal_pending(current) || nfs_current_task_exiting()) + status = -EINTR; + else + if (schedule_timeout(5*HZ) != 0) + status = 0; + + finish_wait(&state->waitq, &wait); + + if (!status) + continue; + if (status == -EINTR) + break; + + /* we slept the whole 5 seconds, we must have lost a seqid */ + dst->seqid = cpu_to_be32(dst_seqid + 1); + ret = true; + break; + } + + return ret; +} + struct nfs4_closedata { struct inode *inode; struct nfs4_state *state; struct nfs_closeargs arg; struct nfs_closeres res; + struct { + struct nfs4_layoutreturn_args arg; + struct nfs4_layoutreturn_res res; + struct nfs4_xdr_opaque_data ld_private; + u32 roc_barrier; + bool roc; + } lr; struct nfs_fattr fattr; unsigned long timestamp; - bool roc; - u32 roc_barrier; + unsigned short retrans; }; static void nfs4_free_closedata(void *data) @@ -2349,8 +3645,9 @@ static void nfs4_free_closedata(void *data) struct nfs4_state_owner *sp = calldata->state->owner; struct super_block *sb = calldata->state->inode->i_sb; - if (calldata->roc) - pnfs_roc_release(calldata->state->inode); + if (calldata->lr.roc) + pnfs_roc_release(&calldata->lr.arg, &calldata->lr.res, + calldata->res.lr_ret); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); @@ -2358,61 +3655,80 @@ static void nfs4_free_closedata(void *data) kfree(calldata); } -static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, - fmode_t fmode) -{ - spin_lock(&state->owner->so_lock); - clear_bit(NFS_O_RDWR_STATE, &state->flags); - switch (fmode & (FMODE_READ|FMODE_WRITE)) { - case FMODE_WRITE: - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - break; - case FMODE_READ: - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - break; - case 0: - clear_bit(NFS_O_RDONLY_STATE, &state->flags); - clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_bit(NFS_OPEN_STATE, &state->flags); - } - spin_unlock(&state->owner->so_lock); -} - static void nfs4_close_done(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + nfs4_stateid *res_stateid = NULL; + struct nfs4_exception exception = { + .state = state, + .inode = calldata->inode, + .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, + }; - dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; - /* hmm. we are done with the inode, and in the process of freeing + trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); + + /* Handle Layoutreturn errors */ + if (pnfs_roc_done(task, &calldata->arg.lr_args, &calldata->res.lr_res, + &calldata->res.lr_ret) == -EAGAIN) + goto out_restart; + + /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ switch (task->tk_status) { case 0: - if (calldata->roc) - pnfs_roc_set_barrier(state->inode, - calldata->roc_barrier); - nfs_set_open_stateid(state, &calldata->res.stateid, 0); + res_stateid = &calldata->res.stateid; renew_lease(server, calldata->timestamp); - nfs4_close_clear_stateid_flags(state, - calldata->arg.fmode); break; - case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_ACCESS: + if (calldata->arg.bitmask != NULL) { + calldata->arg.bitmask = NULL; + calldata->res.fattr = NULL; + goto out_restart; + + } + break; case -NFS4ERR_OLD_STATEID: - case -NFS4ERR_BAD_STATEID: + /* Did we race with OPEN? */ + if (nfs4_refresh_open_old_stateid(&calldata->arg.stateid, + state)) + goto out_restart; + goto out_release; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(server, + &calldata->arg.stateid, + task->tk_msg.rpc_cred); + fallthrough; + case -NFS4ERR_BAD_STATEID: if (calldata->arg.fmode == 0) break; + fallthrough; default: - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) - rpc_restart_call_prepare(task); - } + task->tk_status = nfs4_async_handle_exception(task, + server, task->tk_status, &exception); + calldata->retrans = exception.retrans; + if (exception.retry) + goto out_restart; + } + nfs_clear_open_stateid(state, &calldata->arg.stateid, + res_stateid, calldata->arg.fmode); +out_release: + task->tk_status = 0; nfs_release_seqid(calldata->arg.seqid); - nfs_refresh_inode(calldata->inode, calldata->res.fattr); - dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); + nfs_refresh_inode(calldata->inode, &calldata->fattr); + dprintk("%s: ret = %d\n", __func__, task->tk_status); + return; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; } static void nfs4_close_prepare(struct rpc_task *task, void *data) @@ -2420,28 +3736,36 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layout_hdr *lo; + bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) goto out_wait; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.fmode = FMODE_READ|FMODE_WRITE; spin_lock(&state->owner->so_lock); + is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); + is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); + is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); /* Calculate the change in open mode */ + calldata->arg.fmode = 0; if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) { - call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); - calldata->arg.fmode &= ~FMODE_READ; - } - if (state->n_wronly == 0) { - call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); - calldata->arg.fmode &= ~FMODE_WRITE; - } - } + if (state->n_rdonly == 0) + call_close |= is_rdonly; + else if (is_rdonly) + calldata->arg.fmode |= FMODE_READ; + if (state->n_wronly == 0) + call_close |= is_wronly; + else if (is_wronly) + calldata->arg.fmode |= FMODE_WRITE; + if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE)) + call_close |= is_rdwr; + } else if (is_rdwr) + calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; + + nfs4_sync_open_stateid(&calldata->arg.stateid, state); if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -2451,23 +3775,44 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_no_action; } - if (calldata->arg.fmode == 0) { + if (!calldata->lr.roc && nfs4_wait_on_layoutreturn(inode, task)) { + nfs_release_seqid(calldata->arg.seqid); + goto out_wait; + } + + lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL; + if (lo && !pnfs_layout_is_valid(lo)) { + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + } + + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; - if (calldata->roc && - pnfs_roc_drain(inode, &calldata->roc_barrier, task)) { - nfs_release_seqid(calldata->arg.seqid); - goto out_wait; - } + + if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { + /* Close-to-open cache consistency revalidation */ + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { + nfs4_bitmask_set(calldata->arg.bitmask_store, + server->cache_consistency_bitmask, + inode, 0); + calldata->arg.bitmask = calldata->arg.bitmask_store; + } else + calldata->arg.bitmask = NULL; } - nfs_fattr_init(calldata->res.fattr); + calldata->arg.share_access = + nfs4_fmode_to_share_access(calldata->arg.fmode); + + if (calldata->res.fattr == NULL) + calldata->arg.bitmask = NULL; + else if (calldata->arg.bitmask == NULL) + calldata->res.fattr = NULL; calldata->timestamp = jiffies; - if (nfs4_setup_sequence(NFS_SERVER(inode), + if (nfs4_setup_sequence(NFS_SERVER(inode)->nfs_client, &calldata->arg.seq_args, &calldata->res.seq_res, task) != 0) nfs_release_seqid(calldata->arg.seqid); - dprintk("%s: done!\n", __func__); return; out_no_action: task->tk_action = NULL; @@ -2495,6 +3840,7 @@ static const struct rpc_call_ops nfs4_close_ops = { int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) { struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); struct nfs4_closedata *calldata; struct nfs4_state_owner *sp = state->owner; struct rpc_task *task; @@ -2507,28 +3853,43 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) .rpc_message = &msg, .callback_ops = &nfs4_close_ops, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int status = -ENOMEM; + if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, + &task_setup_data.rpc_client, &msg); + calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1, 0); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); - calldata->arg.stateid = &state->open_stateid; + if (!nfs4_copy_open_stateid(&calldata->arg.stateid, state)) + goto out_free_calldata; /* Serialization for the sequence id */ - calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask); - if (calldata->arg.seqid == NULL) + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; + calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); + if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; + nfs_fattr_init(&calldata->fattr); calldata->arg.fmode = 0; - calldata->arg.bitmask = server->cache_consistency_bitmask; + calldata->lr.arg.ld_private = &calldata->lr.ld_private; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = pnfs_roc(state->inode); + calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + calldata->lr.roc = pnfs_roc(state->inode, + &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred); + if (calldata->lr.roc) { + calldata->arg.lr_args = &calldata->lr.arg; + calldata->res.lr_res = &calldata->lr.res; + } nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -2551,15 +3912,16 @@ out: } static struct inode * -nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) +nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, + int open_flags, struct iattr *attr, int *opened) { struct nfs4_state *state; - struct nfs4_label l = {0, 0, 0, NULL}, *label = NULL; + struct nfs4_label l, *label; label = nfs4_label_init_security(dir, ctx->dentry, attr, &l); /* Protect against concurrent sillydeletes */ - state = nfs4_do_open(dir, ctx, open_flags, attr, label); + state = nfs4_do_open(dir, ctx, open_flags, attr, label, opened); nfs4_label_release_security(label); @@ -2570,18 +3932,42 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) { + struct dentry *dentry = ctx->dentry; if (ctx->state == NULL) return; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + nfs4_inode_set_return_delegation_on_close(d_inode(dentry)); if (is_sync) - nfs4_close_sync(ctx->state, ctx->mode); + nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else - nfs4_close_state(ctx->state, ctx->mode); + nfs4_close_state(ctx->state, _nfs4_ctx_to_openmode(ctx)); +} + +#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) +#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_OPEN_ARGUMENTS - 1UL) + +#define FATTR4_WORD2_NFS42_TIME_DELEG_MASK \ + (FATTR4_WORD2_TIME_DELEG_MODIFY|FATTR4_WORD2_TIME_DELEG_ACCESS) +static bool nfs4_server_delegtime_capable(struct nfs4_server_caps_res *res) +{ + u32 share_access_want = res->open_caps.oa_share_access_want[0]; + u32 attr_bitmask = res->attr_bitmask[2]; + + return (share_access_want & NFS4_SHARE_WANT_DELEG_TIMESTAMPS) && + ((attr_bitmask & FATTR4_WORD2_NFS42_TIME_DELEG_MASK) == + FATTR4_WORD2_NFS42_TIME_DELEG_MASK); } static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { + u32 minorversion = server->nfs_client->cl_minorversion; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_SUPPORTED_ATTRS, + }; struct nfs4_server_caps_arg args = { .fhandle = fhandle, + .bitmask = bitmask, }; struct nfs4_server_caps_res res = {}; struct rpc_message msg = { @@ -2590,51 +3976,115 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f .rpc_resp = &res, }; int status; + int i; + + bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS | + FATTR4_WORD0_FH_EXPIRE_TYPE | + FATTR4_WORD0_LINK_SUPPORT | + FATTR4_WORD0_SYMLINK_SUPPORT | + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING; + if (minorversion) + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; + if (minorversion > 1) + bitmask[2] |= FATTR4_WORD2_OPEN_ARGUMENTS; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + bitmask[0] = (FATTR4_WORD0_SUPPORTED_ATTRS | + FATTR4_WORD0_FH_EXPIRE_TYPE | + FATTR4_WORD0_LINK_SUPPORT | + FATTR4_WORD0_SYMLINK_SUPPORT | + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING) & + res.attr_bitmask[0]; + /* Sanity check the server answers */ + switch (minorversion) { + case 0: + res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK; + res.attr_bitmask[2] = 0; + break; + case 1: + res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT & + res.attr_bitmask[2]; + break; + case 2: + res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + bitmask[2] = (FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS) & + res.attr_bitmask[2]; + } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); - server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| - NFS_CAP_SYMLINKS|NFS_CAP_FILEID| - NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| - NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| - NFS_CAP_CTIME|NFS_CAP_MTIME); - if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) + server->caps &= + ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | + NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS | + NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME); + server->fattr_valid = NFS_ATTR_FATTR_V4; + if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && + res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; - if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) - server->caps |= NFS_CAP_FILEID; - if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) - server->caps |= NFS_CAP_MODE; - if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) - server->caps |= NFS_CAP_NLINK; - if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) - server->caps |= NFS_CAP_OWNER; - if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) - server->caps |= NFS_CAP_OWNER_GROUP; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) - server->caps |= NFS_CAP_ATIME; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) - server->caps |= NFS_CAP_CTIME; - if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) - server->caps |= NFS_CAP_MTIME; + if (res.case_insensitive) + server->caps |= NFS_CAP_CASE_INSENSITIVE; + if (res.case_preserving) + server->caps |= NFS_CAP_CASE_PRESERVING; #ifdef CONFIG_NFS_V4_SECURITY_LABEL if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) server->caps |= NFS_CAP_SECURITY_LABEL; #endif + if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) + server->caps |= NFS_CAP_FS_LOCATIONS; + if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) + server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MODE; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)) + server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER)) + server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER | + NFS_ATTR_FATTR_OWNER_NAME); + if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)) + server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP | + NFS_ATTR_FATTR_GROUP_NAME); + if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)) + server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)) + server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)) + server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)) + server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME; + if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_CREATE)) + server->fattr_valid &= ~NFS_ATTR_FATTR_BTIME; memcpy(server->attr_bitmask_nl, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + if (res.open_caps.oa_share_access_want[0] & + NFS4_SHARE_WANT_OPEN_XOR_DELEGATION) + server->caps |= NFS_CAP_OPEN_XOR; + if (nfs4_server_delegtime_capable(&res)) + server->caps |= NFS_CAP_DELEGTIME; - if (server->caps & NFS_CAP_SECURITY_LABEL) { - server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - } memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + server->cache_consistency_bitmask[2] = 0; + + /* Avoid a regression due to buggy server */ + for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++) + res.exclcreat_bitmask[i] &= res.attr_bitmask[i]; + memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask, + sizeof(server->exclcreat_bitmask)); + server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -2644,8 +4094,11 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; + do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle), @@ -2654,16 +4107,157 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) return err; } +static void test_fs_location_for_trunking(struct nfs4_fs_location *location, + struct nfs_client *clp, + struct nfs_server *server) +{ + int i; + + for (i = 0; i < location->nservers; i++) { + struct nfs4_string *srv_loc = &location->servers[i]; + struct sockaddr_storage addr; + size_t addrlen; + struct xprt_create xprt_args = { + .ident = 0, + .net = clp->cl_net, + }; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; + char *servername = NULL; + + if (!srv_loc->len) + continue; + + addrlen = nfs_parse_server_name(srv_loc->data, srv_loc->len, + &addr, sizeof(addr), + clp->cl_net, server->port); + if (!addrlen) + return; + xprt_args.dstaddr = (struct sockaddr *)&addr; + xprt_args.addrlen = addrlen; + servername = kmalloc(srv_loc->len + 1, GFP_KERNEL); + if (!servername) + return; + memcpy(servername, srv_loc->data, srv_loc->len); + servername[srv_loc->len] = '\0'; + xprt_args.servername = servername; + + xprtdata.cred = nfs4_get_clid_cred(clp); + rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, + rpc_clnt_setup_test_and_add_xprt, + &rpcdata); + if (xprtdata.cred) + put_cred(xprtdata.cred); + kfree(servername); + } +} + +static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, + struct nfs4_pathname *path2) +{ + int i; + + if (path1->ncomponents != path2->ncomponents) + return false; + for (i = 0; i < path1->ncomponents; i++) { + if (path1->components[i].len != path2->components[i].len) + return false; + if (memcmp(path1->components[i].data, path2->components[i].data, + path1->components[i].len)) + return false; + } + return true; +} + +static int _nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) +{ + struct nfs4_fs_locations *locations = NULL; + struct page *page; + const struct cred *cred; + struct nfs_client *clp = server->nfs_client; + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + int status = -ENOMEM, i; + + cred = ops->get_state_renewal_cred(clp); + if (cred == NULL) { + cred = nfs4_get_clid_cred(clp); + if (cred == NULL) + return -ENOKEY; + } + + page = alloc_page(GFP_KERNEL); + if (!page) + goto out_put_cred; + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (!locations) + goto out_free; + locations->fattr = nfs_alloc_fattr(); + if (!locations->fattr) + goto out_free_2; + + status = nfs4_proc_get_locations(server, fhandle, locations, page, + cred); + if (status) + goto out_free_3; + + for (i = 0; i < locations->nlocations; i++) { + if (!_is_same_nfs4_pathname(&locations->fs_path, + &locations->locations[i].rootpath)) + continue; + test_fs_location_for_trunking(&locations->locations[i], clp, + server); + } +out_free_3: + kfree(locations->fattr); +out_free_2: + kfree(locations); +out_free: + __free_page(page); +out_put_cred: + put_cred(cred); + return status; +} + +static int nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { + .interruptible = true, + }; + struct nfs_client *clp = server->nfs_client; + int err = 0; + + if (!nfs4_has_session(clp)) + goto out; + do { + err = nfs4_handle_exception(server, + _nfs4_discover_trunking(server, fhandle), + &exception); + } while (exception.retry); +out: + return err; +} + static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { - u32 bitmask[3]; + u32 bitmask[3] = { + [0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | + FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID, + }; struct nfs4_lookup_root_arg args = { .bitmask = bitmask, }; struct nfs4_lookup_res res = { .server = server, - .fattr = info->fattr, + .fattr = fattr, .fh = fhandle, }; struct rpc_message msg = { @@ -2672,24 +4266,20 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = &res, }; - bitmask[0] = nfs4_fattr_bitmap[0]; - bitmask[1] = nfs4_fattr_bitmap[1]; - /* - * Process the label in the upcoming getfattr - */ - bitmask[2] = nfs4_fattr_bitmap[2] & ~FATTR4_WORD2_SECURITY_LABEL; - - nfs_fattr_init(info->fattr); + nfs_fattr_init(fattr); return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); } static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = _nfs4_lookup_root(server, fhandle, info); + err = _nfs4_lookup_root(server, fhandle, fattr); + trace_nfs4_lookup_root(server, fhandle, fattr, err); switch (err) { case 0: case -NFS4ERR_WRONGSEC: @@ -2702,20 +4292,19 @@ out: return err; } -static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, rpc_authflavor_t flavor) +static int nfs4_lookup_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + rpc_authflavor_t flavor) { + struct rpc_auth_create_args auth_args = { + .pseudoflavor = flavor, + }; struct rpc_auth *auth; - int ret; - auth = rpcauth_create(flavor, server->client); - if (IS_ERR(auth)) { - ret = -EACCES; - goto out; - } - ret = nfs4_lookup_root(server, fhandle, info); -out: - return ret; + auth = rpcauth_create(&auth_args, server->client); + if (IS_ERR(auth)) + return -EACCES; + return nfs4_lookup_root(server, fhandle, fattr); } /* @@ -2728,7 +4317,7 @@ out: * negative errno value. */ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr) { /* Per 3530bis 15.33.5 */ static const rpc_authflavor_t flav_array[] = { @@ -2741,15 +4330,29 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, int status = -EPERM; size_t i; - for (i = 0; i < ARRAY_SIZE(flav_array); i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); - if (status == -NFS4ERR_WRONGSEC || status == -EACCES) - continue; - break; + if (server->auth_info.flavor_len > 0) { + /* try each flavor specified by user */ + for (i = 0; i < server->auth_info.flavor_len; i++) { + status = nfs4_lookup_root_sec( + server, fhandle, fattr, + server->auth_info.flavors[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; + } + } else { + /* no flavors specified by user, try default list */ + for (i = 0; i < ARRAY_SIZE(flav_array); i++) { + status = nfs4_lookup_root_sec(server, fhandle, fattr, + flav_array[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; + } } /* - * -EACCESS could mean that the user doesn't have correct permissions + * -EACCES could mean that the user doesn't have correct permissions * to access the mount. It could also mean that we tried to mount * with a gss auth flavor, but rpc.gssd isn't running. Either way, * existing mount programs don't handle -EACCES very well so it should @@ -2760,35 +4363,26 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs4_do_find_root_sec(struct nfs_server *server, - struct nfs_fh *fhandle, struct nfs_fsinfo *info) -{ - int mv = server->nfs_client->cl_minorversion; - return nfs_v4_minor_ops[mv]->find_root_sec(server, fhandle, info); -} - /** * nfs4_proc_get_rootfh - get file handle for server's pseudoroot * @server: initialized nfs_server handle * @fhandle: we fill in the pseudo-fs root file handle - * @info: we fill in an FSINFO struct + * @fattr: we fill in a bare bones struct fattr + * @auth_probe: probe the auth flavours * * Returns zero on success, or a negative errno. */ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) + struct nfs_fattr *fattr, bool auth_probe) { - int status; + int status = 0; - status = nfs4_lookup_root(server, fhandle, info); - if ((status == -NFS4ERR_WRONGSEC) && - !(server->flags & NFS_MOUNT_SECFLAVOUR)) - status = nfs4_do_find_root_sec(server, fhandle, info); + if (!auth_probe) + status = nfs4_lookup_root(server, fhandle, fattr); - if (status == 0) - status = nfs4_server_capabilities(server, fhandle); - if (status == 0) - status = nfs4_do_fsinfo(server, fhandle, info); + if (auth_probe || status == NFS4ERR_WRONGSEC) + status = server->nfs_client->cl_mvops->find_root_sec( + server, fhandle, fattr); return nfs4_map_errors(status); } @@ -2798,7 +4392,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, { int error; struct nfs_fattr *fattr = info->fattr; - struct nfs4_label *label = NULL; error = nfs4_server_capabilities(server, mntfh); if (error < 0) { @@ -2806,23 +4399,17 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh, return error; } - label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(label)) - return PTR_ERR(label); - - error = nfs4_proc_getattr(server, mntfh, fattr, label); + error = nfs4_proc_getattr(server, mntfh, fattr, NULL); if (error < 0) { dprintk("nfs4_get_root: getattr error = %d\n", -error); - goto err_free_label; + goto out; } if (fattr->valid & NFS_ATTR_FATTR_FSID && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); -err_free_label: - nfs4_label_free(label); - +out: return error; } @@ -2846,21 +4433,25 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, if (locations == NULL) goto out; + locations->fattr = fattr; + status = nfs4_proc_fs_locations(client, dir, name, locations, page); if (status != 0) goto out; - /* Make sure server returned a different fsid for the referral */ - if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { + + /* + * If the fsid didn't change, this is a migration event, not a + * referral. Cause us to drop into the exception handler, which + * will kick off migration recovery. + */ + if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &fattr->fsid)) { dprintk("%s: server did not return a different fsid for" " a referral at %s\n", __func__, name->name); - status = -EIO; + status = -NFS4ERR_MOVED; goto out; } /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ - nfs_fixup_referral_attributes(&locations->fattr); - - /* replace the lookup nfs_fattr with the locations nfs_fattr */ - memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr)); + nfs_fixup_referral_attributes(fattr); memset(fhandle, 0, sizeof(struct nfs_fh)); out: if (page) @@ -2870,15 +4461,15 @@ out: } static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct nfs_fattr *fattr, struct inode *inode) { + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_getattr_arg args = { .fh = fhandle, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, }; struct nfs4_getattr_res res = { .fattr = fattr, - .label = label, .server = server, }; struct rpc_message msg = { @@ -2886,21 +4477,33 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + if (nfs4_has_session(server->nfs_client)) + task_flags = RPC_TASK_MOVEABLE; - args.bitmask = nfs4_bitmask(server, label); + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, 0); nfs_fattr_init(fattr); - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + return nfs4_do_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, task_flags); } -static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) +int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct inode *inode) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_proc_getattr(server, fhandle, fattr, label), + err = _nfs4_proc_getattr(server, fhandle, fattr, inode); + trace_nfs4_getattr(server, fhandle, fattr, err); + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; @@ -2927,52 +4530,49 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct inode *inode = dentry->d_inode; - struct rpc_cred *cred = NULL; - struct nfs4_state *state = NULL; - struct nfs4_label *label = NULL; + struct inode *inode = d_inode(dentry); + const struct cred *cred = NULL; + struct nfs_open_context *ctx = NULL; int status; - if (pnfs_ld_layoutret_on_setattr(inode)) + if (pnfs_ld_layoutret_on_setattr(inode) && + sattr->ia_valid & ATTR_SIZE && + sattr->ia_size < i_size_read(inode)) pnfs_commit_and_return_layout(inode); nfs_fattr_init(fattr); /* Deal with open(O_TRUNC) */ if (sattr->ia_valid & ATTR_OPEN) - sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME); /* Optimization: if the end result is no change, don't RPC */ - if ((sattr->ia_valid & ~(ATTR_FILE)) == 0) + if ((sattr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; /* Search for an existing open(O_WRITE) file */ if (sattr->ia_valid & ATTR_FILE) { - struct nfs_open_context *ctx; ctx = nfs_file_open_context(sattr->ia_file); - if (ctx) { + if (ctx) cred = ctx->cred; - state = ctx->state; - } } - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(label)) - return PTR_ERR(label); + /* Return any delegations if we're going to change ACLs */ + if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) + nfs4_inode_make_writeable(inode); - status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL); if (status == 0) { - nfs_setattr_update_inode(inode, sattr); - nfs_setsecurity(inode, fattr, label); + nfs_setattr_update_inode(inode, sattr, fattr); + nfs_setsecurity(inode, fattr); } - nfs4_label_free(label); return status; } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct nfs_server *server = NFS_SERVER(dir); int status; @@ -2984,7 +4584,6 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_res res = { .server = server, .fattr = fattr, - .label = label, .fh = fhandle, }; struct rpc_message msg = { @@ -2992,13 +4591,23 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; - args.bitmask = nfs4_bitmask(server, label); + if (nfs_server_capable(dir, NFS_CAP_MOVEABLE)) + task_flags = RPC_TASK_MOVEABLE; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); - dprintk("NFS call lookup %s\n", name->name); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); + dprintk("NFS call lookup %pd2\n", dentry); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + status = nfs4_do_call_sync(clnt, server, &msg, + &args.seq_args, &res.seq_res, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -3012,27 +4621,31 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - struct qstr *name, struct nfs_fh *fhandle, - struct nfs_fattr *fattr, struct nfs4_label *label) + struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; struct rpc_clnt *client = *clnt; int err; do { - err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); + err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr); + trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: err = -ENOENT; goto out; case -NFS4ERR_MOVED: err = nfs4_get_referral(client, dir, name, fattr, fhandle); + if (err == -NFS4ERR_MOVED) + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); goto out; case -NFS4ERR_WRONGSEC: err = -EPERM; if (client != *clnt) goto out; - - client = nfs4_create_sec_client(client, dir, name); + client = nfs4_negotiate_security(client, dir, name); if (IS_ERR(client)) return PTR_ERR(client); @@ -3052,14 +4665,13 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); + status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -3068,26 +4680,81 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, } struct rpc_clnt * -nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, +nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_clnt *client = NFS_CLIENT(dir); int status; - struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); - if (status < 0) { - rpc_shutdown_client(client); + status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name, + fhandle, fattr); + if (status < 0) return ERR_PTR(status); - } - return client; + return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; } -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int _nfs4_proc_lookupp(struct inode *inode, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct nfs_server *server = NFS_SERVER(inode); + int status; + struct nfs4_lookupp_arg args = { + .bitmask = server->attr_bitmask, + .fh = NFS_FH(inode), + }; + struct nfs4_lookupp_res res = { + .server = server, + .fattr = fattr, + .fh = fhandle, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUPP], + .rpc_argp = &args, + .rpc_resp = &res, + }; + unsigned short task_flags = 0; + + if (server->flags & NFS_MOUNT_SOFTREVAL) + task_flags |= RPC_TASK_TIMEOUT; + if (server->caps & NFS_CAP_MOVEABLE) + task_flags |= RPC_TASK_MOVEABLE; + + args.bitmask = nfs4_bitmask(server, fattr->label); + + nfs_fattr_init(fattr); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + + dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino); + status = nfs4_do_call_sync(clnt, server, &msg, &args.seq_args, + &res.seq_res, task_flags); + dprintk("NFS reply lookupp: %d\n", status); + return status; +} + +static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) +{ + struct nfs4_exception exception = { + .interruptible = true, + }; + int err; + do { + err = _nfs4_proc_lookupp(inode, fhandle, fattr); + trace_nfs4_lookupp(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->cache_consistency_bitmask, + .access = entry->mask, }; struct nfs4_accessres res = { .server = server, @@ -3096,48 +4763,37 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = entry->cred, + .rpc_cred = cred, }; - int mode = entry->mask; int status = 0; - /* - * Determine which access bits we want to ask for... - */ - if (mode & MAY_READ) - args.access |= NFS4_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_EXECUTE; + if (!nfs4_have_delegation(inode, FMODE_READ, 0)) { + res.fattr = nfs_alloc_fattr(); + if (res.fattr == NULL) + return -ENOMEM; + args.bitmask = server->cache_consistency_bitmask; } - - res.fattr = nfs_alloc_fattr(); - if (res.fattr == NULL) - return -ENOMEM; - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { nfs_access_set_mask(entry, res.access); - nfs_refresh_inode(inode, res.fattr); + if (res.fattr) + nfs_refresh_inode(inode, res.fattr); } nfs_free_fattr(res.fattr); return status; } -static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_proc_access(inode, entry), + err = _nfs4_proc_access(inode, entry, cred); + trace_nfs4_access(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -3187,11 +4843,14 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page, static int nfs4_proc_readlink(struct inode *inode, struct page *page, unsigned int pgbase, unsigned int pglen) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_proc_readlink(inode, page, pgbase, pglen), + err = _nfs4_proc_readlink(inode, page, pgbase, pglen); + trace_nfs4_readlink(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -3204,19 +4863,21 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { - struct nfs4_label l, *ilabel = NULL; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_label l, *ilabel; struct nfs_open_context *ctx; struct nfs4_state *state; int status = 0; - ctx = alloc_nfs_open_context(dentry, FMODE_READ); + ctx = alloc_nfs_open_context(dentry, FMODE_READ, NULL); if (IS_ERR(ctx)) return PTR_ERR(ctx); ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, ctx, flags, sattr, ilabel); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); + state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, NULL); if (IS_ERR(state)) { status = PTR_ERR(state); goto out; @@ -3227,7 +4888,8 @@ out: return status; } -static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) +static int +_nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype) { struct nfs_server *server = NFS_SERVER(dir); struct nfs_removeargs args = { @@ -3242,42 +4904,83 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) .rpc_argp = &args, .rpc_resp = &res, }; + unsigned long timestamp = jiffies; int status; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); - if (status == 0) - update_changeattr(dir, &res.cinfo); + if (status == 0) { + spin_lock(&dir->i_lock); + /* Removing a directory decrements nlink in the parent */ + if (ftype == NF4DIR && dir->i_nlink > 2) + nfs4_dec_nlink_locked(dir); + nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp, + NFS_INO_INVALID_DATA); + spin_unlock(&dir->i_lock); + } return status; } -static int nfs4_proc_remove(struct inode *dir, struct qstr *name) +static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; + struct inode *inode = d_inode(dentry); int err; + + if (inode) { + if (inode->i_nlink == 1) + nfs4_inode_return_delegation(inode); + else + nfs4_inode_make_writeable(inode); + } do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_remove(dir, name), + err = _nfs4_proc_remove(dir, &dentry->d_name, NF4REG); + trace_nfs4_remove(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; } -static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) +static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name) +{ + struct nfs4_exception exception = { + .interruptible = true, + }; + int err; + + do { + err = _nfs4_proc_remove(dir, name, NF4DIR); + trace_nfs4_remove(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, + &exception); + } while (exception.retry); + return err; +} + +static void nfs4_proc_unlink_setup(struct rpc_message *msg, + struct dentry *dentry, + struct inode *inode) { - struct nfs_server *server = NFS_SERVER(dir); struct nfs_removeargs *args = msg->rpc_argp; struct nfs_removeres *res = msg->rpc_resp; - res->server = server; + res->server = NFS_SB(dentry->d_sb); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; - nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); + nfs4_init_sequence(&args->seq_args, &res->seq_res, 1, 0); nfs_fattr_init(res->dir_attr); + + if (inode) { + nfs4_inode_return_delegation(inode); + nfs_d_prune_case_insensitive_aliases(inode); + } } static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - nfs4_setup_sequence(NFS_SERVER(data->dir), + nfs4_setup_sequence(NFS_SB(data->dentry->d_sb)->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -3285,30 +4988,42 @@ static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlin static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { - struct nfs_removeres *res = task->tk_msg.rpc_resp; + struct nfs_unlinkdata *data = task->tk_calldata; + struct nfs_removeres *res = &data->res; if (!nfs4_sequence_done(task, &res->seq_res)) return 0; - if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) + if (nfs4_async_handle_error(task, res->server, NULL, + &data->timeout) == -EAGAIN) return 0; - update_changeattr(dir, &res->cinfo); + if (task->tk_status == 0) + nfs4_update_changeattr(dir, &res->cinfo, + res->dir_attr->time_start, + NFS_INO_INVALID_DATA); return 1; } -static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) +static void nfs4_proc_rename_setup(struct rpc_message *msg, + struct dentry *old_dentry, + struct dentry *new_dentry) { - struct nfs_server *server = NFS_SERVER(dir); struct nfs_renameargs *arg = msg->rpc_argp; struct nfs_renameres *res = msg->rpc_resp; + struct inode *old_inode = d_inode(old_dentry); + struct inode *new_inode = d_inode(new_dentry); + if (old_inode) + nfs4_inode_make_writeable(old_inode); + if (new_inode) + nfs4_inode_return_delegation(new_inode); msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; - res->server = server; - nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); + res->server = NFS_SB(old_dentry->d_sb); + nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1, 0); } static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) { - nfs4_setup_sequence(NFS_SERVER(data->old_dir), + nfs4_setup_sequence(NFS_SERVER(data->old_dir)->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -3317,72 +5032,46 @@ static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renam static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) { - struct nfs_renameres *res = task->tk_msg.rpc_resp; + struct nfs_renamedata *data = task->tk_calldata; + struct nfs_renameres *res = &data->res; if (!nfs4_sequence_done(task, &res->seq_res)) return 0; - if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) + if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN) return 0; - update_changeattr(old_dir, &res->old_cinfo); - update_changeattr(new_dir, &res->new_cinfo); - return 1; -} - -static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs_server *server = NFS_SERVER(old_dir); - struct nfs_renameargs arg = { - .old_dir = NFS_FH(old_dir), - .new_dir = NFS_FH(new_dir), - .old_name = old_name, - .new_name = new_name, - }; - struct nfs_renameres res = { - .server = server, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], - .rpc_argp = &arg, - .rpc_resp = &res, - }; - int status = -ENOMEM; - - status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); - if (!status) { - update_changeattr(old_dir, &res.old_cinfo); - update_changeattr(new_dir, &res.new_cinfo); + if (task->tk_status == 0) { + nfs_d_prune_case_insensitive_aliases(d_inode(data->old_dentry)); + if (new_dir != old_dir) { + /* Note: If we moved a directory, nlink will change */ + nfs4_update_changeattr(old_dir, &res->old_cinfo, + res->old_fattr->time_start, + NFS_INO_INVALID_NLINK | + NFS_INO_INVALID_DATA); + nfs4_update_changeattr(new_dir, &res->new_cinfo, + res->new_fattr->time_start, + NFS_INO_INVALID_NLINK | + NFS_INO_INVALID_DATA); + } else + nfs4_update_changeattr(old_dir, &res->old_cinfo, + res->old_fattr->time_start, + NFS_INO_INVALID_DATA); } - return status; -} - -static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, - struct inode *new_dir, struct qstr *new_name) -{ - struct nfs4_exception exception = { }; - int err; - do { - err = nfs4_handle_exception(NFS_SERVER(old_dir), - _nfs4_proc_rename(old_dir, old_name, - new_dir, new_name), - &exception); - } while (exception.retry); - return err; + return 1; } -static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS4_BITMASK_SZ]; struct nfs4_link_arg arg = { .fh = NFS_FH(inode), .dir_fh = NFS_FH(dir), .name = name, - .bitmask = server->attr_bitmask, + .bitmask = bitmask, }; struct nfs4_link_res res = { .server = server, - .label = NULL, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], @@ -3391,36 +5080,34 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * }; int status = -ENOMEM; - res.fattr = nfs_alloc_fattr(); + res.fattr = nfs_alloc_fattr_with_label(server); if (res.fattr == NULL) goto out; - res.label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(res.label)) { - status = PTR_ERR(res.label); - goto out; - } - arg.bitmask = nfs4_bitmask(server, res.label); - + nfs4_inode_make_writeable(inode); + nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), + inode, + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (!status) { - update_changeattr(dir, &res.cinfo); + nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, + NFS_INO_INVALID_DATA); + nfs4_inc_nlink(inode); status = nfs_post_op_update_inode(inode, res.fattr); if (!status) - nfs_setsecurity(inode, res.fattr, res.label); + nfs_setsecurity(inode, res.fattr); } - - nfs4_label_free(res.label); - out: nfs_free_fattr(res.fattr); return status; } -static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) +static int nfs4_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { err = nfs4_handle_exception(NFS_SERVER(inode), @@ -3436,11 +5123,10 @@ struct nfs4_createdata { struct nfs4_create_res res; struct nfs_fh fh; struct nfs_fattr fattr; - struct nfs4_label *label; }; static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, - struct qstr *name, struct iattr *sattr, u32 ftype) + const struct qstr *name, struct iattr *sattr, u32 ftype) { struct nfs4_createdata *data; @@ -3448,8 +5134,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, if (data != NULL) { struct nfs_server *server = NFS_SERVER(dir); - data->label = nfs4_label_alloc(server, GFP_KERNEL); - if (IS_ERR(data->label)) + data->fattr.label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(data->fattr.label)) goto out_free; data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE]; @@ -3460,11 +5146,11 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->arg.name = name; data->arg.attrs = sattr; data->arg.ftype = ftype; - data->arg.bitmask = nfs4_bitmask(server, data->label); + data->arg.bitmask = nfs4_bitmask(server, data->fattr.label); + data->arg.umask = current_umask(); data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; - data->res.label = data->label; nfs_fattr_init(data->res.fattr); } return data; @@ -3478,22 +5164,52 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_ int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, &data->arg.seq_args, &data->res.seq_res, 1); if (status == 0) { - update_changeattr(dir, &data->res.dir_cinfo); - status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); + spin_lock(&dir->i_lock); + nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, + data->res.fattr->time_start, + NFS_INO_INVALID_DATA); + spin_unlock(&dir->i_lock); + status = nfs_instantiate(dentry, data->res.fh, data->res.fattr); } return status; } +static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry, + struct nfs4_createdata *data, int *statusp) +{ + struct dentry *ret; + + *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, + &data->arg.seq_args, &data->res.seq_res, 1); + + if (*statusp) + return NULL; + + spin_lock(&dir->i_lock); + /* Creating a directory bumps nlink in the parent */ + nfs4_inc_nlink_locked(dir); + nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo, + data->res.fattr->time_start, + NFS_INO_INVALID_DATA); + spin_unlock(&dir->i_lock); + ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr); + if (!IS_ERR(ret)) + return ret; + *statusp = PTR_ERR(ret); + return NULL; +} + static void nfs4_free_createdata(struct nfs4_createdata *data) { - nfs4_label_free(data->label); + nfs4_label_free(data->fattr.label); kfree(data); } static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr, + struct folio *folio, unsigned int len, struct iattr *sattr, struct nfs4_label *label) { + struct page *page = &folio->page; struct nfs4_createdata *data; int status = -ENAMETOOLONG; @@ -3518,18 +5234,20 @@ out: } static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, - struct page *page, unsigned int len, struct iattr *sattr) + struct folio *folio, unsigned int len, struct iattr *sattr) { - struct nfs4_exception exception = { }; - struct nfs4_label l, *label = NULL; + struct nfs4_exception exception = { + .interruptible = true, + }; + struct nfs4_label l, *label; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_symlink(dir, dentry, page, - len, sattr, label), + err = _nfs4_proc_symlink(dir, dentry, folio, len, sattr, label); + trace_nfs4_symlink(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); @@ -3537,74 +5255,88 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry, return err; } -static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, - struct iattr *sattr, struct nfs4_label *label) +static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, + struct nfs4_label *label, int *statusp) { struct nfs4_createdata *data; - int status = -ENOMEM; + struct dentry *ret = NULL; + *statusp = -ENOMEM; data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR); if (data == NULL) goto out; data->arg.label = label; - status = nfs4_do_create(dir, dentry, data); + ret = nfs4_do_mkdir(dir, dentry, data, statusp); nfs4_free_createdata(data); out: - return status; + return ret; } -static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, - struct iattr *sattr) +static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) { - struct nfs4_exception exception = { }; - struct nfs4_label l, *label = NULL; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_exception exception = { + .interruptible = true, + }; + struct nfs4_label l, *label; + struct dentry *alias; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mkdir(dir, dentry, sattr, label), - &exception); + alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err); + trace_nfs4_mkdir(dir, &dentry->d_name, err); + if (err) + alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir), + err, + &exception)); } while (exception.retry); nfs4_label_release_security(label); - return err; + return alias; } -static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page **pages, unsigned int count, int plus) +static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = dentry->d_inode; + struct inode *dir = d_inode(nr_arg->dentry); + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), - .pages = pages, + .pages = nr_arg->pages, .pgbase = 0, - .count = count, - .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, - .plus = plus, + .count = nr_arg->page_len, + .plus = nr_arg->plus, }; struct nfs4_readdir_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status; - dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __func__, - dentry->d_parent->d_name.name, - dentry->d_name.name, - (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); + dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__, + nr_arg->dentry, (unsigned long long)nr_arg->cookie); + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + args.bitmask = server->attr_bitmask_nl; + else + args.bitmask = server->attr_bitmask; + + nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args); res.pgbase = args.pgbase; - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } @@ -3614,16 +5346,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, return status; } -static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, - u64 cookie, struct page **pages, unsigned int count, int plus) +static int nfs4_proc_readdir(struct nfs_readdir_arg *arg, + struct nfs_readdir_res *res) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), - _nfs4_proc_readdir(dentry, cred, cookie, - pages, count, plus), - &exception); + err = _nfs4_proc_readdir(arg, res); + trace_nfs4_readdir(d_inode(arg->dentry), err); + err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)), + err, &exception); } while (exception.retry); return err; } @@ -3666,16 +5400,21 @@ out: static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { - struct nfs4_exception exception = { }; - struct nfs4_label l, *label = NULL; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_exception exception = { + .interruptible = true, + }; + struct nfs4_label l, *label; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mknod(dir, dentry, sattr, label, rdev), + err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev); + trace_nfs4_mknod(dir, &dentry->d_name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); @@ -3706,7 +5445,9 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { err = nfs4_handle_exception(server, @@ -3737,19 +5478,16 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - struct nfs4_exception exception = { }; - unsigned long now = jiffies; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { err = _nfs4_do_fsinfo(server, fhandle, fsinfo); + trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); if (err == 0) { - struct nfs_client *clp = server->nfs_client; - - spin_lock(&clp->cl_lock); - clp->cl_lease_time = fsinfo->lease_time * HZ; - clp->cl_last_renewal = now; - spin_unlock(&clp->cl_lock); + nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ); break; } err = nfs4_handle_exception(server, err, &exception); @@ -3766,7 +5504,7 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s if (error == 0) { /* block layout checks this! */ server->pnfs_blksize = fsinfo->blksize; - set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); + set_pnfs_layoutdriver(server, fhandle, fsinfo); } return error; @@ -3801,7 +5539,9 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *pathconf) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { @@ -3817,11 +5557,7 @@ int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner); + return nfs4_select_rw_stateid(ctx->state, fmode, l_ctx, stateid, NULL); } EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); @@ -3830,11 +5566,12 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode) { - nfs4_stateid current_stateid; + nfs4_stateid _current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; - return nfs4_stateid_match(stateid, ¤t_stateid); + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(&_current_stateid, ctx, l_ctx, fmode) == -EIO) + return true; + return nfs4_stateid_match(stateid, &_current_stateid); } static bool nfs4_error_stateid_expired(int err) @@ -3852,28 +5589,34 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_read_data *data) -{ - nfs_invalidate_atime(data->header->inode); -} - -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - rpc_restart_call_prepare(task); - return -EAGAIN; + trace_nfs4_read(hdr, task->tk_status); + if (task->tk_status < 0) { + struct nfs4_exception exception = { + .inode = hdr->inode, + .state = hdr->args.context->state, + .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, + }; + task->tk_status = nfs4_async_handle_exception(task, + server, task->tk_status, &exception); + hdr->retrans = exception.retrans; + if (exception.retry) { + rpc_restart_call_prepare(task); + return -EAGAIN; + } } - __nfs4_read_done_cb(data); if (task->tk_status > 0) - renew_lease(server, data->timestamp); + renew_lease(server, hdr->timestamp); return 0; } static bool nfs4_read_stateid_changed(struct rpc_task *task, - struct nfs_readargs *args) + struct nfs_pgio_args *args) { if (!nfs4_error_stateid_expired(task->tk_status) || @@ -3886,55 +5629,114 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static bool nfs4_read_plus_not_supported(struct rpc_task *task, + struct nfs_pgio_header *hdr) { + struct nfs_server *server = NFS_SERVER(hdr->inode); + struct rpc_message *msg = &task->tk_msg; - dprintk("--> %s\n", __func__); + if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && + task->tk_status == -ENOTSUPP) { + server->caps &= ~NFS_CAP_READ_PLUS; + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + rpc_restart_call_prepare(task); + return true; + } + return false; +} - if (!nfs4_sequence_done(task, &data->res.seq_res)) +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) +{ + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_read_stateid_changed(task, &data->args)) + if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->read_done_cb ? data->read_done_cb(task, data) : - nfs4_read_done_cb(task, data); + if (nfs4_read_plus_not_supported(task, hdr)) + return -EAGAIN; + if (task->tk_status > 0) + nfs_invalidate_atime(hdr->inode); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_read_done_cb(task, hdr); } -static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS +static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - data->timestamp = jiffies; - data->read_done_cb = nfs4_read_done_cb; - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + /* Note: We don't use READ_PLUS with pNFS yet */ + if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) { + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; + return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE); + } + return false; +} +#else +static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr, + struct rpc_message *msg) +{ + return false; } +#endif /* CONFIG_NFS_V4_2 */ -static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, + hdr->timestamp = jiffies; + if (!hdr->pgio_done_cb) + hdr->pgio_done_cb = nfs4_read_done_cb; + if (!nfs42_read_plus_support(hdr, msg)) + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); +} + +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (nfs4_setup_sequence(NFS_SERVER(hdr->inode)->nfs_client, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) - return; - nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_READ); + return 0; + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, + hdr->rw_mode) == -EIO) + return -EIO; + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) + return -EIO; + return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; - - if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - rpc_restart_call_prepare(task); - return -EAGAIN; + struct inode *inode = hdr->inode; + + trace_nfs4_write(hdr, task->tk_status); + if (task->tk_status < 0) { + struct nfs4_exception exception = { + .inode = hdr->inode, + .state = hdr->args.context->state, + .stateid = &hdr->args.stateid, + .retrans = hdr->retrans, + }; + task->tk_status = nfs4_async_handle_exception(task, + NFS_SERVER(inode), task->tk_status, + &exception); + hdr->retrans = exception.retrans; + if (exception.retry) { + rpc_restart_call_prepare(task); + return -EAGAIN; + } } if (task->tk_status >= 0) { - renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + renew_lease(NFS_SERVER(inode), hdr->timestamp); + nfs_writeback_update_inode(hdr); } return 0; } static bool nfs4_write_stateid_changed(struct rpc_task *task, - struct nfs_writeargs *args) + struct nfs_pgio_args *args) { if (!nfs4_error_stateid_expired(task->tk_status) || @@ -3947,63 +5749,92 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_write_stateid_changed(task, &data->args)) + if (nfs4_write_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->write_done_cb ? data->write_done_cb(task, data) : - nfs4_write_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_write_done_cb(task, hdr); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) +bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) { - const struct nfs_pgio_header *hdr = data->header; - /* Don't request attributes for pNFS or O_DIRECT writes */ - if (data->ds_clp != NULL || hdr->dreq != NULL) + if (hdr->ds_clp != NULL || hdr->dreq != NULL) return false; /* Otherwise, request attributes if and only if we don't hold * a delegation */ - return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; + return nfs4_have_delegation(hdr->inode, FMODE_READ, 0) == 0; } -static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], + struct inode *inode, unsigned long cache_validity) { - struct nfs_server *server = NFS_SERVER(data->header->inode); - - if (!nfs4_write_need_cache_consistency_data(data)) { - data->args.bitmask = NULL; - data->res.fattr = NULL; - } else - data->args.bitmask = server->cache_consistency_bitmask; + struct nfs_server *server = NFS_SERVER(inode); + unsigned int i; + + memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ); + cache_validity |= READ_ONCE(NFS_I(inode)->cache_validity); + + if (cache_validity & NFS_INO_INVALID_CHANGE) + bitmask[0] |= FATTR4_WORD0_CHANGE; + if (cache_validity & NFS_INO_INVALID_ATIME) + bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; + if (cache_validity & NFS_INO_INVALID_MODE) + bitmask[1] |= FATTR4_WORD1_MODE; + if (cache_validity & NFS_INO_INVALID_OTHER) + bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP; + if (cache_validity & NFS_INO_INVALID_NLINK) + bitmask[1] |= FATTR4_WORD1_NUMLINKS; + if (cache_validity & NFS_INO_INVALID_CTIME) + bitmask[1] |= FATTR4_WORD1_TIME_METADATA; + if (cache_validity & NFS_INO_INVALID_MTIME) + bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; + if (cache_validity & NFS_INO_INVALID_BLOCKS) + bitmask[1] |= FATTR4_WORD1_SPACE_USED; + if (cache_validity & NFS_INO_INVALID_BTIME) + bitmask[1] |= FATTR4_WORD1_TIME_CREATE; + + if (cache_validity & NFS_INO_INVALID_SIZE) + bitmask[0] |= FATTR4_WORD0_SIZE; + + for (i = 0; i < NFS4_BITMASK_SZ; i++) + bitmask[i] &= server->attr_bitmask[i]; +} + +static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg, + struct rpc_clnt **clnt) +{ + struct nfs_server *server = NFS_SERVER(hdr->inode); + + if (!nfs4_write_need_cache_consistency_data(hdr)) { + hdr->args.bitmask = NULL; + hdr->res.fattr = NULL; + } else { + nfs4_bitmask_set(hdr->args.bitmask_store, + server->cache_consistency_bitmask, + hdr->inode, NFS_INO_INVALID_BLOCKS); + hdr->args.bitmask = hdr->args.bitmask_store; + } - if (!data->write_done_cb) - data->write_done_cb = nfs4_write_done_cb; - data->res.server = server; - data->timestamp = jiffies; + if (!hdr->pgio_done_cb) + hdr->pgio_done_cb = nfs4_write_done_cb; + hdr->res.server = server; + hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); -} - -static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return; - nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_WRITE); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); + nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { - nfs4_setup_sequence(NFS_SERVER(data->inode), + nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -4013,7 +5844,9 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da { struct inode *inode = data->inode; - if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { + trace_nfs4_commit(data, task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), + NULL, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } @@ -4027,7 +5860,8 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data) return data->commit_done_cb(task, data); } -static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg) +static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg, + struct rpc_clnt **clnt) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -4035,7 +5869,43 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->commit_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); + nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client, + NFS_SP4_MACH_CRED_COMMIT, clnt, msg); +} + +static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args, + struct nfs_commitres *res) +{ + struct inode *dst_inode = file_inode(dst); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], + .rpc_argp = args, + .rpc_resp = res, + }; + + args->fh = NFS_FH(dst_inode); + return nfs4_call_sync(server->client, server, &msg, + &args->seq_args, &res->seq_res, 1); +} + +int nfs4_proc_commit(struct file *dst, __u64 offset, __u32 count, struct nfs_commitres *res) +{ + struct nfs_commitargs args = { + .offset = offset, + .count = count, + }; + struct nfs_server *dst_server = NFS_SERVER(file_inode(dst)); + struct nfs4_exception exception = { }; + int status; + + do { + status = _nfs4_proc_commit(dst, &args, res); + status = nfs4_handle_exception(dst_server, status, &exception); + } while (exception.retry); + + return status; } struct nfs4_renewdata { @@ -4052,7 +5922,7 @@ static void nfs4_renew_release(void *calldata) struct nfs4_renewdata *data = calldata; struct nfs_client *clp = data->client; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(data); @@ -4064,7 +5934,14 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) struct nfs_client *clp = data->client; unsigned long timestamp = data->timestamp; - if (task->tk_status < 0) { + trace_nfs4_renew_async(clp, task->tk_status); + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + break; + default: /* Unless we're shutting down, schedule state recovery! */ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) return; @@ -4082,7 +5959,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) +static int nfs4_proc_async_renew(struct nfs_client *clp, const struct cred *cred, unsigned renew_flags) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -4093,18 +5970,20 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, if (renew_flags == 0) return 0; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); - if (data == NULL) + if (data == NULL) { + nfs_put_client(clp); return -ENOMEM; + } data->client = clp; data->timestamp = jiffies; return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT, &nfs4_renew_ops, data); } -static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_renew(struct nfs_client *clp, const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -4121,11 +6000,17 @@ static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) return 0; } -static inline int nfs4_server_supports_acls(struct nfs_server *server) +static bool nfs4_server_supports_acls(const struct nfs_server *server, + enum nfs4_acl_type type) { - return (server->caps & NFS_CAP_ACLS) - && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) - && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); + switch (type) { + default: + return server->attr_bitmask[0] & FATTR4_WORD0_ACL; + case NFS4ACL_DACL: + return server->attr_bitmask[1] & FATTR4_WORD1_DACL; + case NFS4ACL_SACL: + return server->attr_bitmask[1] & FATTR4_WORD1_SACL; + } } /* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that @@ -4134,8 +6019,8 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) -static int buf_to_pages_noslab(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) +int nfs4_buf_to_pages_noslab(const void *buf, size_t buflen, + struct page **pages) { struct page *newpage, **spages; int rc = 0; @@ -4149,8 +6034,8 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen, if (newpage == NULL) goto unwind; memcpy(page_address(newpage), buf, len); - buf += len; - buflen -= len; + buf += len; + buflen -= len; *pages++ = newpage; rc++; } while (buflen != 0); @@ -4164,9 +6049,10 @@ unwind: } struct nfs4_cached_acl { + enum nfs4_acl_type type; int cached; size_t len; - char data[0]; + char data[]; }; static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) @@ -4184,7 +6070,8 @@ static void nfs4_zap_acl_attr(struct inode *inode) nfs4_set_cached_acl(inode, NULL); } -static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +static ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_cached_acl *acl; @@ -4194,6 +6081,8 @@ static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_ acl = nfsi->nfs4_acl; if (acl == NULL) goto out; + if (acl->type != type) + goto out; if (buf == NULL) /* user is just asking for length */ goto out_len; if (acl->cached == 0) @@ -4209,7 +6098,9 @@ out: return ret; } -static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) +static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, + size_t pgbase, size_t acl_len, + enum nfs4_acl_type type) { struct nfs4_cached_acl *acl; size_t buflen = sizeof(*acl) + acl_len; @@ -4226,6 +6117,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size goto out; acl->cached = 0; } + acl->type = type; acl->len = acl_len; out: nfs4_set_cached_acl(inode, acl); @@ -4241,15 +6133,17 @@ out: * length. The next getxattr call will then produce another round trip to * the server, this time with the input buf of the required size. */ -static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, + size_t buflen, enum nfs4_acl_type type) { - struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; + struct page **pages; struct nfs_getaclargs args = { .fh = NFS_FH(inode), - .acl_pages = pages, + .acl_type = type, .acl_len = buflen, }; struct nfs_getaclres res = { + .acl_type = type, .acl_len = buflen, }; struct rpc_message msg = { @@ -4257,15 +6151,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); + unsigned int npages; int ret = -ENOMEM, i; + struct nfs_server *server = NFS_SERVER(inode); - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - if (npages == 0) - npages = 1; - if (npages > ARRAY_SIZE(pages)) - return -ERANGE; + if (buflen == 0) + buflen = server->rsize; + + npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + args.acl_pages = pages; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -4274,12 +6172,11 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu } /* for decoding across pages */ - res.acl_scratch = alloc_page(GFP_KERNEL); + res.acl_scratch = folio_alloc(GFP_KERNEL, 0); if (!res.acl_scratch) goto out_free; args.acl_len = npages * PAGE_SIZE; - args.acl_pgbase = 0; dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -4296,7 +6193,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu ret = -ERANGE; goto out_free; } - nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len); + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len, + type); if (buf) { if (res.acl_len > buflen) { ret = -ERANGE; @@ -4307,20 +6205,24 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu out_ok: ret = res.acl_len; out_free: - for (i = 0; i < npages; i++) - if (pages[i]) - __free_page(pages[i]); + while (--i >= 0) + __free_page(pages[i]); if (res.acl_scratch) - __free_page(res.acl_scratch); + folio_put(res.acl_scratch); + kfree(pages); return ret; } -static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, + size_t buflen, enum nfs4_acl_type type) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; ssize_t ret; do { - ret = __nfs4_get_acl_uncached(inode, buf, buflen); + ret = __nfs4_get_acl_uncached(inode, buf, buflen, type); + trace_nfs4_get_acl(inode, ret); if (ret >= 0) break; ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception); @@ -4328,34 +6230,39 @@ static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bufl return ret; } -static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen, + enum nfs4_acl_type type) { struct nfs_server *server = NFS_SERVER(inode); int ret; - if (!nfs4_server_supports_acls(server)) + if (unlikely(NFS_FH(inode)->size == 0)) + return -ENODATA; + if (!nfs4_server_supports_acls(server, type)) return -EOPNOTSUPP; - ret = nfs_revalidate_inode(server, inode); + ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); if (ret < 0) return ret; if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - ret = nfs4_read_cached_acl(inode, buf, buflen); + ret = nfs4_read_cached_acl(inode, buf, buflen, type); if (ret != -ENOENT) /* -ENOENT is returned if there is no ACL or if there is an ACL * but no cached acl data, just the acl length */ return ret; - return nfs4_get_acl_uncached(inode, buf, buflen); + return nfs4_get_acl_uncached(inode, buf, buflen, type); } -static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; struct nfs_setaclargs arg = { - .fh = NFS_FH(inode), - .acl_pages = pages, - .acl_len = buflen, + .fh = NFS_FH(inode), + .acl_type = type, + .acl_len = buflen, + .acl_pages = pages, }; struct nfs_setaclres res; struct rpc_message msg = { @@ -4366,14 +6273,17 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; - if (!nfs4_server_supports_acls(server)) + /* You can't remove system.nfs4_acl: */ + if (buflen == 0) + return -EINVAL; + if (!nfs4_server_supports_acls(server, type)) return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + i = nfs4_buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; - nfs4_inode_return_delegation(inode); + nfs4_inode_make_writeable(inode); ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); /* @@ -4388,20 +6298,35 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl * so mark the attribute cache invalid. */ spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; + nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE | + NFS_INO_INVALID_CTIME | + NFS_INO_REVAL_FORCED); spin_unlock(&inode->i_lock); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); return ret; } -static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, + size_t buflen, enum nfs4_acl_type type) { struct nfs4_exception exception = { }; int err; + + if (unlikely(NFS_FH(inode)->size == 0)) + return -ENODATA; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - __nfs4_proc_set_acl(inode, buf, buflen), + err = __nfs4_proc_set_acl(inode, buf, buflen, type); + trace_nfs4_set_acl(inode, err); + if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) { + /* + * no need to retry since the kernel + * isn't involved in encoding the ACEs. + */ + err = -EINVAL; + break; + } + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -4412,50 +6337,52 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr; - struct nfs4_label label = {0, 0, buflen, buf}; + struct nfs4_label label = {0, 0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; - struct nfs4_getattr_arg args = { + struct nfs_fattr fattr = { + .label = &label, + }; + struct nfs4_getattr_arg arg = { .fh = NFS_FH(inode), .bitmask = bitmask, }; struct nfs4_getattr_res res = { .fattr = &fattr, - .label = &label, .server = server, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], - .rpc_argp = &args, + .rpc_argp = &arg, .rpc_resp = &res, }; int ret; nfs_fattr_init(&fattr); - ret = rpc_call_sync(server->client, &msg, 0); + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); if (ret) return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) return -ENOENT; - if (buflen < label.len) - return -ERANGE; - return 0; + return label.len; } static int nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) return -EOPNOTSUPP; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_get_security_label(inode, buf, buflen), + err = _nfs4_get_security_label(inode, buf, buflen); + trace_nfs4_get_security_label(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; @@ -4463,35 +6390,33 @@ static int nfs4_get_security_label(struct inode *inode, void *buf, static int _nfs4_do_set_security_label(struct inode *inode, struct nfs4_label *ilabel, - struct nfs_fattr *fattr, - struct nfs4_label *olabel) + struct nfs_fattr *fattr) { struct iattr sattr = {0}; struct nfs_server *server = NFS_SERVER(inode); const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; - struct nfs_setattrargs args = { - .fh = NFS_FH(inode), - .iap = &sattr, + struct nfs_setattrargs arg = { + .fh = NFS_FH(inode), + .iap = &sattr, .server = server, .bitmask = bitmask, .label = ilabel, }; struct nfs_setattrres res = { .fattr = fattr, - .label = olabel, .server = server, }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &args, - .rpc_resp = &res, + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], + .rpc_argp = &arg, + .rpc_resp = &res, }; int status; - nfs4_stateid_copy(&args.stateid, &zero_stateid); + nfs4_stateid_copy(&arg.stateid, &zero_stateid); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (status) dprintk("%s failed: %d\n", __func__, status); @@ -4500,129 +6425,44 @@ static int _nfs4_do_set_security_label(struct inode *inode, static int nfs4_do_set_security_label(struct inode *inode, struct nfs4_label *ilabel, - struct nfs_fattr *fattr, - struct nfs4_label *olabel) + struct nfs_fattr *fattr) { struct nfs4_exception exception = { }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(inode), - _nfs4_do_set_security_label(inode, ilabel, - fattr, olabel), + err = _nfs4_do_set_security_label(inode, ilabel, fattr); + trace_nfs4_set_security_label(inode, err); + err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); return err; } static int -nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen) +nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { - struct nfs4_label ilabel, *olabel = NULL; - struct nfs_fattr fattr; - struct rpc_cred *cred; - struct inode *inode = dentry->d_inode; + struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf }; + struct nfs_fattr *fattr; int status; if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) return -EOPNOTSUPP; - nfs_fattr_init(&fattr); - - ilabel.pi = 0; - ilabel.lfs = 0; - ilabel.label = (char *)buf; - ilabel.len = buflen; - - cred = rpc_lookup_cred(); - if (IS_ERR(cred)) - return PTR_ERR(cred); - - olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); - if (IS_ERR(olabel)) { - status = -PTR_ERR(olabel); - goto out; - } + fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode)); + if (fattr == NULL) + return -ENOMEM; - status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel); + status = nfs4_do_set_security_label(inode, &ilabel, fattr); if (status == 0) - nfs_setsecurity(inode, &fattr, olabel); + nfs_setsecurity(inode, fattr); - nfs4_label_free(olabel); -out: - put_rpccred(cred); + nfs_free_fattr(fattr); return status; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) -{ - struct nfs_client *clp = server->nfs_client; - - if (task->tk_status >= 0) - return 0; - switch(task->tk_status) { - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); - case -NFS4ERR_OPENMODE: - if (state == NULL) - break; - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto stateid_invalid; - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto stateid_invalid; - } - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_lease_recovery(clp); - goto wait_on_recovery; -#if defined(CONFIG_NFS_V4_1) - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR %d, Reset session\n", __func__, - task->tk_status); - nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - task->tk_status = 0; - return -EAGAIN; -#endif /* CONFIG_NFS_V4_1 */ - case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); - case -NFS4ERR_GRACE: - rpc_delay(task, NFS4_POLL_RETRY_MAX); - task->tk_status = 0; - return -EAGAIN; - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_OLD_STATEID: - task->tk_status = 0; - return -EAGAIN; - } - task->tk_status = nfs4_map_errors(task->tk_status); - return 0; -stateid_invalid: - task->tk_status = -EIO; - return 0; -wait_on_recovery: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - task->tk_status = 0; - return -EAGAIN; -} - static void nfs4_init_boot_verifier(const struct nfs_client *clp, nfs4_verifier *bootverf) { @@ -4631,65 +6471,180 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) { /* An impossible timestamp guarantees this value * will never match a generated boot time. */ - verf[0] = 0; - verf[1] = (__be32)(NSEC_PER_SEC + 1); + verf[0] = cpu_to_be32(U32_MAX); + verf[1] = cpu_to_be32(U32_MAX); } else { struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); - verf[0] = (__be32)nn->boot_time.tv_sec; - verf[1] = (__be32)nn->boot_time.tv_nsec; + u64 ns = ktime_to_ns(nn->boot_time); + + verf[0] = cpu_to_be32(ns >> 32); + verf[1] = cpu_to_be32(ns); } memcpy(bootverf->data, verf, sizeof(bootverf->data)); } -static unsigned int -nfs4_init_nonuniform_client_string(const struct nfs_client *clp, - char *buf, size_t len) +static size_t +nfs4_get_uniquifier(struct nfs_client *clp, char *buf, size_t buflen) { - unsigned int result; + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); + struct nfs_netns_client *nn_clp = nn->nfs_client; + const char *id; + + buf[0] = '\0'; + + if (nn_clp) { + rcu_read_lock(); + id = rcu_dereference(nn_clp->identifier); + if (id) + strscpy(buf, id, buflen); + rcu_read_unlock(); + } + + if (nfs4_client_id_uniquifier[0] != '\0' && buf[0] == '\0') + strscpy(buf, nfs4_client_id_uniquifier, buflen); + + return strlen(buf); +} + +static int +nfs4_init_nonuniform_client_string(struct nfs_client *clp) +{ + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; + size_t len; + char *str; + + if (clp->cl_owner_id != NULL) + return 0; rcu_read_lock(); - result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", - clp->cl_ipaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR), - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_PROTO)); + len = 14 + + strlen(clp->cl_rpcclient->cl_nodename) + + 1 + + strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + + 1; rcu_read_unlock(); - return result; + + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + rcu_read_lock(); + if (buflen) + scnprintf(str, len, "Linux NFSv4.0 %s/%s/%s", + clp->cl_rpcclient->cl_nodename, buf, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); + else + scnprintf(str, len, "Linux NFSv4.0 %s/%s", + clp->cl_rpcclient->cl_nodename, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + + clp->cl_owner_id = str; + return 0; +} + +static int +nfs4_init_uniform_client_string(struct nfs_client *clp) +{ + char buf[NFS4_CLIENT_ID_UNIQ_LEN]; + size_t buflen; + size_t len; + char *str; + + if (clp->cl_owner_id != NULL) + return 0; + + len = 10 + 10 + 1 + 10 + 1 + + strlen(clp->cl_rpcclient->cl_nodename) + 1; + + buflen = nfs4_get_uniquifier(clp, buf, sizeof(buf)); + if (buflen) + len += buflen + 1; + + if (len > NFS4_OPAQUE_LIMIT + 1) + return -EINVAL; + + /* + * Since this string is allocated at mount time, and held until the + * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying + * about a memory-reclaim deadlock. + */ + str = kmalloc(len, GFP_KERNEL); + if (!str) + return -ENOMEM; + + if (buflen) + scnprintf(str, len, "Linux NFSv%u.%u %s/%s", + clp->rpc_ops->version, clp->cl_minorversion, + buf, clp->cl_rpcclient->cl_nodename); + else + scnprintf(str, len, "Linux NFSv%u.%u %s", + clp->rpc_ops->version, clp->cl_minorversion, + clp->cl_rpcclient->cl_nodename); + clp->cl_owner_id = str; + return 0; } +/* + * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback + * services. Advertise one based on the address family of the + * clientaddr. + */ static unsigned int -nfs4_init_uniform_client_string(const struct nfs_client *clp, - char *buf, size_t len) +nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) +{ + if (strchr(clp->cl_ipaddr, ':') != NULL) + return scnprintf(buf, len, "tcp6"); + else + return scnprintf(buf, len, "tcp"); +} + +static void nfs4_setclientid_done(struct rpc_task *task, void *calldata) { - char *nodename = clp->cl_rpcclient->cl_nodename; + struct nfs4_setclientid *sc = calldata; - if (nfs4_client_id_uniquifier[0] != '\0') - nodename = nfs4_client_id_uniquifier; - return scnprintf(buf, len, "Linux NFSv%u.%u %s", - clp->rpc_ops->version, clp->cl_minorversion, - nodename); + if (task->tk_status == 0) + sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred); } +static const struct rpc_call_ops nfs4_setclientid_ops = { + .rpc_call_done = nfs4_setclientid_done, +}; + /** * nfs4_proc_setclientid - Negotiate client ID * @clp: state data structure * @program: RPC program for NFSv4 callback service * @port: IP port number for NFS4 callback service - * @cred: RPC credential to use for this call + * @cred: credential to use for this call * @res: where to place the result * * Returns zero, a negative errno, or a negative NFS4ERR status code. */ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, - unsigned short port, struct rpc_cred *cred, + unsigned short port, const struct cred *cred, struct nfs4_setclientid_res *res) { nfs4_verifier sc_verifier; struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, - .sc_cb_ident = clp->cl_cb_ident, + .sc_clnt = clp, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], @@ -4697,35 +6652,51 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_setclientid_ops, + .callback_data = &setclientid, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, + }; + unsigned long now = jiffies; int status; /* nfs_client_id4 */ nfs4_init_boot_verifier(clp, &sc_verifier); + if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) - setclientid.sc_name_len = - nfs4_init_uniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); + status = nfs4_init_uniform_client_string(clp); else - setclientid.sc_name_len = - nfs4_init_nonuniform_client_string(clp, - setclientid.sc_name, - sizeof(setclientid.sc_name)); + status = nfs4_init_nonuniform_client_string(clp); + + if (status) + goto out; + /* cb_client4 */ - rcu_read_lock(); - setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, - sizeof(setclientid.sc_netid), "%s", - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_NETID)); - rcu_read_unlock(); + setclientid.sc_netid_len = + nfs4_init_callback_netid(clp, + setclientid.sc_netid, + sizeof(setclientid.sc_netid)); setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); - dprintk("NFS call setclientid auth=%s, '%.*s'\n", + dprintk("NFS call setclientid auth=%s, '%s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, - setclientid.sc_name_len, setclientid.sc_name); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + clp->cl_owner_id); + + status = nfs4_call_sync_custom(&task_setup_data); + if (setclientid.sc_cred) { + kfree(clp->cl_acceptor); + clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); + put_rpccred(setclientid.sc_cred); + } + + if (status == 0) + do_renew_lease(clp, now); +out: + trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); return status; } @@ -4733,14 +6704,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, /** * nfs4_proc_setclientid_confirm - Confirm client ID * @clp: state data structure - * @res: result of a previous SETCLIENTID - * @cred: RPC credential to use for this call + * @arg: result of a previous SETCLIENTID + * @cred: credential to use for this call * * Returns zero, a negative errno, or a negative NFS4ERR status code. */ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, - struct rpc_cred *cred) + const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], @@ -4752,7 +6723,9 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); + trace_nfs4_setclientid_confirm(clp, status); dprintk("NFS reply setclientid_confirm: %d\n", status); return status; } @@ -4763,61 +6736,162 @@ struct nfs4_delegreturndata { struct nfs_fh fh; nfs4_stateid stateid; unsigned long timestamp; + unsigned short retrans; + struct { + struct nfs4_layoutreturn_args arg; + struct nfs4_layoutreturn_res res; + struct nfs4_xdr_opaque_data ld_private; + u32 roc_barrier; + bool roc; + } lr; + struct nfs4_delegattr sattr; struct nfs_fattr fattr; int rpc_status; + struct inode *inode; }; static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_delegreturndata *data = calldata; + struct nfs4_exception exception = { + .inode = data->inode, + .stateid = &data->stateid, + .task_is_privileged = data->args.seq_args.sa_privileged, + .retrans = data->retrans, + }; if (!nfs4_sequence_done(task, &data->res.seq_res)) return; + trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); + + /* Handle Layoutreturn errors */ + if (pnfs_roc_done(task, &data->args.lr_args, &data->res.lr_res, + &data->res.lr_ret) == -EAGAIN) + goto out_restart; + + if (data->args.sattr_args && task->tk_status != 0) { + switch(data->res.sattr_ret) { + case 0: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + /* Let the main handler below do stateid recovery */ + break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, + data->inode)) + goto out_restart; + fallthrough; + default: + data->args.sattr_args = NULL; + data->res.sattr_res = false; + goto out_restart; + } + } + switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; - default: - if (nfs4_async_handle_error(task, data->res.server, NULL) == - -EAGAIN) { - rpc_restart_call_prepare(task); - return; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(data->res.server, + data->args.stateid, + task->tk_msg.rpc_cred); + fallthrough; + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -ETIMEDOUT: + task->tk_status = 0; + break; + case -NFS4ERR_OLD_STATEID: + if (!nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) + nfs4_stateid_seqid_inc(&data->stateid); + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; } + goto out_restart; + case -NFS4ERR_ACCESS: + if (data->args.bitmask) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + goto out_restart; + } + fallthrough; + default: + task->tk_status = nfs4_async_handle_exception(task, + data->res.server, task->tk_status, + &exception); + data->retrans = exception.retrans; + if (exception.retry) + goto out_restart; } + nfs_delegation_mark_returned(data->inode, data->args.stateid); data->rpc_status = task->tk_status; + return; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); } static void nfs4_delegreturn_release(void *calldata) { + struct nfs4_delegreturndata *data = calldata; + struct inode *inode = data->inode; + + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); + if (inode) { + nfs4_fattr_set_prechange(&data->fattr, + inode_peek_iversion_raw(inode)); + nfs_refresh_inode(inode, &data->fattr); + nfs_iput_and_deactive(inode); + } kfree(calldata); } -#if defined(CONFIG_NFS_V4_1) static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) { struct nfs4_delegreturndata *d_data; + struct pnfs_layout_hdr *lo; + + d_data = data; + + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) { + nfs4_sequence_done(task, &d_data->res.seq_res); + return; + } - d_data = (struct nfs4_delegreturndata *)data; + lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL; + if (lo && !pnfs_layout_is_valid(lo)) { + d_data->args.lr_args = NULL; + d_data->res.lr_res = NULL; + } - nfs4_setup_sequence(d_data->res.server, + nfs4_setup_sequence(d_data->res.server->nfs_client, &d_data->args.seq_args, &d_data->res.seq_res, task); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs4_delegreturn_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs4_delegreturn_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, }; -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) +static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, + int issync) { struct nfs4_delegreturndata *data; struct nfs_server *server = NFS_SERVER(inode); @@ -4830,24 +6904,65 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_delegreturn_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; int status = 0; - data = kzalloc(sizeof(*data), GFP_NOFS); + if (nfs_server_capable(inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return -ENOMEM; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + + nfs4_state_protect(server->nfs_client, + NFS_SP4_MACH_CRED_CLEANUP, + &task_setup_data.rpc_client, &msg); + data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; - data->args.bitmask = server->cache_consistency_bitmask; + nfs4_bitmask_set(data->args.bitmask_store, + server->cache_consistency_bitmask, inode, 0); + data->args.bitmask = data->args.bitmask_store; nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; data->res.server = server; + data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + data->lr.arg.ld_private = &data->lr.ld_private; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; + data->inode = nfs_igrab_and_active(inode); + if (data->inode || issync) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); + if (data->lr.roc) { + data->args.lr_args = &data->lr.arg; + data->res.lr_res = &data->lr.res; + } + } + + if (delegation && + test_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags)) { + if (delegation->type & FMODE_READ) { + data->sattr.atime = inode_get_atime(inode); + data->sattr.atime_set = true; + } + if (delegation->type & FMODE_WRITE) { + data->sattr.mtime = inode_get_mtime(inode); + data->sattr.mtime_set = true; + } + data->args.sattr_args = &data->sattr; + data->res.sattr_res = true; + } + + if (!data->inode) + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, + 1); + else + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, + 0); task_setup_data.callback_data = data; msg.rpc_argp = &data->args; @@ -4857,26 +6972,26 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co return PTR_ERR(task); if (!issync) goto out; - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); if (status != 0) goto out; status = data->rpc_status; - if (status == 0) - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); - else - nfs_refresh_inode(inode, &data->fattr); out: rpc_put_task(task); return status; } -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync) +int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, + const nfs4_stateid *stateid, + struct nfs_delegation *delegation, int issync) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; int err; do { - err = _nfs4_proc_delegreturn(inode, cred, stateid, issync); + err = _nfs4_proc_delegreturn(inode, cred, stateid, + delegation, issync); + trace_nfs4_delegreturn(inode, stateid, err); switch (err) { case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: @@ -4888,22 +7003,6 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 return err; } -#define NFS4_LOCK_MINTIMEOUT (1 * HZ) -#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) - -/* - * sleep, with exponential backoff, and retry the LOCK operation. - */ -static unsigned long -nfs4_set_lock_task_retry(unsigned long timeout) -{ - freezable_schedule_timeout_killable_unsafe(timeout); - timeout <<= 1; - if (timeout > NFS4_LOCK_MAXTIMEOUT) - return NFS4_LOCK_MAXTIMEOUT; - return timeout; -} - static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; @@ -4918,8 +7017,8 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], - .rpc_argp = &arg, - .rpc_resp = &res, + .rpc_argp = &arg, + .rpc_resp = &res, .rpc_cred = state->owner->so_cred, }; struct nfs4_lock_state *lsp; @@ -4935,43 +7034,67 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); switch (status) { case 0: - request->fl_type = F_UNLCK; + request->c.flc_type = F_UNLCK; break; case -NFS4ERR_DENIED: status = 0; } request->fl_ops->fl_release_private(request); + request->fl_ops = NULL; out: return status; } static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_getlk(state, cmd, request), + err = _nfs4_proc_getlk(state, cmd, request); + trace_nfs4_get_lock(request, state, cmd, err); + err = nfs4_handle_exception(NFS_SERVER(state->inode), err, &exception); } while (exception.retry); return err; } -static int do_vfs_lock(struct file *file, struct file_lock *fl) +/* + * Update the seqid of a lock stateid after receiving + * NFS4ERR_OLD_STATEID + */ +static bool nfs4_refresh_lock_old_stateid(nfs4_stateid *dst, + struct nfs4_lock_state *lsp) { - int res = 0; - switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { - case FL_POSIX: - res = posix_lock_file_wait(file, fl); - break; - case FL_FLOCK: - res = flock_lock_file_wait(file, fl); - break; - default: - BUG(); - } - return res; + struct nfs4_state *state = lsp->ls_state; + bool ret = false; + + spin_lock(&state->state_lock); + if (!nfs4_stateid_match_other(dst, &lsp->ls_stateid)) + goto out; + if (!nfs4_stateid_is_newer(&lsp->ls_stateid, dst)) + nfs4_stateid_seqid_inc(dst); + else + dst->seqid = lsp->ls_stateid.seqid; + ret = true; +out: + spin_unlock(&state->state_lock); + return ret; +} + +static bool nfs4_sync_lock_stateid(nfs4_stateid *dst, + struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_state; + bool ret; + + spin_lock(&state->state_lock); + ret = !nfs4_stateid_match_other(dst, &lsp->ls_stateid); + nfs4_stateid_copy(dst, &lsp->ls_stateid); + spin_unlock(&state->state_lock); + return ret; } struct nfs4_unlockdata { @@ -4979,9 +7102,11 @@ struct nfs4_unlockdata { struct nfs_locku_res res; struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; + struct nfs_lock_context *l_ctx; struct file_lock fl; - const struct nfs_server *server; + struct nfs_server *server; unsigned long timestamp; + unsigned short retrans; }; static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, @@ -4990,22 +7115,33 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs_seqid *seqid) { struct nfs4_unlockdata *p; - struct inode *inode = lsp->ls_state->inode; + struct nfs4_state *state = lsp->ls_state; + struct inode *inode = state->inode; + struct nfs_lock_context *l_ctx; - p = kzalloc(sizeof(*p), GFP_NOFS); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; + l_ctx = nfs_get_lock_context(ctx); + if (!IS_ERR(l_ctx)) { + p->l_ctx = l_ctx; + } else { + kfree(p); + return NULL; + } p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; p->res.seqid = seqid; - p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; - atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); - memcpy(&p->fl, fl, sizeof(p->fl)); + locks_init_lock(&p->fl); + locks_copy_lock(&p->fl, fl); p->server = NFS_SERVER(inode); + spin_lock(&state->state_lock); + nfs4_stateid_copy(&p->arg.stateid, &lsp->ls_stateid); + spin_unlock(&state->state_lock); return p; } @@ -5014,6 +7150,7 @@ static void nfs4_locku_release_calldata(void *data) struct nfs4_unlockdata *calldata = data; nfs_free_seqid(calldata->arg.seqid); nfs4_put_lock_state(calldata->lsp); + nfs_put_lock_context(calldata->l_ctx); put_nfs_open_context(calldata->ctx); kfree(calldata); } @@ -5021,22 +7158,45 @@ static void nfs4_locku_release_calldata(void *data) static void nfs4_locku_done(struct rpc_task *task, void *data) { struct nfs4_unlockdata *calldata = data; + struct nfs4_exception exception = { + .inode = calldata->lsp->ls_state->inode, + .stateid = &calldata->arg.stateid, + .retrans = calldata->retrans, + }; if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; switch (task->tk_status) { case 0: - nfs4_stateid_copy(&calldata->lsp->ls_stateid, - &calldata->res.stateid); renew_lease(calldata->server, calldata->timestamp); - break; + locks_lock_inode_wait(calldata->lsp->ls_state->inode, &calldata->fl); + if (nfs4_update_lock_stateid(calldata->lsp, + &calldata->res.stateid)) + break; + fallthrough; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: + nfs4_free_revoked_stateid(calldata->server, + &calldata->arg.stateid, + task->tk_msg.rpc_cred); + fallthrough; case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: + if (nfs4_sync_lock_stateid(&calldata->arg.stateid, + calldata->lsp)) + rpc_restart_call_prepare(task); + break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_lock_old_stateid(&calldata->arg.stateid, + calldata->lsp)) + rpc_restart_call_prepare(task); break; default: - if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) + task->tk_status = nfs4_async_handle_exception(task, + calldata->server, task->tk_status, + &exception); + calldata->retrans = exception.retrans; + if (exception.retry) rpc_restart_call_prepare(task); } nfs_release_seqid(calldata->arg.seqid); @@ -5046,6 +7206,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) { struct nfs4_unlockdata *calldata = data; + if (test_bit(NFS_CONTEXT_UNLOCK, &calldata->l_ctx->open_context->flags) && + nfs_async_iocounter_wait(task, calldata->l_ctx)) + return; + if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) goto out_wait; if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { @@ -5053,7 +7217,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) goto out_no_action; } calldata->timestamp = jiffies; - if (nfs4_setup_sequence(calldata->server, + if (nfs4_setup_sequence(calldata->server->nfs_client, &calldata->arg.seq_args, &calldata->res.seq_res, task) != 0) @@ -5089,10 +7253,18 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .flags = RPC_TASK_ASYNC, }; + if (nfs_server_capable(lsp->ls_state->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client, + NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg); + /* Ensure this is an unlock - when canceling a lock, the * canceled lock is passed in, and it won't be an unlock. */ - fl->fl_type = F_UNLCK; + fl->c.flc_type = F_UNLCK; + if (fl->c.flc_flags & FL_CLOSE) + set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags); data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); if (data == NULL) { @@ -5100,7 +7272,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, 0); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -5115,41 +7287,47 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; struct rpc_task *task; + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); int status = 0; - unsigned char fl_flags = request->fl_flags; + unsigned char saved_flags = request->c.flc_flags; status = nfs4_set_lock_state(state, request); /* Unlock _before_ we do the RPC call */ - request->fl_flags |= FL_EXISTS; + request->c.flc_flags |= FL_EXISTS; /* Exclude nfs_delegation_claim_locks() */ mutex_lock(&sp->so_delegreturn_mutex); /* Exclude nfs4_reclaim_open_stateid() - note nesting! */ down_read(&nfsi->rwsem); - if (do_vfs_lock(request->fl_file, request) == -ENOENT) { + if (locks_lock_inode_wait(inode, request) == -ENOENT) { up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); goto out; } + lsp = request->fl_u.nfs4_fl.owner; + set_bit(NFS_LOCK_UNLOCKING, &lsp->ls_flags); up_read(&nfsi->rwsem); mutex_unlock(&sp->so_delegreturn_mutex); if (status != 0) goto out; /* Is this a delegated lock? */ - lsp = request->fl_u.nfs4_fl.owner; if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) goto out; - seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); + alloc_seqid = NFS_SERVER(inode)->nfs_client->cl_mvops->alloc_seqid; + seqid = alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); status = -ENOMEM; - if (seqid == NULL) + if (IS_ERR(seqid)) goto out; - task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); + task = nfs4_do_unlck(request, + nfs_file_open_context(request->c.flc_file), + lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; - status = nfs4_wait_for_completion_rpc_task(task); + status = rpc_wait_for_completion_task(task); rpc_put_task(task); out: - request->fl_flags = fl_flags; + request->c.flc_flags = saved_flags; + trace_nfs4_unlock(request, state, F_SETLK, status); return status; } @@ -5172,6 +7350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, struct nfs4_lockdata *p; struct inode *inode = lsp->ls_state->inode; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); p = kzalloc(sizeof(*p), gfp_mask); if (p == NULL) @@ -5180,21 +7359,21 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask); - if (p->arg.open_seqid == NULL) + if (IS_ERR(p->arg.open_seqid)) goto out_free; - p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask); - if (p->arg.lock_seqid == NULL) + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; + p->arg.lock_seqid = alloc_seqid(&lsp->ls_seqid, gfp_mask); + if (IS_ERR(p->arg.lock_seqid)) goto out_free_seqid; - p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_seqid.owner_id; p->arg.lock_owner.s_dev = server->s_dev; p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; p->server = server; - atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); - memcpy(&p->fl, fl, sizeof(p->fl)); + locks_init_lock(&p->fl); + locks_copy_lock(&p->fl, fl); return p; out_free_seqid: nfs_free_seqid(p->arg.open_seqid); @@ -5208,26 +7387,29 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) struct nfs4_lockdata *data = calldata; struct nfs4_state *state = data->lsp->ls_state; - dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) goto out_wait; /* Do we need to do an open_to_lock_owner? */ - if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { + if (!test_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; } - data->arg.open_stateid = &state->open_stateid; + nfs4_stateid_copy(&data->arg.open_stateid, + &state->open_stateid); data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; - } else + } else { data->arg.new_lock_owner = 0; + nfs4_stateid_copy(&data->arg.lock_stateid, + &data->lsp->ls_stateid); + } if (!nfs4_valid_open_stateid(state)) { data->rpc_status = -EBADF; task->tk_action = NULL; goto out_release_open_seqid; } data->timestamp = jiffies; - if (nfs4_setup_sequence(data->server, + if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, &data->res.seq_res, task) == 0) @@ -5238,41 +7420,68 @@ out_release_lock_seqid: nfs_release_seqid(data->arg.lock_seqid); out_wait: nfs4_sequence_done(task, &data->res.seq_res); - dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); + dprintk("%s: ret = %d\n", __func__, data->rpc_status); } static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; - - dprintk("%s: begin!\n", __func__); + struct nfs4_lock_state *lsp = data->lsp; if (!nfs4_sequence_done(task, &data->res.seq_res)) return; data->rpc_status = task->tk_status; - if (data->arg.new_lock_owner != 0) { - if (data->rpc_status == 0) - nfs_confirm_seqid(&data->lsp->ls_seqid, 0); - else - goto out; - } - if (data->rpc_status == 0) { - nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); - set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags); - renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); - } -out: - dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); + switch (task->tk_status) { + case 0: + renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), + data->timestamp); + if (data->arg.new_lock && !data->cancelled) { + data->fl.c.flc_flags &= ~(FL_SLEEP | FL_ACCESS); + if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) + goto out_restart; + } + if (data->arg.new_lock_owner != 0) { + nfs_confirm_seqid(&lsp->ls_seqid, 0); + nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); + set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + goto out_restart; + break; + case -NFS4ERR_OLD_STATEID: + if (data->arg.new_lock_owner != 0 && + nfs4_refresh_open_old_stateid(&data->arg.open_stateid, + lsp->ls_state)) + goto out_restart; + if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp)) + goto out_restart; + fallthrough; + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + if (data->arg.new_lock_owner != 0) { + if (!nfs4_stateid_match(&data->arg.open_stateid, + &lsp->ls_state->open_stateid)) + goto out_restart; + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, + &lsp->ls_stateid)) + goto out_restart; + } +out_done: + dprintk("%s: ret = %d!\n", __func__, data->rpc_status); + return; +out_restart: + if (!data->cancelled) + rpc_restart_call_prepare(task); + goto out_done; } static void nfs4_lock_release(void *calldata) { struct nfs4_lockdata *data = calldata; - dprintk("%s: begin!\n", __func__); nfs_free_seqid(data->arg.open_seqid); - if (data->cancelled != 0) { + if (data->cancelled && data->rpc_status == 0) { struct rpc_task *task; task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, data->arg.lock_seqid); @@ -5284,7 +7493,6 @@ static void nfs4_lock_release(void *calldata) nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); kfree(data); - dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_lock_ops = { @@ -5297,6 +7505,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ { switch (error) { case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; if (new_lock_owner != 0 || @@ -5305,9 +7514,8 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ break; case -NFS4ERR_STALE_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(server->nfs_client); - }; + } } static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) @@ -5323,40 +7531,44 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .rpc_message = &msg, .callback_ops = &nfs4_lock_ops, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF, }; int ret; - dprintk("%s: begin!\n", __func__); - data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), - fl->fl_u.nfs4_fl.owner, - recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); + if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE)) + task_setup_data.flags |= RPC_TASK_MOVEABLE; + + data = nfs4_alloc_lockdata(fl, + nfs_file_open_context(fl->c.flc_file), + fl->fl_u.nfs4_fl.owner, GFP_KERNEL); if (data == NULL) return -ENOMEM; if (IS_SETLKW(cmd)) data->arg.block = 1; - nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1, + recovery_type > NFS_LOCK_NEW); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; if (recovery_type > NFS_LOCK_NEW) { if (recovery_type == NFS_LOCK_RECLAIM) data->arg.reclaim = NFS_LOCK_RECLAIM; - nfs4_set_sequence_privileged(&data->arg.seq_args); - } + } else + data->arg.new_lock = 1; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - ret = nfs4_wait_for_completion_rpc_task(task); + ret = rpc_wait_for_completion_task(task); if (ret == 0) { ret = data->rpc_status; if (ret) nfs4_handle_setlk_error(data->server, data->lsp, data->arg.new_lock_owner, ret); } else - data->cancelled = 1; + data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); - dprintk("%s: done, ret = %d!\n", __func__, ret); + dprintk("%s: ret = %d\n", __func__, ret); return ret; } @@ -5391,6 +7603,10 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request err = nfs4_set_lock_state(state, request); if (err != 0) return err; + if (!recover_lost_locks) { + set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags); + return 0; + } do { if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; @@ -5409,101 +7625,49 @@ out: } #if defined(CONFIG_NFS_V4_1) -/** - * nfs41_check_expired_locks - possibly free a lock stateid - * - * @state: NFSv4 state for an inode - * - * Returns NFS_OK if recovery for this stateid is now finished. - * Otherwise a negative NFS4ERR value is returned. - */ -static int nfs41_check_expired_locks(struct nfs4_state *state) -{ - int status, ret = -NFS4ERR_BAD_STATEID; - struct nfs4_lock_state *lsp; - struct nfs_server *server = NFS_SERVER(state->inode); - - list_for_each_entry(lsp, &state->lock_states, ls_locks) { - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { - struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - - status = nfs41_test_stateid(server, - &lsp->ls_stateid, - cred); - if (status != NFS_OK) { - /* Free the stateid unless the server - * informs us the stateid is unrecognized. */ - if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, - &lsp->ls_stateid, - cred); - clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); - ret = status; - } - } - }; - - return ret; -} - static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) { - int status = NFS_OK; + struct nfs4_lock_state *lsp; + int status; - if (test_bit(LK_STATE_IN_USE, &state->flags)) - status = nfs41_check_expired_locks(state); - if (status != NFS_OK) - status = nfs4_lock_expired(state, request); - return status; + status = nfs4_set_lock_state(state, request); + if (status != 0) + return status; + lsp = request->fl_u.nfs4_fl.owner; + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) || + test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) + return 0; + return nfs4_lock_expired(state, request); } #endif static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_state_owner *sp = state->owner; struct nfs_inode *nfsi = NFS_I(state->inode); - unsigned char fl_flags = request->fl_flags; - unsigned int seq; - int status = -ENOLCK; + struct nfs4_state_owner *sp = state->owner; + unsigned char flags = request->c.flc_flags; + int status; - if ((fl_flags & FL_POSIX) && - !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) - goto out; - /* Is this a delegated open? */ - status = nfs4_set_lock_state(state, request); - if (status != 0) - goto out; - request->fl_flags |= FL_ACCESS; - status = do_vfs_lock(request->fl_file, request); + request->c.flc_flags |= FL_ACCESS; + status = locks_lock_inode_wait(state->inode, request); if (status < 0) goto out; + mutex_lock(&sp->so_delegreturn_mutex); down_read(&nfsi->rwsem); if (test_bit(NFS_DELEGATED_STATE, &state->flags)) { /* Yes: cache locks! */ /* ...but avoid races with delegation recall... */ - request->fl_flags = fl_flags & ~FL_SLEEP; - status = do_vfs_lock(request->fl_file, request); - goto out_unlock; - } - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - up_read(&nfsi->rwsem); - status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); - if (status != 0) + request->c.flc_flags = flags & ~FL_SLEEP; + status = locks_lock_inode_wait(state->inode, request); + up_read(&nfsi->rwsem); + mutex_unlock(&sp->so_delegreturn_mutex); goto out; - down_read(&nfsi->rwsem); - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { - status = -NFS4ERR_DELAY; - goto out_unlock; } - /* Note: we always want to sleep here! */ - request->fl_flags = fl_flags | FL_SLEEP; - if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " - "manager!\n", __func__); -out_unlock: up_read(&nfsi->rwsem); + mutex_unlock(&sp->so_delegreturn_mutex); + status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); out: - request->fl_flags = fl_flags; + request->c.flc_flags = flags; return status; } @@ -5512,6 +7676,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * struct nfs4_exception exception = { .state = state, .inode = state->inode, + .interruptible = true, }; int err; @@ -5525,21 +7690,116 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * return err; } +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +static int +nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd, + struct file_lock *request) +{ + int status = -ERESTARTSYS; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + + while(!signalled()) { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); + schedule_timeout(timeout); + timeout *= 2; + timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout); + status = -ERESTARTSYS; + } + return status; +} + +#ifdef CONFIG_NFS_V4_1 +struct nfs4_lock_waiter { + struct inode *inode; + struct nfs_lowner owner; + wait_queue_entry_t wait; +}; + +static int +nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) +{ + struct nfs4_lock_waiter *waiter = + container_of(wait, struct nfs4_lock_waiter, wait); + + /* NULL key means to wake up everyone */ + if (key) { + struct cb_notify_lock_args *cbnl = key; + struct nfs_lowner *lowner = &cbnl->cbnl_owner, + *wowner = &waiter->owner; + + /* Only wake if the callback was for the same owner. */ + if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev) + return 0; + + /* Make sure it's for the right inode */ + if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh)) + return 0; + } + + return woken_wake_function(wait, mode, flags, key); +} + +static int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_client *clp = server->nfs_client; + wait_queue_head_t *q = &clp->cl_lock_waitq; + struct nfs4_lock_waiter waiter = { + .inode = state->inode, + .owner = { .clientid = clp->cl_clientid, + .id = lsp->ls_seqid.owner_id, + .s_dev = server->s_dev }, + }; + int status; + + /* Don't bother with waitqueue if we don't expect a callback */ + if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags)) + return nfs4_retry_setlk_simple(state, cmd, request); + + init_wait(&waiter.wait); + waiter.wait.func = nfs4_wake_lock_waiter; + add_wait_queue(q, &waiter.wait); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if (status != -EAGAIN || IS_SETLK(cmd)) + break; + + status = -ERESTARTSYS; + wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE, + NFS4_LOCK_MAXTIMEOUT); + } while (!signalled()); + + remove_wait_queue(q, &waiter.wait); + + return status; +} +#else /* !CONFIG_NFS_V4_1 */ +static inline int +nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + return nfs4_retry_setlk_simple(state, cmd, request); +} +#endif + static int nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) { struct nfs_open_context *ctx; struct nfs4_state *state; - unsigned long timeout = NFS4_LOCK_MINTIMEOUT; int status; /* verify open state */ ctx = nfs_file_open_context(filp); state = ctx->state; - if (request->fl_start < 0 || request->fl_end < 0) - return -EINVAL; - if (IS_GETLK(cmd)) { if (state != NULL) return nfs4_proc_getlk(state, F_GETLK, request); @@ -5549,7 +7809,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) return -EINVAL; - if (request->fl_type == F_UNLCK) { + if (lock_is_unlock(request)) { if (state != NULL) return nfs4_proc_unlck(state, cmd, request); return 0; @@ -5557,11 +7817,16 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (state == NULL) return -ENOLCK; + + if ((request->c.flc_flags & FL_POSIX) && + !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) + return -ENOLCK; + /* * Don't rely on the VFS having checked the file open mode, * since it won't do this for flock() locks. */ - switch (request->fl_type) { + switch (request->c.flc_type) { case F_RDLCK: if (!(filp->f_mode & FMODE_READ)) return -EBADF; @@ -5571,16 +7836,48 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return -EBADF; } - do { - status = nfs4_proc_setlk(state, cmd, request); - if ((status != -EAGAIN) || IS_SETLK(cmd)) - break; - timeout = nfs4_set_lock_task_retry(timeout); - status = -ERESTARTSYS; - if (signalled()) - break; - } while(status < 0); - return status; + status = nfs4_set_lock_state(state, request); + if (status != 0) + return status; + + return nfs4_retry_setlk(state, cmd, request); +} + +static int nfs4_delete_lease(struct file *file, void **priv) +{ + return generic_setlease(file, F_UNLCK, NULL, priv); +} + +static int nfs4_add_lease(struct file *file, int arg, struct file_lease **lease, + void **priv) +{ + struct inode *inode = file_inode(file); + fmode_t type = arg == F_RDLCK ? FMODE_READ : FMODE_WRITE; + int ret; + + /* No delegation, no lease */ + if (!nfs4_have_delegation(inode, type, 0)) + return -EAGAIN; + ret = generic_setlease(file, arg, lease, priv); + if (ret || nfs4_have_delegation(inode, type, 0)) + return ret; + /* We raced with a delegation return */ + nfs4_delete_lease(file, priv); + return -EAGAIN; +} + +int nfs4_proc_setlease(struct file *file, int arg, struct file_lease **lease, + void **priv) +{ + switch (arg) { + case F_RDLCK: + case F_WRLCK: + return nfs4_add_lease(file, arg, lease, priv); + case F_UNLCK: + return nfs4_delete_lease(file, priv); + default: + return -EINVAL; + } } int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid) @@ -5591,16 +7888,56 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, err = nfs4_set_lock_state(state, fl); if (err != 0) return err; - err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); - return nfs4_handle_delegation_recall_error(server, state, stateid, err); + do { + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + if (err != -NFS4ERR_DELAY && err != -NFS4ERR_GRACE) + break; + ssleep(1); + } while (err == -NFS4ERR_DELAY || err == -NFSERR_GRACE); + return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err); } struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; + struct nfs_release_lockowner_res res; + unsigned long timestamp; }; +static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_release_lockowner_data *data = calldata; + struct nfs_server *server = data->server; + nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, task); + data->args.lock_owner.clientid = server->nfs_client->cl_clientid; + data->timestamp = jiffies; +} + +static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) +{ + struct nfs_release_lockowner_data *data = calldata; + struct nfs_server *server = data->server; + + nfs40_sequence_done(task, &data->res.seq_res); + + switch (task->tk_status) { + case 0: + renew_lease(server, data->timestamp); + break; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + nfs4_schedule_lease_recovery(server->nfs_client); + break; + case -NFS4ERR_LEASE_MOVED: + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, + NULL, NULL) == -EAGAIN) + rpc_restart_call_prepare(task); + } +} + static void nfs4_release_lockowner_release(void *calldata) { struct nfs_release_lockowner_data *data = calldata; @@ -5609,10 +7946,13 @@ static void nfs4_release_lockowner_release(void *calldata) } static const struct rpc_call_ops nfs4_release_lockowner_ops = { + .rpc_call_prepare = nfs4_release_lockowner_prepare, + .rpc_call_done = nfs4_release_lockowner_done, .rpc_release = nfs4_release_lockowner_release, }; -static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void +nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) { struct nfs_release_lockowner_data *data; struct rpc_message msg = { @@ -5620,101 +7960,274 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st }; if (server->nfs_client->cl_mvops->minor_version != 0) - return -EINVAL; - data = kmalloc(sizeof(*data), GFP_NOFS); + return; + + data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) - return -ENOMEM; + return; data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->args.lock_owner.id = lsp->ls_seqid.owner_id; data->args.lock_owner.s_dev = server->s_dev; + msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); - return 0; } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" -static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key, - const void *buf, size_t buflen, - int flags, int type) +static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) { - if (strcmp(key, "") != 0) - return -EINVAL; + return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_ACL); +} - return nfs4_proc_set_acl(dentry->d_inode, buf, buflen); +static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_ACL); } -static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key, - void *buf, size_t buflen, int type) +static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) { - if (strcmp(key, "") != 0) - return -EINVAL; + return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_ACL); +} - return nfs4_proc_get_acl(dentry->d_inode, buf, buflen); +#if defined(CONFIG_NFS_V4_1) +#define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl" + +static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_DACL); } -static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list, - size_t list_len, const char *name, - size_t name_len, int type) +static int nfs4_xattr_get_nfs4_dacl(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { - size_t len = sizeof(XATTR_NAME_NFSV4_ACL); + return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_DACL); +} - if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode))) - return 0; +static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry) +{ + return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_DACL); +} - if (list && len <= list_len) - memcpy(list, XATTR_NAME_NFSV4_ACL, len); - return len; +#define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl" + +static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_SACL); } -#ifdef CONFIG_NFS_V4_SECURITY_LABEL -static inline int nfs4_server_supports_labels(struct nfs_server *server) +static int nfs4_xattr_get_nfs4_sacl(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { - return server->caps & NFS_CAP_SECURITY_LABEL; + return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_SACL); } -static int nfs4_xattr_set_nfs4_label(struct dentry *dentry, const char *key, - const void *buf, size_t buflen, - int flags, int type) +static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry) +{ + return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_SACL); +} + +#endif + +#ifdef CONFIG_NFS_V4_SECURITY_LABEL + +static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) { if (security_ismaclabel(key)) - return nfs4_set_security_label(dentry, buf, buflen); + return nfs4_set_security_label(inode, buf, buflen); return -EOPNOTSUPP; } -static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key, - void *buf, size_t buflen, int type) +static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { if (security_ismaclabel(key)) - return nfs4_get_security_label(dentry->d_inode, buf, buflen); + return nfs4_get_security_label(inode, buf, buflen); return -EOPNOTSUPP; } -static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list, - size_t list_len, const char *name, - size_t name_len, int type) +static ssize_t +nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) { - size_t len = 0; + int len = 0; - if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) { - len = security_inode_listsecurity(dentry->d_inode, NULL, 0); - if (list && len <= list_len) - security_inode_listsecurity(dentry->d_inode, list, len); + if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL)) { + len = security_inode_listsecurity(inode, list, list_len); + if (len >= 0 && list_len && len > list_len) + return -ERANGE; } return len; } static const struct xattr_handler nfs4_xattr_nfs4_label_handler = { .prefix = XATTR_SECURITY_PREFIX, - .list = nfs4_xattr_list_nfs4_label, .get = nfs4_xattr_get_nfs4_label, .set = nfs4_xattr_set_nfs4_label, }; + +#else + +static ssize_t +nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) +{ + return 0; +} + #endif +#ifdef CONFIG_NFS_V4_2 +static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *unused, struct inode *inode, + const char *key, const void *buf, + size_t buflen, int flags) +{ + u32 mask; + int ret; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + + /* + * There is no mapping from the MAY_* flags to the NFS_ACCESS_XA* + * flags right now. Handling of xattr operations use the normal + * file read/write permissions. + * + * Just in case the server has other ideas (which RFC 8276 allows), + * do a cached access check for the XA* flags to possibly avoid + * doing an RPC and getting EACCES back. + */ + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAWRITE)) + return -EACCES; + } + + if (buf == NULL) { + ret = nfs42_proc_removexattr(inode, key); + if (!ret) + nfs4_xattr_cache_remove(inode, key); + } else { + ret = nfs42_proc_setxattr(inode, key, buf, buflen, flags); + if (!ret) + nfs4_xattr_cache_add(inode, key, buf, NULL, buflen); + } + + return ret; +} + +static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) +{ + u32 mask; + ssize_t ret; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return -EOPNOTSUPP; + + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAREAD)) + return -EACCES; + } + + ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); + if (ret) + return ret; + + ret = nfs4_xattr_cache_get(inode, key, buf, buflen); + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) + return ret; + + ret = nfs42_proc_getxattr(inode, key, buf, buflen); + + return ret; +} + +static ssize_t +nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) +{ + u64 cookie; + bool eof; + ssize_t ret, size; + char *buf; + size_t buflen; + u32 mask; + + if (!nfs_server_capable(inode, NFS_CAP_XATTR)) + return 0; + + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XALIST)) + return 0; + } + + ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); + if (ret) + return ret; + + ret = nfs4_xattr_cache_list(inode, list, list_len); + if (ret >= 0 || (ret < 0 && ret != -ENOENT)) + return ret; + + cookie = 0; + eof = false; + buflen = list_len ? list_len : XATTR_LIST_MAX; + buf = list_len ? list : NULL; + size = 0; + + while (!eof) { + ret = nfs42_proc_listxattrs(inode, buf, buflen, + &cookie, &eof); + if (ret < 0) + return ret; + + if (list_len) { + buf += ret; + buflen -= ret; + } + size += ret; + } + + if (list_len) + nfs4_xattr_cache_set_list(inode, list, size); + + return size; +} + +#else + +static ssize_t +nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_2 */ /* * nfs_fhget will use either the mounted_on_fileid or the fileid @@ -5739,9 +8252,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); - u32 bitmask[3] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; + u32 bitmask[3]; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), .name = name, @@ -5760,14 +8271,17 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, dprintk("%s: start\n", __func__); + bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS; + bitmask[1] = nfs4_fattr_bitmap[1]; + /* Ask for the fileid of the absent filesystem if mounted_on_fileid * is not supported */ if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + bitmask[0] &= ~FATTR4_WORD0_FILEID; else - bitmask[0] |= FATTR4_WORD0_FILEID; + bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; - nfs_fattr_init(&fs_locations->fattr); + nfs_fattr_init(fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0); @@ -5780,19 +8294,322 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct nfs4_fs_locations *fs_locations, struct page *page) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_fs_locations(client, dir, name, fs_locations, page), + err = _nfs4_proc_fs_locations(client, dir, name, + fs_locations, page); + trace_nfs4_get_fs_locations(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; } -static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) +/* + * This operation also signals the server that this client is + * performing migration recovery. The server can stop returning + * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is + * appended to this compound to identify the client ID which is + * performing recovery. + */ +static int _nfs40_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_fs_locations *locations, + struct page *page, const struct cred *cred) +{ + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .clientid = server->nfs_client->cl_clientid, + .fh = fhandle, + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ + .renew = 1, /* append RENEW */ + }; + struct nfs4_fs_locations_res res = { + .fs_locations = locations, + .migration = 1, + .renew = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + nfs_fattr_init(locations->fattr); + locations->server = server; + locations->nlocations = 0; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + if (status) + return status; + + renew_lease(server, now); + return 0; +} + +#ifdef CONFIG_NFS_V4_1 + +/* + * This operation also signals the server that this client is + * performing migration recovery. The server can stop asserting + * SEQ4_STATUS_LEASE_MOVED for this client. The client ID + * performing this operation is identified in the SEQUENCE + * operation in this compound. + * + * When the client supports GETATTR(fs_locations_info), it can + * be plumbed in here. + */ +static int _nfs41_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_fs_locations *locations, + struct page *page, const struct cred *cred) +{ + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .fh = fhandle, + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ + }; + struct nfs4_fs_locations_res res = { + .fs_locations = locations, + .migration = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + struct nfs4_call_sync_data data = { + .seq_server = server, + .seq_args = &args.seq_args, + .seq_res = &res.seq_res, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = server->nfs_client->cl_mvops->call_sync_ops, + .callback_data = &data, + .flags = RPC_TASK_NO_ROUND_ROBIN, + }; + int status; + + nfs_fattr_init(locations->fattr); + locations->server = server; + locations->nlocations = 0; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); + status = nfs4_call_sync_custom(&task_setup_data); + if (status == NFS4_OK && + res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) + status = -NFS4ERR_LEASE_MOVED; + return status; +} + +#endif /* CONFIG_NFS_V4_1 */ + +/** + * nfs4_proc_get_locations - discover locations for a migrated FSID + * @server: pointer to nfs_server to process + * @fhandle: pointer to the kernel NFS client file handle + * @locations: result of query + * @page: buffer + * @cred: credential to use for this operation + * + * Returns NFS4_OK on success, a negative NFS4ERR status code if the + * operation failed, or a negative errno if a local error occurred. + * + * On success, "locations" is filled in, but if the server has + * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not + * asserted. + * + * -NFS4ERR_LEASE_MOVED is returned if the server still has leases + * from this client that require migration recovery. + */ +int nfs4_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_fs_locations *locations, + struct page *page, const struct cred *cred) { + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; + struct nfs4_exception exception = { + .interruptible = true, + }; int status; + + dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + nfs_display_fhandle(fhandle, __func__); + + do { + status = ops->get_locations(server, fhandle, locations, page, + cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + return status; +} + +/* + * This operation also signals the server that this client is + * performing "lease moved" recovery. The server can stop + * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation + * is appended to this compound to identify the client ID which is + * performing recovery. + */ +static int _nfs40_proc_fsid_present(struct inode *inode, const struct cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct rpc_clnt *clnt = server->client; + struct nfs4_fsid_present_arg args = { + .fh = NFS_FH(inode), + .clientid = clp->cl_clientid, + .renew = 1, /* append RENEW */ + }; + struct nfs4_fsid_present_res res = { + .renew = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + res.fh = nfs_alloc_fhandle(); + if (res.fh == NULL) + return -ENOMEM; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + nfs_free_fhandle(res.fh); + if (status) + return status; + + do_renew_lease(clp, now); + return 0; +} + +#ifdef CONFIG_NFS_V4_1 + +/* + * This operation also signals the server that this client is + * performing "lease moved" recovery. The server can stop asserting + * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing + * this operation is identified in the SEQUENCE operation in this + * compound. + */ +static int _nfs41_proc_fsid_present(struct inode *inode, const struct cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + struct nfs4_fsid_present_arg args = { + .fh = NFS_FH(inode), + }; + struct nfs4_fsid_present_res res = { + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + res.fh = nfs_alloc_fhandle(); + if (res.fh == NULL) + return -ENOMEM; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + nfs_free_fhandle(res.fh); + if (status == NFS4_OK && + res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) + status = -NFS4ERR_LEASE_MOVED; + return status; +} + +#endif /* CONFIG_NFS_V4_1 */ + +/** + * nfs4_proc_fsid_present - Is this FSID present or absent on server? + * @inode: inode on FSID to check + * @cred: credential to use for this operation + * + * Server indicates whether the FSID is present, moved, or not + * recognized. This operation is necessary to clear a LEASE_MOVED + * condition for this client ID. + * + * Returns NFS4_OK if the FSID is present on this server, + * -NFS4ERR_MOVED if the FSID is no longer present, a negative + * NFS4ERR code if some error occurred on the server, or a + * negative errno if a local failure occurred. + */ +int nfs4_proc_fsid_present(struct inode *inode, const struct cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; + struct nfs4_exception exception = { + .interruptible = true, + }; + int status; + + dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + nfs_display_fhandle(NFS_FH(inode), __func__); + + do { + status = ops->fsid_present(inode, cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + return status; +} + +/* + * If 'use_integrity' is true and the state managment nfs_client + * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient + * and the machine credential as per RFC3530bis and RFC5661 Security + * Considerations sections. Otherwise, just use the user cred with the + * filesystem's rpc_client. + */ +static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity) +{ + int status; + struct rpc_clnt *clnt = NFS_SERVER(dir)->client; + struct nfs_client *clp = NFS_SERVER(dir)->nfs_client; struct nfs4_secinfo_arg args = { .dir_fh = NFS_FH(dir), .name = name, @@ -5805,21 +8622,65 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct .rpc_argp = &args, .rpc_resp = &res, }; + struct nfs4_call_sync_data data = { + .seq_server = NFS_SERVER(dir), + .seq_args = &args.seq_args, + .seq_res = &res.seq_res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = clp->cl_mvops->call_sync_ops, + .callback_data = &data, + .flags = RPC_TASK_NO_ROUND_ROBIN, + }; + const struct cred *cred = NULL; + + if (use_integrity) { + clnt = clp->cl_rpcclient; + task_setup.rpc_client = clnt; + + cred = nfs4_get_clid_cred(clp); + msg.rpc_cred = cred; + } dprintk("NFS call secinfo %s\n", name->name); - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + + nfs4_state_protect(clp, NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + status = nfs4_call_sync_custom(&task_setup); + dprintk("NFS reply secinfo: %d\n", status); + + put_cred(cred); return status; } int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_secinfo(dir, name, flavors), + err = -NFS4ERR_WRONGSEC; + + /* try to use integrity protection with machine cred */ + if (_nfs4_is_integrity_protected(NFS_SERVER(dir)->nfs_client)) + err = _nfs4_proc_secinfo(dir, name, flavors, true); + + /* + * if unable to use integrity protection, or SECINFO with + * integrity protection returns NFS4ERR_WRONGSEC (which is + * disallowed by spec, but exists in deployed servers) use + * the current filesystem's rpc_client and the user cred. + */ + if (err == -NFS4ERR_WRONGSEC) + err = _nfs4_proc_secinfo(dir, name, flavors, false); + + trace_nfs4_secinfo(dir, name, err); + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } while (exception.retry); return err; @@ -5831,9 +8692,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -5849,176 +8712,533 @@ static bool nfs41_same_server_scope(struct nfs41_server_scope *a, struct nfs41_server_scope *b) { - if (a->server_scope_sz == b->server_scope_sz && - memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) - return true; + if (a->server_scope_sz != b->server_scope_sz) + return false; + return memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0; +} - return false; +static void +nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) +{ + struct nfs41_bind_conn_to_session_args *args = task->tk_msg.rpc_argp; + struct nfs41_bind_conn_to_session_res *res = task->tk_msg.rpc_resp; + struct nfs_client *clp = args->client; + + switch (task->tk_status) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); + return; + } + if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && + res->dir != NFS4_CDFS4_BOTH) { + rpc_task_close_connection(task); + if (args->retries++ < MAX_BIND_CONN_TO_SESSION_RETRIES) + rpc_restart_call(task); + } } +static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { + .rpc_call_done = nfs4_bind_one_conn_to_session_done, +}; + /* - * nfs4_proc_bind_conn_to_session() + * nfs4_proc_bind_one_conn_to_session() * * The 4.1 client currently uses the same TCP connection for the * fore and backchannel. */ -int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred) +static +int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + struct nfs_client *clp, + const struct cred *cred) { int status; + struct nfs41_bind_conn_to_session_args args = { + .client = clp, + .dir = NFS4_CDFC4_FORE_OR_BOTH, + .retries = 0, + }; struct nfs41_bind_conn_to_session_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_BIND_CONN_TO_SESSION], - .rpc_argp = clp, + .rpc_argp = &args, .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_xprt = xprt, + .callback_ops = &nfs4_bind_one_conn_to_session_ops, + .rpc_message = &msg, + .flags = RPC_TASK_TIMEOUT, + }; + struct rpc_task *task; - dprintk("--> %s\n", __func__); + nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id); + if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) + args.dir = NFS4_CDFC4_FORE; - res.session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); - if (unlikely(res.session == NULL)) { - status = -ENOMEM; - goto out; - } + /* Do not set the backchannel flag unless this is clnt->cl_xprt */ + if (xprt != rcu_access_pointer(clnt->cl_xprt)) + args.dir = NFS4_CDFC4_FORE; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) { + status = task->tk_status; + rpc_put_task(task); + } else + status = PTR_ERR(task); + trace_nfs4_bind_conn_to_session(clp, status); if (status == 0) { - if (memcmp(res.session->sess_id.data, + if (memcmp(res.sessionid.data, clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { dprintk("NFS: %s: Session ID mismatch\n", __func__); - status = -EIO; - goto out_session; + return -EIO; } - if (res.dir != NFS4_CDFS4_BOTH) { + if ((res.dir & args.dir) != res.dir || res.dir == 0) { dprintk("NFS: %s: Unexpected direction from server\n", __func__); - status = -EIO; - goto out_session; + return -EIO; } - if (res.use_conn_in_rdma_mode) { + if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) { dprintk("NFS: %s: Server returned RDMA mode = true\n", __func__); - status = -EIO; - goto out_session; + return -EIO; } } -out_session: - kfree(res.session); -out: - dprintk("<-- %s status= %d\n", __func__, status); + return status; } +struct rpc_bind_conn_calldata { + struct nfs_client *clp; + const struct cred *cred; +}; + +static int +nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + void *calldata) +{ + struct rpc_bind_conn_calldata *p = calldata; + + return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred); +} + +int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, const struct cred *cred) +{ + struct rpc_bind_conn_calldata data = { + .clp = clp, + .cred = cred, + }; + return rpc_clnt_iterate_for_each_xprt(clp->cl_rpcclient, + nfs4_proc_bind_conn_to_session_callback, &data); +} + /* - * nfs4_proc_exchange_id() + * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map + * and operations we'd like to see to enable certain features in the allow map + */ +static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = { + .how = SP4_MACH_CRED, + .enforce.u.words = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) + }, + .allow.u.words = { + [0] = 1 << (OP_CLOSE) | + 1 << (OP_OPEN_DOWNGRADE) | + 1 << (OP_LOCKU) | + 1 << (OP_DELEGRETURN) | + 1 << (OP_COMMIT), + [1] = 1 << (OP_SECINFO - 32) | + 1 << (OP_SECINFO_NO_NAME - 32) | + 1 << (OP_LAYOUTRETURN - 32) | + 1 << (OP_TEST_STATEID - 32) | + 1 << (OP_FREE_STATEID - 32) | + 1 << (OP_WRITE - 32) + } +}; + +/* + * Select the state protection mode for client `clp' given the server results + * from exchange_id in `sp'. * - * Returns zero, a negative errno, or a negative NFS4ERR status code. + * Returns 0 on success, negative errno otherwise. + */ +static int nfs4_sp4_select_mode(struct nfs_client *clp, + struct nfs41_state_protection *sp) +{ + static const u32 supported_enforce[NFS4_OP_MAP_NUM_WORDS] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) + }; + unsigned long flags = 0; + unsigned int i; + int ret = 0; + + if (sp->how == SP4_MACH_CRED) { + /* Print state protect result */ + dfprintk(MOUNT, "Server SP4_MACH_CRED support:\n"); + for (i = 0; i <= LAST_NFS4_OP; i++) { + if (test_bit(i, sp->enforce.u.longs)) + dfprintk(MOUNT, " enforce op %d\n", i); + if (test_bit(i, sp->allow.u.longs)) + dfprintk(MOUNT, " allow op %d\n", i); + } + + /* make sure nothing is on enforce list that isn't supported */ + for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) { + if (sp->enforce.u.words[i] & ~supported_enforce[i]) { + dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); + ret = -EINVAL; + goto out; + } + } + + /* + * Minimal mode - state operations are allowed to use machine + * credential. Note this already happens by default, so the + * client doesn't have to do anything more than the negotiation. + * + * NOTE: we don't care if EXCHANGE_ID is in the list - + * we're already using the machine cred for exchange_id + * and will never use a different cred. + */ + if (test_bit(OP_BIND_CONN_TO_SESSION, sp->enforce.u.longs) && + test_bit(OP_CREATE_SESSION, sp->enforce.u.longs) && + test_bit(OP_DESTROY_SESSION, sp->enforce.u.longs) && + test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) { + dfprintk(MOUNT, "sp4_mach_cred:\n"); + dfprintk(MOUNT, " minimal mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_MINIMAL, &flags); + } else { + dfprintk(MOUNT, "sp4_mach_cred: disabled\n"); + ret = -EINVAL; + goto out; + } + + if (test_bit(OP_CLOSE, sp->allow.u.longs) && + test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) && + test_bit(OP_DELEGRETURN, sp->allow.u.longs) && + test_bit(OP_LOCKU, sp->allow.u.longs)) { + dfprintk(MOUNT, " cleanup mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_CLEANUP, &flags); + } + + if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) { + dfprintk(MOUNT, " pnfs cleanup mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP, &flags); + } + + if (test_bit(OP_SECINFO, sp->allow.u.longs) && + test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { + dfprintk(MOUNT, " secinfo mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_SECINFO, &flags); + } + + if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) && + test_bit(OP_FREE_STATEID, sp->allow.u.longs)) { + dfprintk(MOUNT, " stateid mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_STATEID, &flags); + } + + if (test_bit(OP_WRITE, sp->allow.u.longs)) { + dfprintk(MOUNT, " write mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_WRITE, &flags); + } + + if (test_bit(OP_COMMIT, sp->allow.u.longs)) { + dfprintk(MOUNT, " commit mode enabled\n"); + __set_bit(NFS_SP4_MACH_CRED_COMMIT, &flags); + } + } +out: + clp->cl_sp4_flags = flags; + return ret; +} + +struct nfs41_exchange_id_data { + struct nfs41_exchange_id_res res; + struct nfs41_exchange_id_args args; +}; + +static void nfs4_exchange_id_release(void *data) +{ + struct nfs41_exchange_id_data *cdata = + (struct nfs41_exchange_id_data *)data; + + nfs_put_client(cdata->args.client); + kfree(cdata->res.impl_id); + kfree(cdata->res.server_scope); + kfree(cdata->res.server_owner); + kfree(cdata); +} + +static const struct rpc_call_ops nfs4_exchange_id_call_ops = { + .rpc_release = nfs4_exchange_id_release, +}; + +/* + * _nfs4_proc_exchange_id() * - * Since the clientid has expired, all compounds using sessions - * associated with the stale clientid will be returning - * NFS4ERR_BADSESSION in the sequence operation, and will therefore - * be in some phase of session reset. + * Wrapper for EXCHANGE_ID operation. */ -int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +static struct rpc_task * +nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, + u32 sp4_how, struct rpc_xprt *xprt) { - nfs4_verifier verifier; - struct nfs41_exchange_id_args args = { - .verifier = &verifier, - .client = clp, - .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID, - }; - struct nfs41_exchange_id_res res = { - 0 - }; - int status; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], - .rpc_argp = &args, - .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .callback_ops = &nfs4_exchange_id_call_ops, + .rpc_message = &msg, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, + }; + struct nfs41_exchange_id_data *calldata; + int status; - nfs4_init_boot_verifier(clp, &verifier); - args.id_len = nfs4_init_uniform_client_string(clp, args.id, - sizeof(args.id)); - dprintk("NFS call exchange_id auth=%s, '%.*s'\n", - clp->cl_rpcclient->cl_auth->au_ops->au_name, - args.id_len, args.id); + if (!refcount_inc_not_zero(&clp->cl_count)) + return ERR_PTR(-EIO); - res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), - GFP_NOFS); - if (unlikely(res.server_owner == NULL)) { - status = -ENOMEM; + status = -ENOMEM; + calldata = kzalloc(sizeof(*calldata), GFP_NOFS); + if (!calldata) goto out; - } - res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), + nfs4_init_boot_verifier(clp, &calldata->args.verifier); + + status = nfs4_init_uniform_client_string(clp); + if (status) + goto out_calldata; + + calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), + GFP_NOFS); + status = -ENOMEM; + if (unlikely(calldata->res.server_owner == NULL)) + goto out_calldata; + + calldata->res.server_scope = kzalloc(sizeof(struct nfs41_server_scope), GFP_NOFS); - if (unlikely(res.server_scope == NULL)) { - status = -ENOMEM; + if (unlikely(calldata->res.server_scope == NULL)) goto out_server_owner; - } - res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); - if (unlikely(res.impl_id == NULL)) { - status = -ENOMEM; + calldata->res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_NOFS); + if (unlikely(calldata->res.impl_id == NULL)) goto out_server_scope; - } - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); - if (status == 0) - status = nfs4_check_cl_exchange_flags(res.flags); + switch (sp4_how) { + case SP4_NONE: + calldata->args.state_protect.how = SP4_NONE; + break; - if (status == 0) { - clp->cl_clientid = res.clientid; - clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); - if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) - clp->cl_seqid = res.seqid; - - kfree(clp->cl_serverowner); - clp->cl_serverowner = res.server_owner; - res.server_owner = NULL; - - /* use the most recent implementation id */ - kfree(clp->cl_implid); - clp->cl_implid = res.impl_id; - - if (clp->cl_serverscope != NULL && - !nfs41_same_server_scope(clp->cl_serverscope, - res.server_scope)) { - dprintk("%s: server_scope mismatch detected\n", - __func__); - set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); - kfree(clp->cl_serverscope); - clp->cl_serverscope = NULL; - } + case SP4_MACH_CRED: + calldata->args.state_protect = nfs4_sp4_mach_cred_request; + break; - if (clp->cl_serverscope == NULL) { - clp->cl_serverscope = res.server_scope; - goto out; - } - } else - kfree(res.impl_id); + default: + /* unsupported! */ + WARN_ON_ONCE(1); + status = -EINVAL; + goto out_impl_id; + } + if (xprt) { + task_setup_data.rpc_xprt = xprt; + task_setup_data.flags |= RPC_TASK_SOFTCONN; + memcpy(calldata->args.verifier.data, clp->cl_confirm.data, + sizeof(calldata->args.verifier.data)); + } + calldata->args.client = clp; + calldata->args.flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID; +#ifdef CONFIG_NFS_V4_1_MIGRATION + calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR; +#endif + if (test_bit(NFS_CS_PNFS, &clp->cl_flags)) + calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS; + msg.rpc_argp = &calldata->args; + msg.rpc_resp = &calldata->res; + task_setup_data.callback_data = calldata; -out_server_owner: - kfree(res.server_owner); + return rpc_run_task(&task_setup_data); + +out_impl_id: + kfree(calldata->res.impl_id); out_server_scope: - kfree(res.server_scope); + kfree(calldata->res.server_scope); +out_server_owner: + kfree(calldata->res.server_owner); +out_calldata: + kfree(calldata); out: - if (clp->cl_implid != NULL) - dprintk("NFS reply exchange_id: Server Implementation ID: " - "domain: %s, name: %s, date: %llu,%u\n", - clp->cl_implid->domain, clp->cl_implid->name, - clp->cl_implid->date.seconds, - clp->cl_implid->date.nseconds); - dprintk("NFS reply exchange_id: %d\n", status); + nfs_put_client(clp); + return ERR_PTR(status); +} + +/* + * _nfs4_proc_exchange_id() + * + * Wrapper for EXCHANGE_ID operation. + */ +static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred, + u32 sp4_how) +{ + struct rpc_task *task; + struct nfs41_exchange_id_args *argp; + struct nfs41_exchange_id_res *resp; + unsigned long now = jiffies; + int status; + + task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); + + argp = task->tk_msg.rpc_argp; + resp = task->tk_msg.rpc_resp; + status = task->tk_status; + if (status != 0) + goto out; + + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); + if (status != 0) + goto out; + + status = nfs4_sp4_select_mode(clp, &resp->state_protect); + if (status != 0) + goto out; + + do_renew_lease(clp, now); + + clp->cl_clientid = resp->clientid; + clp->cl_exchange_flags = resp->flags; + clp->cl_seqid = resp->seqid; + /* Client ID is not confirmed */ + if (!(resp->flags & EXCHGID4_FLAG_CONFIRMED_R)) + clear_bit(NFS4_SESSION_ESTABLISHED, + &clp->cl_session->session_state); + + if (clp->cl_serverscope != NULL && + !nfs41_same_server_scope(clp->cl_serverscope, + resp->server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + } + + swap(clp->cl_serverowner, resp->server_owner); + swap(clp->cl_serverscope, resp->server_scope); + swap(clp->cl_implid, resp->impl_id); + + /* Save the EXCHANGE_ID verifier session trunk tests */ + memcpy(clp->cl_confirm.data, argp->verifier.data, + sizeof(clp->cl_confirm.data)); +out: + trace_nfs4_exchange_id(clp, status); + rpc_put_task(task); return status; } +/* + * nfs4_proc_exchange_id() + * + * Returns zero, a negative errno, or a negative NFS4ERR status code. + * + * Since the clientid has expired, all compounds using sessions + * associated with the stale clientid will be returning + * NFS4ERR_BADSESSION in the sequence operation, and will therefore + * be in some phase of session reset. + * + * Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used. + */ +int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred) +{ + rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor; + int status; + + /* try SP4_MACH_CRED if krb5i/p */ + if (authflavor == RPC_AUTH_GSS_KRB5I || + authflavor == RPC_AUTH_GSS_KRB5P) { + status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED); + if (!status) + return 0; + } + + /* try SP4_NONE */ + return _nfs4_proc_exchange_id(clp, cred, SP4_NONE); +} + +/** + * nfs4_test_session_trunk + * + * This is an add_xprt_test() test function called from + * rpc_clnt_setup_test_and_add_xprt. + * + * The rpc_xprt_switch is referrenced by rpc_clnt_setup_test_and_add_xprt + * and is dereferrenced in nfs4_exchange_id_release + * + * Upon success, add the new transport to the rpc_clnt + * + * @clnt: struct rpc_clnt to get new transport + * @xprt: the rpc_xprt to test + * @data: call data for _nfs4_proc_exchange_id. + */ +void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, + void *data) +{ + struct nfs4_add_xprt_data *adata = data; + struct rpc_task *task; + int status; + + u32 sp4_how; + + dprintk("--> %s try %s\n", __func__, + xprt->address_strings[RPC_DISPLAY_ADDR]); + + sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED); + +try_again: + /* Test connection for session trunking. Async exchange_id call */ + task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt); + if (IS_ERR(task)) + return; + + status = task->tk_status; + if (status == 0) { + status = nfs4_detect_session_trunking(adata->clp, + task->tk_msg.rpc_resp, xprt); + trace_nfs4_trunked_exchange_id(adata->clp, + xprt->address_strings[RPC_DISPLAY_ADDR], status); + } + if (status == 0) + rpc_clnt_xprt_switch_add_xprt(clnt, xprt); + else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt, + (struct sockaddr *)&xprt->addr)) + rpc_clnt_xprt_switch_remove_xprt(clnt, xprt); + + rpc_put_task(task); + if (status == -NFS4ERR_DELAY) { + ssleep(1); + goto try_again; + } +} +EXPORT_SYMBOL_GPL(nfs4_test_session_trunk); + static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_CLIENTID], @@ -6027,7 +9247,9 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, }; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); + trace_nfs4_destroy_clientid(clp, status); if (status) dprintk("NFS: Got error %d from the server %s on " "DESTROY_CLIENTID.", status, clp->cl_hostname); @@ -6035,7 +9257,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, } static int nfs4_proc_destroy_clientid(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { unsigned int loop; int ret; @@ -6056,7 +9278,7 @@ static int nfs4_proc_destroy_clientid(struct nfs_client *clp, int nfs4_destroy_clientid(struct nfs_client *clp) { - struct rpc_cred *cred; + const struct cred *cred; int ret = 0; if (clp->cl_mvops->minor_version < 1) @@ -6065,10 +9287,9 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_preserve_clid) goto out; - cred = nfs4_get_exchange_id_cred(clp); + cred = nfs4_get_clid_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); - if (cred) - put_rpccred(cred); + put_cred(cred); switch (ret) { case 0: case -NFS4ERR_STALE_CLIENTID: @@ -6078,6 +9299,8 @@ out: return ret; } +#endif /* CONFIG_NFS_V4_1 */ + struct nfs4_get_lease_time_data { struct nfs4_get_lease_time_args *args; struct nfs4_get_lease_time_res *res; @@ -6090,14 +9313,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; - dprintk("--> %s\n", __func__); /* just setup sequence, do not trigger session recovery since we're invoked within one */ - nfs41_setup_sequence(data->clp->cl_session, + nfs4_setup_sequence(data->clp, &data->args->la_seq_args, &data->res->lr_seq_res, task); - dprintk("<-- %s\n", __func__); } /* @@ -6109,21 +9330,18 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) struct nfs4_get_lease_time_data *data = (struct nfs4_get_lease_time_data *)calldata; - dprintk("--> %s\n", __func__); - if (!nfs41_sequence_done(task, &data->res->lr_seq_res)) + if (!nfs4_sequence_done(task, &data->res->lr_seq_res)) return; switch (task->tk_status) { case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: - dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; - /* fall through */ + fallthrough; case -NFS4ERR_RETRY_UNCACHED_REP: rpc_restart_call_prepare(task); return; } - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_get_lease_time_ops = { @@ -6133,7 +9351,6 @@ static const struct rpc_call_ops nfs4_get_lease_time_ops = { int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) { - struct rpc_task *task; struct nfs4_get_lease_time_args args; struct nfs4_get_lease_time_res res = { .lr_fsinfo = fsinfo, @@ -6155,24 +9372,13 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) .callback_data = &data, .flags = RPC_TASK_TIMEOUT, }; - int status; - - nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); - nfs4_set_sequence_privileged(&args.la_seq_args); - dprintk("--> %s\n", __func__); - task = rpc_run_task(&task_setup); - - if (IS_ERR(task)) - status = PTR_ERR(task); - else { - status = task->tk_status; - rpc_put_task(task); - } - dprintk("<-- %s return %d\n", __func__, status); - return status; + nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0, 1); + return nfs4_call_sync_custom(&task_setup); } +#ifdef CONFIG_NFS_V4_1 + /* * Initialize the values to be used by the client in CREATE_SESSION * If nfs4_init_session set the fore channel request and response sizes, @@ -6182,9 +9388,12 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) * always set csa_cachethis to FALSE because the current implementation * of the back channel DRC only supports caching the CB_SEQUENCE operation. */ -static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) +static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, + struct rpc_clnt *clnt) { unsigned int max_rqst_sz, max_resp_sz; + unsigned int max_bc_payload = rpc_max_bc_payload(clnt); + unsigned int max_bc_slots = rpc_num_bc_slots(clnt); max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; @@ -6202,11 +9411,13 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.max_ops, args->fc_attrs.max_reqs); /* Back channel attributes */ - args->bc_attrs.max_rqst_sz = PAGE_SIZE; - args->bc_attrs.max_resp_sz = PAGE_SIZE; + args->bc_attrs.max_rqst_sz = max_bc_payload; + args->bc_attrs.max_resp_sz = max_bc_payload; args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; - args->bc_attrs.max_reqs = 1; + args->bc_attrs.max_reqs = max_t(unsigned short, max_session_cb_slots, 1); + if (args->bc_attrs.max_reqs > max_bc_slots) + args->bc_attrs.max_reqs = max_bc_slots; dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", @@ -6216,10 +9427,11 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->bc_attrs.max_reqs); } -static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) +static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, + struct nfs41_create_session_res *res) { struct nfs4_channel_attrs *sent = &args->fc_attrs; - struct nfs4_channel_attrs *rcvd = &session->fc_attrs; + struct nfs4_channel_attrs *rcvd = &res->fc_attrs; if (rcvd->max_resp_sz > sent->max_resp_sz) return -EINVAL; @@ -6238,47 +9450,65 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args return 0; } -static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) +static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, + struct nfs41_create_session_res *res) { struct nfs4_channel_attrs *sent = &args->bc_attrs; - struct nfs4_channel_attrs *rcvd = &session->bc_attrs; + struct nfs4_channel_attrs *rcvd = &res->bc_attrs; + if (!(res->flags & SESSION4_BACK_CHAN)) + goto out; if (rcvd->max_rqst_sz > sent->max_rqst_sz) return -EINVAL; - if (rcvd->max_resp_sz < sent->max_resp_sz) + if (rcvd->max_resp_sz > sent->max_resp_sz) return -EINVAL; if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; - /* These would render the backchannel useless: */ - if (rcvd->max_ops != sent->max_ops) + if (rcvd->max_ops > sent->max_ops) return -EINVAL; - if (rcvd->max_reqs != sent->max_reqs) + if (rcvd->max_reqs > sent->max_reqs) return -EINVAL; +out: return 0; } static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, - struct nfs4_session *session) + struct nfs41_create_session_res *res) { int ret; - ret = nfs4_verify_fore_channel_attrs(args, session); + ret = nfs4_verify_fore_channel_attrs(args, res); if (ret) return ret; - return nfs4_verify_back_channel_attrs(args, session); + return nfs4_verify_back_channel_attrs(args, res); +} + +static void nfs4_update_session(struct nfs4_session *session, + struct nfs41_create_session_res *res) +{ + nfs4_copy_sessionid(&session->sess_id, &res->sessionid); + /* Mark client id and session as being confirmed */ + session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; + set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state); + session->flags = res->flags; + memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs)); + if (res->flags & SESSION4_BACK_CHAN) + memcpy(&session->bc_attrs, &res->bc_attrs, + sizeof(session->bc_attrs)); } static int _nfs4_proc_create_session(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_session *session = clp->cl_session; struct nfs41_create_session_args args = { .client = clp, + .clientid = clp->cl_clientid, + .seqid = clp->cl_seqid, .cb_program = NFS4_CALLBACK, }; - struct nfs41_create_session_res res = { - .client = clp, - }; + struct nfs41_create_session_res res; + struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], .rpc_argp = &args, @@ -6287,18 +9517,32 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, }; int status; - nfs4_init_channel_attrs(&args); + nfs4_init_channel_attrs(&args, clp->cl_rpcclient); args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); + trace_nfs4_create_session(clp, status); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EACCES: + case -EAGAIN: + goto out; + } + + clp->cl_seqid++; if (!status) { /* Verify the session's negotiated channel_attrs values */ - status = nfs4_verify_channel_attrs(&args, session); + status = nfs4_verify_channel_attrs(&args, &res); /* Increment the clientid slot sequence id */ - clp->cl_seqid++; + if (status) + goto out; + nfs4_update_session(session, &res); } - +out: return status; } @@ -6307,11 +9551,18 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, * It is the responsibility of the caller to verify the session is * expired before calling this routine. */ -int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred) { int status; unsigned *ptr; struct nfs4_session *session = clp->cl_session; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); @@ -6328,8 +9579,8 @@ int nfs4_proc_create_session(struct nfs_client *clp, struct rpc_cred *cred) ptr = (unsigned *)&session->sess_id.data[0]; dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); + rpc_clnt_probe_trunked_xprts(clp->cl_rpcclient, &rpcdata); out: - dprintk("<-- %s\n", __func__); return status; } @@ -6338,7 +9589,7 @@ out: * The caller must serialize access to this routine. */ int nfs4_proc_destroy_session(struct nfs4_session *session, - struct rpc_cred *cred) + const struct cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION], @@ -6347,19 +9598,18 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, }; int status = 0; - dprintk("--> nfs4_proc_destroy_session\n"); - /* session is still being setup */ - if (session->clp->cl_cons_state != NFS_CS_READY) - return status; + if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) + return 0; - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); + trace_nfs4_destroy_session(session->clp, status); if (status) dprintk("NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); - - dprintk("<-- nfs4_proc_destroy_session\n"); + rpc_clnt_manage_trunked_xprts(session->clp->cl_rpcclient); return status; } @@ -6377,7 +9627,7 @@ static void nfs41_sequence_release(void *data) struct nfs4_sequence_data *calldata = data; struct nfs_client *clp = calldata->clp; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(calldata); @@ -6403,10 +9653,11 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp)) return; - if (task->tk_status < 0) { + trace_nfs4_sequence(clp, task->tk_status); + if (task->tk_status < 0 && clp->cl_cons_state >= 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); - if (atomic_read(&clp->cl_count) == 1) - goto out; + if (refcount_read(&clp->cl_count) == 1) + return; if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -6414,8 +9665,6 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) } } dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); -out: - dprintk("<-- %s\n", __func__); } static void nfs41_sequence_prepare(struct rpc_task *task, void *data) @@ -6428,7 +9677,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - nfs41_setup_sequence(clp->cl_session, args, res, task); + nfs4_setup_sequence(clp, args, res, task); } static const struct rpc_call_ops nfs41_sequence_ops = { @@ -6438,7 +9687,8 @@ static const struct rpc_call_ops nfs41_sequence_ops = { }; static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, - struct rpc_cred *cred, + const struct cred *cred, + struct nfs4_slot *slot, bool is_privileged) { struct nfs4_sequence_data *calldata; @@ -6450,35 +9700,44 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs41_sequence_ops, - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE, }; + struct rpc_task *ret; - if (!atomic_inc_not_zero(&clp->cl_count)) - return ERR_PTR(-EIO); - calldata = kzalloc(sizeof(*calldata), GFP_NOFS); - if (calldata == NULL) { - nfs_put_client(clp); - return ERR_PTR(-ENOMEM); - } - nfs41_init_sequence(&calldata->args, &calldata->res, 0); - if (is_privileged) - nfs4_set_sequence_privileged(&calldata->args); + ret = ERR_PTR(-EIO); + if (!refcount_inc_not_zero(&clp->cl_count)) + goto out_err; + + ret = ERR_PTR(-ENOMEM); + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + goto out_put_clp; + nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged); + nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; task_setup_data.callback_data = calldata; - return rpc_run_task(&task_setup_data); + ret = rpc_run_task(&task_setup_data); + if (IS_ERR(ret)) + goto out_err; + return ret; +out_put_clp: + nfs_put_client(clp); +out_err: + nfs41_release_slot(slot); + return ret; } -static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) +static int nfs41_proc_async_sequence(struct nfs_client *clp, const struct cred *cred, unsigned renew_flags) { struct rpc_task *task; int ret = 0; if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) - return 0; - task = _nfs41_proc_sequence(clp, cred, false); + return -EAGAIN; + task = _nfs41_proc_sequence(clp, cred, NULL, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -6487,24 +9746,19 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr return ret; } -static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_sequence(struct nfs_client *clp, const struct cred *cred) { struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred, true); + task = _nfs41_proc_sequence(clp, cred, NULL, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; } ret = rpc_wait_for_completion_task(task); - if (!ret) { - struct nfs4_sequence_res *res = task->tk_msg.rpc_resp; - - if (task->tk_status == 0) - nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); + if (!ret) ret = task->tk_status; - } rpc_put_task(task); out: dprintk("<-- %s status=%d\n", __func__, ret); @@ -6521,7 +9775,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) { struct nfs4_reclaim_complete_data *calldata = data; - nfs41_setup_sequence(calldata->clp->cl_session, + nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, &calldata->res.seq_res, task); @@ -6531,14 +9785,23 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf { switch(task->tk_status) { case 0: + wake_up_all(&clp->cl_lock_waitq); + fallthrough; case -NFS4ERR_COMPLETE_ALREADY: case -NFS4ERR_WRONG_CRED: /* What to do here? */ break; case -NFS4ERR_DELAY: rpc_delay(task, NFS4_POLL_RETRY_MAX); - /* fall through */ + fallthrough; case -NFS4ERR_RETRY_UNCACHED_REP: + case -EACCES: + dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n", + __func__, task->tk_status, clp->cl_hostname); return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + break; default: nfs4_schedule_lease_recovery(clp); } @@ -6551,15 +9814,14 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) struct nfs_client *clp = calldata->clp; struct nfs4_sequence_res *res = &calldata->res.seq_res; - dprintk("--> %s\n", __func__); if (!nfs41_sequence_done(task, res)) return; + trace_nfs4_reclaim_complete(clp, task->tk_status); if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) { rpc_restart_call_prepare(task); return; } - dprintk("<-- %s\n", __func__); } static void nfs4_free_reclaim_complete_data(void *data) @@ -6579,10 +9841,9 @@ static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { * Issue a global reclaim complete. */ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_reclaim_complete_data *calldata; - struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], .rpc_cred = cred, @@ -6591,32 +9852,21 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs4_reclaim_complete_call_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_NO_ROUND_ROBIN, }; int status = -ENOMEM; - dprintk("--> %s\n", __func__); calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) goto out; calldata->clp = clp; calldata->arg.one_fs = 0; - nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); - nfs4_set_sequence_privileged(&calldata->arg.seq_args); + nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0, 1); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - status = PTR_ERR(task); - goto out; - } - status = nfs4_wait_for_completion_rpc_task(task); - if (status == 0) - status = task->tk_status; - rpc_put_task(task); - return 0; + status = nfs4_call_sync_custom(&task_setup_data); out: dprintk("<-- %s status=%d\n", __func__, status); return status; @@ -6627,129 +9877,123 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); - struct nfs4_session *session = nfs4_get_session(server); - dprintk("--> %s\n", __func__); - /* Note the is a race here, where a CB_LAYOUTRECALL can come in - * right now covering the LAYOUTGET we are about to send. - * However, that is not so catastrophic, and there seems - * to be no way to prevent it completely. - */ - if (nfs41_setup_sequence(session, &lgp->args.seq_args, - &lgp->res.seq_res, task)) - return; - if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, - NFS_I(lgp->args.inode)->layout, - lgp->args.ctx->state)) { - rpc_exit(task, NFS4_OK); - } + nfs4_setup_sequence(server->nfs_client, &lgp->args.seq_args, + &lgp->res.seq_res, task); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutget *lgp = calldata; + + nfs41_sequence_process(task, &lgp->res.seq_res); +} + +static int +nfs4_layoutget_handle_exception(struct rpc_task *task, + struct nfs4_layoutget *lgp, struct nfs4_exception *exception) +{ struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layout_hdr *lo; - struct nfs4_state *state = NULL; - unsigned long timeo, giveup; + struct pnfs_layout_hdr *lo = lgp->lo; + int nfs4err = task->tk_status; + int err, status = 0; + LIST_HEAD(head); - dprintk("--> %s\n", __func__); + dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); - if (!nfs41_sequence_done(task, &lgp->res.seq_res)) - goto out; + nfs4_sequence_free_slot(&lgp->res.seq_res); - switch (task->tk_status) { + exception->state = NULL; + exception->stateid = NULL; + + switch (nfs4err) { case 0: goto out; + + /* + * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs + * on the file. set tk_status to -ENODATA to tell upper layer to + * retry go inband. + */ + case -NFS4ERR_LAYOUTUNAVAILABLE: + status = -ENODATA; + goto out; + /* + * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of + * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). + */ + case -NFS4ERR_BADLAYOUT: + status = -EOVERFLOW; + goto out; + /* + * NFS4ERR_LAYOUTTRYLATER is a conflict with another client + * (or clients) writing to the same RAID stripe except when + * the minlength argument is 0 (see RFC5661 section 18.43.3). + * + * Treat it like we would RECALLCONFLICT -- we retry for a little + * while, and then eventually give up. + */ case -NFS4ERR_LAYOUTTRYLATER: + if (lgp->args.minlength == 0) { + status = -EOVERFLOW; + goto out; + } + status = -EBUSY; + break; case -NFS4ERR_RECALLCONFLICT: - timeo = rpc_get_timeout(task->tk_client); - giveup = lgp->args.timestamp + timeo; - if (time_after(giveup, jiffies)) - task->tk_status = -NFS4ERR_DELAY; + case -NFS4ERR_RETURNCONFLICT: + status = -ERECALLCONFLICT; break; + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: + exception->timeout = 0; spin_lock(&inode->i_lock); - lo = NFS_I(inode)->layout; - if (!lo || list_empty(&lo->plh_segs)) { - spin_unlock(&inode->i_lock); - /* If the open stateid was bad, then recover it. */ - state = lgp->args.ctx->state; - } else { - LIST_HEAD(head); - - pnfs_mark_matching_lsegs_invalid(lo, &head, NULL); + /* If the open stateid was bad, then recover it. */ + if (!lo || test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + !nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) { spin_unlock(&inode->i_lock); - /* Mark the bad layout state as invalid, then - * retry using the open stateid. */ - pnfs_free_lseg_list(&head); + exception->state = lgp->args.ctx->state; + exception->stateid = &lgp->args.stateid; + break; } + + /* + * Mark the bad layout state as invalid, then retry + */ + pnfs_mark_layout_stateid_invalid(lo, &head); + spin_unlock(&inode->i_lock); + nfs_commit_inode(inode, 0); + pnfs_free_lseg_list(&head); + status = -EAGAIN; + goto out; + } + + err = nfs4_handle_exception(server, nfs4err, exception); + if (!status) { + if (exception->retry) + status = -EAGAIN; + else + status = err; } - if (nfs4_async_handle_error(task, server, state) == -EAGAIN) - rpc_restart_call_prepare(task); out: - dprintk("<-- %s\n", __func__); + return status; } -static size_t max_response_pages(struct nfs_server *server) +size_t max_response_pages(struct nfs_server *server) { u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; return nfs_page_array_len(0, max_resp_sz); } -static void nfs4_free_pages(struct page **pages, size_t size) -{ - int i; - - if (!pages) - return; - - for (i = 0; i < size; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); -} - -static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) -{ - struct page **pages; - int i; - - pages = kcalloc(size, sizeof(struct page *), gfp_flags); - if (!pages) { - dprintk("%s: can't alloc array of %zu pages\n", __func__, size); - return NULL; - } - - for (i = 0; i < size; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) { - dprintk("%s: failed to allocate page\n", __func__); - nfs4_free_pages(pages, size); - return NULL; - } - } - - return pages; -} - static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; - struct inode *inode = lgp->args.inode; - struct nfs_server *server = NFS_SERVER(inode); - size_t max_pages = max_response_pages(server); - dprintk("--> %s\n", __func__); - nfs4_free_pages(lgp->args.layout.pages, max_pages); - pnfs_put_layout_hdr(NFS_I(inode)->layout); - put_nfs_open_context(lgp->args.ctx); - kfree(calldata); - dprintk("<-- %s\n", __func__); + nfs4_sequence_free_slot(&lgp->res.seq_res); + pnfs_layoutget_free(lgp); } static const struct rpc_call_ops nfs4_layoutget_call_ops = { @@ -6759,11 +10003,11 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { }; struct pnfs_layout_segment * -nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, + struct nfs4_exception *exception) { struct inode *inode = lgp->args.inode; struct nfs_server *server = NFS_SERVER(inode); - size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6776,37 +10020,39 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) .rpc_message = &msg, .callback_ops = &nfs4_layoutget_call_ops, .callback_data = lgp, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | + RPC_TASK_MOVEABLE, }; struct pnfs_layout_segment *lseg = NULL; int status = 0; - dprintk("--> %s\n", __func__); - - lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); - if (!lgp->args.layout.pages) { - nfs4_layoutget_release(lgp); - return ERR_PTR(-ENOMEM); - } - lgp->args.layout.pglen = max_pages * PAGE_SIZE; - lgp->args.timestamp = jiffies; - - lgp->res.layoutp = &lgp->args.layout; - lgp->res.seq_res.sr_slot = NULL; - nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); + nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0); + exception->retry = 0; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); - status = nfs4_wait_for_completion_rpc_task(task); - if (status == 0) - status = task->tk_status; - /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ - if (status == 0 && lgp->res.layoutp->len) + + status = rpc_wait_for_completion_task(task); + if (status != 0) + goto out; + + if (task->tk_status < 0) { + exception->retry = 1; + status = nfs4_layoutget_handle_exception(task, lgp, exception); + } else if (lgp->res.layoutp->len == 0) { + exception->retry = 1; + status = -EAGAIN; + nfs4_update_delay(&exception->timeout); + } else lseg = pnfs_layout_process(lgp); +out: + trace_nfs4_layoutget(lgp->args.ctx, + &lgp->args.range, + &lgp->res.range, + &lgp->res.stateid, + status); + rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) @@ -6819,11 +10065,12 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; - dprintk("--> %s\n", __func__); - nfs41_setup_sequence(lrp->clp->cl_session, + nfs4_setup_sequence(lrp->clp, &lrp->args.seq_args, &lrp->res.seq_res, task); + if (!pnfs_layout_is_valid(lrp->args.layout)) + rpc_exit(task, 0); } static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) @@ -6831,17 +10078,58 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - dprintk("--> %s\n", __func__); + if (!nfs41_sequence_process(task, &lrp->res.seq_res)) + return; - if (!nfs41_sequence_done(task, &lrp->res.seq_res)) + if (task->tk_rpc_status == -ETIMEDOUT) { + lrp->rpc_status = -EAGAIN; + lrp->res.lrs_present = 0; return; + } + /* + * Was there an RPC level error? Assume the call succeeded, + * and that we need to release the layout + */ + if (task->tk_rpc_status != 0 && RPC_WAS_SENT(task)) { + lrp->res.lrs_present = 0; + return; + } server = NFS_SERVER(lrp->args.inode); - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; + switch (task->tk_status) { + case -NFS4ERR_OLD_STATEID: + if (nfs4_layout_refresh_old_stateid(&lrp->args.stateid, + &lrp->args.range, + lrp->args.inode)) + goto out_restart; + fallthrough; + default: + task->tk_status = 0; + lrp->res.lrs_present = 0; + fallthrough; + case 0: + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(server->nfs_client->cl_session, + task->tk_status); + lrp->res.lrs_present = 0; + lrp->rpc_status = -EAGAIN; + task->tk_status = 0; + break; + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL, NULL) == + -EAGAIN) + goto out_restart; + lrp->res.lrs_present = 0; + break; } - dprintk("<-- %s\n", __func__); + return; +out_restart: + task->tk_status = 0; + nfs4_sequence_free_slot(&lrp->res.seq_res); + rpc_restart_call_prepare(task); } static void nfs4_layoutreturn_release(void *calldata) @@ -6849,15 +10137,20 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - dprintk("--> %s\n", __func__); - spin_lock(&lo->plh_inode->i_lock); - if (lrp->res.lrs_present) - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - lo->plh_block_lgets--; - spin_unlock(&lo->plh_inode->i_lock); + if (lrp->rpc_status == 0 || !lrp->inode) + pnfs_layoutreturn_free_lsegs( + lo, &lrp->args.stateid, &lrp->args.range, + lrp->res.lrs_present ? &lrp->res.stateid : NULL); + else + pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid, + &lrp->args.range); + nfs4_sequence_free_slot(&lrp->res.seq_res); + if (lrp->ld_private.ops && lrp->ld_private.ops->free) + lrp->ld_private.ops->free(&lrp->ld_private); pnfs_put_layout_hdr(lrp->args.layout); + nfs_iput_and_deactive(lrp->inode); + put_cred(lrp->cred); kfree(calldata); - dprintk("<-- %s\n", __func__); } static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { @@ -6866,7 +10159,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { .rpc_release = nfs4_layoutreturn_release, }; -int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, unsigned int flags) { struct rpc_task *task; struct rpc_message msg = { @@ -6876,79 +10169,54 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) .rpc_cred = lrp->cred, }; struct rpc_task_setup task_setup_data = { - .rpc_client = lrp->clp->cl_rpcclient, + .rpc_client = NFS_SERVER(lrp->args.inode)->client, .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, + .flags = RPC_TASK_MOVEABLE, }; - int status; + int status = 0; + + nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client, + NFS_SP4_MACH_CRED_PNFS_CLEANUP, + &task_setup_data.rpc_client, &msg); - dprintk("--> %s\n", __func__); - nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); + lrp->inode = nfs_igrab_and_active(lrp->args.inode); + if (flags & PNFS_FL_LAYOUTRETURN_ASYNC) { + if (!lrp->inode) { + nfs4_layoutreturn_release(lrp); + return -EAGAIN; + } + task_setup_data.flags |= RPC_TASK_ASYNC; + } + if (!lrp->inode) + flags |= PNFS_FL_LAYOUTRETURN_PRIVILEGED; + if (flags & PNFS_FL_LAYOUTRETURN_PRIVILEGED) + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, + 1); + else + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, + 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - status = task->tk_status; + if (!(flags & PNFS_FL_LAYOUTRETURN_ASYNC)) + status = task->tk_status; + trace_nfs4_layoutreturn(lrp->args.inode, &lrp->args.stateid, status); dprintk("<-- %s status=%d\n", __func__, status); rpc_put_task(task); return status; } -/* - * Retrieve the list of Data Server devices from the MDS. - */ -static int _nfs4_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist) -{ - struct nfs4_getdevicelist_args args = { - .fh = fh, - .layoutclass = server->pnfs_curr_ld->id, - }; - struct nfs4_getdevicelist_res res = { - .devlist = devlist, - }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST], - .rpc_argp = &args, - .rpc_resp = &res, - }; - int status; - - dprintk("--> %s\n", __func__); - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, - &res.seq_res, 0); - dprintk("<-- %s status=%d\n", __func__, status); - return status; -} - -int nfs4_proc_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist) -{ - struct nfs4_exception exception = { }; - int err; - - do { - err = nfs4_handle_exception(server, - _nfs4_getdevicelist(server, fh, devlist), - &exception); - } while (exception.retry); - - dprintk("%s: err=%d, num_devs=%u\n", __func__, - err, devlist->num_devs); - - return err; -} -EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist); - static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_getdeviceinfo_args args = { .pdev = pdev, + .notify_types = NOTIFY_DEVICEID4_CHANGE | + NOTIFY_DEVICEID4_DELETE, }; struct nfs4_getdeviceinfo_res res = { .pdev = pdev, @@ -6961,8 +10229,14 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, }; int status; - dprintk("--> %s\n", __func__); status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + if (res.notification & ~args.notify_types) + dprintk("%s: unsupported notification\n", __func__); + if (res.notification != args.notify_types) + pdev->nocache = 1; + + trace_nfs4_getdeviceinfo(server, &pdev->dev_id, status); + dprintk("<-- %s status=%d\n", __func__, status); return status; @@ -6970,7 +10244,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev, - struct rpc_cred *cred) + const struct cred *cred) { struct nfs4_exception exception = { }; int err; @@ -6988,9 +10262,8 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) { struct nfs4_layoutcommit_data *data = calldata; struct nfs_server *server = NFS_SERVER(data->args.inode); - struct nfs4_session *session = nfs4_get_session(server); - nfs41_setup_sequence(session, + nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -7013,11 +10286,9 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case 0: - nfs_post_op_update_inode_force_wcc(data->args.inode, - data->res.fattr); break; default: - if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); return; } @@ -7029,7 +10300,10 @@ static void nfs4_layoutcommit_release(void *calldata) struct nfs4_layoutcommit_data *data = calldata; pnfs_cleanup_layoutcommit(data); - put_rpccred(data->cred); + nfs_post_op_update_inode_force_wcc(data->args.inode, + data->res.fattr); + put_cred(data->cred); + nfs_iput_and_deactive(data->inode); kfree(data); } @@ -7054,36 +10328,44 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutcommit_ops, .callback_data = data, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_MOVEABLE, }; struct rpc_task *task; int status = 0; - dprintk("NFS: %4d initiating layoutcommit call. sync %d " - "lbw: %llu inode %lu\n", - data->task.tk_pid, sync, + dprintk("NFS: initiating layoutcommit call. sync %d " + "lbw: %llu inode %lu\n", sync, data->args.lastbytewritten, data->args.inode->i_ino); - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + if (!sync) { + data->inode = nfs_igrab_and_active(data->args.inode); + if (data->inode == NULL) { + nfs4_layoutcommit_release(data); + return -EAGAIN; + } + task_setup_data.flags = RPC_TASK_ASYNC; + } + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (sync == false) - goto out; - status = nfs4_wait_for_completion_rpc_task(task); - if (status != 0) - goto out; - status = task->tk_status; -out: + if (sync) + status = task->tk_status; + trace_nfs4_layoutcommit(data->args.inode, &data->args.stateid, status); dprintk("%s: status %d\n", __func__, status); rpc_put_task(task); return status; } -static int -_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +/* + * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if + * possible) as per RFC3530bis and RFC5661 Security Considerations sections + */ +static int _nfs41_proc_secinfo_no_name(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_secinfo_flavors *flavors, + bool use_integrity) { struct nfs41_secinfo_no_name_args args = { .style = SECINFO_STYLE_CURRENT_FH, @@ -7096,21 +10378,68 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + struct nfs4_call_sync_data data = { + .seq_server = server, + .seq_args = &args.seq_args, + .seq_res = &res.seq_res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = server->client, + .rpc_message = &msg, + .callback_ops = server->nfs_client->cl_mvops->call_sync_ops, + .callback_data = &data, + .flags = RPC_TASK_NO_ROUND_ROBIN, + }; + const struct cred *cred = NULL; + int status; + + if (use_integrity) { + task_setup.rpc_client = server->nfs_client->cl_rpcclient; + + cred = nfs4_get_clid_cred(server->nfs_client); + msg.rpc_cred = cred; + } + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + status = nfs4_call_sync_custom(&task_setup); + dprintk("<-- %s status=%d\n", __func__, status); + + put_cred(cred); + + return status; } -static int -nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +static int nfs41_proc_secinfo_no_name(struct nfs_server *server, + struct nfs_fh *fhandle, + struct nfs4_secinfo_flavors *flavors) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { - err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + /* first try using integrity protection */ + err = -NFS4ERR_WRONGSEC; + + /* try to use integrity protection with machine cred */ + if (_nfs4_is_integrity_protected(server->nfs_client)) + err = _nfs41_proc_secinfo_no_name(server, fhandle, + flavors, true); + + /* + * if unable to use integrity protection, or SECINFO with + * integrity protection returns NFS4ERR_WRONGSEC (which is + * disallowed by spec, but exists in deployed servers) use + * the current filesystem's rpc_client and the user cred. + */ + if (err == -NFS4ERR_WRONGSEC) + err = _nfs41_proc_secinfo_no_name(server, fhandle, + flavors, false); + switch (err) { case 0: case -NFS4ERR_WRONGSEC: - case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: goto out; default: err = nfs4_handle_exception(server, err, &exception); @@ -7120,14 +10449,15 @@ out: return err; } -static int -nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fsinfo *info) +static int nfs41_find_root_sec(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) { int err; struct page *page; - rpc_authflavor_t flavor; + rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; struct nfs4_secinfo_flavors *flavors; + struct nfs4_secinfo4 *secinfo; + int i; page = alloc_page(GFP_KERNEL); if (!page) { @@ -7136,22 +10466,47 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, } flavors = page_address(page); - err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + err = nfs41_proc_secinfo_no_name(server, fhandle, flavors); /* * Fall back on "guess and check" method if * the server doesn't support SECINFO_NO_NAME */ - if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { - err = nfs4_find_root_sec(server, fhandle, info); + if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) { + err = nfs4_find_root_sec(server, fhandle, fattr); goto out_freepage; } if (err) goto out_freepage; - flavor = nfs_find_best_sec(flavors); - if (err == 0) - err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + for (i = 0; i < flavors->num_flavors; i++) { + secinfo = &flavors->flavors[i]; + + switch (secinfo->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + case RPC_AUTH_GSS: + flavor = rpcauth_get_pseudoflavor(secinfo->flavor, + &secinfo->flavor_info); + break; + default: + flavor = RPC_AUTH_MAXFLAVOR; + break; + } + + if (!nfs_auth_info_match(&server->auth_info, flavor)) + flavor = RPC_AUTH_MAXFLAVOR; + + if (flavor != RPC_AUTH_MAXFLAVOR) { + err = nfs4_lookup_root_sec(server, fhandle, fattr, + flavor); + if (!err) + break; + } + } + + if (flavor == RPC_AUTH_MAXFLAVOR) + err = -EPERM; out_freepage: put_page(page); @@ -7162,12 +10517,12 @@ out: } static int _nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - struct rpc_cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { int status; struct nfs41_test_stateid_args args = { - .stateid = stateid, + .stateid = *stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -7176,11 +10531,14 @@ static int _nfs41_test_stateid(struct nfs_server *server, .rpc_resp = &res, .rpc_cred = cred, }; + struct rpc_clnt *rpc_client = server->client; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, + &rpc_client, &msg); dprintk("NFS call test_stateid %p\n", stateid); - nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); - nfs4_set_sequence_privileged(&args.seq_args); - status = nfs4_call_sync_sequence(server->client, server, &msg, + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1); + status = nfs4_call_sync_sequence(rpc_client, server, &msg, &args.seq_args, &res.seq_res); if (status != NFS_OK) { dprintk("NFS reply test_stateid: failed, %d\n", status); @@ -7190,6 +10548,24 @@ static int _nfs41_test_stateid(struct nfs_server *server, return -res.status; } +static void nfs4_handle_delay_or_session_error(struct nfs_server *server, + int err, struct nfs4_exception *exception) +{ + exception->retry = 0; + switch(err) { + case -NFS4ERR_DELAY: + case -NFS4ERR_RETRY_UNCACHED_REP: + nfs4_handle_exception(server, err, exception); + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + nfs4_do_handle_exception(server, err, exception); + } +} + /** * nfs41_test_stateid - perform a TEST_STATEID operation * @@ -7202,16 +10578,16 @@ static int _nfs41_test_stateid(struct nfs_server *server, * failed or the state ID is not currently valid. */ static int nfs41_test_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - struct rpc_cred *cred) + const nfs4_stateid *stateid, + const struct cred *cred) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .interruptible = true, + }; int err; do { err = _nfs41_test_stateid(server, stateid, cred); - if (err != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, err, &exception); + nfs4_handle_delay_or_session_error(server, err, &exception); } while (exception.retry); return err; } @@ -7225,7 +10601,7 @@ struct nfs_free_stateid_data { static void nfs41_free_stateid_prepare(struct rpc_task *task, void *calldata) { struct nfs_free_stateid_data *data = calldata; - nfs41_setup_sequence(nfs4_get_session(data->server), + nfs4_setup_sequence(data->server->nfs_client, &data->args.seq_args, &data->res.seq_res, task); @@ -7239,25 +10615,39 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN) + if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN) rpc_restart_call_prepare(task); } } static void nfs41_free_stateid_release(void *calldata) { + struct nfs_free_stateid_data *data = calldata; + struct nfs_client *clp = data->server->nfs_client; + + nfs_put_client(clp); kfree(calldata); } -const struct rpc_call_ops nfs41_free_stateid_ops = { +static const struct rpc_call_ops nfs41_free_stateid_ops = { .rpc_call_prepare = nfs41_free_stateid_prepare, .rpc_call_done = nfs41_free_stateid_done, .rpc_release = nfs41_free_stateid_release, }; -static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, +/** + * nfs41_free_stateid - perform a FREE_STATEID operation + * + * @server: server / transport on which to perform the operation + * @stateid: state ID to release + * @cred: credential + * @privileged: set to true if this call needs to be privileged + * + * Note: this function is always asynchronous. + */ +static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid, - struct rpc_cred *cred, + const struct cred *cred, bool privileged) { struct rpc_message msg = { @@ -7268,14 +10658,22 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs41_free_stateid_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE, }; struct nfs_free_stateid_data *data; + struct rpc_task *task; + struct nfs_client *clp = server->nfs_client; + + if (!refcount_inc_not_zero(&clp->cl_count)) + return -EIO; + + nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID, + &task_setup.rpc_client, &msg); dprintk("NFS call free_stateid %p\n", stateid); - data = kmalloc(sizeof(*data), GFP_NOFS); + data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) - return ERR_PTR(-ENOMEM); + return -ENOMEM; data->server = server; nfs4_stateid_copy(&data->args.stateid, stateid); @@ -7283,65 +10681,39 @@ static struct rpc_task *_nfs41_free_stateid(struct nfs_server *server, msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; - nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); - if (privileged) - nfs4_set_sequence_privileged(&data->args.seq_args); - - return rpc_run_task(&task_setup); -} - -/** - * nfs41_free_stateid - perform a FREE_STATEID operation - * - * @server: server / transport on which to perform the operation - * @stateid: state ID to release - * @cred: credential - * - * Returns NFS_OK if the server freed "stateid". Otherwise a - * negative NFS4ERR value is returned. - */ -static int nfs41_free_stateid(struct nfs_server *server, - nfs4_stateid *stateid, - struct rpc_cred *cred) -{ - struct rpc_task *task; - int ret; - - task = _nfs41_free_stateid(server, stateid, cred, true); + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, privileged); + task = rpc_run_task(&task_setup); if (IS_ERR(task)) return PTR_ERR(task); - ret = rpc_wait_for_completion_task(task); - if (!ret) - ret = task->tk_status; rpc_put_task(task); - return ret; + stateid->type = NFS4_FREED_STATEID_TYPE; + return 0; } -static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void +nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct rpc_task *task; - struct rpc_cred *cred = lsp->ls_state->owner->so_cred; + const struct cred *cred = lsp->ls_state->owner->so_cred; - task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); + nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); nfs4_free_lock_state(server, lsp); - if (IS_ERR(task)) - return PTR_ERR(task); - rpc_put_task(task); - return 0; } static bool nfs41_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { + trace_nfs41_match_stateid(s1, s2); + + if (s1->type != s2->type) + return false; + if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) return false; if (s1->seqid == s2->seqid) return true; - if (s1->seqid == 0 || s2->seqid == 0) - return true; - return false; + return s1->seqid == 0 || s2->seqid == 0; } #endif /* CONFIG_NFS_V4_1 */ @@ -7349,6 +10721,8 @@ static bool nfs41_match_stateid(const nfs4_stateid *s1, static bool nfs4_match_stateid(const nfs4_stateid *s1, const nfs4_stateid *s2) { + trace_nfs4_match_stateid(s1, s2); + return nfs4_stateid_match(s1, s2); } @@ -7359,7 +10733,6 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, - .get_clid_cred = nfs4_get_setclientid_cred, .detect_trunking = nfs40_discover_server_trunking, }; @@ -7370,7 +10743,6 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs41_init_clientid, - .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, .detect_trunking = nfs41_discover_server_trunking, }; @@ -7379,10 +10751,9 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, + .recover_open = nfs40_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, - .get_clid_cred = nfs4_get_setclientid_cred, }; #if defined(CONFIG_NFS_V4_1) @@ -7392,55 +10763,83 @@ static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .recover_open = nfs41_open_expired, .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, - .get_clid_cred = nfs4_get_exchange_id_cred, }; #endif /* CONFIG_NFS_V4_1 */ static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { .sched_state_renewal = nfs4_proc_async_renew, - .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, + .get_state_renewal_cred = nfs4_get_renew_cred, .renew_lease = nfs4_proc_renew, }; #if defined(CONFIG_NFS_V4_1) static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { .sched_state_renewal = nfs41_proc_async_sequence, - .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, + .get_state_renewal_cred = nfs4_get_machine_cred, .renew_lease = nfs4_proc_sequence, }; #endif +static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = { + .get_locations = _nfs40_proc_get_locations, + .fsid_present = _nfs40_proc_fsid_present, +}; + +#if defined(CONFIG_NFS_V4_1) +static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = { + .get_locations = _nfs41_proc_get_locations, + .fsid_present = _nfs41_proc_fsid_present, +}; +#endif /* CONFIG_NFS_V4_1 */ + static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, - .call_sync = _nfs4_call_sync, + .init_client = nfs40_init_client, + .shutdown_client = nfs40_shutdown_client, .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, + .test_and_free_expired = nfs40_test_and_free_expired_stateid, + .alloc_seqid = nfs_alloc_seqid, + .call_sync_ops = &nfs40_call_sync_ops, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, + .mig_recovery_ops = &nfs40_mig_recovery_ops, }; #if defined(CONFIG_NFS_V4_1) +static struct nfs_seqid * +nfs_alloc_no_seqid(struct nfs_seqid_counter *arg1, gfp_t arg2) +{ + return NULL; +} + static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, - .call_sync = nfs4_call_sync_sequence, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_LGOPEN + | NFS_CAP_MOVEABLE, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .test_and_free_expired = nfs41_test_and_free_expired_stateid, + .alloc_seqid = nfs_alloc_no_seqid, + .session_trunk = nfs4_test_session_trunk, + .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif @@ -7449,17 +10848,36 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, - .call_sync = nfs4_call_sync_sequence, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_LGOPEN + | NFS_CAP_ALLOCATE + | NFS_CAP_COPY + | NFS_CAP_OFFLOAD_CANCEL + | NFS_CAP_COPY_NOTIFY + | NFS_CAP_DEALLOCATE + | NFS_CAP_ZERO_RANGE + | NFS_CAP_SEEK + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE + | NFS_CAP_LAYOUTERROR + | NFS_CAP_READ_PLUS + | NFS_CAP_MOVEABLE + | NFS_CAP_OFFLOAD_STATUS, + .init_client = nfs41_init_client, + .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .call_sync_ops = &nfs41_call_sync_ops, + .test_and_free_expired = nfs41_test_and_free_expired_stateid, + .alloc_seqid = nfs_alloc_no_seqid, + .session_trunk = nfs4_test_session_trunk, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif @@ -7473,7 +10891,71 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = { #endif }; -const struct inode_operations nfs4_dir_inode_operations = { +static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size) +{ + ssize_t error, error2, error3, error4 = 0; + size_t left = size; + + error = generic_listxattr(dentry, list, left); + if (error < 0) + return error; + if (list) { + list += error; + left -= error; + } + + error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left); + if (error2 < 0) + return error2; + + if (list) { + list += error2; + left -= error2; + } + + error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left); + if (error3 < 0) + return error3; + if (list) { + list += error3; + left -= error3; + } + + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) { + error4 = security_inode_listsecurity(d_inode(dentry), list, left); + if (error4 < 0) + return error4; + } + + error += error2 + error3 + error4; + if (size && error > size) + return -ERANGE; + return error; +} + +static void nfs4_enable_swap(struct inode *inode) +{ + /* The state manager thread must always be running. + * It will notice the client is a swapper, and stay put. + */ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + + nfs4_schedule_state_manager(clp); +} + +static void nfs4_disable_swap(struct inode *inode) +{ + /* The state manager thread will now exit once it is + * woken. + */ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); + wake_up_var(&clp->cl_state); +} + +static const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, @@ -7487,22 +10969,36 @@ const struct inode_operations nfs4_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, - .getxattr = generic_getxattr, - .setxattr = generic_setxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, + .listxattr = nfs4_listxattr, }; static const struct inode_operations nfs4_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, - .getxattr = generic_getxattr, - .setxattr = generic_setxattr, - .listxattr = generic_listxattr, - .removexattr = generic_removexattr, + .listxattr = nfs4_listxattr, }; +static struct nfs_server *nfs4_clone_server(struct nfs_server *source, + struct nfs_fh *fh, struct nfs_fattr *fattr, + rpc_authflavor_t flavor) +{ + struct nfs_server *server; + int error; + + server = nfs_clone_server(source, fh, fattr, flavor); + if (IS_ERR(server)) + return server; + + error = nfs4_delegation_hash_alloc(server); + if (error) { + nfs_free_server(server); + return ERR_PTR(error); + } + + return server; +} + const struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, @@ -7511,10 +11007,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, - .try_mount = nfs4_try_mount, + .try_get_tree = nfs4_try_get_tree, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, + .lookupp = nfs4_proc_lookupp, .access = nfs4_proc_access, .readlink = nfs4_proc_readlink, .create = nfs4_proc_create, @@ -7522,14 +11019,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .unlink_setup = nfs4_proc_unlink_setup, .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, .unlink_done = nfs4_proc_unlink_done, - .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, .rename_done = nfs4_proc_rename_done, .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, .mkdir = nfs4_proc_mkdir, - .rmdir = nfs4_proc_remove, + .rmdir = nfs4_proc_rmdir, .readdir = nfs4_proc_readdir, .mknod = nfs4_proc_mknod, .statfs = nfs4_proc_statfs, @@ -7537,13 +11033,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .pathconf = nfs4_proc_pathconf, .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, + .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare, .read_setup = nfs4_proc_read_setup, - .read_pageio_init = pnfs_pageio_init_read, - .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_pageio_init = pnfs_pageio_init_write, - .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, @@ -7558,26 +11051,54 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .init_client = nfs4_init_client, .free_client = nfs4_free_client, .create_server = nfs4_create_server, - .clone_server = nfs_clone_server, + .clone_server = nfs4_clone_server, + .discover_trunking = nfs4_discover_trunking, + .enable_swap = nfs4_enable_swap, + .disable_swap = nfs4_disable_swap, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { - .prefix = XATTR_NAME_NFSV4_ACL, + .name = XATTR_NAME_NFSV4_ACL, .list = nfs4_xattr_list_nfs4_acl, .get = nfs4_xattr_get_nfs4_acl, .set = nfs4_xattr_set_nfs4_acl, }; -const struct xattr_handler *nfs4_xattr_handlers[] = { +#if defined(CONFIG_NFS_V4_1) +static const struct xattr_handler nfs4_xattr_nfs4_dacl_handler = { + .name = XATTR_NAME_NFSV4_DACL, + .list = nfs4_xattr_list_nfs4_dacl, + .get = nfs4_xattr_get_nfs4_dacl, + .set = nfs4_xattr_set_nfs4_dacl, +}; + +static const struct xattr_handler nfs4_xattr_nfs4_sacl_handler = { + .name = XATTR_NAME_NFSV4_SACL, + .list = nfs4_xattr_list_nfs4_sacl, + .get = nfs4_xattr_get_nfs4_sacl, + .set = nfs4_xattr_set_nfs4_sacl, +}; +#endif + +#ifdef CONFIG_NFS_V4_2 +static const struct xattr_handler nfs4_xattr_nfs4_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = nfs4_xattr_get_nfs4_user, + .set = nfs4_xattr_set_nfs4_user, +}; +#endif + +const struct xattr_handler * const nfs4_xattr_handlers[] = { &nfs4_xattr_nfs4_acl_handler, +#if defined(CONFIG_NFS_V4_1) + &nfs4_xattr_nfs4_dacl_handler, + &nfs4_xattr_nfs4_sacl_handler, +#endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL &nfs4_xattr_nfs4_label_handler, #endif +#ifdef CONFIG_NFS_V4_2 + &nfs4_xattr_nfs4_user_handler, +#endif NULL }; - -/* - * Local variables: - * c-basic-offset: 8 - * End: - */ |
