summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs/exporting.rst26
-rw-r--r--fs/exportfs/expfs.c1
-rw-r--r--fs/lockd/mon.c3
-rw-r--r--fs/lockd/svc.c52
-rw-r--r--fs/lockd/svclock.c18
-rw-r--r--fs/locks.c7
-rw-r--r--fs/nfs/callback.c23
-rw-r--r--fs/nfsd/blocklayoutxdr.c9
-rw-r--r--fs/nfsd/cache.h8
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c9
-rw-r--r--fs/nfsd/nfs3proc.c4
-rw-r--r--fs/nfsd/nfs4acl.c34
-rw-r--r--fs/nfsd/nfs4proc.c51
-rw-r--r--fs/nfsd/nfs4state.c162
-rw-r--r--fs/nfsd/nfs4xdr.c39
-rw-r--r--fs/nfsd/nfscache.c204
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfsfh.c26
-rw-r--r--fs/nfsd/nfsfh.h6
-rw-r--r--fs/nfsd/nfssvc.c111
-rw-r--r--fs/nfsd/state.h3
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nfsd/stats.h7
-rw-r--r--fs/nfsd/trace.h27
-rw-r--r--fs/nfsd/vfs.c52
-rw-r--r--fs/nfsd/xdr4.h11
-rw-r--r--include/linux/lockd/lockd.h4
-rw-r--r--include/linux/sunrpc/cache.h12
-rw-r--r--include/linux/sunrpc/stats.h23
-rw-r--r--include/linux/sunrpc/svc.h52
-rw-r--r--include/linux/sunrpc/svc_xprt.h38
-rw-r--r--include/linux/sunrpc/svcauth.h53
-rw-r--r--include/linux/sunrpc/svcsock.h9
-rw-r--r--include/linux/sunrpc/xdr.h2
-rw-r--r--include/trace/events/sunrpc.h80
-rw-r--r--net/sunrpc/.kunitconfig1
-rw-r--r--net/sunrpc/Kconfig35
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_internal.h23
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c84
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c257
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c69
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c106
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c196
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c77
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c287
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c7
-rw-r--r--net/sunrpc/svc.c97
-rw-r--r--net/sunrpc/svc_xprt.c126
-rw-r--r--net/sunrpc/svcauth.c35
-rw-r--r--net/sunrpc/svcauth_unix.c9
-rw-r--r--net/sunrpc/svcsock.c131
-rw-r--r--net/sunrpc/xdr.c50
54 files changed, 969 insertions, 1799 deletions
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 3d97b8d8f735..4b30daee399a 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -215,3 +215,29 @@ following flags are defined:
This flag causes nfsd to close any open files for this inode _before_
calling into the vfs to do an unlink or a rename that would replace
an existing file.
+
+ EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote
+ PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to
+ write to one bdi (the final bdi) in order to free up writes queued
+ to another bdi (the client bdi). Such threads get a private balance
+ of dirty pages so that dirty pages for the client bdi do not imact
+ the daemon writing to the final bdi. For filesystems whose durable
+ storage is not local (such as exported NFS filesystems), this
+ constraint has negative consequences. EXPORT_OP_REMOTE_FS enables
+ an export to disable writeback throttling.
+
+ EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically
+ EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem
+ cannot provide the semantics required by the "atomic" boolean in
+ NFSv4's change_info4. This boolean indicates to a client whether the
+ returned before and after change attributes were obtained atomically
+ with the respect to the requested metadata operation (UNLINK,
+ OPEN/CREATE, MKDIR, etc).
+
+ EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2)
+ On most filesystems, inodes can remain under writeback after the
+ file is closed. NFSD relies on client activity or local flusher
+ threads to handle writeback. Certain filesystems, such as NFS, flush
+ all of an inode's dirty data on last close. Exports that behave this
+ way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
+ waiting for writeback when closing such files.
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index d1dbe47c7975..c20704aa21b3 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -386,6 +386,7 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
* @inode: the object to encode
* @fid: where to store the file handle fragment
* @max_len: maximum length to store there
+ * @parent: parent directory inode, if wanted
* @flags: properties of the requested file handle
*
* Returns an enum fid_type or a negative errno.
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1d9488cf0534..87a0f207df0b 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -276,6 +276,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
{
struct nsm_handle *new;
+ if (!hostname)
+ return NULL;
+
new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
if (unlikely(new == NULL))
return NULL;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 22d3ff3818f5..6579948070a4 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -45,7 +45,6 @@
#define NLMDBG_FACILITY NLMDBG_SVC
#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
-#define ALLOWED_SIGS (sigmask(SIGKILL))
static struct svc_program nlmsvc_program;
@@ -57,6 +56,12 @@ static unsigned int nlmsvc_users;
static struct svc_serv *nlmsvc_serv;
unsigned long nlmsvc_timeout;
+static void nlmsvc_request_retry(struct timer_list *tl)
+{
+ svc_wake_up(nlmsvc_serv);
+}
+DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry);
+
unsigned int lockd_net_id;
/*
@@ -111,26 +116,12 @@ static void set_grace_period(struct net *net)
schedule_delayed_work(&ln->grace_period_end, grace_period);
}
-static void restart_grace(void)
-{
- if (nlmsvc_ops) {
- struct net *net = &init_net;
- struct lockd_net *ln = net_generic(net, lockd_net_id);
-
- cancel_delayed_work_sync(&ln->grace_period_end);
- locks_end_grace(&ln->lockd_manager);
- nlmsvc_invalidate_all();
- set_grace_period(net);
- }
-}
-
/*
* This is the lockd kernel thread
*/
static int
lockd(void *vrqstp)
{
- int err = 0;
struct svc_rqst *rqstp = vrqstp;
struct net *net = &init_net;
struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -138,9 +129,6 @@ lockd(void *vrqstp)
/* try_to_freeze() is called from svc_recv() */
set_freezable();
- /* Allow SIGKILL to tell lockd to drop all of its locks */
- allow_signal(SIGKILL);
-
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
/*
@@ -148,33 +136,12 @@ lockd(void *vrqstp)
* NFS mount or NFS daemon has gone away.
*/
while (!kthread_should_stop()) {
- long timeout = MAX_SCHEDULE_TIMEOUT;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
/* update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nlm_max_connections;
- if (signalled()) {
- flush_signals(current);
- restart_grace();
- continue;
- }
-
- timeout = nlmsvc_retry_blocked();
-
- /*
- * Find a socket with data available and call its
- * recvfrom routine.
- */
- err = svc_recv(rqstp, timeout);
- if (err == -EAGAIN || err == -EINTR)
- continue;
- dprintk("lockd: request from %s\n",
- svc_print_addr(rqstp, buf, sizeof(buf)));
-
- svc_process(rqstp);
+ nlmsvc_retry_blocked();
+ svc_recv(rqstp);
}
- flush_signals(current);
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
@@ -407,6 +374,7 @@ static void lockd_put(void)
#endif
svc_set_num_threads(nlmsvc_serv, NULL, 0);
+ timer_delete_sync(&nlmsvc_retry);
nlmsvc_serv = NULL;
dprintk("lockd_down: service destroyed\n");
}
@@ -538,7 +506,7 @@ static inline int is_callback(u32 proc)
}
-static int lockd_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status lockd_authenticate(struct svc_rqst *rqstp)
{
rqstp->rq_client = NULL;
switch (rqstp->rq_authop->flavour) {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c43ccdf28ed9..43aeba9de55c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -131,12 +131,14 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
static inline void
nlmsvc_remove_block(struct nlm_block *block)
{
+ spin_lock(&nlm_blocked_lock);
if (!list_empty(&block->b_list)) {
- spin_lock(&nlm_blocked_lock);
list_del_init(&block->b_list);
spin_unlock(&nlm_blocked_lock);
nlmsvc_release_block(block);
+ return;
}
+ spin_unlock(&nlm_blocked_lock);
}
/*
@@ -152,6 +154,7 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
file, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
@@ -161,9 +164,11 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
nlmdbg_cookie2a(&block->b_call->a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
kref_get(&block->b_count);
+ spin_unlock(&nlm_blocked_lock);
return block;
}
}
+ spin_unlock(&nlm_blocked_lock);
return NULL;
}
@@ -185,16 +190,19 @@ nlmsvc_find_block(struct nlm_cookie *cookie)
{
struct nlm_block *block;
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
goto found;
}
+ spin_unlock(&nlm_blocked_lock);
return NULL;
found:
dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
kref_get(&block->b_count);
+ spin_unlock(&nlm_blocked_lock);
return block;
}
@@ -317,6 +325,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,
restart:
mutex_lock(&file->f_mutex);
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
if (!match(block->b_host, host))
continue;
@@ -325,11 +334,13 @@ restart:
if (list_empty(&block->b_list))
continue;
kref_get(&block->b_count);
+ spin_unlock(&nlm_blocked_lock);
mutex_unlock(&file->f_mutex);
nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
goto restart;
}
+ spin_unlock(&nlm_blocked_lock);
mutex_unlock(&file->f_mutex);
}
@@ -1008,7 +1019,7 @@ retry_deferred_block(struct nlm_block *block)
* picks up locks that can be granted, or grant notifications that must
* be retransmitted.
*/
-unsigned long
+void
nlmsvc_retry_blocked(void)
{
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1038,5 +1049,6 @@ nlmsvc_retry_blocked(void)
}
spin_unlock(&nlm_blocked_lock);
- return timeout;
+ if (timeout < MAX_SCHEDULE_TIMEOUT)
+ mod_timer(&nlmsvc_retry, jiffies + timeout);
}
diff --git a/fs/locks.c b/fs/locks.c
index a45efc16945d..76ad05f8070a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1744,13 +1744,6 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
if (is_deleg && !inode_trylock(inode))
return -EAGAIN;
- if (is_deleg && arg == F_WRLCK) {
- /* Write delegations are not currently supported: */
- inode_unlock(inode);
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
-
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 456af7d230cf..466ebf1d41b2 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -74,23 +74,12 @@ out_err:
static int
nfs4_callback_svc(void *vrqstp)
{
- int err;
struct svc_rqst *rqstp = vrqstp;
set_freezable();
- while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
- /*
- * Listen for a request on the socket
- */
- err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
- if (err == -EAGAIN || err == -EINTR)
- continue;
- svc_process(rqstp);
- }
+ while (!kthread_freezable_should_stop(NULL))
+ svc_recv(rqstp);
svc_exit_thread(rqstp);
return 0;
@@ -112,11 +101,7 @@ nfs41_callback_svc(void *vrqstp)
set_freezable();
while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
-
- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
req = list_first_entry(&serv->sv_cb_list,
@@ -387,7 +372,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
*/
-static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status nfs_callback_authenticate(struct svc_rqst *rqstp)
{
rqstp->rq_auth_stat = rpc_autherr_badcred;
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 8e9c1a0f8d38..1ed2f691ebb9 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -83,6 +83,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
int len = sizeof(__be32), ret, i;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4c9b87850ab1..929248c6ca84 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -19,7 +19,7 @@
* typical sockaddr_storage. This is for space reasons, since sockaddr_storage
* is much larger than a sockaddr_in6.
*/
-struct svc_cacherep {
+struct nfsd_cacherep {
struct {
/* Keep often-read xid, csum in the same cache line: */
__be32 k_xid;
@@ -84,8 +84,10 @@ int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
-int nfsd_cache_lookup(struct svc_rqst *);
-void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+int nfsd_cache_lookup(struct svc_rqst *rqstp,
+ struct nfsd_cacherep **cacherep);
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp);
int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381..bb205328e043 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
int addr_len;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fc8d5b7db9f8..268ef57751c4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -307,7 +307,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
- fh_fill_pre_attrs(fhp);
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 518203821790..96e786b5e544 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -441,7 +441,7 @@ struct posix_ace_state_array {
* calculated so far: */
struct posix_acl_state {
- int empty;
+ unsigned char valid;
struct posix_ace_state owner;
struct posix_ace_state group;
struct posix_ace_state other;
@@ -457,7 +457,6 @@ init_state(struct posix_acl_state *state, int cnt)
int alloc;
memset(state, 0, sizeof(struct posix_acl_state));
- state->empty = 1;
/*
* In the worst case, each individual acl could be for a distinct
* named user or group, but we don't know which, so we allocate
@@ -500,7 +499,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
* and effective cases: when there are no inheritable ACEs,
* calls ->set_acl with a NULL ACL structure.
*/
- if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+ if (!state->valid && (flags & NFS4_ACL_TYPE_DEFAULT))
return NULL;
/*
@@ -622,11 +621,12 @@ static void process_one_v4_ace(struct posix_acl_state *state,
struct nfs4_ace *ace)
{
u32 mask = ace->access_mask;
+ short type = ace2type(ace);
int i;
- state->empty = 0;
+ state->valid |= type;
- switch (ace2type(ace)) {
+ switch (type) {
case ACL_USER_OBJ:
if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
allow_bits(&state->owner, mask);
@@ -726,6 +726,30 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl,
if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
process_one_v4_ace(&effective_acl_state, ace);
}
+
+ /*
+ * At this point, the default ACL may have zeroed-out entries for owner,
+ * group and other. That usually results in a non-sensical resulting ACL
+ * that denies all access except to any ACE that was explicitly added.
+ *
+ * The setfacl command solves a similar problem with this logic:
+ *
+ * "If a Default ACL entry is created, and the Default ACL contains
+ * no owner, owning group, or others entry, a copy of the ACL
+ * owner, owning group, or others entry is added to the Default ACL."
+ *
+ * Copy any missing ACEs from the effective set, if any ACEs were
+ * explicitly set.
+ */
+ if (default_acl_state.valid) {
+ if (!(default_acl_state.valid & ACL_USER_OBJ))
+ default_acl_state.owner = effective_acl_state.owner;
+ if (!(default_acl_state.valid & ACL_GROUP_OBJ))
+ default_acl_state.group = effective_acl_state.group;
+ if (!(default_acl_state.valid & ACL_OTHER))
+ default_acl_state.other = effective_acl_state.other;
+ }
+
*pacl = posix_state_to_acl(&effective_acl_state, flags);
if (IS_ERR(*pacl)) {
ret = PTR_ERR(*pacl);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5ae670807449..5ca748309c26 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -297,12 +297,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
if (d_really_is_positive(child)) {
- status = nfs_ok;
-
/* NFSv4 protocol requires change attributes even though
* no change happened.
*/
- fh_fill_both_attrs(fhp);
+ status = fh_fill_both_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
switch (open->op_createmode) {
case NFS4_CREATE_UNCHECKED:
@@ -345,7 +345,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
- fh_fill_pre_attrs(fhp);
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
status = nfsd4_vfs_create(fhp, child, open);
if (status != nfs_ok)
goto out;
@@ -380,6 +382,38 @@ out:
return status;
}
+/**
+ * set_change_info - set up the change_info4 for a reply
+ * @cinfo: pointer to nfsd4_change_info to be populated
+ * @fhp: pointer to svc_fh to use as source
+ *
+ * Many operations in NFSv4 require change_info4 in the reply. This function
+ * populates that from the info that we (should!) have already collected. In
+ * the event that we didn't get any pre-attrs, just zero out both.
+ */
+static void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+ cinfo->atomic = (u32)(fhp->fh_pre_saved && fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
+ cinfo->before_change = fhp->fh_pre_change;
+ cinfo->after_change = fhp->fh_post_change;
+
+ /*
+ * If fetching the pre-change attributes failed, then we should
+ * have already failed the whole operation. We could have still
+ * failed to fetch post-change attributes however.
+ *
+ * If we didn't get post-op attrs, just zero-out the after
+ * field since we don't know what it should be. If the pre_saved
+ * field isn't set for some reason, throw warning and just copy
+ * whatever is in the after field.
+ */
+ if (WARN_ON_ONCE(!fhp->fh_pre_saved))
+ cinfo->before_change = 0;
+ if (!fhp->fh_post_saved)
+ cinfo->after_change = cinfo->before_change + 1;
+}
+
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
@@ -424,11 +458,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
} else {
status = nfsd_lookup(rqstp, current_fh,
open->op_fname, open->op_fnamelen, *resfh);
- if (!status)
+ if (status == nfs_ok)
/* NFSv4 protocol requires change attributes even though
* no change happened.
*/
- fh_fill_both_attrs(current_fh);
+ status = fh_fill_both_attrs(current_fh);
}
if (status)
goto out;
@@ -1313,12 +1347,11 @@ try_again:
/* found a match */
if (ni->nsui_busy) {
/* wait - and try again */
- prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
- TASK_INTERRUPTIBLE);
+ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
spin_unlock(&nn->nfsd_ssc_lock);
/* allow 20secs for mount/unmount for now - revisit */
- if (signal_pending(current) ||
+ if (kthread_should_stop() ||
(schedule_timeout(20*HZ) == 0)) {
finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index daf305daa751..8534693eb6a4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -649,6 +649,18 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
+static struct nfsd_file *
+find_rw_file(struct nfs4_file *f)
+{
+ struct nfsd_file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
@@ -1144,7 +1156,7 @@ static void block_delegations(struct knfsd_fh *fh)
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
- struct nfs4_clnt_odstate *odstate)
+ struct nfs4_clnt_odstate *odstate, u32 dl_type)
{
struct nfs4_delegation *dp;
long n;
@@ -1170,7 +1182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_clnt_odstate = odstate;
get_clnt_odstate(odstate);
- dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+ dp->dl_type = dl_type;
dp->dl_retries = 1;
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -5449,8 +5461,9 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
- struct nfsd_file *nf;
+ struct nfsd_file *nf = NULL;
struct file_lock *fl;
+ u32 dl_type;
/*
* The fi_had_conflict and nfs_get_existing_delegation checks
@@ -5460,15 +5473,35 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- nf = find_readable_file(fp);
- if (!nf) {
- /*
- * We probably could attempt another open and get a read
- * delegation, but for now, don't bother until the
- * client actually sends us one.
- */
- return ERR_PTR(-EAGAIN);
+ /*
+ * Try for a write delegation first. RFC8881 section 10.4 says:
+ *
+ * "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
+ * on its own, all opens."
+ *
+ * Furthermore the client can use a write delegation for most READ
+ * operations as well, so we require a O_RDWR file here.
+ *
+ * Offer a write delegation in the case of a BOTH open, and ensure
+ * we get the O_RDWR descriptor.
+ */
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
+ nf = find_rw_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_WRITE;
}
+
+ /*
+ * If the file is being opened O_RDONLY or we couldn't get a O_RDWR
+ * file for some reason, then try for a read delegation instead.
+ */
+ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
+ nf = find_readable_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_READ;
+ }
+
+ if (!nf)
+ return ERR_PTR(-EAGAIN);
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
@@ -5491,11 +5524,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
return ERR_PTR(status);
status = -ENOMEM;
- dp = alloc_init_deleg(clp, fp, odstate);
+ dp = alloc_init_deleg(clp, fp, odstate, dl_type);
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ fl = nfs4_alloc_init_lease(dp, dl_type);
if (!fl)
goto out_clnt_odstate;
@@ -5568,10 +5601,28 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
}
/*
- * Attempt to hand out a delegation.
+ * The Linux NFS server does not offer write delegations to NFSv4.0
+ * clients in order to avoid conflicts between write delegations and
+ * GETATTRs requesting CHANGE or SIZE attributes.
+ *
+ * With NFSv4.1 and later minorversions, the SEQUENCE operation that
+ * begins each COMPOUND contains a client ID. Delegation recall can
+ * be avoided when the server recognizes the client sending a
+ * GETATTR also holds write delegation it conflicts with.
+ *
+ * However, the NFSv4.0 protocol does not enable a server to
+ * determine that a GETATTR originated from the client holding the
+ * conflicting delegation versus coming from some other client. Per
+ * RFC 7530 Section 16.7.5, the server must recall or send a
+ * CB_GETATTR even when the GETATTR originates from the client that
+ * holds the conflicting delegation.
*
- * Note we don't support write delegations, and won't until the vfs has
- * proper support for them.
+ * An NFSv4.0 client can trigger a pathological situation if it
+ * always sends a DELEGRETURN preceded by a conflicting GETATTR in
+ * the same COMPOUND. COMPOUND execution will always stop at the
+ * GETATTR and the DELEGRETURN will never get executed. The server
+ * eventually revokes the delegation, which can result in loss of
+ * open or lock state.
*/
static void
nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
@@ -5590,8 +5641,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!cb_up)
open->op_recall = 1;
- if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
- goto out_no_deleg;
break;
case NFS4_OPEN_CLAIM_NULL:
parent = currentfh;
@@ -5606,6 +5655,9 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
+ !clp->cl_minorversion)
+ goto out_no_deleg;
break;
default:
goto out_no_deleg;
@@ -5616,8 +5668,13 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
- trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ } else {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
+ }
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
@@ -8341,3 +8398,68 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
{
get_stateid(cstate, &u->write.wr_stateid);
}
+
+/**
+ * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
+ * @rqstp: RPC transaction context
+ * @inode: file to be checked for a conflict
+ *
+ * This function is called when there is a conflict between a write
+ * delegation and a change/size GETATTR from another client. The server
+ * must either use the CB_GETATTR to get the current values of the
+ * attributes from the client that holds the delegation or recall the
+ * delegation before replying to the GETATTR. See RFC 8881 section
+ * 18.7.4.
+ *
+ * The current implementation does not support CB_GETATTR yet. However
+ * this can avoid recalling the delegation could be added in follow up
+ * work.
+ *
+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+ __be32 status;
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+ struct nfs4_delegation *dp;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return 0;
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+ if (fl->fl_flags == FL_LAYOUT)
+ continue;
+ if (fl->fl_lmops != &nfsd_lease_mng_ops) {
+ /*
+ * non-nfs lease, if it's a lease with F_RDLCK then
+ * we are done; there isn't any write delegation
+ * on this inode
+ */
+ if (fl->fl_type == F_RDLCK)
+ break;
+ goto break_lease;
+ }
+ if (fl->fl_type == F_WRLCK) {
+ dp = fl->fl_owner;
+ if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ spin_unlock(&ctx->flc_lock);
+ return 0;
+ }
+break_lease:
+ spin_unlock(&ctx->flc_lock);
+ nfsd_stats_wdeleg_getattr_inc();
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ return 0;
+ }
+ break;
+ }
+ spin_unlock(&ctx->flc_lock);
+ return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b30dca7de8cc..2e40c74d2f72 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2984,6 +2984,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
if (status)
goto out;
}
+ if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+ if (status)
+ goto out;
+ }
err = vfs_getattr(&path, &stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
@@ -3973,17 +3978,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
if (nfserr)
return nfserr;
- p = xdr_reserve_space(xdr, 32);
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 8);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(open->op_recall);
/*
+ * Always flush on close
+ *
* TODO: space_limit's in delegations
*/
*p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
- *p++ = cpu_to_be32(~(u32)0);
- *p++ = cpu_to_be32(~(u32)0);
+ *p++ = xdr_zero;
+ *p++ = xdr_zero;
/*
* TODO: ACE's in delegations
@@ -4678,20 +4686,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(gdev->gd_layout_type);
- /* If maxcount is 0 then just update notifications */
- if (gdev->gd_maxcount != 0) {
- ops = nfsd4_layout_ops[gdev->gd_layout_type];
- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
- if (nfserr) {
- /*
- * We don't bother to burden the layout drivers with
- * enforcing gd_maxcount, just tell the client to
- * come back with a bigger buffer if it's not enough.
- */
- if (xdr->buf->len + 4 > gdev->gd_maxcount)
- goto toosmall;
- return nfserr;
- }
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
+ nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+ if (nfserr) {
+ /*
+ * We don't bother to burden the layout drivers with
+ * enforcing gd_maxcount, just tell the client to
+ * come back with a bigger buffer if it's not enough.
+ */
+ if (xdr->buf->len + 4 > gdev->gd_maxcount)
+ goto toosmall;
+ return nfserr;
}
if (gdev->gd_notify_types) {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index a8eda1c85829..80621a709510 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,11 +84,11 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
}
-static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
- struct nfsd_net *nn)
+static struct nfsd_cacherep *
+nfsd_cacherep_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
if (rp) {
@@ -110,36 +110,64 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
return rp;
}
-static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
- struct nfsd_net *nn)
+static void nfsd_cacherep_free(struct nfsd_cacherep *rp)
{
- if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
+ if (rp->c_type == RC_REPLBUFF)
kfree(rp->c_replvec.iov_base);
+ kmem_cache_free(drc_slab, rp);
+}
+
+static unsigned long
+nfsd_cacherep_dispose(struct list_head *dispose)
+{
+ struct nfsd_cacherep *rp;
+ unsigned long freed = 0;
+
+ while (!list_empty(dispose)) {
+ rp = list_first_entry(dispose, struct nfsd_cacherep, c_lru);
+ list_del(&rp->c_lru);
+ nfsd_cacherep_free(rp);
+ freed++;
}
+ return freed;
+}
+
+static void
+nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ struct nfsd_cacherep *rp)
+{
+ if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base)
+ nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
atomic_dec(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
}
- kmem_cache_free(drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
+ struct nfsd_net *nn)
+{
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ nfsd_cacherep_free(rp);
+}
+
+static void
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp, nn);
+ nfsd_cacherep_unlink_locked(nn, b, rp);
spin_unlock(&b->cache_lock);
+ nfsd_cacherep_free(rp);
}
int nfsd_drc_slab_create(void)
{
drc_slab = kmem_cache_create("nfsd_drc",
- sizeof(struct svc_cacherep), 0, 0, NULL);
+ sizeof(struct nfsd_cacherep), 0, 0, NULL);
return drc_slab ? 0: -ENOMEM;
}
@@ -208,7 +236,7 @@ out_shrinker:
void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
unsigned int i;
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
@@ -216,7 +244,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
for (i = 0; i < nn->drc_hashsize; i++) {
struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
- rp = list_first_entry(head, struct svc_cacherep, c_lru);
+ rp = list_first_entry(head, struct nfsd_cacherep, c_lru);
nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
rp, nn);
}
@@ -233,7 +261,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
* not already scheduled.
*/
static void
-lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+lru_put_end(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &b->lru_head);
@@ -247,12 +275,21 @@ nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
return &nn->drc_hashtbl[hash];
}
-static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
- unsigned int max)
+/*
+ * Remove and return no more than @max expired entries in bucket @b.
+ * If @max is zero, do not limit the number of removed entries.
+ */
+static void
+nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ unsigned int max, struct list_head *dispose)
{
- struct svc_cacherep *rp, *tmp;
- long freed = 0;
+ unsigned long expiry = jiffies - RC_EXPIRE;
+ struct nfsd_cacherep *rp, *tmp;
+ unsigned int freed = 0;
+
+ lockdep_assert_held(&b->cache_lock);
+ /* The bucket LRU is ordered oldest-first. */
list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
/*
* Don't free entries attached to calls that are still
@@ -260,60 +297,77 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
*/
if (rp->c_state == RC_INPROG)
continue;
+
if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
- time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
+ time_before(expiry, rp->c_timestamp))
break;
- nfsd_reply_cache_free_locked(b, rp, nn);
- if (max && freed++ > max)
+
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ list_add(&rp->c_lru, dispose);
+
+ if (max && ++freed > max)
break;
}
- return freed;
}
-static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
+/**
+ * nfsd_reply_cache_count - count_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Returns the total number of entries in the duplicate reply cache. To
+ * keep things simple and quick, this is not the number of expired entries
+ * in the cache (ie, the number that would be removed by a call to
+ * nfsd_reply_cache_scan).
+ */
+static unsigned long
+nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_bucket(b, nn, 3);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
-/*
- * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
- * Also prune the oldest ones when the total exceeds the max number of entries.
+/**
+ * nfsd_reply_cache_scan - scan_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Free expired entries on each bucket's LRU list until we've released
+ * nr_to_scan freed objects. Nothing will be released if the cache
+ * has not exceeded it's max_drc_entries limit.
+ *
+ * Returns the number of entries released by this call.
*/
-static long
-prune_cache_entries(struct nfsd_net *nn)
+static unsigned long
+nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+ unsigned long freed = 0;
+ LIST_HEAD(dispose);
unsigned int i;
- long freed = 0;
for (i = 0; i < nn->drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
+
spin_lock(&b->cache_lock);
- freed += prune_bucket(b, nn, 0);
+ nfsd_prune_bucket_locked(nn, b, 0, &dispose);
spin_unlock(&b->cache_lock);
- }
- return freed;
-}
-static unsigned long
-nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
-{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
+ freed += nfsd_cacherep_dispose(&dispose);
+ if (freed > sc->nr_to_scan)
+ break;
+ }
- return atomic_read(&nn->num_drc_entries);
+ trace_nfsd_drc_gc(nn, freed);
+ return freed;
}
-static unsigned long
-nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
-
- return prune_cache_entries(nn);
-}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
*/
@@ -348,8 +402,8 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key,
- const struct svc_cacherep *rp, struct nfsd_net *nn)
+nfsd_cache_key_cmp(const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum) {
@@ -365,11 +419,11 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
* Must be called with cache_lock held. Returns the found entry or
* inserts an empty key on failure.
*/
-static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+static struct nfsd_cacherep *
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key,
struct nfsd_net *nn)
{
- struct svc_cacherep *rp, *ret = key;
+ struct nfsd_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
*parent = NULL;
unsigned int entries = 0;
@@ -378,7 +432,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
while (*p != NULL) {
++entries;
parent = *p;
- rp = rb_entry(parent, struct svc_cacherep, c_node);
+ rp = rb_entry(parent, struct nfsd_cacherep, c_node);
cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
@@ -411,6 +465,7 @@ out:
/**
* nfsd_cache_lookup - Find an entry in the duplicate reply cache
* @rqstp: Incoming Call to find
+ * @cacherep: OUT: DRC entry for this request
*
* Try to find an entry matching the current call in the cache. When none
* is found, we try to grab the oldest expired entry off the LRU list. If
@@ -423,16 +478,17 @@ out:
* %RC_REPLY: Reply from cache
* %RC_DROPIT: Do not process the request further
*/
-int nfsd_cache_lookup(struct svc_rqst *rqstp)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn;
- struct svc_cacherep *rp, *found;
+ struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
int type = rqstp->rq_cachetype;
+ unsigned long freed;
+ LIST_HEAD(dispose);
int rtn = RC_DOIT;
- rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) {
nfsd_stats_rc_nocache_inc();
goto out;
@@ -445,7 +501,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry.
*/
nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
+ rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp)
goto out;
@@ -454,20 +510,18 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
found = nfsd_cache_insert(b, rp, nn);
if (found != rp)
goto found_entry;
-
- nfsd_stats_rc_misses_inc();
- rqstp->rq_cacherep = rp;
+ *cacherep = rp;
rp->c_state = RC_INPROG;
+ nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+ spin_unlock(&b->cache_lock);
+ freed = nfsd_cacherep_dispose(&dispose);
+ trace_nfsd_drc_gc(nn, freed);
+
+ nfsd_stats_rc_misses_inc();
atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
-
- nfsd_prune_bucket(b, nn);
-
-out_unlock:
- spin_unlock(&b->cache_lock);
-out:
- return rtn;
+ goto out;
found_entry:
/* We found a matching entry which is either in progress or done. */
@@ -505,12 +559,16 @@ found_entry:
out_trace:
trace_nfsd_drc_found(nn, rqstp, rtn);
- goto out_unlock;
+out_unlock:
+ spin_unlock(&b->cache_lock);
+out:
+ return rtn;
}
/**
* nfsd_cache_update - Update an entry in the duplicate reply cache.
* @rqstp: svc_rqst with a finished Reply
+ * @rp: IN: DRC entry for this request
* @cachetype: which cache to update
* @statp: pointer to Reply's NFS status code, or NULL
*
@@ -528,10 +586,10 @@ out_trace:
* nfsd failed to encode a reply that otherwise would have been cached.
* In this case, nfsd_cache_update is called with statp == NULL.
*/
-void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
struct nfsd_drc_bucket *b;
int len;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3709830f90a6..7ed02fb88a36 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1627,6 +1627,7 @@ static void __exit exit_nfsd(void)
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("In-kernel NFS server");
MODULE_LICENSE("GPL");
module_init(init_nfsd)
module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d88498f8b275..11c14faa6c67 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -96,7 +96,12 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-void nfsd_put(struct net *net);
+static inline void nfsd_put(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ svc_put(nn->nfsd_serv);
+}
bool i_am_nfsd(void);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c291389a1d71..355bf0db3235 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -614,7 +614,7 @@ out_negative:
* @fhp: file handle to be updated
*
*/
-void fh_fill_pre_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
struct inode *inode;
@@ -622,12 +622,12 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
__be32 err;
if (fhp->fh_no_wcc || fhp->fh_pre_saved)
- return;
+ return nfs_ok;
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err)
- return;
+ return err;
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -636,6 +636,7 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
fhp->fh_pre_ctime = stat.ctime;
fhp->fh_pre_size = stat.size;
fhp->fh_pre_saved = true;
+ return nfs_ok;
}
/**
@@ -643,26 +644,27 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
* @fhp: file handle to be updated
*
*/
-void fh_fill_post_attrs(struct svc_fh *fhp)
+__be32 fh_fill_post_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
struct inode *inode = d_inode(fhp->fh_dentry);
__be32 err;
if (fhp->fh_no_wcc)
- return;
+ return nfs_ok;
if (fhp->fh_post_saved)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
if (err)
- return;
+ return err;
fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ return nfs_ok;
}
/**
@@ -672,16 +674,20 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
* This is used when the directory wasn't changed, but wcc attributes
* are needed anyway.
*/
-void fh_fill_both_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp)
{
- fh_fill_post_attrs(fhp);
- if (!fhp->fh_post_saved)
- return;
+ __be32 err;
+
+ err = fh_fill_post_attrs(fhp);
+ if (err)
+ return err;
+
fhp->fh_pre_change = fhp->fh_post_change;
fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
fhp->fh_pre_size = fhp->fh_post_attr.size;
fhp->fh_pre_saved = true;
+ return nfs_ok;
}
/*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4e0ecf0ae2cf..40426f899e76 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -294,7 +294,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
}
u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
-extern void fh_fill_pre_attrs(struct svc_fh *fhp);
-extern void fh_fill_post_attrs(struct svc_fh *fhp);
-extern void fh_fill_both_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
+__be32 fh_fill_post_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2154fa63c5f2..1582af33e204 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -542,9 +542,14 @@ static struct notifier_block nfsd_inet6addr_notifier = {
/* Only used under nfsd_mutex, so this atomic may be overkill: */
static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+static void nfsd_last_thread(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv = nn->nfsd_serv;
+
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
@@ -554,6 +559,8 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
#endif
}
+ svc_xprt_destroy_all(serv, net);
+
/*
* write_ports can create the server without actually starting
* any threads--if we get shut down before any threads are
@@ -644,7 +651,8 @@ void nfsd_shutdown_threads(struct net *net)
svc_get(serv);
/* Kill outstanding nfsd threads */
svc_set_num_threads(serv, NULL, 0);
- nfsd_put(net);
+ nfsd_last_thread(net);
+ svc_put(serv);
mutex_unlock(&nfsd_mutex);
}
@@ -674,9 +682,6 @@ int nfsd_create_serv(struct net *net)
serv->sv_maxconn = nn->max_connections;
error = svc_bind(serv, net);
if (error < 0) {
- /* NOT nfsd_put() as notifiers (see below) haven't
- * been set up yet.
- */
svc_put(serv);
return error;
}
@@ -719,29 +724,6 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
return 0;
}
-/* This is the callback for kref_put() below.
- * There is no code here as the first thing to be done is
- * call svc_shutdown_net(), but we cannot get the 'net' from
- * the kref. So do all the work when kref_put returns true.
- */
-static void nfsd_noop(struct kref *ref)
-{
-}
-
-void nfsd_put(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
- svc_xprt_destroy_all(nn->nfsd_serv, net);
- nfsd_last_thread(nn->nfsd_serv, net);
- svc_destroy(&nn->nfsd_serv->sv_refcnt);
- spin_lock(&nfsd_notifier_lock);
- nn->nfsd_serv = NULL;
- spin_unlock(&nfsd_notifier_lock);
- }
-}
-
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
{
int i = 0;
@@ -792,7 +774,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (err)
break;
}
- nfsd_put(net);
+ svc_put(nn->nfsd_serv);
return err;
}
@@ -807,6 +789,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
int error;
bool nfsd_up_before;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
@@ -826,22 +809,25 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
goto out;
nfsd_up_before = nn->nfsd_net_up;
+ serv = nn->nfsd_serv;
error = nfsd_startup_net(net, cred);
if (error)
goto out_put;
- error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
+ error = svc_set_num_threads(serv, NULL, nrservs);
if (error)
goto out_shutdown;
- error = nn->nfsd_serv->sv_nrthreads;
+ error = serv->sv_nrthreads;
+ if (error == 0)
+ nfsd_last_thread(net);
out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown_net(net);
out_put:
/* Threads now hold service active */
if (xchg(&nn->keep_active, 0))
- nfsd_put(net);
- nfsd_put(net);
+ svc_put(serv);
+ svc_put(serv);
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -953,7 +939,6 @@ nfsd(void *vrqstp)
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int err;
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
@@ -965,15 +950,6 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
- /*
- * thread is spawned with all signals set to SIG_IGN, re-enable
- * the ones that will bring down the thread
- */
- allow_signal(SIGKILL);
- allow_signal(SIGHUP);
- allow_signal(SIGINT);
- allow_signal(SIGQUIT);
-
atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -981,54 +957,19 @@ nfsd(void *vrqstp)
/*
* The main request loop
*/
- for (;;) {
+ while (!kthread_should_stop()) {
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
- /*
- * Find a socket with data available and call its
- * recvfrom routine.
- */
- while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
- ;
- if (err == -EINTR)
- break;
- validate_process_creds();
- svc_process(rqstp);
+ svc_recv(rqstp);
validate_process_creds();
}
- /* Clear signals before calling svc_exit_thread() */
- flush_signals(current);
-
atomic_dec(&nfsdstats.th_cnt);
out:
- /* Take an extra ref so that the svc_put in svc_exit_thread()
- * doesn't call svc_destroy()
- */
- svc_get(nn->nfsd_serv);
-
/* Release the thread */
svc_exit_thread(rqstp);
-
- /* We need to drop a ref, but may not drop the last reference
- * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
- * could deadlock with nfsd_shutdown_threads() waiting for us.
- * So three options are:
- * - drop a non-final reference,
- * - get the mutex without waiting
- * - sleep briefly andd try the above again
- */
- while (!svc_put_not_last(nn->nfsd_serv)) {
- if (mutex_trylock(&nfsd_mutex)) {
- nfsd_put(net);
- mutex_unlock(&nfsd_mutex);
- break;
- }
- msleep(20);
- }
-
return 0;
}
@@ -1046,6 +987,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *proc = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
+ struct nfsd_cacherep *rp;
/*
* Give the xdr decoder a chance to change this if it wants
@@ -1056,7 +998,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
- switch (nfsd_cache_lookup(rqstp)) {
+ rp = NULL;
+ switch (nfsd_cache_lookup(rqstp, &rp)) {
case RC_DOIT:
break;
case RC_REPLY:
@@ -1072,7 +1015,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
goto out_encode_err;
- nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
out_cached_reply:
return 1;
@@ -1082,13 +1025,13 @@ out_decode_err:
return 1;
out_update_drop:
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
out_dropit:
return 0;
out_encode_err:
trace_nfsd_cant_encode_err(rqstp);
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
*statp = rpc_system_err;
return 1;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f..cbddcf484dba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
return clp->cl_state == NFSD4_EXPIRABLE;
}
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+ struct inode *inode);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 777e24e5da33..63797635e1c3 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -65,6 +65,8 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, " %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
}
+ seq_printf(seq, "\nwdeleg_getattr %lld",
+ percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 9b43dc3d9991..cf5524e7ca06 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -22,6 +22,7 @@ enum {
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
};
@@ -93,4 +94,10 @@ static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
}
+#ifdef CONFIG_NFSD_V4
+static inline void nfsd_stats_wdeleg_getattr_inc(void)
+{
+ percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
+}
+#endif
#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 2af74983f146..803904348871 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -607,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_write);
DEFINE_STATEID_EVENT(deleg_return);
DEFINE_STATEID_EVENT(deleg_recall);
@@ -1240,8 +1241,8 @@ TRACE_EVENT(nfsd_drc_found,
TRACE_EVENT(nfsd_drc_mismatch,
TP_PROTO(
const struct nfsd_net *nn,
- const struct svc_cacherep *key,
- const struct svc_cacherep *rp
+ const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp
),
TP_ARGS(nn, key, rp),
TP_STRUCT__entry(
@@ -1261,6 +1262,28 @@ TRACE_EVENT(nfsd_drc_mismatch,
__entry->ingress)
);
+TRACE_EVENT_CONDITION(nfsd_drc_gc,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ unsigned long freed
+ ),
+ TP_ARGS(nn, freed),
+ TP_CONDITION(freed > 0),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(unsigned long, freed)
+ __field(int, total)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->freed = freed;
+ __entry->total = atomic_read(&nn->num_drc_entries);
+ ),
+ TP_printk("boot_time=%16llx total=%d freed=%lu",
+ __entry->boot_time, __entry->total, __entry->freed
+ )
+);
+
TRACE_EVENT(nfsd_cb_args,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9b7acba382fe..48260cf68fde 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1540,7 +1540,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
dput(dchild);
if (err)
goto out_unlock;
- fh_fill_pre_attrs(fhp);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
fh_fill_post_attrs(fhp);
out_unlock:
@@ -1635,13 +1637,16 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_unlock(dentry->d_inode);
goto out_drop_write;
}
- fh_fill_pre_attrs(fhp);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
err = nfserrno(host_err);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
if (!err)
nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(dentry->d_inode);
if (!err)
err = nfserrno(commit_metadata(fhp));
@@ -1703,7 +1708,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
err = nfserr_noent;
if (d_really_is_negative(dold))
goto out_dput;
- fh_fill_pre_attrs(ffhp);
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_dput;
host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
@@ -1789,8 +1796,12 @@ retry:
}
trap = lock_rename(tdentry, fdentry);
- fh_fill_pre_attrs(ffhp);
- fh_fill_pre_attrs(tfhp);
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ err = fh_fill_pre_attrs(tfhp);
+ if (err != nfs_ok)
+ goto out_unlock;
odentry = lookup_one_len(fname, fdentry, flen);
host_err = PTR_ERR(odentry);
@@ -1857,6 +1868,7 @@ retry:
fh_fill_post_attrs(ffhp);
fh_fill_post_attrs(tfhp);
}
+out_unlock:
unlock_rename(tdentry, fdentry);
fh_drop_write(ffhp);
@@ -1916,12 +1928,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
goto out_unlock;
}
rinode = d_inode(rdentry);
- ihold(rinode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ihold(rinode);
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- fh_fill_pre_attrs(fhp);
if (type != S_IFDIR) {
int retries;
@@ -2341,16 +2355,18 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
return nfserrno(ret);
inode_lock(fhp->fh_dentry->d_inode);
- fh_fill_pre_attrs(fhp);
-
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
name, NULL);
-
+ err = nfsd_xattr_errno(ret);
fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(fhp->fh_dentry->d_inode);
fh_drop_write(fhp);
- return nfsd_xattr_errno(ret);
+ return err;
}
__be32
@@ -2368,15 +2384,17 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
if (ret)
return nfserrno(ret);
inode_lock(fhp->fh_dentry->d_inode);
- fh_fill_pre_attrs(fhp);
-
- ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf,
- len, flags, NULL);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
+ name, buf, len, flags, NULL);
fh_fill_post_attrs(fhp);
+ err = nfsd_xattr_errno(ret);
+out_unlock:
inode_unlock(fhp->fh_dentry->d_inode);
fh_drop_write(fhp);
-
- return nfsd_xattr_errno(ret);
+ return err;
}
#endif
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 510978e602da..9d918a79dc16 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -774,17 +774,6 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op);
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
- BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
-
- cinfo->before_change = fhp->fh_pre_change;
- cinfo->after_change = fhp->fh_post_change;
-}
-
-
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0..0f016d69c996 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout;
extern bool nsm_use_hostnames;
extern u32 nsm_local_state;
+extern struct timer_list nlmsvc_retry;
+
/*
* Lockd client functions
*/
@@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *,
struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-unsigned long nlmsvc_retry_blocked(void);
+void nlmsvc_retry_blocked(void);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 518bd28f5ab8..35766963dd14 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -56,10 +56,14 @@ struct cache_head {
struct kref ref;
unsigned long flags;
};
-#define CACHE_VALID 0 /* Entry contains valid data */
-#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
-#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
-#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
+
+/* cache_head.flags */
+enum {
+ CACHE_VALID, /* Entry contains valid data */
+ CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
+ CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
+ CACHE_CLEANED, /* Entry has been cleaned from cache */
+};
#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index d94d4f410507..3ce1550d1beb 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -43,22 +43,6 @@ struct net;
#ifdef CONFIG_PROC_FS
int rpc_proc_init(struct net *);
void rpc_proc_exit(struct net *);
-#else
-static inline int rpc_proc_init(struct net *net)
-{
- return 0;
-}
-
-static inline void rpc_proc_exit(struct net *net)
-{
-}
-#endif
-
-#ifdef MODULE
-void rpc_modcount(struct inode *, int);
-#endif
-
-#ifdef CONFIG_PROC_FS
struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *);
void rpc_proc_unregister(struct net *,const char *);
void rpc_proc_zero(const struct rpc_program *);
@@ -69,7 +53,14 @@ void svc_proc_unregister(struct net *, const char *);
void svc_seq_show(struct seq_file *,
const struct svc_stat *);
#else
+static inline int rpc_proc_init(struct net *net)
+{
+ return 0;
+}
+static inline void rpc_proc_exit(struct net *net)
+{
+}
static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; }
static inline void rpc_proc_unregister(struct net *net, const char *p) {}
static inline void rpc_proc_zero(const struct rpc_program *p) {}
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f8751118c122..dbf5b21feafe 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -39,16 +39,20 @@ struct svc_pool {
struct list_head sp_all_threads; /* all server threads */
/* statistics on pool operation */
+ struct percpu_counter sp_messages_arrived;
struct percpu_counter sp_sockets_queued;
struct percpu_counter sp_threads_woken;
- struct percpu_counter sp_threads_timedout;
-#define SP_TASK_PENDING (0) /* still work to do even if no
- * xprt is queued. */
-#define SP_CONGESTED (1)
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
+/* bits for sp_flags */
+enum {
+ SP_TASK_PENDING, /* still work to do even if no xprt is queued */
+ SP_CONGESTED, /* all threads are busy, none idle */
+};
+
+
/*
* RPC service.
*
@@ -120,19 +124,6 @@ static inline void svc_put(struct svc_serv *serv)
kref_put(&serv->sv_refcnt, svc_destroy);
}
-/**
- * svc_put_not_last - decrement non-final reference count on SUNRPC serv
- * @serv: the svc_serv to have count decremented
- *
- * Returns: %true is refcount was decremented.
- *
- * If the refcount is 1, it is not decremented and instead failure is reported.
- */
-static inline bool svc_put_not_last(struct svc_serv *serv)
-{
- return refcount_dec_not_one(&serv->sv_refcnt.refcount);
-}
-
/*
* Maximum payload size supported by a kernel RPC server.
* This is use to determine the max number of pages nfsd is
@@ -232,16 +223,6 @@ struct svc_rqst {
u32 rq_proc; /* procedure number */
u32 rq_prot; /* IP protocol */
int rq_cachetype; /* catering to nfsd */
-#define RQ_SECURE (0) /* secure port */
-#define RQ_LOCAL (1) /* local request */
-#define RQ_USEDEFERRAL (2) /* use deferral */
-#define RQ_DROPME (3) /* drop current reply */
-#define RQ_SPLICE_OK (4) /* turned off in gss privacy
- * to prevent encrypting page
- * cache pages */
-#define RQ_VICTIM (5) /* about to be shut down */
-#define RQ_BUSY (6) /* request is busy */
-#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
@@ -265,7 +246,6 @@ struct svc_rqst {
/* Catering to nfsd */
struct auth_domain * rq_client; /* RPC peer info */
struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
- struct svc_cacherep * rq_cacherep; /* cache info */
struct task_struct *rq_task; /* service thread */
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
@@ -273,6 +253,19 @@ struct svc_rqst {
void ** rq_lease_breaker; /* The v4 client breaking a lease */
};
+/* bits for rq_flags */
+enum {
+ RQ_SECURE, /* secure port */
+ RQ_LOCAL, /* local request */
+ RQ_USEDEFERRAL, /* use deferral */
+ RQ_DROPME, /* drop current reply */
+ RQ_SPLICE_OK, /* turned off in gss privacy to prevent
+ * encrypting page cache pages */
+ RQ_VICTIM, /* about to be shut down */
+ RQ_BUSY, /* request is busy */
+ RQ_DATA, /* request has data */
+};
+
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
/*
@@ -344,7 +337,7 @@ struct svc_program {
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
struct svc_stat * pg_stats; /* rpc statistics */
- int (*pg_authenticate)(struct svc_rqst *);
+ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *,
struct svc_process_info *);
@@ -427,6 +420,7 @@ int svc_register(const struct svc_serv *, struct net *, const int,
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
+void svc_pool_wake_idle_thread(struct svc_pool *pool);
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
const char * svc_proc_name(const struct svc_rqst *rqstp);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a6b12631db21..fa55d12dc765 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -56,23 +56,6 @@ struct svc_xprt {
struct list_head xpt_list;
struct list_head xpt_ready;
unsigned long xpt_flags;
-#define XPT_BUSY 0 /* enqueued/receiving */
-#define XPT_CONN 1 /* conn pending */
-#define XPT_CLOSE 2 /* dead or dying */
-#define XPT_DATA 3 /* data pending */
-#define XPT_TEMP 4 /* connected transport */
-#define XPT_DEAD 6 /* transport closed */
-#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
-#define XPT_DEFERRED 8 /* deferred request pending */
-#define XPT_OLD 9 /* used for xprt aging mark+sweep */
-#define XPT_LISTENER 10 /* listening endpoint */
-#define XPT_CACHE_AUTH 11 /* cache auth info */
-#define XPT_LOCAL 12 /* connection from loopback interface */
-#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
-#define XPT_CONG_CTRL 14 /* has congestion control */
-#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
-#define XPT_TLS_SESSION 16 /* transport-layer security established */
-#define XPT_PEER_AUTH 17 /* peer has been authenticated */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
@@ -97,6 +80,27 @@ struct svc_xprt {
struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
};
+/* flag bits for xpt_flags */
+enum {
+ XPT_BUSY, /* enqueued/receiving */
+ XPT_CONN, /* conn pending */
+ XPT_CLOSE, /* dead or dying */
+ XPT_DATA, /* data pending */
+ XPT_TEMP, /* connected transport */
+ XPT_DEAD, /* transport closed */
+ XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
+ XPT_DEFERRED, /* deferred request pending */
+ XPT_OLD, /* used for xprt aging mark+sweep */
+ XPT_LISTENER, /* listening endpoint */
+ XPT_CACHE_AUTH, /* cache auth info */
+ XPT_LOCAL, /* connection from loopback interface */
+ XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
+ XPT_CONG_CTRL, /* has congestion control */
+ XPT_HANDSHAKE, /* xprt requests a handshake */
+ XPT_TLS_SESSION, /* transport-layer security established */
+ XPT_PEER_AUTH, /* peer has been authenticated */
+};
+
static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
{
spin_lock(&xpt->xpt_lock);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 6d9cc9080aca..6f90203edbf8 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -83,6 +83,19 @@ struct auth_domain {
struct rcu_head rcu_head;
};
+enum svc_auth_status {
+ SVC_GARBAGE = 1,
+ SVC_SYSERR,
+ SVC_VALID,
+ SVC_NEGATIVE,
+ SVC_OK,
+ SVC_DROP,
+ SVC_CLOSE,
+ SVC_DENIED,
+ SVC_PENDING,
+ SVC_COMPLETE,
+};
+
/*
* Each authentication flavour registers an auth_ops
* structure.
@@ -98,6 +111,8 @@ struct auth_domain {
* is (probably) already in place. Certainly space is
* reserved for it.
* DROP - simply drop the request. It may have been deferred
+ * CLOSE - like SVC_DROP, but request is definitely lost.
+ * If there is a tcp connection, it should be closed.
* GARBAGE - rpc garbage_args error
* SYSERR - rpc system_err error
* DENIED - authp holds reason for denial.
@@ -111,14 +126,10 @@ struct auth_domain {
*
* release() is given a request after the procedure has been run.
* It should sign/encrypt the results if needed
- * It should return:
- * OK - the resbuf is ready to be sent
- * DROP - the reply should be quitely dropped
- * DENIED - authp holds a reason for MSG_DENIED
- * SYSERR - rpc system_err
*
* domain_release()
* This call releases a domain.
+ *
* set_client()
* Givens a pending request (struct svc_rqst), finds and assigns
* an appropriate 'auth_domain' as the client.
@@ -127,44 +138,28 @@ struct auth_ops {
char * name;
struct module *owner;
int flavour;
- int (*accept)(struct svc_rqst *rq);
- int (*release)(struct svc_rqst *rq);
- void (*domain_release)(struct auth_domain *);
- int (*set_client)(struct svc_rqst *rq);
-};
-#define SVC_GARBAGE 1
-#define SVC_SYSERR 2
-#define SVC_VALID 3
-#define SVC_NEGATIVE 4
-#define SVC_OK 5
-#define SVC_DROP 6
-#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
- * lost so if there is a tcp connection, it
- * should be closed
- */
-#define SVC_DENIED 8
-#define SVC_PENDING 9
-#define SVC_COMPLETE 10
+ enum svc_auth_status (*accept)(struct svc_rqst *rqstp);
+ int (*release)(struct svc_rqst *rqstp);
+ void (*domain_release)(struct auth_domain *dom);
+ enum svc_auth_status (*set_client)(struct svc_rqst *rqstp);
+};
struct svc_xprt;
-extern int svc_authenticate(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
extern int svc_authorise(struct svc_rqst *rqstp);
-extern int svc_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
extern void svc_auth_unregister(rpc_authflavor_t flavor);
extern struct auth_domain *unix_domain_find(char *name);
extern void auth_domain_put(struct auth_domain *item);
-extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom);
extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
extern struct auth_domain *auth_domain_find(char *name);
-extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
-extern int auth_unix_forget_old(struct auth_domain *dom);
extern void svcauth_unix_purge(struct net *net);
extern void svcauth_unix_info_release(struct svc_xprt *xpt);
-extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp);
extern int unix_gid_cache_create(struct net *net);
extern void unix_gid_cache_destroy(struct net *net);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a7116048a4d4..7c78ec6356b9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -35,8 +35,8 @@ struct svc_sock {
/* Total length of the data (not including fragment headers)
* received so far in the fragments making up this rpc: */
u32 sk_datalen;
- /* Number of queued send requests */
- atomic_t sk_sendqlen;
+
+ struct page_frag_cache sk_frag_cache;
struct completion sk_handshake_done;
@@ -56,8 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
/*
* Function prototypes.
*/
-void svc_close_net(struct svc_serv *, struct net *);
-int svc_recv(struct svc_rqst *, long);
+void svc_recv(struct svc_rqst *rqstp);
void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
@@ -66,8 +65,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net,
const struct cred *cred);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
-void svc_sock_destroy(struct svc_xprt *);
/*
* svc_makesock socket characteristics
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index f89ec4b5ea16..42f9d7eb9a1a 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -139,6 +139,8 @@ void xdr_terminate_string(const struct xdr_buf *, const u32);
size_t xdr_buf_pagecount(const struct xdr_buf *buf);
int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
void xdr_free_bvec(struct xdr_buf *buf);
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr);
static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
{
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 43711753616a..6beb38c1dcb5 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1706,7 +1706,7 @@ TRACE_DEFINE_ENUM(SVC_DENIED);
TRACE_DEFINE_ENUM(SVC_PENDING);
TRACE_DEFINE_ENUM(SVC_COMPLETE);
-#define svc_show_status(status) \
+#define show_svc_auth_status(status) \
__print_symbolic(status, \
{ SVC_GARBAGE, "SVC_GARBAGE" }, \
{ SVC_SYSERR, "SVC_SYSERR" }, \
@@ -1743,7 +1743,10 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
__entry->xid, __get_sockaddr(server), __get_sockaddr(client)
TRACE_EVENT_CONDITION(svc_authenticate,
- TP_PROTO(const struct svc_rqst *rqst, int auth_res),
+ TP_PROTO(
+ const struct svc_rqst *rqst,
+ enum svc_auth_status auth_res
+ ),
TP_ARGS(rqst, auth_res),
@@ -1766,7 +1769,7 @@ TRACE_EVENT_CONDITION(svc_authenticate,
TP_printk(SVC_RQST_ENDPOINT_FORMAT
" auth_res=%s auth_stat=%s",
SVC_RQST_ENDPOINT_VARARGS,
- svc_show_status(__entry->svc_status),
+ show_svc_auth_status(__entry->svc_status),
rpc_show_auth_stat(__entry->auth_stat))
);
@@ -1918,25 +1921,42 @@ TRACE_EVENT(svc_stats_latency,
__get_str(procedure), __entry->execute)
);
+/*
+ * from include/linux/sunrpc/svc_xprt.h
+ */
+#define SVC_XPRT_FLAG_LIST \
+ svc_xprt_flag(BUSY) \
+ svc_xprt_flag(CONN) \
+ svc_xprt_flag(CLOSE) \
+ svc_xprt_flag(DATA) \
+ svc_xprt_flag(TEMP) \
+ svc_xprt_flag(DEAD) \
+ svc_xprt_flag(CHNGBUF) \
+ svc_xprt_flag(DEFERRED) \
+ svc_xprt_flag(OLD) \
+ svc_xprt_flag(LISTENER) \
+ svc_xprt_flag(CACHE_AUTH) \
+ svc_xprt_flag(LOCAL) \
+ svc_xprt_flag(KILL_TEMP) \
+ svc_xprt_flag(CONG_CTRL) \
+ svc_xprt_flag(HANDSHAKE) \
+ svc_xprt_flag(TLS_SESSION) \
+ svc_xprt_flag_end(PEER_AUTH)
+
+#undef svc_xprt_flag
+#undef svc_xprt_flag_end
+#define svc_xprt_flag(x) TRACE_DEFINE_ENUM(XPT_##x);
+#define svc_xprt_flag_end(x) TRACE_DEFINE_ENUM(XPT_##x);
+
+SVC_XPRT_FLAG_LIST
+
+#undef svc_xprt_flag
+#undef svc_xprt_flag_end
+#define svc_xprt_flag(x) { BIT(XPT_##x), #x },
+#define svc_xprt_flag_end(x) { BIT(XPT_##x), #x }
+
#define show_svc_xprt_flags(flags) \
- __print_flags(flags, "|", \
- { BIT(XPT_BUSY), "BUSY" }, \
- { BIT(XPT_CONN), "CONN" }, \
- { BIT(XPT_CLOSE), "CLOSE" }, \
- { BIT(XPT_DATA), "DATA" }, \
- { BIT(XPT_TEMP), "TEMP" }, \
- { BIT(XPT_DEAD), "DEAD" }, \
- { BIT(XPT_CHNGBUF), "CHNGBUF" }, \
- { BIT(XPT_DEFERRED), "DEFERRED" }, \
- { BIT(XPT_OLD), "OLD" }, \
- { BIT(XPT_LISTENER), "LISTENER" }, \
- { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
- { BIT(XPT_LOCAL), "LOCAL" }, \
- { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
- { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
- { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
- { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
- { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
+ __print_flags(flags, "|", SVC_XPRT_FLAG_LIST)
TRACE_EVENT(svc_xprt_create_err,
TP_PROTO(
@@ -1994,25 +2014,25 @@ TRACE_EVENT(svc_xprt_create_err,
TRACE_EVENT(svc_xprt_enqueue,
TP_PROTO(
const struct svc_xprt *xprt,
- const struct svc_rqst *rqst
+ unsigned long flags
),
- TP_ARGS(xprt, rqst),
+ TP_ARGS(xprt, flags),
TP_STRUCT__entry(
SVC_XPRT_ENDPOINT_FIELDS(xprt)
-
- __field(int, pid)
),
TP_fast_assign(
- SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
-
- __entry->pid = rqst? rqst->rq_task->pid : 0;
+ __assign_sockaddr(server, &xprt->xpt_local,
+ xprt->xpt_locallen);
+ __assign_sockaddr(client, &xprt->xpt_remote,
+ xprt->xpt_remotelen);
+ __entry->flags = flags;
+ __entry->netns_ino = xprt->xpt_net->ns.inum;
),
- TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d",
- SVC_XPRT_ENDPOINT_VARARGS, __entry->pid)
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS)
);
TRACE_EVENT(svc_xprt_dequeue,
diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig
index a55a00fa649b..eb02b906c295 100644
--- a/net/sunrpc/.kunitconfig
+++ b/net/sunrpc/.kunitconfig
@@ -23,7 +23,6 @@ CONFIG_NFS_FS=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
-CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 4afc5fd71d44..2d8b67dac7b5 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,38 +34,6 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config RPCSEC_GSS_KRB5_SIMPLIFIED
- bool
- depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_CRYPTOSYSTEM
- bool
- depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_ENCTYPES_DES
- bool "Enable Kerberos enctypes based on DES (deprecated)"
- depends on RPCSEC_GSS_KRB5
- depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_ECB
- depends on CRYPTO_HMAC && CRYPTO_MD5 && CRYPTO_SHA1
- depends on CRYPTO_DES
- default n
- select RPCSEC_GSS_KRB5_SIMPLIFIED
- help
- Choose Y to enable the use of deprecated Kerberos 5
- encryption types that utilize Data Encryption Standard
- (DES) based ciphers. These include des-cbc-md5,
- des-cbc-crc, and des-cbc-md4, which were deprecated by
- RFC 6649, and des3-cbc-sha1, which was deprecated by RFC
- 8429.
-
- These encryption types are known to be insecure, therefore
- the default setting of this option is N. Support for these
- encryption types is available only for compatibility with
- legacy NFS client and server implementations.
-
- Removal of support is planned for a subsequent kernel
- release.
-
config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
bool "Enable Kerberos enctypes based on AES and SHA-1"
depends on RPCSEC_GSS_KRB5
@@ -73,7 +41,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
depends on CRYPTO_HMAC && CRYPTO_SHA1
depends on CRYPTO_AES
default y
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Advanced Encryption Standard (AES) ciphers and
@@ -86,7 +53,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA
depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA
depends on CRYPTO_CMAC
default n
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Camellia ciphers (RFC 3713) and CMAC digests
@@ -100,7 +66,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2
depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512
depends on CRYPTO_AES
default n
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Advanced Encryption Standard (AES) ciphers and
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 012ae1720689..ad1736d93b76 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -12,6 +12,6 @@ auth_rpcgss-y := auth_gss.o gss_generic_token.o \
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
- gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
index b673e2626acb..3afd4065bf3d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_internal.h
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -33,7 +33,6 @@ struct gss_krb5_enctype {
const u32 Ke_length; /* encryption subkey length, in octets */
const u32 Ki_length; /* integrity subkey length, in octets */
- int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask);
int (*derive_key)(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *in,
struct xdr_netobj *out,
@@ -85,24 +84,15 @@ struct krb5_ctx {
* GSS Kerberos 5 mechanism Per-Message calls.
*/
-u32 gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token);
u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token);
-u32 gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token);
u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
struct xdr_netobj *read_token);
-u32 gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages);
u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
struct xdr_buf *buf, struct page **pages);
-u32 gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align);
u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
struct xdr_buf *buf, unsigned int *slack,
unsigned int *align);
@@ -113,12 +103,6 @@ u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
/* Key Derivation Functions */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *label,
- gfp_t gfp_mask);
-
int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *inkey,
struct xdr_netobj *outkey,
@@ -169,13 +153,6 @@ static inline int krb5_derive_key(struct krb5_ctx *kctx,
return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask);
}
-s32 krb5_make_seq_num(struct krb5_ctx *kctx, struct crypto_sync_skcipher *key,
- int direction, u32 seqnum, unsigned char *cksum,
- unsigned char *buf);
-
-s32 krb5_get_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
- unsigned char *buf, int *direction, u32 *seqnum);
-
void krb5_make_confounder(u8 *p, int conflen);
u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 5347fe1cc93f..06d8ee0db000 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -222,90 +222,6 @@ err_return:
return ret;
}
-#define smask(step) ((1<<step)-1)
-#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
-#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
-
-static void mit_des_fixup_key_parity(u8 key[8])
-{
- int i;
- for (i = 0; i < 8; i++) {
- key[i] &= 0xfe;
- key[i] |= 1^parity_char(key[i]);
- }
-}
-
-static int krb5_random_to_key_v1(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
-{
- int i, ret = -EINVAL;
-
- if (key->len != 24) {
- dprintk("%s: key->len is %d\n", __func__, key->len);
- goto err_out;
- }
- if (randombits->len != 21) {
- dprintk("%s: randombits->len is %d\n",
- __func__, randombits->len);
- goto err_out;
- }
-
- /* take the seven bytes, move them around into the top 7 bits of the
- 8 key bytes, then compute the parity bits. Do this three times. */
-
- for (i = 0; i < 3; i++) {
- memcpy(key->data + i*8, randombits->data + i*7, 7);
- key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
- ((key->data[i*8+1]&1)<<2) |
- ((key->data[i*8+2]&1)<<3) |
- ((key->data[i*8+3]&1)<<4) |
- ((key->data[i*8+4]&1)<<5) |
- ((key->data[i*8+5]&1)<<6) |
- ((key->data[i*8+6]&1)<<7));
-
- mit_des_fixup_key_parity(key->data + i*8);
- }
- ret = 0;
-err_out:
- return ret;
-}
-
-/**
- * krb5_derive_key_v1 - Derive a subkey for an RFC 3961 enctype
- * @gk5e: Kerberos 5 enctype profile
- * @inkey: base protocol key
- * @outkey: OUT: derived key
- * @label: subkey usage label
- * @gfp_mask: memory allocation control flags
- *
- * Caller sets @outkey->len to the desired length of the derived key.
- *
- * On success, returns 0 and fills in @outkey. A negative errno value
- * is returned on failure.
- */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *label,
- gfp_t gfp_mask)
-{
- struct xdr_netobj inblock;
- int ret;
-
- inblock.len = gk5e->keybytes;
- inblock.data = kmalloc(inblock.len, gfp_mask);
- if (!inblock.data)
- return -ENOMEM;
-
- ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
- if (!ret)
- ret = krb5_random_to_key_v1(gk5e, &inblock, outkey);
-
- kfree_sensitive(inblock.data);
- return ret;
-}
-
/*
* This is the identity function, with some sanity checking.
*/
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 20e21d08badb..e31cfdf7eadc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -30,61 +30,7 @@
static struct gss_api_mech gss_kerberos_mech;
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask);
-static int gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-static int gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
- /*
- * DES (All DES enctypes are mapped to the same gss functionality)
- */
- {
- .etype = ENCTYPE_DES_CBC_RAW,
- .ctype = CKSUMTYPE_RSA_MD5,
- .name = "des-cbc-crc",
- .encrypt_name = "cbc(des)",
- .cksum_name = "md5",
- .import_ctx = gss_krb5_import_ctx_des,
- .get_mic = gss_krb5_get_mic_v1,
- .verify_mic = gss_krb5_verify_mic_v1,
- .wrap = gss_krb5_wrap_v1,
- .unwrap = gss_krb5_unwrap_v1,
- .signalg = SGN_ALG_DES_MAC_MD5,
- .sealalg = SEAL_ALG_DES,
- .keybytes = 7,
- .keylength = 8,
- .cksumlength = 8,
- .keyed_cksum = 0,
- },
- /*
- * 3DES
- */
- {
- .etype = ENCTYPE_DES3_CBC_RAW,
- .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
- .name = "des3-hmac-sha1",
- .encrypt_name = "cbc(des3_ede)",
- .cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v1,
- .derive_key = krb5_derive_key_v1,
- .get_mic = gss_krb5_get_mic_v1,
- .verify_mic = gss_krb5_verify_mic_v1,
- .wrap = gss_krb5_wrap_v1,
- .unwrap = gss_krb5_unwrap_v1,
- .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
- .sealalg = SEAL_ALG_DES3KD,
- .keybytes = 21,
- .keylength = 24,
- .cksumlength = 20,
- .keyed_cksum = 1,
- },
-#endif
-
#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
/*
* AES-128 with SHA-1 (RFC 3962)
@@ -96,7 +42,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.encrypt_name = "cts(cbc(aes))",
.aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_derive_key_v2,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -126,7 +71,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.encrypt_name = "cts(cbc(aes))",
.aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_derive_key_v2,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -166,7 +110,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(128),
.Ki_length = BITS2OCTETS(128),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_feedback_cmac,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -193,7 +136,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(256),
.Ki_length = BITS2OCTETS(256),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_feedback_cmac,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -223,7 +165,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(128),
.Ki_length = BITS2OCTETS(128),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_hmac_sha2,
.encrypt = krb5_etm_encrypt,
.decrypt = krb5_etm_decrypt,
@@ -250,7 +191,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(256),
.Ki_length = BITS2OCTETS(192),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_hmac_sha2,
.encrypt = krb5_etm_encrypt,
.decrypt = krb5_etm_decrypt,
@@ -284,12 +224,6 @@ static void gss_krb5_prepare_enctype_priority_list(void)
ENCTYPE_AES256_CTS_HMAC_SHA1_96,
ENCTYPE_AES128_CTS_HMAC_SHA1_96,
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
- ENCTYPE_DES3_CBC_SHA1,
- ENCTYPE_DES_CBC_MD5,
- ENCTYPE_DES_CBC_CRC,
- ENCTYPE_DES_CBC_MD4,
-#endif
};
size_t total, i;
char buf[16];
@@ -330,185 +264,6 @@ const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype)
EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype);
static struct crypto_sync_skcipher *
-gss_krb5_alloc_cipher_v1(struct krb5_ctx *ctx, struct xdr_netobj *key)
-{
- struct crypto_sync_skcipher *tfm;
-
- tfm = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
- if (IS_ERR(tfm))
- return NULL;
- if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
- crypto_free_sync_skcipher(tfm);
- return NULL;
- }
- return tfm;
-}
-
-static inline const void *
-get_key(const void *p, const void *end,
- struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
-{
- struct crypto_sync_skcipher *tfm;
- struct xdr_netobj key;
- int alg;
-
- p = simple_get_bytes(p, end, &alg, sizeof(alg));
- if (IS_ERR(p))
- goto out_err;
- switch (alg) {
- case ENCTYPE_DES_CBC_CRC:
- case ENCTYPE_DES_CBC_MD4:
- case ENCTYPE_DES_CBC_MD5:
- /* Map all these key types to ENCTYPE_DES_CBC_RAW */
- alg = ENCTYPE_DES_CBC_RAW;
- break;
- }
- if (!gss_krb5_lookup_enctype(alg)) {
- pr_warn("gss_krb5: unsupported enctype: %d\n", alg);
- goto out_err_inval;
- }
-
- p = simple_get_netobj(p, end, &key);
- if (IS_ERR(p))
- goto out_err;
- tfm = gss_krb5_alloc_cipher_v1(ctx, &key);
- kfree(key.data);
- if (!tfm) {
- pr_warn("gss_krb5: failed to initialize cipher '%s'\n",
- ctx->gk5e->encrypt_name);
- goto out_err_inval;
- }
- *res = tfm;
-
- return p;
-
-out_err_inval:
- p = ERR_PTR(-EINVAL);
-out_err:
- return p;
-}
-
-static int
-gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
-{
- u32 seq_send;
- int tmp;
- u32 time32;
-
- p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
- if (IS_ERR(p))
- goto out_err;
-
- /* Old format supports only DES! Any other enctype uses new format */
- ctx->enctype = ENCTYPE_DES_CBC_RAW;
-
- ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
- if (ctx->gk5e == NULL) {
- p = ERR_PTR(-EINVAL);
- goto out_err;
- }
-
- /* The downcall format was designed before we completely understood
- * the uses of the context fields; so it includes some stuff we
- * just give some minimal sanity-checking, and some we ignore
- * completely (like the next twenty bytes): */
- if (unlikely(p + 20 > end || p + 20 < p)) {
- p = ERR_PTR(-EFAULT);
- goto out_err;
- }
- p += 20;
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SGN_ALG_DES_MAC_MD5) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SEAL_ALG_DES) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &time32, sizeof(time32));
- if (IS_ERR(p))
- goto out_err;
- /* unsigned 32-bit time overflows in year 2106 */
- ctx->endtime = (time64_t)time32;
- p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
- if (IS_ERR(p))
- goto out_err;
- atomic_set(&ctx->seq_send, seq_send);
- p = simple_get_netobj(p, end, &ctx->mech_used);
- if (IS_ERR(p))
- goto out_err;
- p = get_key(p, end, ctx, &ctx->enc);
- if (IS_ERR(p))
- goto out_err_free_mech;
- p = get_key(p, end, ctx, &ctx->seq);
- if (IS_ERR(p))
- goto out_err_free_key1;
- if (p != end) {
- p = ERR_PTR(-EFAULT);
- goto out_err_free_key2;
- }
-
- return 0;
-
-out_err_free_key2:
- crypto_free_sync_skcipher(ctx->seq);
-out_err_free_key1:
- crypto_free_sync_skcipher(ctx->enc);
-out_err_free_mech:
- kfree(ctx->mech_used.data);
-out_err:
- return PTR_ERR(p);
-}
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int
-gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
- return -EINVAL;
-}
-
-static int
-gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
- struct xdr_netobj keyin, keyout;
-
- keyin.data = ctx->Ksess;
- keyin.len = ctx->gk5e->keylength;
-
- ctx->seq = gss_krb5_alloc_cipher_v1(ctx, &keyin);
- if (ctx->seq == NULL)
- goto out_err;
- ctx->enc = gss_krb5_alloc_cipher_v1(ctx, &keyin);
- if (ctx->enc == NULL)
- goto out_free_seq;
-
- /* derive cksum */
- keyout.data = ctx->cksum;
- keyout.len = ctx->gk5e->keylength;
- if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_SIGN,
- KEY_USAGE_SEED_CHECKSUM, gfp_mask))
- goto out_free_enc;
-
- return 0;
-
-out_free_enc:
- crypto_free_sync_skcipher(ctx->enc);
-out_free_seq:
- crypto_free_sync_skcipher(ctx->seq);
-out_err:
- return -EINVAL;
-}
-#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-
-static struct crypto_sync_skcipher *
gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key)
{
struct crypto_sync_skcipher *tfm;
@@ -636,8 +391,6 @@ out_free:
goto out;
}
-#endif
-
static int
gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
gfp_t gfp_mask)
@@ -671,9 +424,6 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
if (IS_ERR(p))
goto out_err;
- /* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
- if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
- ctx->enctype = ENCTYPE_DES3_CBC_RAW;
ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
@@ -700,7 +450,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- return ctx->gk5e->import_ctx(ctx, gfp_mask);
+ return gss_krb5_import_ctx_v2(ctx, gfp_mask);
out_err:
return PTR_ERR(p);
@@ -718,10 +468,7 @@ gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id,
if (ctx == NULL)
return -ENOMEM;
- if (len == 85)
- ret = gss_import_v1_context(p, end, ctx);
- else
- ret = gss_import_v2_context(p, end, ctx, gfp_mask);
+ ret = gss_import_v2_context(p, end, ctx, gfp_mask);
memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess));
if (ret) {
kfree(ctx);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 146aa755f07d..ce540df9bce4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -71,75 +71,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static void *
-setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
-{
- u16 *ptr;
- void *krb5_hdr;
- int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
-
- token->len = g_token_size(&ctx->mech_used, body_size);
-
- ptr = (u16 *)token->data;
- g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
-
- /* ptr now at start of header described in rfc 1964, section 1.2.1: */
- krb5_hdr = ptr;
- *ptr++ = KG_TOK_MIC_MSG;
- /*
- * signalg is stored as if it were converted from LE to host endian, even
- * though it's an opaque pair of bytes according to the RFC.
- */
- *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
- *ptr++ = SEAL_ALG_NONE;
- *ptr = 0xffff;
-
- return krb5_hdr;
-}
-
-u32
-gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- void *ptr;
- time64_t now;
- u32 seq_send;
- u8 *cksumkey;
-
- dprintk("RPC: %s\n", __func__);
- BUG_ON(ctx == NULL);
-
- now = ktime_get_real_seconds();
-
- ptr = setup_token(ctx, token);
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
- KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&ctx->seq_send);
-
- if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
- return GSS_S_FAILURE;
-
- return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-#endif
-
static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
deleted file mode 100644
index 1babc3474e10..000000000000
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * linux/net/sunrpc/gss_krb5_seqnum.c
- *
- * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
- *
- * Copyright (c) 2000 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/sunrpc/gss_krb5.h>
-
-#include "gss_krb5_internal.h"
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-s32
-krb5_make_seq_num(struct krb5_ctx *kctx,
- struct crypto_sync_skcipher *key,
- int direction,
- u32 seqnum,
- unsigned char *cksum, unsigned char *buf)
-{
- unsigned char *plain;
- s32 code;
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- plain[0] = (unsigned char) (seqnum & 0xff);
- plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
- plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
- plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
-
- plain[4] = direction;
- plain[5] = direction;
- plain[6] = direction;
- plain[7] = direction;
-
- code = krb5_encrypt(key, cksum, plain, buf, 8);
- kfree(plain);
- return code;
-}
-
-s32
-krb5_get_seq_num(struct krb5_ctx *kctx,
- unsigned char *cksum,
- unsigned char *buf,
- int *direction, u32 *seqnum)
-{
- s32 code;
- unsigned char *plain;
- struct crypto_sync_skcipher *key = kctx->seq;
-
- dprintk("RPC: krb5_get_seq_num:\n");
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- goto out;
-
- if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7])) {
- code = (s32)KG_BAD_SEQ;
- goto out;
- }
-
- *direction = plain[4];
-
- *seqnum = ((plain[0]) |
- (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
-
-out:
- kfree(plain);
- return code;
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index 95ca783795c5..85625e3f3814 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -320,208 +320,12 @@ static void rfc3961_nfold_case(struct kunit *test)
"result mismatch");
}
-/*
- * RFC 3961 Appendix A.3. DES3 DR and DK
- *
- * These tests show the derived-random and derived-key values for the
- * des3-hmac-sha1-kd encryption scheme, using the DR and DK functions
- * defined in section 6.3.1. The input keys were randomly generated;
- * the usage values are from this specification.
- *
- * This test material is copyright (C) The Internet Society (2005).
- */
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_155,
- 0x00, 0x00, 0x00, 0x01, 0x55
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_1aa,
- 0x00, 0x00, 0x00, 0x01, 0xaa
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_kerberos,
- 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_base_key,
- 0xdc, 0xe0, 0x6b, 0x1f, 0x64, 0xc8, 0x57, 0xa1,
- 0x1c, 0x3d, 0xb5, 0x7c, 0x51, 0x89, 0x9b, 0x2c,
- 0xc1, 0x79, 0x10, 0x08, 0xce, 0x97, 0x3b, 0x92
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_derived_key,
- 0x92, 0x51, 0x79, 0xd0, 0x45, 0x91, 0xa7, 0x9b,
- 0x5d, 0x31, 0x92, 0xc4, 0xa7, 0xe9, 0xc2, 0x89,
- 0xb0, 0x49, 0xc7, 0x1f, 0x6e, 0xe6, 0x04, 0xcd
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_base_key,
- 0x5e, 0x13, 0xd3, 0x1c, 0x70, 0xef, 0x76, 0x57,
- 0x46, 0x57, 0x85, 0x31, 0xcb, 0x51, 0xc1, 0x5b,
- 0xf1, 0x1c, 0xa8, 0x2c, 0x97, 0xce, 0xe9, 0xf2
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_derived_key,
- 0x9e, 0x58, 0xe5, 0xa1, 0x46, 0xd9, 0x94, 0x2a,
- 0x10, 0x1c, 0x46, 0x98, 0x45, 0xd6, 0x7a, 0x20,
- 0xe3, 0xc4, 0x25, 0x9e, 0xd9, 0x13, 0xf2, 0x07
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_base_key,
- 0x98, 0xe6, 0xfd, 0x8a, 0x04, 0xa4, 0xb6, 0x85,
- 0x9b, 0x75, 0xa1, 0x76, 0x54, 0x0b, 0x97, 0x52,
- 0xba, 0xd3, 0xec, 0xd6, 0x10, 0xa2, 0x52, 0xbc
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_derived_key,
- 0x13, 0xfe, 0xf8, 0x0d, 0x76, 0x3e, 0x94, 0xec,
- 0x6d, 0x13, 0xfd, 0x2c, 0xa1, 0xd0, 0x85, 0x07,
- 0x02, 0x49, 0xda, 0xd3, 0x98, 0x08, 0xea, 0xbf
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_base_key,
- 0x62, 0x2a, 0xec, 0x25, 0xa2, 0xfe, 0x2c, 0xad,
- 0x70, 0x94, 0x68, 0x0b, 0x7c, 0x64, 0x94, 0x02,
- 0x80, 0x08, 0x4c, 0x1a, 0x7c, 0xec, 0x92, 0xb5
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_derived_key,
- 0xf8, 0xdf, 0xbf, 0x04, 0xb0, 0x97, 0xe6, 0xd9,
- 0xdc, 0x07, 0x02, 0x68, 0x6b, 0xcb, 0x34, 0x89,
- 0xd9, 0x1f, 0xd9, 0xa4, 0x51, 0x6b, 0x70, 0x3e
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_base_key,
- 0xd3, 0xf8, 0x29, 0x8c, 0xcb, 0x16, 0x64, 0x38,
- 0xdc, 0xb9, 0xb9, 0x3e, 0xe5, 0xa7, 0x62, 0x92,
- 0x86, 0xa4, 0x91, 0xf8, 0x38, 0xf8, 0x02, 0xfb
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_derived_key,
- 0x23, 0x70, 0xda, 0x57, 0x5d, 0x2a, 0x3d, 0xa8,
- 0x64, 0xce, 0xbf, 0xdc, 0x52, 0x04, 0xd5, 0x6d,
- 0xf7, 0x79, 0xa7, 0xdf, 0x43, 0xd9, 0xda, 0x43
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_base_key,
- 0xc1, 0x08, 0x16, 0x49, 0xad, 0xa7, 0x43, 0x62,
- 0xe6, 0xa1, 0x45, 0x9d, 0x01, 0xdf, 0xd3, 0x0d,
- 0x67, 0xc2, 0x23, 0x4c, 0x94, 0x07, 0x04, 0xda
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_derived_key,
- 0x34, 0x80, 0x57, 0xec, 0x98, 0xfd, 0xc4, 0x80,
- 0x16, 0x16, 0x1c, 0x2a, 0x4c, 0x7a, 0x94, 0x3e,
- 0x92, 0xae, 0x49, 0x2c, 0x98, 0x91, 0x75, 0xf7
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_base_key,
- 0x5d, 0x15, 0x4a, 0xf2, 0x38, 0xf4, 0x67, 0x13,
- 0x15, 0x57, 0x19, 0xd5, 0x5e, 0x2f, 0x1f, 0x79,
- 0x0d, 0xd6, 0x61, 0xf2, 0x79, 0xa7, 0x91, 0x7c
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_derived_key,
- 0xa8, 0x80, 0x8a, 0xc2, 0x67, 0xda, 0xda, 0x3d,
- 0xcb, 0xe9, 0xa7, 0xc8, 0x46, 0x26, 0xfb, 0xc7,
- 0x61, 0xc2, 0x94, 0xb0, 0x13, 0x15, 0xe5, 0xc1
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_base_key,
- 0x79, 0x85, 0x62, 0xe0, 0x49, 0x85, 0x2f, 0x57,
- 0xdc, 0x8c, 0x34, 0x3b, 0xa1, 0x7f, 0x2c, 0xa1,
- 0xd9, 0x73, 0x94, 0xef, 0xc8, 0xad, 0xc4, 0x43
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_derived_key,
- 0xc8, 0x13, 0xf8, 0x8a, 0x3b, 0xe3, 0xb3, 0x34,
- 0xf7, 0x54, 0x25, 0xce, 0x91, 0x75, 0xfb, 0xe3,
- 0xc8, 0x49, 0x3b, 0x89, 0xc8, 0x70, 0x3b, 0x49
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_base_key,
- 0x26, 0xdc, 0xe3, 0x34, 0xb5, 0x45, 0x29, 0x2f,
- 0x2f, 0xea, 0xb9, 0xa8, 0x70, 0x1a, 0x89, 0xa4,
- 0xb9, 0x9e, 0xb9, 0x94, 0x2c, 0xec, 0xd0, 0x16
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_derived_key,
- 0xf4, 0x8f, 0xfd, 0x6e, 0x83, 0xf8, 0x3e, 0x73,
- 0x54, 0xe6, 0x94, 0xfd, 0x25, 0x2c, 0xf8, 0x3b,
- 0xfe, 0x58, 0xf7, 0xd5, 0xba, 0x37, 0xec, 0x5d
-);
-
-static const struct gss_krb5_test_param rfc3961_kdf_test_params[] = {
- {
- .desc = "des3-hmac-sha1 key derivation case 1",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test1_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test1_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 2",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test2_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test2_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 3",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test3_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test3_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 4",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test4_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test4_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 5",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test5_base_key,
- .usage = &des3_dk_usage_kerberos,
- .expected_result = &des3_dk_test5_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 6",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test6_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test6_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 7",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test7_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test7_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 8",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test8_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test8_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 9",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test9_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test9_derived_key,
- },
-};
-
-/* Creates the function rfc3961_kdf_gen_params */
-KUNIT_ARRAY_PARAM(rfc3961_kdf, rfc3961_kdf_test_params, gss_krb5_get_desc);
-
static struct kunit_case rfc3961_test_cases[] = {
{
.name = "RFC 3961 n-fold",
.run_case = rfc3961_nfold_case,
.generate_params = rfc3961_nfold_gen_params,
},
- {
- .name = "RFC 3961 key derivation",
- .run_case = kdf_case,
- .generate_params = rfc3961_kdf_gen_params,
- },
{}
};
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 7d6d4ae4a3c9..4fbc50a0a2c4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -69,83 +69,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-/* read_token is a mic token, and message_buffer is the data that the mic was
- * supposedly taken over. */
-u32
-gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- s32 now;
- int direction;
- u32 seqnum;
- unsigned char *ptr = (unsigned char *)read_token->data;
- int bodysize;
- u8 *cksumkey;
-
- dprintk("RPC: krb5_read_token\n");
-
- if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
- read_token->len))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != ctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != SEAL_ALG_NONE)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, message_buffer, 0,
- cksumkey, KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- ctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > ctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
- &direction, &seqnum))
- return GSS_S_FAILURE;
-
- if ((ctx->initiate && direction != 0xff) ||
- (!ctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- return GSS_S_COMPLETE;
-}
-#endif
-
u32
gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
struct xdr_netobj *read_token)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 6d6b082380b2..b3e1738ff6bf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -40,293 +40,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static inline int
-gss_krb5_padding(int blocksize, int length)
-{
- return blocksize - (length % blocksize);
-}
-
-static inline void
-gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
-{
- int padding = gss_krb5_padding(blocksize, buf->len - offset);
- char *p;
- struct kvec *iov;
-
- if (buf->page_len || buf->tail[0].iov_len)
- iov = &buf->tail[0];
- else
- iov = &buf->head[0];
- p = iov->iov_base + iov->iov_len;
- iov->iov_len += padding;
- buf->len += padding;
- memset(p, padding, padding);
-}
-
-static inline int
-gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
-{
- u8 *ptr;
- u8 pad;
- size_t len = buf->len;
-
- if (len <= buf->head[0].iov_len) {
- pad = *(u8 *)(buf->head[0].iov_base + len - 1);
- if (pad > buf->head[0].iov_len)
- return -EINVAL;
- buf->head[0].iov_len -= pad;
- goto out;
- } else
- len -= buf->head[0].iov_len;
- if (len <= buf->page_len) {
- unsigned int last = (buf->page_base + len - 1)
- >>PAGE_SHIFT;
- unsigned int offset = (buf->page_base + len - 1)
- & (PAGE_SIZE - 1);
- ptr = kmap_atomic(buf->pages[last]);
- pad = *(ptr + offset);
- kunmap_atomic(ptr);
- goto out;
- } else
- len -= buf->page_len;
- BUG_ON(len > buf->tail[0].iov_len);
- pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
-out:
- /* XXX: NOTE: we do not adjust the page lengths--they represent
- * a range of data in the real filesystem page cache, and we need
- * to know that range so the xdr code can properly place read data.
- * However adjusting the head length, as we do above, is harmless.
- * In the case of a request that fits into a single page, the server
- * also uses length and head length together to determine the original
- * start of the request to copy the request for deferal; so it's
- * easier on the server if we adjust head and tail length in tandem.
- * It's not really a problem that we don't fool with the page and
- * tail lengths, though--at worst badly formed xdr might lead the
- * server to attempt to parse the padding.
- * XXX: Document all these weird requirements for gss mechanism
- * wrap/unwrap functions. */
- if (pad > blocksize)
- return -EINVAL;
- if (buf->len > pad)
- buf->len -= pad;
- else
- return -EINVAL;
- return 0;
-}
-
-/* Assumptions: the head and tail of inbuf are ours to play with.
- * The pages, however, may be real pages in the page cache and we replace
- * them with scratch pages from **pages before writing to them. */
-/* XXX: obviously the above should be documentation of wrap interface,
- * and shouldn't be in this kerberos-specific file. */
-
-/* XXX factor out common code with seal/unseal. */
-
-u32
-gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- int blocksize = 0, plainlen;
- unsigned char *ptr, *msg_start;
- time64_t now;
- int headlen;
- struct page **tmp_pages;
- u32 seq_send;
- u8 *cksumkey;
- u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
-
- dprintk("RPC: %s\n", __func__);
-
- now = ktime_get_real_seconds();
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- gss_krb5_add_padding(buf, offset, blocksize);
- BUG_ON((buf->len - offset) % blocksize);
- plainlen = conflen + buf->len - offset;
-
- headlen = g_token_size(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
- (buf->len - offset);
-
- ptr = buf->head[0].iov_base + offset;
- /* shift data to make room for header. */
- xdr_extend_head(buf, offset, headlen);
-
- /* XXX Would be cleverer to encrypt while copying. */
- BUG_ON((buf->len - offset - headlen) % blocksize);
-
- g_make_token_header(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN +
- kctx->gk5e->cksumlength + plainlen, &ptr);
-
-
- /* ptr now at header described in rfc 1964, section 1.2.1: */
- ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
- ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
-
- msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
-
- /*
- * signalg and sealalg are stored as if they were converted from LE
- * to host endian, even though they're opaque pairs of bytes according
- * to the RFC.
- */
- *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
- *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
- ptr[6] = 0xff;
- ptr[7] = 0xff;
-
- krb5_make_confounder(msg_start, conflen);
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- /* XXXJBF: UGH!: */
- tmp_pages = buf->pages;
- buf->pages = pages;
- if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
- buf->pages = tmp_pages;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&kctx->seq_send);
-
- /* XXX would probably be more efficient to compute checksum
- * and encrypt at the same time: */
- if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
- return GSS_S_FAILURE;
-
- if (gss_encrypt_xdr_buf(kctx->enc, buf,
- offset + headlen - conflen, pages))
- return GSS_S_FAILURE;
-
- return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-u32
-gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- time64_t now;
- int direction;
- s32 seqnum;
- unsigned char *ptr;
- int bodysize;
- void *data_start, *orig_start;
- int data_len;
- int blocksize;
- u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
- int crypt_offset;
- u8 *cksumkey;
- unsigned int saved_len = buf->len;
-
- dprintk("RPC: gss_unwrap_kerberos\n");
-
- ptr = (u8 *)buf->head[0].iov_base + offset;
- if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
- len - offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- /* get the sign and seal algorithms */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != kctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != kctx->gk5e->sealalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /*
- * Data starts after token header and checksum. ptr points
- * to the beginning of the token header
- */
- crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
- (unsigned char *)buf->head[0].iov_base;
-
- buf->len = len;
- if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- kctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > kctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
- ptr + 8, &direction, &seqnum))
- return GSS_S_BAD_SIG;
-
- if ((kctx->initiate && direction != 0xff) ||
- (!kctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- /* Copy the data back to the right position. XXX: Would probably be
- * better to copy and encrypt at the same time. */
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
- conflen;
- orig_start = buf->head[0].iov_base + offset;
- data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
- memmove(orig_start, data_start, data_len);
- buf->head[0].iov_len -= (data_start - orig_start);
- buf->len = len - (data_start - orig_start);
-
- if (gss_krb5_remove_padding(buf, blocksize))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* slack must include room for krb5 padding */
- *slack = XDR_QUADLEN(saved_len - buf->len);
- /* The GSS blob always precedes the RPC message payload */
- *align = *slack;
- return GSS_S_COMPLETE;
-}
-
-#endif
-
/*
* We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need
* to do more than that, we shift repeatedly. Kevin Coffman reports
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c4a566737085..18734e70c5dd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -986,7 +986,7 @@ bad_unwrap:
return -EINVAL;
}
-static int
+static enum svc_auth_status
svcauth_gss_set_client(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1634,7 +1634,7 @@ svcauth_gss_decode_credbody(struct xdr_stream *xdr,
*
* The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531).
*/
-static int
+static enum svc_auth_status
svcauth_gss_accept(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1945,9 +1945,6 @@ bad_wrap:
* %0: the Reply is ready to be sent
* %-ENOMEM: failed to allocate memory
* %-EINVAL: encoding error
- *
- * XXX: These return values do not match the return values documented
- * for the auth_ops ->release method in linux/sunrpc/svcauth.h.
*/
static int
svcauth_gss_release(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 587811a002c9..dc21e6c732db 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -513,9 +513,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
+ percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
- percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
}
return serv;
@@ -588,9 +588,9 @@ svc_destroy(struct kref *ref)
for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_pool *pool = &serv->sv_pools[i];
+ percpu_counter_destroy(&pool->sp_messages_arrived);
percpu_counter_destroy(&pool->sp_sockets_queued);
percpu_counter_destroy(&pool->sp_threads_woken);
- percpu_counter_destroy(&pool->sp_threads_timedout);
}
kfree(serv->sv_pools);
kfree(serv);
@@ -689,23 +689,44 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
return rqstp;
}
-/*
- * Choose a pool in which to create a new thread, for svc_set_num_threads
+/**
+ * svc_pool_wake_idle_thread - Awaken an idle thread in @pool
+ * @pool: service thread pool
+ *
+ * Can be called from soft IRQ or process context. Finding an idle
+ * service thread and marking it BUSY is atomic with respect to
+ * other calls to svc_pool_wake_idle_thread().
+ *
*/
-static inline struct svc_pool *
-choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+void svc_pool_wake_idle_thread(struct svc_pool *pool)
{
- if (pool != NULL)
- return pool;
+ struct svc_rqst *rqstp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
+ continue;
+
+ WRITE_ONCE(rqstp->rq_qtime, ktime_get());
+ wake_up_process(rqstp->rq_task);
+ rcu_read_unlock();
+ percpu_counter_inc(&pool->sp_threads_woken);
+ trace_svc_wake_up(rqstp->rq_task->pid);
+ return;
+ }
+ rcu_read_unlock();
- return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+ set_bit(SP_CONGESTED, &pool->sp_flags);
}
-/*
- * Choose a thread to kill, for svc_set_num_threads
- */
-static inline struct task_struct *
-choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+static struct task_struct *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
{
unsigned int i;
struct task_struct *task = NULL;
@@ -713,7 +734,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
if (pool != NULL) {
spin_lock_bh(&pool->sp_lock);
} else {
- /* choose a pool in round-robin fashion */
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
spin_lock_bh(&pool->sp_lock);
@@ -728,21 +748,15 @@ found_pool:
if (!list_empty(&pool->sp_all_threads)) {
struct svc_rqst *rqstp;
- /*
- * Remove from the pool->sp_all_threads list
- * so we don't try to kill it again.
- */
rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
set_bit(RQ_VICTIM, &rqstp->rq_flags);
list_del_rcu(&rqstp->rq_all);
task = rqstp->rq_task;
}
spin_unlock_bh(&pool->sp_lock);
-
return task;
}
-/* create new threads */
static int
svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -754,13 +768,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
do {
nrservs--;
- chosen_pool = choose_pool(serv, pool, &state);
-
+ chosen_pool = svc_pool_next(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
+
rqstp = svc_prepare_thread(serv, chosen_pool, node);
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
-
task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
@@ -779,15 +792,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- */
-
-/* destroy old threads */
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -795,9 +799,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
- /* destroy old threads */
do {
- task = choose_victim(serv, pool, &state);
+ task = svc_pool_victim(serv, pool, &state);
if (task == NULL)
break;
rqstp = kthread_data(task);
@@ -809,6 +812,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
+/**
+ * svc_set_num_threads - adjust number of threads per RPC service
+ * @serv: RPC service to adjust
+ * @pool: Specific pool from which to choose threads, or NULL
+ * @nrservs: New number of threads for @serv (0 or less means kill all threads)
+ *
+ * Create or destroy threads to make the number of threads for @serv the
+ * given number. If @pool is non-NULL, change only threads in that pool;
+ * otherwise, round-robin between all pools for @serv. @serv's
+ * sv_nrthreads is adjusted for each thread created or destroyed.
+ *
+ * Caller must ensure mutual exclusion between this and server startup or
+ * shutdown.
+ *
+ * Returns zero on success or a negative errno if an error occurred while
+ * starting a thread.
+ */
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -1277,8 +1297,9 @@ svc_process_common(struct svc_rqst *rqstp)
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
- int auth_res, rc;
+ enum svc_auth_status auth_res;
unsigned int aoffset;
+ int rc;
__be32 *p;
/* Will be turned off by GSS integrity and privacy services */
@@ -1333,6 +1354,9 @@ svc_process_common(struct svc_rqst *rqstp)
goto dropit;
case SVC_COMPLETE:
goto sendit;
+ default:
+ pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
+ goto err_system_err;
}
if (progp == NULL)
@@ -1516,7 +1540,6 @@ out_baddir:
out_drop:
svc_drop(rqstp);
}
-EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 62c7919ea610..4cfe9640df48 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -434,6 +434,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ trace_svc_xprt_enqueue(xprt, xpt_flags);
if (xpt_flags & BIT(XPT_BUSY))
return false;
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
@@ -456,7 +457,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
- struct svc_rqst *rqstp = NULL;
if (!svc_xprt_ready(xprt))
return;
@@ -476,21 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
spin_unlock_bh(&pool->sp_lock);
- /* find a thread for this xprt */
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- percpu_counter_inc(&pool->sp_threads_woken);
- rqstp->rq_qtime = ktime_get();
- wake_up_process(rqstp->rq_task);
- goto out_unlock;
- }
- set_bit(SP_CONGESTED, &pool->sp_flags);
- rqstp = NULL;
-out_unlock:
- rcu_read_unlock();
- trace_svc_xprt_enqueue(xprt, rqstp);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -581,7 +567,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
svc_xprt_put(xprt);
}
-/*
+/**
+ * svc_wake_up - Wake up a service thread for non-transport work
+ * @serv: RPC service
+ *
* Some svc_serv's will have occasional work to do, even when a xprt is not
* waiting to be serviced. This function is there to "kick" a task in one of
* those services so that it can wake up and do that work. Note that we only
@@ -590,27 +579,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
*/
void svc_wake_up(struct svc_serv *serv)
{
- struct svc_rqst *rqstp;
- struct svc_pool *pool;
-
- pool = &serv->sv_pools[0];
+ struct svc_pool *pool = &serv->sv_pools[0];
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* skip any that aren't queued */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- rcu_read_unlock();
- wake_up_process(rqstp->rq_task);
- trace_svc_wake_up(rqstp->rq_task->pid);
- return;
- }
- rcu_read_unlock();
-
- /* No free entries available */
set_bit(SP_TASK_PENDING, &pool->sp_flags);
- smp_wmb();
- trace_svc_wake_up(0);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
@@ -679,7 +651,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-static int svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
@@ -701,10 +673,10 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
/* Made progress, don't sleep yet */
continue;
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_IDLE);
+ if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
- return -EINTR;
+ return false;
}
trace_svc_alloc_arg_err(pages, ret);
memalloc_retry_wait(GFP_KERNEL);
@@ -723,7 +695,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
arg->tail[0].iov_len = 0;
rqstp->rq_xid = xdr_zero;
- return 0;
+ return true;
}
static bool
@@ -732,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
struct svc_pool *pool = rqstp->rq_pool;
/* did someone call svc_wake_up? */
- if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+ if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
return false;
/* was a socket queued? */
@@ -740,7 +712,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* are we shutting down? */
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return false;
/* are we freezing? */
@@ -750,10 +722,9 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
- long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
@@ -762,18 +733,14 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (rqstp->rq_xprt)
goto out_found;
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
- time_left = schedule_timeout(timeout);
+ schedule();
else
__set_current_state(TASK_RUNNING);
@@ -781,17 +748,16 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
set_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt)
goto out_found;
- if (!time_left)
- percpu_counter_inc(&pool->sp_threads_timedout);
-
- if (signalled() || kthread_should_stop())
- return ERR_PTR(-EINTR);
- return ERR_PTR(-EAGAIN);
+ if (kthread_should_stop())
+ return NULL;
+ return NULL;
out_found:
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
@@ -867,37 +833,35 @@ out:
return len;
}
-/*
- * Receive the next request on any transport. This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
+/**
+ * svc_recv - Receive and process the next request on any transport
+ * @rqstp: an idle RPC service thread
+ *
+ * This code is carefully organised not to touch any cachelines in
+ * the shared svc_serv structure, only cachelines in the local
+ * svc_pool.
*/
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = NULL;
struct svc_serv *serv = rqstp->rq_server;
- int len, err;
+ int len;
- err = svc_alloc_arg(rqstp);
- if (err)
+ if (!svc_alloc_arg(rqstp))
goto out;
try_to_freeze();
cond_resched();
- err = -EINTR;
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
goto out;
- xprt = svc_get_next_xprt(rqstp, timeout);
- if (IS_ERR(xprt)) {
- err = PTR_ERR(xprt);
+ xprt = svc_get_next_xprt(rqstp);
+ if (!xprt)
goto out;
- }
len = svc_handle_xprt(rqstp, xprt);
/* No data, incomplete (TCP) read, or accept() */
- err = -EAGAIN;
if (len <= 0)
goto out_release;
@@ -909,13 +873,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
+ percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
rqstp->rq_stime = ktime_get();
- return len;
+ svc_process(rqstp);
+out:
+ return;
out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
-out:
- return err;
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -1456,12 +1421,11 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
return 0;
}
- seq_printf(m, "%u %llu %llu %llu %llu\n",
- pool->sp_id,
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_threads_woken),
- percpu_counter_sum_positive(&pool->sp_threads_timedout));
+ seq_printf(m, "%u %llu %llu %llu 0\n",
+ pool->sp_id,
+ percpu_counter_sum_positive(&pool->sp_messages_arrived),
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_threads_woken));
return 0;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 67d8245a08af..aa4429d0b810 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -60,8 +60,19 @@ svc_put_auth_ops(struct auth_ops *aops)
module_put(aops->owner);
}
-int
-svc_authenticate(struct svc_rqst *rqstp)
+/**
+ * svc_authenticate - Initialize an outgoing credential
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: XDR encoding of the result can begin
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_COMPLETE: GSS context lifetime event; no further action
+ * %SVC_DROP: Drop this request; no further action
+ * %SVC_CLOSE: Like drop, but also close transport connection
+ */
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp)
{
struct auth_ops *aops;
u32 flavor;
@@ -89,16 +100,28 @@ svc_authenticate(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_authenticate);
-int svc_set_client(struct svc_rqst *rqstp)
+/**
+ * svc_set_client - Assign an appropriate 'auth_domain' as the client
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: Client was found and assigned
+ * %SVC_DENY: Client was explicitly denied
+ * %SVC_DROP: Ignore this request
+ * %SVC_CLOSE: Ignore this request and close the connection
+ */
+enum svc_auth_status svc_set_client(struct svc_rqst *rqstp)
{
rqstp->rq_client = NULL;
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
-/* A request, which was authenticated, has now executed.
- * Time to finalise the credentials and verifier
- * and release and resources
+/**
+ * svc_authorise - Finalize credentials/verifier and release resources
+ * @rqstp: RPC execution context
+ *
+ * Returns zero on success, or a negative errno.
*/
int svc_authorise(struct svc_rqst *rqstp)
{
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 174783f804fa..04b45588ae6f 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -665,7 +665,7 @@ static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
}
}
-int
+enum svc_auth_status
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct sockaddr_in *sin;
@@ -736,7 +736,6 @@ out:
rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
-
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
/**
@@ -751,7 +750,7 @@ EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_null_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -828,7 +827,7 @@ struct auth_ops svcauth_null = {
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_tls_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -913,7 +912,7 @@ struct auth_ops svcauth_tls = {
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8c9a8ee76aa0..998687421fa6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -36,6 +36,8 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <linux/freezer.h>
+#include <linux/bvec.h>
+
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -695,9 +697,10 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
+ .msg_flags = MSG_SPLICE_PAGES,
.msg_controllen = sizeof(buffer),
};
- unsigned int sent;
+ unsigned int count;
int err;
svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
@@ -710,22 +713,23 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- goto out_unlock;
+ count = xdr_buf_to_bvec(rqstp->rq_bvec,
+ ARRAY_SIZE(rqstp->rq_bvec), xdr);
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
}
- xdr_free_bvec(xdr);
+
trace_svcsock_udp_send(xprt, err);
-out_unlock:
+
mutex_unlock(&xprt->xpt_mutex);
- if (err < 0)
- return err;
- return sent;
+ return err;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
@@ -1089,6 +1093,9 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk)
/* If we have more data, signal svc_xprt_enqueue() to try again */
svsk->sk_tcplen = 0;
svsk->sk_marker = xdr_zero;
+
+ smp_wmb();
+ tcp_set_rcvlowat(svsk->sk_sk, 1);
}
/**
@@ -1178,10 +1185,17 @@ err_incomplete:
goto err_delete;
if (len == want)
svc_tcp_fragment_received(svsk);
- else
+ else {
+ /* Avoid more ->sk_data_ready() calls until the rest
+ * of the message has arrived. This reduces service
+ * thread wake-ups on large incoming messages. */
+ tcp_set_rcvlowat(svsk->sk_sk,
+ svc_sock_reclen(svsk) - svsk->sk_tcplen);
+
trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
svc_sock_reclen(svsk),
svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ }
goto err_noclose;
error:
if (len != -EAGAIN)
@@ -1198,75 +1212,51 @@ err_noclose:
return 0; /* record not complete */
}
-static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
- int flags)
-{
- struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
-
- iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
- return sock_sendmsg(sock, &msg);
-}
-
/*
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
- * In addition, the logic assumes that * .bv_len is never larger
- * than PAGE_SIZE.
+ * Note that the send is non-blocking. The caller has incremented
+ * the reference count on each page backing the RPC message, and
+ * the network layer will "put" these pages when transmission is
+ * complete.
+ *
+ * This is safe for our RPC services because the memory backing
+ * the head and tail components is never kmalloc'd. These always
+ * come from pages in the svc_rqst::rq_pages array.
*/
-static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
{
- const struct kvec *head = xdr->head;
- const struct kvec *tail = xdr->tail;
- struct kvec rm = {
- .iov_base = &marker,
- .iov_len = sizeof(marker),
- };
struct msghdr msg = {
- .msg_flags = 0,
+ .msg_flags = MSG_SPLICE_PAGES,
};
+ unsigned int count;
+ void *buf;
int ret;
*sentp = 0;
- ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (ret < 0)
- return ret;
-
- ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != rm.iov_len)
- return -EAGAIN;
-
- ret = svc_tcp_send_kvec(sock, head, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != head->iov_len)
- goto out;
- if (xdr_buf_pagecount(xdr))
- xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
-
- msg.msg_flags = MSG_SPLICE_PAGES;
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
- xdr_buf_pagecount(xdr), xdr->page_len);
- ret = sock_sendmsg(sock, &msg);
+ /* The stream record marker is copied into a temporary page
+ * fragment buffer so that it can be included in rq_bvec.
+ */
+ buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, &marker, sizeof(marker));
+ bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
+
+ count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
+ ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
+
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ 1 + count, sizeof(marker) + rqstp->rq_res.len);
+ ret = sock_sendmsg(svsk->sk_sock, &msg);
if (ret < 0)
return ret;
*sentp += ret;
-
- if (tail->iov_len) {
- ret = svc_tcp_send_kvec(sock, tail, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- }
-
-out:
return 0;
}
@@ -1292,23 +1282,17 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
- atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- tcp_sock_set_cork(svsk->sk_sk, true);
- err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
- xdr_free_bvec(xdr);
+ err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent);
trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
- if (atomic_dec_and_test(&svsk->sk_sendqlen))
- tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
@@ -1317,7 +1301,6 @@ out_close:
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
svc_xprt_deferred_close(xprt);
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1644,6 +1627,7 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@@ -1653,5 +1637,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
+ if (pfc->va)
+ __page_frag_cache_drain(virt_to_head_page(pfc->va),
+ pfc->pagecnt_bias);
kfree(svsk);
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2a22e78af116..358e6de91775 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -165,6 +165,56 @@ xdr_free_bvec(struct xdr_buf *buf)
}
/**
+ * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array
+ * @bvec: bio_vec array to populate
+ * @bvec_size: element count of @bio_vec
+ * @xdr: xdr_buf to be copied
+ *
+ * Returns the number of entries consumed in @bvec.
+ */
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ unsigned int count = 0;
+
+ if (head->iov_len) {
+ bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+ ++count;
+ }
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct page **pages = xdr->pages;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min_t(unsigned int, remaining,
+ PAGE_SIZE - offset);
+ bvec_set_page(bvec++, *pages++, len, offset);
+ remaining -= len;
+ offset = 0;
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+ }
+
+ if (tail->iov_len) {
+ bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+
+ return count;
+
+bvec_overflow:
+ pr_warn_once("%s: bio_vec array overflow\n", __func__);
+ return count - 1;
+}
+
+/**
* xdr_inline_pages - Prepare receive buffer for a large reply
* @xdr: xdr_buf into which reply will be placed
* @offset: expected offset where data payload will start, in bytes