summaryrefslogtreecommitdiff
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig18
-rw-r--r--fs/nfsd/Makefile18
-rw-r--r--fs/nfsd/auth.c17
-rw-r--r--fs/nfsd/auth.h2
-rw-r--r--fs/nfsd/blocklayout.c6
-rw-r--r--fs/nfsd/blocklayoutxdr.h2
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/debugfs.c47
-rw-r--r--fs/nfsd/export.c131
-rw-r--r--fs/nfsd/export.h10
-rw-r--r--fs/nfsd/filecache.c320
-rw-r--r--fs/nfsd/filecache.h14
-rw-r--r--fs/nfsd/localio.c206
-rw-r--r--fs/nfsd/lockd.c13
-rw-r--r--fs/nfsd/netlink.c85
-rw-r--r--fs/nfsd/netlink.h13
-rw-r--r--fs/nfsd/netns.h26
-rw-r--r--fs/nfsd/nfs2acl.c4
-rw-r--r--fs/nfsd/nfs3acl.c4
-rw-r--r--fs/nfsd/nfs3proc.c116
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4acl.c2
-rw-r--r--fs/nfsd/nfs4callback.c516
-rw-r--r--fs/nfsd/nfs4idmap.c13
-rw-r--r--fs/nfsd/nfs4layouts.c8
-rw-r--r--fs/nfsd/nfs4proc.c333
-rw-r--r--fs/nfsd/nfs4recover.c105
-rw-r--r--fs/nfsd/nfs4state.c1244
-rw-r--r--fs/nfsd/nfs4xdr.c625
-rw-r--r--fs/nfsd/nfs4xdr_gen.c256
-rw-r--r--fs/nfsd/nfs4xdr_gen.h25
-rw-r--r--fs/nfsd/nfsctl.c749
-rw-r--r--fs/nfsd/nfsd.h97
-rw-r--r--fs/nfsd/nfsfh.c236
-rw-r--r--fs/nfsd/nfsfh.h14
-rw-r--r--fs/nfsd/nfsproc.c102
-rw-r--r--fs/nfsd/nfssvc.c358
-rw-r--r--fs/nfsd/nfsxdr.c4
-rw-r--r--fs/nfsd/state.h126
-rw-r--r--fs/nfsd/stats.c46
-rw-r--r--fs/nfsd/stats.h7
-rw-r--r--fs/nfsd/trace.h637
-rw-r--r--fs/nfsd/vfs.c324
-rw-r--r--fs/nfsd/vfs.h30
-rw-r--r--fs/nfsd/xdr4.h39
-rw-r--r--fs/nfsd/xdr4cb.h15
46 files changed, 5063 insertions, 1906 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 272ab8d5c4d7..879e0b104d1c 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -4,9 +4,11 @@ config NFSD
depends on INET
depends on FILE_LOCKING
depends on FSNOTIFY
+ select CRC32
select LOCKD
select SUNRPC
select EXPORTFS
+ select NFS_COMMON
select NFS_ACL_SUPPORT if NFSD_V2_ACL
select NFS_ACL_SUPPORT if NFSD_V3_ACL
depends on MULTIUSER
@@ -75,8 +77,8 @@ config NFSD_V4
select FS_POSIX_ACL
select RPCSEC_GSS_KRB5
select CRYPTO
+ select CRYPTO_LIB_SHA256
select CRYPTO_MD5
- select CRYPTO_SHA256
select GRACE_PERIOD
select NFS_V4_2_SSC_HELPER if NFS_V4_2
help
@@ -162,7 +164,7 @@ config NFSD_V4_SECURITY_LABEL
config NFSD_LEGACY_CLIENT_TRACKING
bool "Support legacy NFSv4 client tracking methods (DEPRECATED)"
depends on NFSD_V4
- default n
+ default y
help
The NFSv4 server needs to store a small amount of information on
stable storage in order to handle state recovery after reboot. Most
@@ -171,6 +173,16 @@ config NFSD_LEGACY_CLIENT_TRACKING
recoverydir, or spawn a process directly using a usermodehelper
upcall.
- These legacy client tracking methods have proven to be probelmatic
+ These legacy client tracking methods have proven to be problematic
and will be removed in the future. Say Y here if you need support
for them in the interim.
+
+config NFSD_V4_DELEG_TIMESTAMPS
+ bool "Support delegated timestamps"
+ depends on NFSD_V4
+ default n
+ help
+ NFSD implements delegated timestamps according to
+ draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This
+ is currently an experimental feature and is therefore left disabled
+ by default.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index b8736a82e57c..55744bb786c9 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -18,8 +18,24 @@ nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
- nfs4acl.o nfs4callback.o nfs4recover.o
+ nfs4acl.o nfs4callback.o nfs4recover.o nfs4xdr_gen.o
nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
+nfsd-$(CONFIG_NFS_LOCALIO) += localio.o
+nfsd-$(CONFIG_DEBUG_FS) += debugfs.o
+
+
+.PHONY: xdrgen
+
+xdrgen: ../../include/linux/sunrpc/xdrgen/nfs4_1.h nfs4xdr_gen.h nfs4xdr_gen.c
+
+../../include/linux/sunrpc/xdrgen/nfs4_1.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen definitions $< > $@
+
+nfs4xdr_gen.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen declarations $< > $@
+
+nfs4xdr_gen.c: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen source $< > $@
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index e6beaaf4f170..4dc327e02456 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -5,37 +5,37 @@
#include "nfsd.h"
#include "auth.h"
-int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
+int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp)
{
struct exp_flavor_info *f;
struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
for (f = exp->ex_flavors; f < end; f++) {
- if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
+ if (f->pseudoflavor == cred->cr_flavor)
return f->flags;
}
return exp->ex_flags;
}
-int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
+int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp)
{
struct group_info *rqgi;
struct group_info *gi;
struct cred *new;
int i;
- int flags = nfsexp_flags(rqstp, exp);
+ int flags = nfsexp_flags(cred, exp);
/* discard any old override before preparing the new set */
- revert_creds(get_cred(current_real_cred()));
+ put_cred(revert_creds(get_cred(current_real_cred())));
new = prepare_creds();
if (!new)
return -ENOMEM;
- new->fsuid = rqstp->rq_cred.cr_uid;
- new->fsgid = rqstp->rq_cred.cr_gid;
+ new->fsuid = cred->cr_uid;
+ new->fsgid = cred->cr_gid;
- rqgi = rqstp->rq_cred.cr_group_info;
+ rqgi = cred->cr_group_info;
if (flags & NFSEXP_ALLSQUASH) {
new->fsuid = exp->ex_anon_uid;
@@ -80,7 +80,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
put_cred(override_creds(new));
- put_cred(new);
return 0;
oom:
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
index dbd66424f600..8c5031bbbcee 100644
--- a/fs/nfsd/auth.h
+++ b/fs/nfsd/auth.h
@@ -12,6 +12,6 @@
* Set the current process's fsuid/fsgid etc to those of the NFS
* client user
*/
-int nfsd_setuser(struct svc_rqst *, struct svc_export *);
+int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp);
#endif /* LINUX_NFSD_AUTH_H */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 3c040c81c77d..08a20e5bcf7f 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -147,8 +147,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb,
struct pnfs_block_deviceaddr *dev;
struct pnfs_block_volume *b;
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL);
if (!dev)
return -ENOMEM;
gdp->gd_device = dev;
@@ -255,8 +254,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
const struct pr_ops *ops;
int ret;
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL);
if (!dev)
return -ENOMEM;
gdp->gd_device = dev;
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index b0361e8aa9a7..4e28ac8f1127 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -47,7 +47,7 @@ struct pnfs_block_volume {
struct pnfs_block_deviceaddr {
u32 nr_volumes;
- struct pnfs_block_volume volumes[];
+ struct pnfs_block_volume volumes[] __counted_by(nr_volumes);
};
__be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 66a05fefae98..bb7addef4a31 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -10,7 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
-#include "netns.h"
+#include "nfsd.h"
/*
* Representation of a reply cache entry.
diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
new file mode 100644
index 000000000000..84b0c8b559dc
--- /dev/null
+++ b/fs/nfsd/debugfs.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/debugfs.h>
+
+#include "nfsd.h"
+
+static struct dentry *nfsd_top_dir __read_mostly;
+
+/*
+ * /sys/kernel/debug/nfsd/disable-splice-read
+ *
+ * Contents:
+ * %0: NFS READ is allowed to use page splicing
+ * %1: NFS READ uses only iov iter read
+ *
+ * The default value of this setting is zero (page splicing is
+ * allowed). This setting takes immediate effect for all NFS
+ * versions, all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_dsr_get(void *data, u64 *val)
+{
+ *val = nfsd_disable_splice_read ? 1 : 0;
+ return 0;
+}
+
+static int nfsd_dsr_set(void *data, u64 val)
+{
+ nfsd_disable_splice_read = (val > 0) ? true : false;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n");
+
+void nfsd_debugfs_exit(void)
+{
+ debugfs_remove_recursive(nfsd_top_dir);
+ nfsd_top_dir = NULL;
+}
+
+void nfsd_debugfs_init(void)
+{
+ nfsd_top_dir = debugfs_create_dir("nfsd", NULL);
+
+ debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO,
+ nfsd_top_dir, NULL, &nfsd_dsr_fops);
+}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7b641095a665..88ae410b4113 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -334,33 +334,46 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
static int export_stats_init(struct export_stats *stats)
{
stats->start_time = ktime_get_seconds();
- return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM);
+ return percpu_counter_init_many(stats->counter, 0, GFP_KERNEL,
+ EXP_STATS_COUNTERS_NUM);
}
static void export_stats_reset(struct export_stats *stats)
{
- if (stats)
- nfsd_percpu_counters_reset(stats->counter,
- EXP_STATS_COUNTERS_NUM);
+ if (stats) {
+ int i;
+
+ for (i = 0; i < EXP_STATS_COUNTERS_NUM; i++)
+ percpu_counter_set(&stats->counter[i], 0);
+ }
}
static void export_stats_destroy(struct export_stats *stats)
{
if (stats)
- nfsd_percpu_counters_destroy(stats->counter,
- EXP_STATS_COUNTERS_NUM);
+ percpu_counter_destroy_many(stats->counter,
+ EXP_STATS_COUNTERS_NUM);
}
-static void svc_export_put(struct kref *ref)
+static void svc_export_release(struct rcu_head *rcu_head)
{
- struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
- path_put(&exp->ex_path);
- auth_domain_put(exp->ex_client);
+ struct svc_export *exp = container_of(rcu_head, struct svc_export,
+ ex_rcu);
+
nfsd4_fslocs_free(&exp->ex_fslocs);
export_stats_destroy(exp->ex_stats);
kfree(exp->ex_stats);
kfree(exp->ex_uuid);
- kfree_rcu(exp, ex_rcu);
+ kfree(exp);
+}
+
+static void svc_export_put(struct kref *ref)
+{
+ struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+
+ path_put(&exp->ex_path);
+ auth_domain_put(exp->ex_client);
+ call_rcu(&exp->ex_rcu, svc_export_release);
}
static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -1070,10 +1083,32 @@ static struct svc_export *exp_find(struct cache_detail *cd,
return exp;
}
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
+/**
+ * check_nfsd_access - check if access to export is allowed.
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO).
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
{
struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
- struct svc_xprt *xprt = rqstp->rq_xprt;
+ struct svc_xprt *xprt;
+
+ /*
+ * If rqstp is NULL, this is a LOCALIO request which will only
+ * ever use a filehandle/credential pair for which access has
+ * been affirmed (by ACCESS or OPEN NFS requests) over the
+ * wire. So there is no need for further checks here.
+ */
+ if (!rqstp)
+ return nfs_ok;
+
+ xprt = rqstp->rq_xprt;
if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
@@ -1089,22 +1124,23 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
goto ok;
}
- goto denied;
+ if (!may_bypass_gss)
+ goto denied;
ok:
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
- return 0;
+ return nfs_ok;
/* ip-address based client; check sec= export option: */
for (f = exp->ex_flavors; f < end; f++) {
if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
- return 0;
+ return nfs_ok;
}
/* defaults in absence of sec= options: */
if (exp->ex_nflavors == 0) {
if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
- return 0;
+ return nfs_ok;
}
/* If the compound op contains a spo_must_allowed op,
@@ -1114,10 +1150,27 @@ ok:
*/
if (nfsd4_spo_must_allow(rqstp))
- return 0;
+ return nfs_ok;
+
+ /* Some calls may be processed without authentication
+ * on GSS exports. For example NFS2/3 calls on root
+ * directory, see section 2.3.2 of rfc 2623.
+ * For "may_bypass_gss" check that export has really
+ * enabled some flavor with authentication (GSS or any
+ * other) and also check that the used auth flavor is
+ * without authentication (none or sys).
+ */
+ if (may_bypass_gss && (
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) {
+ for (f = exp->ex_flavors; f < end; f++) {
+ if (f->pseudoflavor >= RPC_AUTH_DES)
+ return 0;
+ }
+ }
denied:
- return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec;
+ return nfserr_wrongsec;
}
/*
@@ -1160,19 +1213,35 @@ gss:
return gssexp;
}
+/**
+ * rqst_exp_find - Find an svc_export in the context of a rqst or similar
+ * @reqp: The handle to be used to suspend the request if a cache-upcall is needed
+ * If NULL, missing in-cache information will result in failure.
+ * @net: The network namespace in which the request exists
+ * @cl: default auth_domain to use for looking up the export
+ * @gsscl: an alternate auth_domain defined using deprecated gss/krb5 format.
+ * @fsid_type: The type of fsid to look for
+ * @fsidv: The actual fsid to look up in the context of either client.
+ *
+ * Perform a lookup for @cl/@fsidv in the given @net for an export. If
+ * none found and @gsscl specified, repeat the lookup.
+ *
+ * Returns an export, or an error pointer.
+ */
struct svc_export *
-rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
+rqst_exp_find(struct cache_req *reqp, struct net *net,
+ struct auth_domain *cl, struct auth_domain *gsscl,
+ int fsid_type, u32 *fsidv)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct cache_detail *cd = nn->svc_export_cache;
- if (rqstp->rq_client == NULL)
+ if (!cl)
goto gss;
/* First try the auth_unix client: */
- exp = exp_find(cd, rqstp->rq_client, fsid_type,
- fsidv, &rqstp->rq_chandle);
+ exp = exp_find(cd, cl, fsid_type, fsidv, reqp);
if (PTR_ERR(exp) == -ENOENT)
goto gss;
if (IS_ERR(exp))
@@ -1182,10 +1251,9 @@ rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
return exp;
gss:
/* Otherwise, try falling back on gss client */
- if (rqstp->rq_gssclient == NULL)
+ if (!gsscl)
return exp;
- gssexp = exp_find(cd, rqstp->rq_gssclient, fsid_type, fsidv,
- &rqstp->rq_chandle);
+ gssexp = exp_find(cd, gsscl, fsid_type, fsidv, reqp);
if (PTR_ERR(gssexp) == -ENOENT)
return exp;
if (!IS_ERR(exp))
@@ -1216,7 +1284,9 @@ struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp)
mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
- return rqst_exp_find(rqstp, FSID_NUM, fsidv);
+ return rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp),
+ rqstp->rq_client, rqstp->rq_gssclient,
+ FSID_NUM, fsidv);
}
/*
@@ -1365,10 +1435,9 @@ static int e_show(struct seq_file *m, void *p)
return 0;
}
- exp_get(exp);
- if (cache_check(cd, &exp->h, NULL))
+ if (cache_check_rcu(cd, &exp->h, NULL))
return 0;
- exp_put(exp);
+
return svc_export_show(m, cd, cp);
}
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index ca9dc230ae3d..4d92b99c1ffd 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -99,8 +99,10 @@ struct svc_expkey {
#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE)
#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+struct svc_cred;
+int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp);
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
/*
* Function declarations
@@ -127,6 +129,8 @@ static inline struct svc_export *exp_get(struct svc_export *exp)
cache_get(&exp->h);
return exp;
}
-struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
+struct svc_export *rqst_exp_find(struct cache_req *reqp, struct net *net,
+ struct auth_domain *cl, struct auth_domain *gsscl,
+ int fsid_type, u32 *fsidv);
#endif /* NFSD_EXPORT_H */
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index ddd3e0d9cfa6..e108b6c705b4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -39,6 +39,7 @@
#include <linux/fsnotify.h>
#include <linux/seq_file.h>
#include <linux/rhashtable.h>
+#include <linux/nfslocalio.h>
#include "vfs.h"
#include "nfsd.h"
@@ -52,10 +53,11 @@
#define NFSD_FILE_CACHE_UP (0)
/* We only care about NFSD_MAY_READ/WRITE for this cache */
-#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE)
+#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO)
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations);
static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
@@ -111,7 +113,7 @@ static void
nfsd_file_schedule_laundrette(void)
{
if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
- queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
+ queue_delayed_work(system_unbound_wq, &nfsd_filecache_laundrette,
NFSD_LAUNDRETTE_DELAY);
}
@@ -151,7 +153,7 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm)
}
static struct nfsd_file_mark *
-nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
+nfsd_file_mark_find_or_create(struct inode *inode)
{
int err;
struct fsnotify_mark *mark;
@@ -159,8 +161,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
do {
fsnotify_group_lock(nfsd_file_fsnotify_group);
- mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
- nfsd_file_fsnotify_group);
+ mark = fsnotify_find_inode_mark(inode,
+ nfsd_file_fsnotify_group);
if (mark) {
nfm = nfsd_file_mark_get(container_of(mark,
struct nfsd_file_mark,
@@ -215,7 +217,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
if (unlikely(!nf))
return NULL;
+ this_cpu_inc(nfsd_file_allocations);
INIT_LIST_HEAD(&nf->nf_lru);
+ INIT_LIST_HEAD(&nf->nf_gc);
nf->nf_birthtime = ktime_get();
nf->nf_file = NULL;
nf->nf_cred = get_current_cred();
@@ -315,15 +319,14 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
}
-
-static bool nfsd_file_lru_add(struct nfsd_file *nf)
+static void nfsd_file_lru_add(struct nfsd_file *nf)
{
- set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
- if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) {
+ refcount_inc(&nf->nf_ref);
+ if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru))
trace_nfsd_file_lru_add(nf);
- return true;
- }
- return false;
+ else
+ WARN_ON(1);
+ nfsd_file_schedule_laundrette();
}
static bool nfsd_file_lru_remove(struct nfsd_file *nf)
@@ -359,42 +362,76 @@ nfsd_file_put(struct nfsd_file *nf)
if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- /*
- * If this is the last reference (nf_ref == 1), then try to
- * transfer it to the LRU.
- */
- if (refcount_dec_not_one(&nf->nf_ref))
- return;
-
- /* Try to add it to the LRU. If that fails, decrement. */
- if (nfsd_file_lru_add(nf)) {
- /* If it's still hashed, we're done */
- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- nfsd_file_schedule_laundrette();
- return;
- }
-
- /*
- * We're racing with unhashing, so try to remove it from
- * the LRU. If removal fails, then someone else already
- * has our reference.
- */
- if (!nfsd_file_lru_remove(nf))
- return;
- }
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ set_bit(NFSD_FILE_RECENT, &nf->nf_flags);
}
+
if (refcount_dec_and_test(&nf->nf_ref))
nfsd_file_free(nf);
}
+/**
+ * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller
+ * @nf: nfsd_file of which to put the reference
+ *
+ * First save the associated net to return to caller, then put
+ * the reference of the nfsd_file.
+ */
+struct net *
+nfsd_file_put_local(struct nfsd_file __rcu **pnf)
+{
+ struct nfsd_file *nf;
+ struct net *net = NULL;
+
+ nf = unrcu_pointer(xchg(pnf, NULL));
+ if (nf) {
+ net = nf->nf_net;
+ nfsd_file_put(nf);
+ }
+ return net;
+}
+
+/**
+ * nfsd_file_get_local - get nfsd_file reference and reference to net
+ * @nf: nfsd_file of which to put the reference
+ *
+ * Get reference to both the nfsd_file and nf->nf_net.
+ */
+struct nfsd_file *
+nfsd_file_get_local(struct nfsd_file *nf)
+{
+ struct net *net = nf->nf_net;
+
+ if (nfsd_net_try_get(net)) {
+ nf = nfsd_file_get(nf);
+ if (!nf)
+ nfsd_net_put(net);
+ } else {
+ nf = NULL;
+ }
+ return nf;
+}
+
+/**
+ * nfsd_file_file - get the backing file of an nfsd_file
+ * @nf: nfsd_file of which to access the backing file.
+ *
+ * Return backing file for @nf.
+ */
+struct file *
+nfsd_file_file(struct nfsd_file *nf)
+{
+ return nf->nf_file;
+}
+
static void
nfsd_file_dispose_list(struct list_head *dispose)
{
struct nfsd_file *nf;
while (!list_empty(dispose)) {
- nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
- list_del_init(&nf->nf_lru);
+ nf = list_first_entry(dispose, struct nfsd_file, nf_gc);
+ list_del_init(&nf->nf_gc);
nfsd_file_free(nf);
}
}
@@ -411,14 +448,23 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
{
while(!list_empty(dispose)) {
struct nfsd_file *nf = list_first_entry(dispose,
- struct nfsd_file, nf_lru);
+ struct nfsd_file, nf_gc);
struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+ struct svc_serv *serv;
spin_lock(&l->lock);
- list_move_tail(&nf->nf_lru, &l->freeme);
+ list_move_tail(&nf->nf_gc, &l->freeme);
spin_unlock(&l->lock);
- svc_wake_up(nn->nfsd_serv);
+
+ /*
+ * The filecache laundrette is shut down after the
+ * nn->nfsd_serv pointer is cleared, but before the
+ * svc_serv is freed.
+ */
+ serv = nn->nfsd_serv;
+ if (serv)
+ svc_wake_up(serv);
}
}
@@ -456,7 +502,6 @@ void nfsd_file_net_dispose(struct nfsd_net *nn)
* nfsd_file_lru_cb - Examine an entry on the LRU list
* @item: LRU entry to examine
* @lru: controlling LRU
- * @lock: LRU list lock (unused)
* @arg: dispose list
*
* Return values:
@@ -466,9 +511,7 @@ void nfsd_file_net_dispose(struct nfsd_net *nn)
*/
static enum lru_status
nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
- spinlock_t *lock, void *arg)
- __releases(lock)
- __acquires(lock)
+ void *arg)
{
struct list_head *head = arg;
struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
@@ -492,31 +535,71 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
}
/*
- * Put the reference held on behalf of the LRU. If it wasn't the last
- * one, then just remove it from the LRU and ignore it.
+ * Put the reference held on behalf of the LRU if it is the last
+ * reference, else rotate.
*/
- if (!refcount_dec_and_test(&nf->nf_ref)) {
+ if (!refcount_dec_if_one(&nf->nf_ref)) {
trace_nfsd_file_gc_in_use(nf);
- list_lru_isolate(lru, &nf->nf_lru);
- return LRU_REMOVED;
+ return LRU_ROTATE;
}
/* Refcount went to zero. Unhash it and queue it to the dispose list */
nfsd_file_unhash(nf);
- list_lru_isolate_move(lru, &nf->nf_lru, head);
+ list_lru_isolate(lru, &nf->nf_lru);
+ list_add(&nf->nf_gc, head);
this_cpu_inc(nfsd_file_evictions);
trace_nfsd_file_gc_disposed(nf);
return LRU_REMOVED;
}
+static enum lru_status
+nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru,
+ void *arg)
+{
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+ if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) {
+ /*
+ * "REFERENCED" really means "should be at the end of the
+ * LRU. As we are putting it there we can clear the flag.
+ */
+ clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ trace_nfsd_file_gc_aged(nf);
+ return LRU_ROTATE;
+ }
+ return nfsd_file_lru_cb(item, lru, arg);
+}
+
+/* If the shrinker runs between calls to list_lru_walk_node() in
+ * nfsd_file_gc(), the "remaining" count will be wrong. This could
+ * result in premature freeing of some files. This may not matter much
+ * but is easy to fix with this spinlock which temporarily disables
+ * the shrinker.
+ */
+static DEFINE_SPINLOCK(nfsd_gc_lock);
static void
nfsd_file_gc(void)
{
+ unsigned long ret = 0;
LIST_HEAD(dispose);
- unsigned long ret;
+ int nid;
- ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
- &dispose, list_lru_count(&nfsd_file_lru));
+ spin_lock(&nfsd_gc_lock);
+ for_each_node_state(nid, N_NORMAL_MEMORY) {
+ unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid);
+
+ while (remaining > 0) {
+ unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH);
+
+ remaining -= nr;
+ ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb,
+ &dispose, &nr);
+ if (nr)
+ /* walk aborted early */
+ remaining = 0;
+ }
+ }
+ spin_unlock(&nfsd_gc_lock);
trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
nfsd_file_dispose_list_delayed(&dispose);
}
@@ -524,9 +607,9 @@ nfsd_file_gc(void)
static void
nfsd_file_gc_worker(struct work_struct *work)
{
- nfsd_file_gc();
if (list_lru_count(&nfsd_file_lru))
- nfsd_file_schedule_laundrette();
+ nfsd_file_gc();
+ nfsd_file_schedule_laundrette();
}
static unsigned long
@@ -541,8 +624,12 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
LIST_HEAD(dispose);
unsigned long ret;
+ if (!spin_trylock(&nfsd_gc_lock))
+ return SHRINK_STOP;
+
ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
nfsd_file_lru_cb, &dispose);
+ spin_unlock(&nfsd_gc_lock);
trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
nfsd_file_dispose_list_delayed(&dispose);
return ret;
@@ -578,7 +665,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
/* If refcount goes to 0, then put on the dispose list */
if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
- list_add(&nf->nf_lru, dispose);
+ list_add(&nf->nf_gc, dispose);
trace_nfsd_file_closing(nf);
}
}
@@ -647,24 +734,19 @@ nfsd_file_close_inode(struct inode *inode)
void
nfsd_file_close_inode_sync(struct inode *inode)
{
- struct nfsd_file *nf;
LIST_HEAD(dispose);
trace_nfsd_file_close(inode);
nfsd_file_queue_for_close(inode, &dispose);
- while (!list_empty(&dispose)) {
- nf = list_first_entry(&dispose, struct nfsd_file, nf_lru);
- list_del_init(&nf->nf_lru);
- nfsd_file_free(nf);
- }
+ nfsd_file_dispose_list(&dispose);
}
static int
nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
void *data)
{
- struct file_lock *fl = data;
+ struct file_lease *fl = data;
/* Only close files for F_SETLEASE leases */
if (fl->c.flc_flags & FL_LEASE)
@@ -719,7 +801,7 @@ nfsd_file_cache_init(void)
ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
if (ret)
- return ret;
+ goto out;
ret = -ENOMEM;
nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
@@ -760,7 +842,7 @@ nfsd_file_cache_init(void)
}
nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
- FSNOTIFY_GROUP_NOFS);
+ 0);
if (IS_ERR(nfsd_file_fsnotify_group)) {
pr_err("nfsd: unable to create fsnotify group: %ld\n",
PTR_ERR(nfsd_file_fsnotify_group));
@@ -771,6 +853,8 @@ nfsd_file_cache_init(void)
INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
out:
+ if (ret)
+ clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags);
return ret;
out_notifier:
lease_unregister_notifier(&nfsd_file_lease_notifier);
@@ -802,6 +886,14 @@ __nfsd_file_cache_purge(struct net *net)
struct nfsd_file *nf;
LIST_HEAD(dispose);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ if (net) {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ nfs_localio_invalidate_clients(&nn->local_clients,
+ &nn->local_clients_lock);
+ }
+#endif
+
rhltable_walk_enter(&nfsd_file_rhltable, &iter);
do {
rhashtable_walk_start(&iter);
@@ -909,6 +1001,7 @@ nfsd_file_cache_shutdown(void)
for_each_possible_cpu(i) {
per_cpu(nfsd_file_cache_hits, i) = 0;
per_cpu(nfsd_file_acquisitions, i) = 0;
+ per_cpu(nfsd_file_allocations, i) = 0;
per_cpu(nfsd_file_releases, i) = 0;
per_cpu(nfsd_file_total_age, i) = 0;
per_cpu(nfsd_file_evictions, i) = 0;
@@ -977,12 +1070,14 @@ nfsd_file_is_cached(struct inode *inode)
}
static __be32
-nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
+ struct svc_cred *cred,
+ struct auth_domain *client,
+ struct svc_fh *fhp,
unsigned int may_flags, struct file *file,
struct nfsd_file **pnf, bool want_gc)
{
unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
- struct net *net = SVC_NET(rqstp);
struct nfsd_file *new, *nf;
bool stale_retry = true;
bool open_retry = true;
@@ -991,8 +1086,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
int ret;
retry:
- status = fh_verify(rqstp, fhp, S_IFREG,
- may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ if (rqstp) {
+ status = fh_verify(rqstp, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ } else {
+ status = fh_verify_local(net, cred, client, fhp, S_IFREG,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ }
if (status != nfs_ok)
return status;
inode = d_inode(fhp->fh_dentry);
@@ -1001,16 +1101,8 @@ retry:
nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
rcu_read_unlock();
- if (nf) {
- /*
- * If the nf is on the LRU then it holds an extra reference
- * that must be put if it's removed. It had better not be
- * the last one however, since we should hold another.
- */
- if (nfsd_file_lru_remove(nf))
- WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
+ if (nf)
goto wait_for_construction;
- }
new = nfsd_file_alloc(net, inode, need, want_gc);
if (!new) {
@@ -1024,7 +1116,7 @@ retry:
if (unlikely(nf)) {
spin_unlock(&inode->i_lock);
rcu_read_unlock();
- nfsd_file_slab_free(&new->nf_rcu);
+ nfsd_file_free(new);
goto wait_for_construction;
}
nf = new;
@@ -1035,8 +1127,6 @@ retry:
if (likely(ret == 0))
goto open_file;
- if (ret == -EEXIST)
- goto retry;
trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
status = nfserr_jukebox;
goto construction_err;
@@ -1051,6 +1141,7 @@ wait_for_construction:
status = nfserr_jukebox;
goto construction_err;
}
+ nfsd_file_put(nf);
open_retry = false;
fh_put(fhp);
goto retry;
@@ -1074,7 +1165,7 @@ out:
open_file:
trace_nfsd_file_alloc(nf);
- nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
+ nf->nf_mark = nfsd_file_mark_find_or_create(inode);
if (nf->nf_mark) {
if (file) {
get_file(file);
@@ -1082,8 +1173,7 @@ open_file:
status = nfs_ok;
trace_nfsd_file_opened(nf, status);
} else {
- ret = nfsd_open_verified(rqstp, fhp, may_flags,
- &nf->nf_file);
+ ret = nfsd_open_verified(fhp, may_flags, &nf->nf_file);
if (ret == -EOPENSTALE && stale_retry) {
stale_retry = false;
nfsd_file_unhash(nf);
@@ -1106,6 +1196,9 @@ open_file:
*/
if (status != nfs_ok || inode->i_nlink == 0)
nfsd_file_unhash(nf);
+ else if (want_gc)
+ nfsd_file_lru_add(nf);
+
clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
if (status == nfs_ok)
goto out;
@@ -1139,7 +1232,8 @@ __be32
nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
+ fhp, may_flags, NULL, pnf, true);
}
/**
@@ -1163,7 +1257,54 @@ __be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false);
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
+ fhp, may_flags, NULL, pnf, false);
+}
+
+/**
+ * nfsd_file_acquire_local - Get a struct nfsd_file with an open file for localio
+ * @net: The network namespace in which to perform a lookup
+ * @cred: the user credential with which to validate access
+ * @client: the auth_domain for LOCALIO lookup
+ * @fhp: the NFS filehandle of the file to be opened
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * This file lookup interface provide access to a file given the
+ * filehandle and credential. No connection-based authorisation
+ * is performed and in that way it is quite different to other
+ * file access mediated by nfsd. It allows a kernel module such as the NFS
+ * client to reach across network and filesystem namespaces to access
+ * a file. The security implications of this should be carefully
+ * considered before use.
+ *
+ * The nfsd_file_object returned by this API is reference-counted
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put().
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
+ struct auth_domain *client, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ /*
+ * Save creds before calling nfsd_file_do_acquire() (which calls
+ * nfsd_setuser). Important because caller (LOCALIO) is from
+ * client context.
+ */
+ const struct cred *save_cred = get_current_cred();
+ __be32 beres;
+
+ beres = nfsd_file_do_acquire(NULL, net, cred, client,
+ fhp, may_flags, NULL, pnf, false);
+ put_cred(revert_creds(save_cred));
+ return beres;
}
/**
@@ -1189,7 +1330,8 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct file *file,
struct nfsd_file **pnf)
{
- return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
+ fhp, may_flags, file, pnf, false);
}
/*
@@ -1199,7 +1341,7 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
*/
int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
{
- unsigned long releases = 0, evictions = 0;
+ unsigned long allocations = 0, releases = 0, evictions = 0;
unsigned long hits = 0, acquisitions = 0;
unsigned int i, count = 0, buckets = 0;
unsigned long lru = 0, total_age = 0;
@@ -1224,6 +1366,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
for_each_possible_cpu(i) {
hits += per_cpu(nfsd_file_cache_hits, i);
acquisitions += per_cpu(nfsd_file_acquisitions, i);
+ allocations += per_cpu(nfsd_file_allocations, i);
releases += per_cpu(nfsd_file_releases, i);
total_age += per_cpu(nfsd_file_total_age, i);
evictions += per_cpu(nfsd_file_evictions, i);
@@ -1234,6 +1377,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "lru entries: %lu\n", lru);
seq_printf(m, "cache hits: %lu\n", hits);
seq_printf(m, "acquisitions: %lu\n", acquisitions);
+ seq_printf(m, "allocations: %lu\n", allocations);
seq_printf(m, "releases: %lu\n", releases);
seq_printf(m, "evictions: %lu\n", evictions);
if (releases)
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index c61884def906..722b26c71e45 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -4,6 +4,12 @@
#include <linux/fsnotify_backend.h>
/*
+ * Limit the time that the list_lru_one lock is held during
+ * an LRU scan.
+ */
+#define NFSD_FILE_GC_BATCH (16UL)
+
+/*
* This is the fsnotify_mark container that nfsd attaches to the files that it
* is holding open. Note that we have a separate refcount here aside from the
* one in the fsnotify_mark. We only want a single fsnotify_mark attached to
@@ -38,12 +44,14 @@ struct nfsd_file {
#define NFSD_FILE_PENDING (1)
#define NFSD_FILE_REFERENCED (2)
#define NFSD_FILE_GC (3)
+#define NFSD_FILE_RECENT (4)
unsigned long nf_flags;
refcount_t nf_ref;
unsigned char nf_may;
struct nfsd_file_mark *nf_mark;
struct list_head nf_lru;
+ struct list_head nf_gc;
struct rcu_head nf_rcu;
ktime_t nf_birthtime;
};
@@ -54,7 +62,10 @@ void nfsd_file_cache_shutdown(void);
int nfsd_file_cache_start_net(struct net *net);
void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
+struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf);
+struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+struct file *nfsd_file_file(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
void nfsd_file_net_dispose(struct nfsd_net *nn);
bool nfsd_file_is_cached(struct inode *inode);
@@ -65,5 +76,8 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct file *file,
struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
+ struct auth_domain *client, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf);
int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
new file mode 100644
index 000000000000..80d9ff6608a7
--- /dev/null
+++ b/fs/nfsd/localio.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NFS server support for local clients to bypass network stack
+ *
+ * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
+ * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/exportfs.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs_common.h>
+#include <linux/nfslocalio.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+#include <linux/string.h>
+
+#include "nfsd.h"
+#include "vfs.h"
+#include "netns.h"
+#include "filecache.h"
+#include "cache.h"
+
+/**
+ * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file
+ *
+ * @net: 'struct net' to get the proper nfsd_net required for LOCALIO access
+ * @dom: 'struct auth_domain' required for LOCALIO access
+ * @rpc_clnt: rpc_clnt that the client established
+ * @cred: cred that the client established
+ * @nfs_fh: filehandle to lookup
+ * @nfp: place to find the nfsd_file, or store it if it was non-NULL
+ * @fmode: fmode_t to use for open
+ *
+ * This function maps a local fh to a path on a local filesystem.
+ * This is useful when the nfs client has the local server mounted - it can
+ * avoid all the NFS overhead with reads, writes and commits.
+ *
+ * On successful return, returned nfsd_file will have its nf_net member
+ * set. Caller (NFS client) is responsible for calling nfsd_net_put and
+ * nfsd_file_put (via nfs_to_nfsd_file_put_local).
+ */
+static struct nfsd_file *
+nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
+ struct rpc_clnt *rpc_clnt, const struct cred *cred,
+ const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf,
+ const fmode_t fmode)
+{
+ int mayflags = NFSD_MAY_LOCALIO;
+ struct svc_cred rq_cred;
+ struct svc_fh fh;
+ struct nfsd_file *localio;
+ __be32 beres;
+
+ if (nfs_fh->size > NFS4_FHSIZE)
+ return ERR_PTR(-EINVAL);
+
+ if (!nfsd_net_try_get(net))
+ return ERR_PTR(-ENXIO);
+
+ rcu_read_lock();
+ localio = nfsd_file_get(rcu_dereference(*pnf));
+ rcu_read_unlock();
+ if (localio)
+ return localio;
+
+ /* nfs_fh -> svc_fh */
+ fh_init(&fh, NFS4_FHSIZE);
+ fh.fh_handle.fh_size = nfs_fh->size;
+ memcpy(fh.fh_handle.fh_raw, nfs_fh->data, nfs_fh->size);
+
+ if (fmode & FMODE_READ)
+ mayflags |= NFSD_MAY_READ;
+ if (fmode & FMODE_WRITE)
+ mayflags |= NFSD_MAY_WRITE;
+
+ svcauth_map_clnt_to_svc_cred_local(rpc_clnt, cred, &rq_cred);
+
+ beres = nfsd_file_acquire_local(net, &rq_cred, dom,
+ &fh, mayflags, &localio);
+ if (beres)
+ localio = ERR_PTR(nfs_stat_to_errno(be32_to_cpu(beres)));
+
+ fh_put(&fh);
+ if (rq_cred.cr_group_info)
+ put_group_info(rq_cred.cr_group_info);
+
+ if (!IS_ERR(localio)) {
+ struct nfsd_file *new;
+ if (!nfsd_net_try_get(net)) {
+ nfsd_file_put(localio);
+ nfsd_net_put(net);
+ return ERR_PTR(-ENXIO);
+ }
+ nfsd_file_get(localio);
+ again:
+ new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio)));
+ if (new) {
+ /* Some other thread installed an nfsd_file */
+ if (nfsd_file_get(new) == NULL)
+ goto again;
+ /*
+ * Drop the ref we were going to install and the
+ * one we were going to return.
+ */
+ nfsd_file_put(localio);
+ nfsd_file_put(localio);
+ localio = new;
+ }
+ } else
+ nfsd_net_put(net);
+
+ return localio;
+}
+
+static const struct nfsd_localio_operations nfsd_localio_ops = {
+ .nfsd_net_try_get = nfsd_net_try_get,
+ .nfsd_net_put = nfsd_net_put,
+ .nfsd_open_local_fh = nfsd_open_local_fh,
+ .nfsd_file_put_local = nfsd_file_put_local,
+ .nfsd_file_get_local = nfsd_file_get_local,
+ .nfsd_file_file = nfsd_file_file,
+};
+
+void nfsd_localio_ops_init(void)
+{
+ nfs_to = &nfsd_localio_ops;
+}
+
+/*
+ * UUID_IS_LOCAL XDR functions
+ */
+
+static __be32 localio_proc_null(struct svc_rqst *rqstp)
+{
+ return rpc_success;
+}
+
+struct localio_uuidarg {
+ uuid_t uuid;
+};
+
+static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp)
+{
+ struct localio_uuidarg *argp = rqstp->rq_argp;
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs_uuid_is_local(&argp->uuid, &nn->local_clients,
+ &nn->local_clients_lock,
+ net, rqstp->rq_client, THIS_MODULE);
+
+ return rpc_success;
+}
+
+static bool localio_decode_uuidarg(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr)
+{
+ struct localio_uuidarg *argp = rqstp->rq_argp;
+ u8 uuid[UUID_SIZE];
+
+ if (decode_opaque_fixed(xdr, uuid, UUID_SIZE))
+ return false;
+ import_uuid(&argp->uuid, uuid);
+
+ return true;
+}
+
+static const struct svc_procedure localio_procedures1[] = {
+ [LOCALIOPROC_NULL] = {
+ .pc_func = localio_proc_null,
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+ .pc_name = "NULL",
+ },
+ [LOCALIOPROC_UUID_IS_LOCAL] = {
+ .pc_func = localio_proc_uuid_is_local,
+ .pc_decode = localio_decode_uuidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct localio_uuidarg),
+ .pc_argzero = sizeof(struct localio_uuidarg),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_name = "UUID_IS_LOCAL",
+ },
+};
+
+#define LOCALIO_NR_PROCEDURES ARRAY_SIZE(localio_procedures1)
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ localio_count[LOCALIO_NR_PROCEDURES]);
+const struct svc_version localio_version1 = {
+ .vs_vers = 1,
+ .vs_nproc = LOCALIO_NR_PROCEDURES,
+ .vs_proc = localio_procedures1,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_count = localio_count,
+ .vs_xdrsize = XDR_QUADLEN(UUID_SIZE),
+ .vs_hidden = true,
+};
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 46a7f9b813e5..edc9f75dc75c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -38,11 +38,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
memcpy(&fh.fh_handle.fh_raw, f->data, f->size);
fh.fh_export = NULL;
+ /*
+ * Allow BYPASS_GSS as some client implementations use AUTH_SYS
+ * for NLM even when GSS is used for NFS.
+ * Allow OWNER_OVERRIDE as permission might have been changed
+ * after the file was opened.
+ * Pass MAY_NLM so that authentication can be completely bypassed
+ * if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL
+ * for NLM requests.
+ */
access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
- access |= NFSD_MAY_LOCK;
+ access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
- /* We return nlm error codes as nlm doesn't know
+ /* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
*/
switch (nfserr) {
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
index 0e1d635ec5f9..ca54aa583530 100644
--- a/fs/nfsd/netlink.c
+++ b/fs/nfsd/netlink.c
@@ -10,15 +10,96 @@
#include <uapi/linux/nfsd_netlink.h>
+/* Common nested types */
+const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1] = {
+ [NFSD_A_SOCK_ADDR] = { .type = NLA_BINARY, },
+ [NFSD_A_SOCK_TRANSPORT_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = {
+ [NFSD_A_VERSION_MAJOR] = { .type = NLA_U32, },
+ [NFSD_A_VERSION_MINOR] = { .type = NLA_U32, },
+ [NFSD_A_VERSION_ENABLED] = { .type = NLA_FLAG, },
+};
+
+/* NFSD_CMD_THREADS_SET - do */
+static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_SCOPE + 1] = {
+ [NFSD_A_SERVER_THREADS] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, },
+};
+
+/* NFSD_CMD_VERSION_SET - do */
+static const struct nla_policy nfsd_version_set_nl_policy[NFSD_A_SERVER_PROTO_VERSION + 1] = {
+ [NFSD_A_SERVER_PROTO_VERSION] = NLA_POLICY_NESTED(nfsd_version_nl_policy),
+};
+
+/* NFSD_CMD_LISTENER_SET - do */
+static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_ADDR + 1] = {
+ [NFSD_A_SERVER_SOCK_ADDR] = NLA_POLICY_NESTED(nfsd_sock_nl_policy),
+};
+
+/* NFSD_CMD_POOL_MODE_SET - do */
+static const struct nla_policy nfsd_pool_mode_set_nl_policy[NFSD_A_POOL_MODE_MODE + 1] = {
+ [NFSD_A_POOL_MODE_MODE] = { .type = NLA_NUL_STRING, },
+};
+
/* Ops table for nfsd */
static const struct genl_split_ops nfsd_nl_ops[] = {
{
.cmd = NFSD_CMD_RPC_STATUS_GET,
- .start = nfsd_nl_rpc_status_get_start,
.dumpit = nfsd_nl_rpc_status_get_dumpit,
- .done = nfsd_nl_rpc_status_get_done,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NFSD_CMD_THREADS_SET,
+ .doit = nfsd_nl_threads_set_doit,
+ .policy = nfsd_threads_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_SCOPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_THREADS_GET,
+ .doit = nfsd_nl_threads_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_VERSION_SET,
+ .doit = nfsd_nl_version_set_doit,
+ .policy = nfsd_version_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_PROTO_VERSION,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_VERSION_GET,
+ .doit = nfsd_nl_version_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_LISTENER_SET,
+ .doit = nfsd_nl_listener_set_doit,
+ .policy = nfsd_listener_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_SOCK_ADDR,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_LISTENER_GET,
+ .doit = nfsd_nl_listener_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_POOL_MODE_SET,
+ .doit = nfsd_nl_pool_mode_set_doit,
+ .policy = nfsd_pool_mode_set_nl_policy,
+ .maxattr = NFSD_A_POOL_MODE_MODE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_POOL_MODE_GET,
+ .doit = nfsd_nl_pool_mode_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
};
struct genl_family nfsd_nl_family __ro_after_init = {
diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h
index d83dd6bdee92..8eb903f24c41 100644
--- a/fs/nfsd/netlink.h
+++ b/fs/nfsd/netlink.h
@@ -11,11 +11,20 @@
#include <uapi/linux/nfsd_netlink.h>
-int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb);
-int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb);
+/* Common nested types */
+extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1];
+extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1];
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info);
extern struct genl_family nfsd_nl_family;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d4be519b5734..3e2d0fde80a7 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -13,6 +13,7 @@
#include <linux/filelock.h>
#include <linux/nfs4.h>
#include <linux/percpu_counter.h>
+#include <linux/percpu-refcount.h>
#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
@@ -127,12 +128,6 @@ struct nfsd_net {
seqlock_t writeverf_lock;
unsigned char writeverf[8];
- /*
- * Max number of connections this nfsd container will allow. Defaults
- * to '0' which is means that it bases this on the number of threads.
- */
- unsigned int max_connections;
-
u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
@@ -140,6 +135,9 @@ struct nfsd_net {
struct svc_info nfsd_info;
#define nfsd_serv nfsd_info.serv
+ struct percpu_ref nfsd_net_ref;
+ struct completion nfsd_net_confirm_done;
+ struct completion nfsd_net_free_done;
/*
* clientid and stateid data for construction of net unique COPY
@@ -148,12 +146,13 @@ struct nfsd_net {
u32 s2s_cp_cl_id;
struct idr s2s_cp_stateids;
spinlock_t s2s_cp_lock;
+ atomic_t pending_async_copies;
/*
* Version information
*/
- bool *nfsd_versions;
- bool *nfsd4_minorversions;
+ bool nfsd_versions[NFSD_MAXVERS + 1];
+ bool nfsd4_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1];
/*
* Duplicate reply cache
@@ -213,15 +212,22 @@ struct nfsd_net {
/* last time an admin-revoke happened for NFSv4.0 */
time64_t nfs40_last_revoke;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ /* Local clients to be invalidated when net is shut down */
+ spinlock_t local_clients_lock;
+ struct list_head local_clients;
+#endif
};
/* Simple check to find out if a given net was properly initialized */
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
-extern void nfsd_netns_free_versions(struct nfsd_net *nn);
-
+extern bool nfsd_support_version(int vers);
extern unsigned int nfsd_net_id;
+bool nfsd_net_try_get(struct net *net);
+void nfsd_net_put(struct net *net);
+
void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
void nfsd_reset_write_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12b2b9bc07bf..5fb202acb0fd 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -84,6 +84,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
@@ -308,8 +310,6 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
fh_put(&resp->fh);
}
-struct nfsd3_voidargs { int dummy; };
-
#define ST 1 /* status*/
#define AT 21 /* attributes */
#define pAT (1+AT) /* post attributes - conditional */
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 73adca47d373..7b5433bd3019 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -76,6 +76,8 @@ out:
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
goto out;
}
@@ -221,8 +223,6 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
posix_acl_release(resp->acl_default);
}
-struct nfsd3_voidargs { int dummy; };
-
#define ST 1 /* status*/
#define AT 21 /* attributes */
#define pAT (1+AT) /* post attributes - conditional */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index dfcc957e460d..a817d8485d21 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -14,6 +14,7 @@
#include "xdr3.h"
#include "vfs.h"
#include "filecache.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -28,6 +29,29 @@ static int nfs3_ftypes[] = {
S_IFIFO, /* NF3FIFO */
};
+static __be32 nfsd3_map_status(__be32 status)
+{
+ switch (status) {
+ case nfs_ok:
+ break;
+ case nfserr_nofilehandle:
+ status = nfserr_badhandle;
+ break;
+ case nfserr_wrongsec:
+ case nfserr_file_open:
+ status = nfserr_acces;
+ break;
+ case nfserr_symlink_not_dir:
+ status = nfserr_notdir;
+ break;
+ case nfserr_symlink:
+ case nfserr_wrong_type:
+ status = nfserr_inval;
+ break;
+ }
+ return status;
+}
+
/*
* NULL call.
*/
@@ -46,8 +70,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: GETATTR(3) %s\n",
- SVCFH_fmt(&argp->fh));
+ trace_nfsd_vfs_getattr(rqstp, &argp->fh);
fh_copy(&resp->fh, &argp->fh);
resp->status = fh_verify(rqstp, &resp->fh, 0,
@@ -57,6 +80,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
out:
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -80,6 +104,7 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
if (argp->check_guard)
guardtime = &argp->guardtime;
resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -103,6 +128,7 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp)
resp->status = nfsd_lookup(rqstp, &resp->dirfh,
argp->name, argp->len,
&resp->fh);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -122,6 +148,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->access = argp->access;
resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -142,6 +169,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp)
resp->pages = rqstp->rq_next_page++;
resp->status = nfsd_readlink(rqstp, &resp->fh,
page_address(*resp->pages), &resp->len);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -179,6 +207,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
&resp->count, &resp->eof);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -191,7 +220,6 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
struct nfsd3_writeargs *argp = rqstp->rq_argp;
struct nfsd3_writeres *resp = rqstp->rq_resp;
unsigned long cnt = argp->len;
- unsigned int nvecs;
dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
SVCFH_fmt(&argp->fh),
@@ -206,12 +234,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nvecs = svc_fill_write_vector(rqstp, &argp->payload);
-
resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, nvecs, &cnt,
+ &argp->payload, &cnt,
resp->committed, resp->verf);
resp->count = cnt;
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -236,6 +263,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 status;
int host_err;
+ trace_nfsd_vfs_create(rqstp, fhp, S_IFREG, argp->name, argp->len);
+
if (isdotent(argp->name, argp->len))
return nfserr_exist;
if (!(iap->ia_valid & ATTR_MODE))
@@ -254,7 +283,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock_nested(inode, I_MUTEX_PARENT);
- child = lookup_one_len(argp->name, parent, argp->len);
+ child = lookup_one(&nop_mnt_idmap,
+ &QSTR_LEN(argp->name, argp->len),
+ parent);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
@@ -350,15 +381,11 @@ nfsd3_proc_create(struct svc_rqst *rqstp)
struct nfsd3_diropres *resp = rqstp->rq_resp;
svc_fh *dirfhp, *newfhp;
- dprintk("nfsd: CREATE(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
dirfhp = fh_copy(&resp->dirfh, &argp->fh);
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -374,16 +401,12 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
.na_iattr = &argp->attrs,
};
- dprintk("nfsd: MKDIR(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
argp->attrs.ia_valid &= ~ATTR_SIZE;
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
&attrs, S_IFDIR, 0, &resp->fh);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -413,17 +436,13 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
goto out;
}
- dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
- SVCFH_fmt(&argp->ffh),
- argp->flen, argp->fname,
- argp->tlen, argp->tname);
-
fh_copy(&resp->dirfh, &argp->ffh);
fh_init(&resp->fh, NFS3_FHSIZE);
resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname,
argp->flen, argp->tname, &attrs, &resp->fh);
kfree(argp->tname);
out:
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -441,11 +460,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
int type;
dev_t rdev = 0;
- dprintk("nfsd: MKNOD(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
@@ -465,6 +479,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
&attrs, type, rdev, &resp->fh);
out:
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -477,15 +492,11 @@ nfsd3_proc_remove(struct svc_rqst *rqstp)
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: REMOVE(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
/* Unlink. -S_IFDIR means file must not be a directory */
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR,
argp->name, argp->len);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -498,14 +509,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp)
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: RMDIR(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
fh_copy(&resp->fh, &argp->fh);
resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR,
argp->name, argp->len);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -515,19 +522,11 @@ nfsd3_proc_rename(struct svc_rqst *rqstp)
struct nfsd3_renameargs *argp = rqstp->rq_argp;
struct nfsd3_renameres *resp = rqstp->rq_resp;
- dprintk("nfsd: RENAME(3) %s %.*s ->\n",
- SVCFH_fmt(&argp->ffh),
- argp->flen,
- argp->fname);
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
-
fh_copy(&resp->ffh, &argp->ffh);
fh_copy(&resp->tfh, &argp->tfh);
resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
&resp->tfh, argp->tname, argp->tlen);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -537,17 +536,11 @@ nfsd3_proc_link(struct svc_rqst *rqstp)
struct nfsd3_linkargs *argp = rqstp->rq_argp;
struct nfsd3_linkres *resp = rqstp->rq_resp;
- dprintk("nfsd: LINK(3) %s ->\n",
- SVCFH_fmt(&argp->ffh));
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
-
fh_copy(&resp->fh, &argp->ffh);
fh_copy(&resp->tfh, &argp->tfh);
resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
&resp->fh);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -581,9 +574,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
struct nfsd3_readdirres *resp = rqstp->rq_resp;
loff_t offset;
- dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, (u32) argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
@@ -600,6 +591,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
/* Recycle only pages that were part of the reply */
rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -614,9 +606,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
struct nfsd3_readdirres *resp = rqstp->rq_resp;
loff_t offset;
- dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, (u32) argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
@@ -644,6 +634,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
rqstp->rq_next_page = resp->xdr.page_ptr + 1;
out:
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -656,11 +647,9 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_fsstatres *resp = rqstp->rq_resp;
- dprintk("nfsd: FSSTAT(3) %s\n",
- SVCFH_fmt(&argp->fh));
-
resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0);
fh_put(&argp->fh);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -704,6 +693,7 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
}
fh_put(&argp->fh);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -746,6 +736,7 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp)
}
fh_put(&argp->fh);
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
@@ -773,6 +764,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
argp->count, resp->verf);
nfsd_file_put(nf);
out:
+ resp->status = nfsd3_map_status(resp->status);
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index a7a07470c1f8..ef4971d71ac4 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1001,7 +1001,9 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_positive_unlocked(name, dparent, namlen);
+ dchild = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ dparent);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 96e786b5e544..936ea1ad9586 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -198,8 +198,6 @@ summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas)
memset(pas, 0, sizeof(*pas));
pas->mask = 07;
- pe = acl->a_entries + acl->a_count;
-
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 87c9547989f6..e00b2aea8da2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -42,11 +42,10 @@
#include "trace.h"
#include "xdr4cb.h"
#include "xdr4.h"
+#include "nfs4xdr_gen.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-static void nfsd4_mark_cb_fault(struct nfs4_client *clp);
-
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
@@ -93,12 +92,35 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
{
fattr->ncf_cb_change = 0;
fattr->ncf_cb_fsize = 0;
+ fattr->ncf_cb_atime.tv_sec = 0;
+ fattr->ncf_cb_atime.tv_nsec = 0;
+ fattr->ncf_cb_mtime.tv_sec = 0;
+ fattr->ncf_cb_mtime.tv_nsec = 0;
+
if (bitmap[0] & FATTR4_WORD0_CHANGE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
- return -NFSERR_BAD_XDR;
+ return -EIO;
if (bitmap[0] & FATTR4_WORD0_SIZE)
if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
- return -NFSERR_BAD_XDR;
+ return -EIO;
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+ fattr4_time_deleg_access access;
+
+ if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access))
+ return -EIO;
+ fattr->ncf_cb_atime.tv_sec = access.seconds;
+ fattr->ncf_cb_atime.tv_nsec = access.nseconds;
+
+ }
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ fattr4_time_deleg_modify modify;
+
+ if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify))
+ return -EIO;
+ fattr->ncf_cb_mtime.tv_sec = modify.seconds;
+ fattr->ncf_cb_mtime.tv_nsec = modify.nseconds;
+
+ }
return 0;
}
@@ -287,17 +309,17 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
u32 length;
__be32 *p;
- p = xdr_inline_decode(xdr, 4 + 4);
+ p = xdr_inline_decode(xdr, XDR_UNIT);
if (unlikely(p == NULL))
goto out_overflow;
- hdr->status = be32_to_cpup(p++);
+ hdr->status = be32_to_cpup(p);
/* Ignore the tag */
- length = be32_to_cpup(p++);
- p = xdr_inline_decode(xdr, length + 4);
- if (unlikely(p == NULL))
+ if (xdr_stream_decode_u32(xdr, &length) < 0)
+ goto out_overflow;
+ if (xdr_inline_decode(xdr, length) == NULL)
+ goto out_overflow;
+ if (xdr_stream_decode_u32(xdr, &hdr->nops) < 0)
goto out_overflow;
- p += XDR_QUADLEN(length);
- hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
return -EIO;
@@ -361,16 +383,63 @@ static void
encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
struct nfs4_cb_fattr *fattr)
{
- struct nfs4_delegation *dp =
- container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+ struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
-
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ u32 bmap_size = 1;
+ u32 bmap[3];
+
+ bmap[0] = FATTR4_WORD0_SIZE;
+ if (!ncf->ncf_file_modified)
+ bmap[0] |= FATTR4_WORD0_CHANGE;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ bmap[1] = 0;
+ bmap[2] = FATTR4_WORD2_TIME_DELEG_ACCESS | FATTR4_WORD2_TIME_DELEG_MODIFY;
+ bmap_size = 3;
+ }
encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
encode_nfs_fh4(xdr, fh);
- encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
+ encode_bitmap4(xdr, bmap, bmap_size);
hdr->nops++;
}
+static u32 highest_slotid(struct nfsd4_session *ses)
+{
+ u32 idx;
+
+ spin_lock(&ses->se_lock);
+ idx = fls(~ses->se_cb_slot_avail);
+ if (idx > 0)
+ --idx;
+ idx = max(idx, ses->se_cb_highest_slot);
+ spin_unlock(&ses->se_lock);
+ return idx;
+}
+
+static void
+encode_referring_call4(struct xdr_stream *xdr,
+ const struct nfsd4_referring_call *rc)
+{
+ encode_uint32(xdr, rc->rc_sequenceid);
+ encode_uint32(xdr, rc->rc_slotid);
+}
+
+static void
+encode_referring_call_list4(struct xdr_stream *xdr,
+ const struct nfsd4_referring_call_list *rcl)
+{
+ struct nfsd4_referring_call *rc;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+ xdr_encode_opaque_fixed(p, rcl->rcl_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ encode_uint32(xdr, rcl->__nr_referring_calls);
+ list_for_each_entry(rc, &rcl->rcl_referring_calls, __list)
+ encode_referring_call4(xdr, rc);
+}
+
/*
* CB_SEQUENCE4args
*
@@ -388,6 +457,7 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
struct nfs4_cb_compound_hdr *hdr)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ struct nfsd4_referring_call_list *rcl;
__be32 *p;
if (hdr->minorversion == 0)
@@ -396,16 +466,45 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
encode_sessionid4(xdr, session);
- p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
- *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
- *p++ = xdr_zero; /* csa_slotid */
- *p++ = xdr_zero; /* csa_highest_slotid */
+ p = xdr_reserve_space(xdr, XDR_UNIT * 4);
+ *p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */
+ *p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */
+ *p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */
*p++ = xdr_zero; /* csa_cachethis */
- xdr_encode_empty_array(p); /* csa_referring_call_lists */
+
+ /* csa_referring_call_lists */
+ encode_uint32(xdr, cb->cb_nr_referring_call_list);
+ list_for_each_entry(rcl, &cb->cb_referring_call_list, __list)
+ encode_referring_call_list4(xdr, rcl);
hdr->nops++;
}
+static void update_cb_slot_table(struct nfsd4_session *ses, u32 target)
+{
+ /* No need to do anything if nothing changed */
+ if (likely(target == READ_ONCE(ses->se_cb_highest_slot)))
+ return;
+
+ spin_lock(&ses->se_lock);
+ if (target > ses->se_cb_highest_slot) {
+ int i;
+
+ target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1);
+
+ /*
+ * Growing the slot table. Reset any new sequences to 1.
+ *
+ * NB: There is some debate about whether the RFC requires this,
+ * but the Linux client expects it.
+ */
+ for (i = ses->se_cb_highest_slot + 1; i <= target; ++i)
+ ses->se_cb_seq_nr[i] = 1;
+ }
+ ses->se_cb_highest_slot = target;
+ spin_unlock(&ses->se_lock);
+}
+
/*
* CB_SEQUENCE4resok
*
@@ -433,7 +532,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
int status = -ESERVERFAULT;
__be32 *p;
- u32 dummy;
+ u32 seqid, slotid, target;
/*
* If the server returns different values for sessionID, slotID or
@@ -449,21 +548,22 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
}
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
- dummy = be32_to_cpup(p++);
- if (dummy != session->se_cb_seq_nr) {
+ seqid = be32_to_cpup(p++);
+ if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) {
dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
- dummy = be32_to_cpup(p++);
- if (dummy != 0) {
+ slotid = be32_to_cpup(p++);
+ if (slotid != cb->cb_held_slot) {
dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
- /*
- * FIXME: process highest slotid and target highest slotid
- */
+ p++; // ignore current highest slot value
+
+ target = be32_to_cpup(p++);
+ update_cb_slot_table(session, target);
status = 0;
out:
cb->cb_seq_status = status;
@@ -592,7 +692,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
struct nfs4_cb_compound_hdr hdr;
int status;
u32 bitmap[3] = {0};
- u32 attrlen;
+ u32 attrlen, maxlen;
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
@@ -605,14 +705,18 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
return status;
status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
- if (status)
+ if (unlikely(status || cb->cb_status))
return status;
if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
- return -NFSERR_BAD_XDR;
+ return -EIO;
if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
- return -NFSERR_BAD_XDR;
- if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
- return -NFSERR_BAD_XDR;
+ return -EIO;
+ maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize);
+ if (bitmap[2] != 0)
+ maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) +
+ sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2;
+ if (attrlen > maxlen)
+ return -EIO;
status = decode_cb_fattr4(xdr, bitmap, ncf);
return status;
}
@@ -978,20 +1082,23 @@ static int max_cb_time(struct net *net)
return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
}
-static struct workqueue_struct *callback_wq;
-
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
- trace_nfsd_cb_queue(cb->cb_clp, cb);
- return queue_delayed_work(callback_wq, &cb->cb_work, 0);
+ struct nfs4_client *clp = cb->cb_clp;
+
+ trace_nfsd_cb_queue(clp, cb);
+ return queue_work(clp->cl_callback_wq, &cb->cb_work);
}
-static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
- unsigned long msecs)
+static void nfsd4_requeue_cb(struct rpc_task *task, struct nfsd4_callback *cb)
{
- trace_nfsd_cb_queue(cb->cb_clp, cb);
- queue_delayed_work(callback_wq, &cb->cb_work,
- msecs_to_jiffies(msecs));
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ trace_nfsd_cb_restart(clp, cb);
+ task->tk_status = 0;
+ set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ }
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@@ -1002,8 +1109,7 @@ static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
{
- if (atomic_dec_and_test(&clp->cl_cb_inflight))
- wake_up_var(&clp->cl_cb_inflight);
+ atomic_dec_and_wake_up(&clp->cl_cb_inflight);
}
static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
@@ -1066,7 +1172,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
- if (!conn->cb_xprt)
+ if (!conn->cb_xprt || !ses)
return -EINVAL;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
@@ -1161,7 +1267,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
void nfsd4_probe_callback_sync(struct nfs4_client *clp)
{
nfsd4_probe_callback(clp);
- flush_workqueue(callback_wq);
+ flush_workqueue(clp->cl_callback_wq);
}
void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
@@ -1172,6 +1278,22 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
spin_unlock(&clp->cl_lock);
}
+static int grab_slot(struct nfsd4_session *ses)
+{
+ int idx;
+
+ spin_lock(&ses->se_lock);
+ idx = ffs(ses->se_cb_slot_avail) - 1;
+ if (idx < 0 || idx > ses->se_cb_highest_slot) {
+ spin_unlock(&ses->se_lock);
+ return -1;
+ }
+ /* clear the bit for the slot */
+ ses->se_cb_slot_avail &= ~BIT(idx);
+ spin_unlock(&ses->se_lock);
+ return idx;
+}
+
/*
* There's currently a single callback channel slot.
* If the slot is available, then mark it busy. Otherwise, set the
@@ -1180,28 +1302,32 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = clp->cl_cb_session;
- if (!cb->cb_holds_slot &&
- test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ if (cb->cb_held_slot >= 0)
+ return true;
+ cb->cb_held_slot = grab_slot(ses);
+ if (cb->cb_held_slot < 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
- dprintk("%s slot is busy\n", __func__);
+ cb->cb_held_slot = grab_slot(ses);
+ if (cb->cb_held_slot < 0)
return false;
- }
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
- cb->cb_holds_slot = true;
return true;
}
static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = clp->cl_cb_session;
- if (cb->cb_holds_slot) {
- cb->cb_holds_slot = false;
- clear_bit(0, &clp->cl_cb_slot_busy);
+ if (cb->cb_held_slot >= 0) {
+ spin_lock(&ses->se_lock);
+ ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
+ spin_unlock(&ses->se_lock);
+ cb->cb_held_slot = -1;
rpc_wake_up_next(&clp->cl_cb_waitq);
}
}
@@ -1212,15 +1338,113 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
trace_nfsd_cb_destroy(clp, cb);
nfsd41_cb_release_slot(cb);
+ if (test_bit(NFSD4_CALLBACK_WAKE, &cb->cb_flags))
+ clear_and_wake_up_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
+ else
+ clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
+
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
nfsd41_cb_inflight_end(clp);
}
-/*
- * TODO: cb_sequence should support referring call lists, cachethis, multiple
- * slots, and mark callback channel down on communication errors.
+/**
+ * nfsd41_cb_referring_call - add a referring call to a callback operation
+ * @cb: context of callback to add the rc to
+ * @sessionid: referring call's session ID
+ * @slotid: referring call's session slot index
+ * @seqno: referring call's slot sequence number
+ *
+ * Caller serializes access to @cb.
+ *
+ * NB: If memory allocation fails, the referring call is not added.
*/
+void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
+ struct nfs4_sessionid *sessionid,
+ u32 slotid, u32 seqno)
+{
+ struct nfsd4_referring_call_list *rcl;
+ struct nfsd4_referring_call *rc;
+ bool found;
+
+ might_sleep();
+
+ found = false;
+ list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) {
+ if (!memcmp(rcl->rcl_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rcl = kmalloc(sizeof(*rcl), GFP_KERNEL);
+ if (!rcl)
+ return;
+ memcpy(rcl->rcl_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN);
+ rcl->__nr_referring_calls = 0;
+ INIT_LIST_HEAD(&rcl->rcl_referring_calls);
+ list_add(&rcl->__list, &cb->cb_referring_call_list);
+ cb->cb_nr_referring_call_list++;
+ }
+
+ found = false;
+ list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) {
+ if (rc->rc_sequenceid == seqno && rc->rc_slotid == slotid) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rc = kmalloc(sizeof(*rc), GFP_KERNEL);
+ if (!rc)
+ goto out;
+ rc->rc_sequenceid = seqno;
+ rc->rc_slotid = slotid;
+ rcl->__nr_referring_calls++;
+ list_add(&rc->__list, &rcl->rcl_referring_calls);
+ }
+
+out:
+ if (!rcl->__nr_referring_calls) {
+ cb->cb_nr_referring_call_list--;
+ list_del(&rcl->__list);
+ kfree(rcl);
+ }
+}
+
+/**
+ * nfsd41_cb_destroy_referring_call_list - release referring call info
+ * @cb: context of a callback that has completed
+ *
+ * Callers who allocate referring calls using nfsd41_cb_referring_call() must
+ * release those resources by calling nfsd41_cb_destroy_referring_call_list.
+ *
+ * Caller serializes access to @cb.
+ */
+void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb)
+{
+ struct nfsd4_referring_call_list *rcl;
+ struct nfsd4_referring_call *rc;
+
+ while (!list_empty(&cb->cb_referring_call_list)) {
+ rcl = list_first_entry(&cb->cb_referring_call_list,
+ struct nfsd4_referring_call_list,
+ __list);
+
+ while (!list_empty(&rcl->rcl_referring_calls)) {
+ rc = list_first_entry(&rcl->rcl_referring_calls,
+ struct nfsd4_referring_call,
+ __list);
+ list_del(&rc->__list);
+ kfree(rc);
+ }
+ list_del(&rcl->__list);
+ kfree(rcl);
+ }
+}
+
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
@@ -1231,6 +1455,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
* cb_seq_status is only set in decode_cb_sequence4res,
* and so will remain 1 if an rpc level failure occurs.
*/
+ trace_nfsd_cb_rpc_prepare(clp);
cb->cb_seq_status = 1;
cb->cb_status = 0;
if (minorversion && !nfsd41_cb_get_slot(cb, task))
@@ -1238,30 +1463,14 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
rpc_call_start(task);
}
+/* Returns true if CB_COMPOUND processing should continue */
static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
{
- struct nfs4_client *clp = cb->cb_clp;
- struct nfsd4_session *session = clp->cl_cb_session;
- bool ret = true;
-
- if (!clp->cl_minorversion) {
- /*
- * If the backchannel connection was shut down while this
- * task was queued, we need to resubmit it after setting up
- * a new backchannel connection.
- *
- * Note that if we lost our callback connection permanently
- * the submission code will error out, so we don't need to
- * handle that case here.
- */
- if (RPC_SIGNALLED(task))
- goto need_restart;
-
- return true;
- }
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ bool ret = false;
- if (!cb->cb_holds_slot)
- goto need_restart;
+ if (cb->cb_held_slot < 0)
+ goto requeue;
/* This is the operation status code for CB_SEQUENCE */
trace_nfsd_cb_seq_status(task, cb);
@@ -1274,12 +1483,17 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
- ++session->se_cb_seq_nr;
+ ++session->se_cb_seq_nr[cb->cb_held_slot];
+ ret = true;
break;
case -ESERVERFAULT:
- ++session->se_cb_seq_nr;
+ /*
+ * Call succeeded, but the session, slot index, or slot
+ * sequence number in the response do not match the same
+ * in the server's call. The sequence information is thus
+ * untrustworthy.
+ */
nfsd4_mark_cb_fault(cb->cb_clp);
- ret = false;
break;
case 1:
/*
@@ -1291,44 +1505,42 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
fallthrough;
case -NFS4ERR_BADSESSION:
nfsd4_mark_cb_fault(cb->cb_clp);
- ret = false;
- goto need_restart;
+ goto requeue;
case -NFS4ERR_DELAY:
cb->cb_seq_status = 1;
- if (!rpc_restart_call(task))
- goto out;
-
+ if (RPC_SIGNALLED(task) || !rpc_restart_call(task))
+ goto requeue;
rpc_delay(task, 2 * HZ);
return false;
+ case -NFS4ERR_SEQ_MISORDERED:
case -NFS4ERR_BADSLOT:
+ /*
+ * A SEQ_MISORDERED or BADSLOT error means that the client and
+ * server are out of sync as to the backchannel parameters. Mark
+ * the backchannel faulty and restart the RPC, but leak the slot
+ * so that it's no longer used.
+ */
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ cb->cb_held_slot = -1;
goto retry_nowait;
- case -NFS4ERR_SEQ_MISORDERED:
- if (session->se_cb_seq_nr != 1) {
- session->se_cb_seq_nr = 1;
- goto retry_nowait;
- }
- break;
default:
nfsd4_mark_cb_fault(cb->cb_clp);
}
- nfsd41_cb_release_slot(cb);
-
trace_nfsd_cb_free_slot(task, cb);
-
- if (RPC_SIGNALLED(task))
- goto need_restart;
-out:
+ nfsd41_cb_release_slot(cb);
return ret;
retry_nowait:
- if (rpc_restart_call_prepare(task))
- ret = false;
- goto out;
-need_restart:
- if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
- trace_nfsd_cb_restart(clp, cb);
- task->tk_status = 0;
- cb->cb_need_restart = true;
+ /*
+ * RPC_SIGNALLED() means that the rpc_client is being torn down and
+ * (possibly) recreated. Requeue the call in that case.
+ */
+ if (!RPC_SIGNALLED(task)) {
+ if (rpc_restart_call_prepare(task))
+ return false;
}
+requeue:
+ nfsd41_cb_release_slot(cb);
+ nfsd4_requeue_cb(task, cb);
return false;
}
@@ -1337,11 +1549,28 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
- if (!nfsd4_cb_sequence_done(task, cb))
+ trace_nfsd_cb_rpc_done(clp);
+
+ if (!clp->cl_minorversion) {
+ /*
+ * If the backchannel connection was shut down while this
+ * task was queued, we need to resubmit it after setting up
+ * a new backchannel connection.
+ *
+ * Note that if we lost our callback connection permanently
+ * the submission code will error out, so we don't need to
+ * handle that case here.
+ */
+ if (RPC_SIGNALLED(task))
+ nfsd4_requeue_cb(task, cb);
+ } else if (!nfsd4_cb_sequence_done(task, cb)) {
return;
+ }
if (cb->cb_status) {
- WARN_ON_ONCE(task->tk_status);
+ WARN_ONCE(task->tk_status,
+ "cb_status=%d tk_status=%d cb_opcode=%d",
+ cb->cb_status, task->tk_status, cb->cb_ops->opcode);
task->tk_status = cb->cb_status;
}
@@ -1367,7 +1596,9 @@ static void nfsd4_cb_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
- if (cb->cb_need_restart)
+ trace_nfsd_cb_rpc_release(cb->cb_clp);
+
+ if (test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags))
nfsd4_queue_cb(cb);
else
nfsd41_destroy_cb(cb);
@@ -1380,19 +1611,6 @@ static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_release = nfsd4_cb_release,
};
-int nfsd4_create_callback_queue(void)
-{
- callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
- if (!callback_wq)
- return -ENOMEM;
- return 0;
-}
-
-void nfsd4_destroy_callback_queue(void)
-{
- destroy_workqueue(callback_wq);
-}
-
/* must be called under the state lock */
void nfsd4_shutdown_callback(struct nfs4_client *clp)
{
@@ -1406,7 +1624,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
* client, destroy the rpc client, and stop:
*/
nfsd4_run_cb(&clp->cl_cb_null);
- flush_workqueue(callback_wq);
+ flush_workqueue(clp->cl_callback_wq);
nfsd41_cb_inflight_wait_complete(clp);
}
@@ -1428,9 +1646,9 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
/*
* Note there isn't a lot of locking in this code; instead we depend on
- * the fact that it is run from the callback_wq, which won't run two
- * work items at once. So, for example, callback_wq handles all access
- * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
+ * the fact that it is run from clp->cl_callback_wq, which won't run two
+ * work items at once. So, for example, clp->cl_callback_wq handles all
+ * access of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
*/
static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
@@ -1490,10 +1708,10 @@ static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
- container_of(work, struct nfsd4_callback, cb_work.work);
+ container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
- int flags;
+ int flags, ret;
trace_nfsd_cb_start(clp);
@@ -1501,17 +1719,12 @@ nfsd4_run_cb_work(struct work_struct *work)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
- if (!clnt) {
- if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
- nfsd41_destroy_cb(cb);
- else {
- /*
- * XXX: Ideally, we could wait for the client to
- * reconnect, but I haven't figured out how
- * to do that yet.
- */
- nfsd4_queue_cb_delayed(cb, 25);
- }
+ if (!clnt || clp->cl_state == NFSD4_COURTESY) {
+ /*
+ * Callback channel broken, client killed or
+ * nfs4_client in courtesy state; give up.
+ */
+ nfsd41_destroy_cb(cb);
return;
}
@@ -1524,16 +1737,19 @@ nfsd4_run_cb_work(struct work_struct *work)
return;
}
- if (cb->cb_need_restart) {
- cb->cb_need_restart = false;
- } else {
+ if (!test_and_clear_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) {
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
}
+
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
- rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
- cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
+ ret = rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
+ cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
+ if (ret != 0) {
+ set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ nfsd4_queue_cb(cb);
+ }
}
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
@@ -1543,11 +1759,13 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
+ cb->cb_flags = 0;
cb->cb_ops = ops;
- INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
+ INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
- cb->cb_need_restart = false;
- cb->cb_holds_slot = false;
+ cb->cb_held_slot = -1;
+ cb->cb_nr_referring_call_list = 0;
+ INIT_LIST_HEAD(&cb->cb_referring_call_list);
}
/**
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 7a806ac13e31..8cca1329f348 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -581,6 +581,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
.id = id,
.type = type,
};
+ __be32 status = nfs_ok;
__be32 *p;
int ret;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -593,12 +594,16 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
return nfserrno(ret);
ret = strlen(item->name);
WARN_ON_ONCE(ret > IDMAP_NAMESZ);
+
p = xdr_reserve_space(xdr, ret + 4);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque(p, item->name, ret);
+ if (unlikely(!p)) {
+ status = nfserr_resource;
+ goto out_put;
+ }
+ xdr_encode_opaque(p, item->name, ret);
+out_put:
cache_put(&item->h, nn->idtoname_cache);
- return 0;
+ return status;
}
static bool
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 4f3072b5979a..290271ac4245 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -344,9 +344,10 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
- refcount_inc(&ls->ls_stid.sc_count);
- nfsd4_run_cb(&ls->ls_recall);
-
+ if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) {
+ refcount_inc(&ls->ls_stid.sc_count);
+ nfsd4_run_cb(&ls->ls_recall);
+ }
out_unlock:
spin_unlock(&ls->ls_lock);
}
@@ -740,6 +741,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
.prepare = nfsd4_cb_layout_prepare,
.done = nfsd4_cb_layout_done,
.release = nfsd4_cb_layout_release,
+ .opcode = OP_CB_LAYOUTRECALL,
};
static bool
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2927b1263f08..f13abbb13b38 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -57,6 +57,8 @@ module_param(inter_copy_offload_enable, bool, 0644);
MODULE_PARM_DESC(inter_copy_offload_enable,
"Enable inter server to server copy offload. Default: false");
+static void cleanup_async_copy(struct nfsd4_copy *copy);
+
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */
module_param(nfsd4_ssc_umount_timeout, int, 0644);
@@ -158,7 +160,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
return fh_verify(rqstp, current_fh, S_IFREG, accmode);
}
-static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
+static __be32 nfsd_check_obj_isreg(struct svc_fh *fh, u32 minor_version)
{
umode_t mode = d_inode(fh->fh_dentry)->i_mode;
@@ -166,14 +168,15 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
return nfs_ok;
if (S_ISDIR(mode))
return nfserr_isdir;
- /*
- * Using err_symlink as our catch-all case may look odd; but
- * there's no other obvious error for this case in 4.0, and we
- * happen to know that it will cause the linux v4 client to do
- * the right thing on attempts to open something other than a
- * regular file.
- */
- return nfserr_symlink;
+ if (S_ISLNK(mode))
+ return nfserr_symlink;
+
+ /* RFC 7530 - 16.16.6 */
+ if (minor_version == 0)
+ return nfserr_symlink;
+ else
+ return nfserr_wrong_type;
+
}
static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh)
@@ -263,7 +266,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_lock_nested(inode, I_MUTEX_PARENT);
- child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+ child = lookup_one(&nop_mnt_idmap,
+ &QSTR_LEN(open->op_fname, open->op_fnamelen),
+ parent);
if (IS_ERR(child)) {
status = nfserrno(PTR_ERR(child));
goto out;
@@ -466,7 +471,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
}
if (status)
goto out;
- status = nfsd_check_obj_isreg(*resfh);
+ status = nfsd_check_obj_isreg(*resfh, cstate->minorversion);
if (status)
goto out;
@@ -751,15 +756,6 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&access->ac_supported);
}
-static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
-{
- __be32 *verf = (__be32 *)verifier->data;
-
- BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
-
- nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
-}
-
static __be32
nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -882,6 +878,8 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_getattr *getattr = &u->getattr;
__be32 status;
+ trace_nfsd_vfs_getattr(rqstp, &cstate->current_fh);
+
status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
@@ -1004,6 +1002,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u64 cookie = readdir->rd_cookie;
static const nfs4_verifier zeroverf;
+ trace_nfsd_vfs_readdir(rqstp, &cstate->current_fh,
+ readdir->rd_maxcount, readdir->rd_cookie);
+
/* no need to check permission - this will be done in nfsd_readdir() */
if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
@@ -1141,18 +1142,43 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
.na_iattr = &setattr->sa_iattr,
.na_seclabel = &setattr->sa_label,
};
+ bool save_no_wcc, deleg_attrs;
+ struct nfs4_stid *st = NULL;
struct inode *inode;
__be32 status = nfs_ok;
- bool save_no_wcc;
int err;
- if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+ deleg_attrs = setattr->sa_bmval[2] & (FATTR4_WORD2_TIME_DELEG_ACCESS |
+ FATTR4_WORD2_TIME_DELEG_MODIFY);
+
+ if (deleg_attrs || (setattr->sa_iattr.ia_valid & ATTR_SIZE)) {
+ int flags = WR_STATE;
+
+ if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+ flags |= RD_STATE;
+
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
- WR_STATE, NULL, NULL);
+ flags, NULL, &st);
if (status)
return status;
}
+
+ if (deleg_attrs) {
+ status = nfserr_bad_stateid;
+ if (st->sc_type & SC_TYPE_DELEG) {
+ struct nfs4_delegation *dp = delegstateid(st);
+
+ /* Only for *_ATTRS_DELEG flavors */
+ if (deleg_attrs_deleg(dp->dl_type))
+ status = nfs_ok;
+ }
+ }
+ if (st)
+ nfs4_put_stid(st);
+ if (status)
+ return status;
+
err = fh_want_write(&cstate->current_fh);
if (err)
return nfserrno(err);
@@ -1192,7 +1218,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
- int nvecs;
if (write->wr_offset > (u64)OFFSET_MAX ||
write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX)
@@ -1207,13 +1232,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
write->wr_how_written = write->wr_stable_how;
-
- nvecs = svc_fill_write_vector(rqstp, &write->wr_payload);
- WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
-
status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
- write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- write->wr_how_written,
+ write->wr_offset, &write->wr_payload,
+ &cnt, write->wr_how_written,
(__be32 *)write->wr_verifier.data);
nfsd_file_put(nf);
@@ -1284,6 +1305,71 @@ out:
return status;
}
+/**
+ * nfsd4_has_active_async_copies - Check for ongoing copy operations
+ * @clp: Client to be checked
+ *
+ * NFSD maintains state for async COPY operations after they complete,
+ * and this state remains in the nfs4_client's async_copies list.
+ * Ongoing copies should block the destruction of the nfs4_client, but
+ * completed copies should not.
+ *
+ * Return values:
+ * %true: At least one active async COPY is ongoing
+ * %false: No active async COPY operations were found
+ */
+bool nfsd4_has_active_async_copies(struct nfs4_client *clp)
+{
+ struct nfsd4_copy *copy;
+ bool result = false;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_entry(copy, &clp->async_copies, copies) {
+ if (!test_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags) &&
+ !test_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags)) {
+ result = true;
+ break;
+ }
+ }
+ spin_unlock(&clp->async_lock);
+ return result;
+}
+
+/**
+ * nfsd4_async_copy_reaper - Purge completed copies
+ * @nn: Network namespace with possible active copy information
+ */
+void nfsd4_async_copy_reaper(struct nfsd_net *nn)
+{
+ struct nfs4_client *clp;
+ struct nfsd4_copy *copy;
+ LIST_HEAD(reaplist);
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ struct list_head *pos, *next;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_safe(pos, next, &clp->async_copies) {
+ copy = list_entry(pos, struct nfsd4_copy, copies);
+ if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, &copy->cp_flags)) {
+ if (--copy->cp_ttl) {
+ list_del_init(&copy->copies);
+ list_add(&copy->copies, &reaplist);
+ }
+ }
+ }
+ spin_unlock(&clp->async_lock);
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&reaplist)) {
+ copy = list_first_entry(&reaplist, struct nfsd4_copy, copies);
+ list_del_init(&copy->copies);
+ cleanup_async_copy(copy);
+ }
+}
+
static void nfs4_put_copy(struct nfsd4_copy *copy)
{
if (!refcount_dec_and_test(&copy->refcount))
@@ -1294,12 +1380,16 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
static void nfsd4_stop_copy(struct nfsd4_copy *copy)
{
- if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags))
+ trace_nfsd_copy_async_cancel(copy);
+ if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags)) {
kthread_stop(copy->copy_task);
+ copy->nfserr = nfs_ok;
+ set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+ }
nfs4_put_copy(copy);
}
-static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
+static struct nfsd4_copy *nfsd4_unhash_copy(struct nfs4_client *clp)
{
struct nfsd4_copy *copy = NULL;
@@ -1308,6 +1398,9 @@ static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
copy = list_first_entry(&clp->async_copies, struct nfsd4_copy,
copies);
refcount_inc(&copy->refcount);
+ copy->cp_clp = NULL;
+ if (!list_empty(&copy->copies))
+ list_del_init(&copy->copies);
}
spin_unlock(&clp->async_lock);
return copy;
@@ -1317,7 +1410,7 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp)
{
struct nfsd4_copy *copy;
- while ((copy = nfsd4_get_copy(clp)) != NULL)
+ while ((copy = nfsd4_unhash_copy(clp)) != NULL)
nfsd4_stop_copy(copy);
}
#ifdef CONFIG_NFSD_V4_2_INTER_SSC
@@ -1605,8 +1698,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
{
struct nfsd4_cb_offload *cbo =
container_of(cb, struct nfsd4_cb_offload, co_cb);
+ struct nfsd4_copy *copy =
+ container_of(cbo, struct nfsd4_copy, cp_cb_offload);
- kfree(cbo);
+ set_bit(NFSD4_COPY_F_OFFLOAD_DONE, &copy->cp_flags);
}
static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
@@ -1616,12 +1711,21 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
container_of(cb, struct nfsd4_cb_offload, co_cb);
trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ if (cbo->co_retries--) {
+ rpc_delay(task, HZ / 5);
+ return 0;
+ }
+ }
+ nfsd41_cb_destroy_referring_call_list(cb);
return 1;
}
static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = {
.release = nfsd4_cb_offload_release,
- .done = nfsd4_cb_offload_done
+ .done = nfsd4_cb_offload_done,
+ .opcode = OP_CB_OFFLOAD,
};
static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
@@ -1630,7 +1734,6 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
test_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags) ?
NFS_FILE_SYNC : NFS_UNSTABLE;
nfsd4_copy_set_sync(copy, sync);
- gen_boot_verifier(&copy->cp_res.wr_verifier, copy->cp_clp->net);
}
static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
@@ -1737,23 +1840,23 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
nfs4_put_copy(copy);
}
-static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
+static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
{
- struct nfsd4_cb_offload *cbo;
-
- cbo = kzalloc(sizeof(*cbo), GFP_KERNEL);
- if (!cbo)
- return;
+ struct nfsd4_cb_offload *cbo = &copy->cp_cb_offload;
memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
- cbo->co_nfserr = nfserr;
+ cbo->co_nfserr = copy->nfserr;
+ cbo->co_retries = 5;
nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops,
NFSPROC4_CLNT_CB_OFFLOAD);
+ nfsd41_cb_referring_call(&cbo->co_cb, &cbo->co_referring_sessionid,
+ cbo->co_referring_slotid,
+ cbo->co_referring_seqno);
trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
- &cbo->co_fh, copy->cp_count, nfserr);
- nfsd4_run_cb(&cbo->co_cb);
+ &cbo->co_fh, copy->cp_count, copy->nfserr);
+ nfsd4_try_run_cb(&cbo->co_cb);
}
/**
@@ -1766,9 +1869,8 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
static int nfsd4_do_async_copy(void *data)
{
struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
- __be32 nfserr;
- trace_nfsd_copy_do_async(copy);
+ trace_nfsd_copy_async(copy);
if (nfsd4_ssc_is_inter(copy)) {
struct file *filp;
@@ -1777,25 +1879,31 @@ static int nfsd4_do_async_copy(void *data)
if (IS_ERR(filp)) {
switch (PTR_ERR(filp)) {
case -EBADF:
- nfserr = nfserr_wrong_type;
+ copy->nfserr = nfserr_wrong_type;
break;
default:
- nfserr = nfserr_offload_denied;
+ copy->nfserr = nfserr_offload_denied;
}
/* ss_mnt will be unmounted by the laundromat */
goto do_callback;
}
- nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
- false);
+ copy->nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
+ false);
nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst);
} else {
- nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
- copy->nf_dst->nf_file, false);
+ copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, false);
}
do_callback:
- nfsd4_send_cb_offload(copy, nfserr);
- cleanup_async_copy(copy);
+ /* The kthread exits forthwith. Ensure that a subsequent
+ * OFFLOAD_CANCEL won't try to kill it again. */
+ set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags);
+
+ set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+ trace_nfsd_copy_async_done(copy);
+ nfsd4_send_cb_offload(copy);
+ atomic_dec(&copy->cp_nn->pending_async_copies);
return 0;
}
@@ -1803,9 +1911,21 @@ static __be32
nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd4_copy *async_copy = NULL;
struct nfsd4_copy *copy = &u->copy;
+ struct nfsd42_write_res *result;
__be32 status;
- struct nfsd4_copy *async_copy = NULL;
+
+ /*
+ * Currently, async COPY is not reliable. Force all COPY
+ * requests to be synchronous to avoid client application
+ * hangs waiting for COPY completion.
+ */
+ nfsd4_copy_set_sync(copy, true);
+
+ result = &copy->cp_res;
+ nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn);
copy->cp_clp = cstate->clp;
if (nfsd4_ssc_is_inter(copy)) {
@@ -1831,26 +1951,34 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
memcpy(&copy->fh, &cstate->current_fh.fh_handle,
sizeof(struct knfsd_fh));
if (nfsd4_copy_is_async(copy)) {
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-
- status = nfserrno(-ENOMEM);
async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!async_copy)
goto out_err;
+ async_copy->cp_nn = nn;
INIT_LIST_HEAD(&async_copy->copies);
refcount_set(&async_copy->refcount, 1);
+ async_copy->cp_ttl = NFSD_COPY_INITIAL_TTL;
+ /* Arbitrary cap on number of pending async copy operations */
+ if (atomic_inc_return(&nn->pending_async_copies) >
+ (int)rqstp->rq_pool->sp_nrthreads)
+ goto out_dec_async_copy_err;
async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
if (!async_copy->cp_src)
- goto out_err;
+ goto out_dec_async_copy_err;
if (!nfs4_init_copy_state(nn, copy))
- goto out_err;
- memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
- sizeof(copy->cp_res.cb_stateid));
+ goto out_dec_async_copy_err;
+ memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
+ sizeof(result->cb_stateid));
dup_copy_fields(copy, async_copy);
+ memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data,
+ cstate->session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ async_copy->cp_cb_offload.co_referring_slotid = cstate->slot->sl_index;
+ async_copy->cp_cb_offload.co_referring_seqno = cstate->slot->sl_seqid;
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
- goto out_err;
+ goto out_dec_async_copy_err;
spin_lock(&async_copy->cp_clp->async_lock);
list_add(&async_copy->copies,
&async_copy->cp_clp->async_copies);
@@ -1865,6 +1993,9 @@ out:
trace_nfsd_copy_done(copy, status);
release_copy_files(copy);
return status;
+out_dec_async_copy_err:
+ if (async_copy)
+ atomic_dec(&nn->pending_async_copies);
out_err:
if (nfsd4_ssc_is_inter(copy)) {
/*
@@ -1876,7 +2007,7 @@ out_err:
}
if (async_copy)
cleanup_async_copy(async_copy);
- status = nfserrno(-ENOMEM);
+ status = nfserr_jukebox;
goto out;
}
@@ -1935,7 +2066,7 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_copy_notify *cn = &u->copy_notify;
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- struct nfs4_stid *stid;
+ struct nfs4_stid *stid = NULL;
struct nfs4_cpntf_state *cps;
struct nfs4_client *clp = cstate->clp;
@@ -1944,6 +2075,8 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&stid);
if (status)
return status;
+ if (!stid)
+ return nfserr_bad_stateid;
cn->cpn_lease_time.tv_sec = nn->nfsd4_lease;
cn->cpn_lease_time.tv_nsec = 0;
@@ -2003,11 +2136,16 @@ nfsd4_offload_status(struct svc_rqst *rqstp,
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
+ os->completed = false;
spin_lock(&clp->async_lock);
copy = find_async_copy_locked(clp, &os->stateid);
- if (copy)
+ if (copy) {
os->count = copy->cp_res.wr_bytes_written;
- else
+ if (test_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags)) {
+ os->completed = true;
+ os->status = copy->nfserr;
+ }
+ } else
status = nfserr_bad_stateid;
spin_unlock(&clp->async_lock);
@@ -2154,6 +2292,29 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status == nfserr_same ? nfs_ok : status;
}
+static __be32
+nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+
+ /*
+ * RFC 8881, section 18.39.3 says:
+ *
+ * "The server may refuse to grant the delegation. In that case, the
+ * server will return NFS4ERR_DIRDELEG_UNAVAIL."
+ *
+ * This is sub-optimal, since it means that the server would need to
+ * abort compound processing just because the delegation wasn't
+ * available. RFC8881bis should change this to allow the server to
+ * return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this
+ * situation.
+ */
+ gdd->gddrnf_status = GDD4_UNAVAIL;
+ return nfs_ok;
+}
+
#ifdef CONFIG_NFSD_PNFS
static const struct nfsd4_layout_ops *
nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
@@ -2196,7 +2357,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
return nfserr_noent;
}
- exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
+ exp = rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp),
+ rqstp->rq_client, rqstp->rq_gssclient,
+ map->fsid_type, map->fsid);
if (IS_ERR(exp)) {
dprintk("%s: could not find device id\n", __func__);
return nfserr_noent;
@@ -2234,7 +2397,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
const struct nfsd4_layout_ops *ops;
struct nfs4_layout_stateid *ls;
__be32 nfserr;
- int accmode = NFSD_MAY_READ_IF_EXEC;
+ int accmode = NFSD_MAY_READ_IF_EXEC | NFSD_MAY_OWNER_OVERRIDE;
switch (lgp->lg_seg.iomode) {
case IOMODE_READ:
@@ -2324,7 +2487,8 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
struct nfs4_layout_stateid *ls;
__be32 nfserr;
- nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
+ nfserr = fh_verify(rqstp, current_fh, 0,
+ NFSD_MAY_WRITE | NFSD_MAY_OWNER_OVERRIDE);
if (nfserr)
goto out;
@@ -2739,6 +2903,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->opdesc->op_get_currentstateid)
op->opdesc->op_get_currentstateid(cstate, &op->u);
op->status = op->opdesc->op_func(rqstp, cstate, &op->u);
+ trace_nfsd_compound_op_err(rqstp, op->opnum, op->status);
/* Only from SEQUENCE */
if (cstate->status == nfserr_replay_cache) {
@@ -2755,7 +2920,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (current_fh->fh_export &&
need_wrongsec_check(rqstp))
- op->status = check_nfsd_access(current_fh->fh_export, rqstp);
+ op->status = check_nfsd_access(current_fh->fh_export, rqstp, false);
}
encode_op:
if (op->status == nfserr_replay_me) {
@@ -3082,6 +3247,18 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
* sizeof(__be32);
}
+static u32 nfsd4_get_dir_delegation_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 1 /* gddr_status */ +
+ op_encode_verifier_maxsz +
+ op_encode_stateid_maxsz +
+ 2 /* gddr_notification */ +
+ 2 /* gddr_child_attributes */ +
+ 2 /* gddr_dir_attributes */);
+}
+
#ifdef CONFIG_NFSD_PNFS
static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
const struct nfsd4_op *op)
@@ -3399,6 +3576,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
/* NFSv4.1 operations */
[OP_EXCHANGE_ID] = {
.op_func = nfsd4_exchange_id,
+ .op_release = nfsd4_exchange_id_release,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
| OP_MODIFIES_SOMETHING,
.op_name = "OP_EXCHANGE_ID",
@@ -3470,6 +3648,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_get_currentstateid = nfsd4_get_freestateid,
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_GET_DIR_DELEGATION] = {
+ .op_func = nfsd4_get_dir_delegation,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_GET_DIR_DELEGATION",
+ .op_rsize_bop = nfsd4_get_dir_delegation_rsize,
+ },
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = {
.op_func = nfsd4_getdeviceinfo,
@@ -3596,7 +3780,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow;
u32 opiter;
- if (!cstate->minorversion)
+ if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] ||
+ cstate->minorversion == 0)
return false;
if (cstate->spo_must_allowed)
@@ -3662,7 +3847,7 @@ static const struct svc_procedure nfsd_procedures4[2] = {
.pc_ressize = sizeof(struct nfsd4_compoundres),
.pc_release = nfsd4_release_compoundargs,
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = NFSD_BUFSIZE/4,
+ .pc_xdrressize = 3+NFSSVC_MAXBLKSIZE/4,
.pc_name = "COMPOUND",
},
};
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 2c060e0b1604..82785db730d9 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,6 +33,7 @@
*/
#include <crypto/hash.h>
+#include <crypto/sha2.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
@@ -82,14 +83,13 @@ nfs4_save_creds(const struct cred **original_creds)
new->fsuid = GLOBAL_ROOT_UID;
new->fsgid = GLOBAL_ROOT_GID;
*original_creds = override_creds(new);
- put_cred(new);
return 0;
}
static void
nfs4_reset_creds(const struct cred *original)
{
- revert_creds(original);
+ put_cred(revert_creds(original));
}
static void
@@ -219,7 +219,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
/* lock the parent */
inode_lock(d_inode(dir));
- dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
@@ -234,9 +234,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+ dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU);
+ if (IS_ERR(dentry))
+ status = PTR_ERR(dentry);
out_put:
- dput(dentry);
+ if (!status)
+ dput(dentry);
out_unlock:
inode_unlock(d_inode(dir));
if (status == 0) {
@@ -314,7 +317,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
struct dentry *dentry;
- dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
+ dentry = lookup_one(&nop_mnt_idmap,
+ &QSTR(entry->name), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
break;
@@ -337,16 +341,16 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
static int
-nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
+nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
{
struct dentry *dir, *dentry;
int status;
- dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+ dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
dir = nn->rec_file->f_path.dentry;
inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- dentry = lookup_one_len(name, dir, namlen);
+ dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
goto out_unlock;
@@ -406,7 +410,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (status < 0)
goto out_drop_write;
- status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
+ status = nfsd4_unlink_clid_dir(dname, nn);
nfs4_reset_creds(original_cred);
if (status == 0) {
vfs_fsync(nn->rec_file, 0);
@@ -659,7 +663,8 @@ nfs4_reset_recoverydir(char *recdir)
return status;
status = -ENOTDIR;
if (d_is_dir(path.dentry)) {
- strcpy(user_recovery_dirname, recdir);
+ strscpy(user_recovery_dirname, recdir,
+ sizeof(user_recovery_dirname));
status = 0;
}
path_put(&path);
@@ -733,7 +738,6 @@ struct cld_net {
spinlock_t cn_lock;
struct list_head cn_list;
unsigned int cn_xid;
- struct crypto_shash *cn_tfm;
#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
bool cn_has_legacy;
#endif
@@ -809,6 +813,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
ci = &cmsg->cm_u.cm_clntinfo;
if (get_user(namelen, &ci->cc_name.cn_len))
return -EFAULT;
+ if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) {
+ dprintk("%s: invalid namelen (%u)", __func__, namelen);
+ return -EINVAL;
+ }
name.data = memdup_user(&ci->cc_name.cn_id, namelen);
if (IS_ERR(name.data))
return PTR_ERR(name.data);
@@ -831,6 +839,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
cnm = &cmsg->cm_u.cm_name;
if (get_user(namelen, &cnm->cn_len))
return -EFAULT;
+ if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) {
+ dprintk("%s: invalid namelen (%u)", __func__, namelen);
+ return -EINVAL;
+ }
name.data = memdup_user(&cnm->cn_id, namelen);
if (IS_ERR(name.data))
return PTR_ERR(name.data);
@@ -1051,8 +1063,6 @@ nfsd4_remove_cld_pipe(struct net *net)
nfsd4_cld_unregister_net(net, cn->cn_pipe);
rpc_destroy_pipe_data(cn->cn_pipe);
- if (cn->cn_tfm)
- crypto_free_shash(cn->cn_tfm);
kfree(nn->cld_net);
nn->cld_net = NULL;
}
@@ -1146,8 +1156,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct cld_net *cn = nn->cld_net;
struct cld_msg_v2 *cmsg;
- struct crypto_shash *tfm = cn->cn_tfm;
- struct xdr_netobj cksum;
char *principal = NULL;
/* Don't upcall if it's already stored */
@@ -1170,22 +1178,9 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
else if (clp->cl_cred.cr_principal)
principal = clp->cl_cred.cr_principal;
if (principal) {
- cksum.len = crypto_shash_digestsize(tfm);
- cksum.data = kmalloc(cksum.len, GFP_KERNEL);
- if (cksum.data == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal),
- cksum.data);
- if (ret) {
- kfree(cksum.data);
- goto out;
- }
- cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
- memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
- cksum.data, cksum.len);
- kfree(cksum.data);
+ sha256(principal, strlen(principal),
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data);
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = SHA256_DIGEST_SIZE;
} else
cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
@@ -1195,7 +1190,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp)
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
-out:
free_cld_upcall(cup);
out_err:
if (ret)
@@ -1334,12 +1328,11 @@ found:
static int
nfsd4_cld_check_v2(struct nfs4_client *clp)
{
- struct nfs4_client_reclaim *crp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
struct cld_net *cn = nn->cld_net;
- int status;
- struct crypto_shash *tfm = cn->cn_tfm;
- struct xdr_netobj cksum;
+#endif
+ struct nfs4_client_reclaim *crp;
char *principal = NULL;
/* did we already find that this client is stable? */
@@ -1355,6 +1348,7 @@ nfsd4_cld_check_v2(struct nfs4_client *clp)
if (cn->cn_has_legacy) {
struct xdr_netobj name;
char dname[HEXDIR_LEN];
+ int status;
status = nfs4_make_rec_clidname(dname, &clp->cl_name);
if (status)
@@ -1377,28 +1371,18 @@ nfsd4_cld_check_v2(struct nfs4_client *clp)
return -ENOENT;
found:
if (crp->cr_princhash.len) {
+ u8 digest[SHA256_DIGEST_SIZE];
+
if (clp->cl_cred.cr_raw_principal)
principal = clp->cl_cred.cr_raw_principal;
else if (clp->cl_cred.cr_principal)
principal = clp->cl_cred.cr_principal;
if (principal == NULL)
return -ENOENT;
- cksum.len = crypto_shash_digestsize(tfm);
- cksum.data = kmalloc(cksum.len, GFP_KERNEL);
- if (cksum.data == NULL)
- return -ENOENT;
- status = crypto_shash_tfm_digest(tfm, principal,
- strlen(principal), cksum.data);
- if (status) {
- kfree(cksum.data);
+ sha256(principal, strlen(principal), digest);
+ if (memcmp(crp->cr_princhash.data, digest,
+ crp->cr_princhash.len))
return -ENOENT;
- }
- if (memcmp(crp->cr_princhash.data, cksum.data,
- crp->cr_princhash.len)) {
- kfree(cksum.data);
- return -ENOENT;
- }
- kfree(cksum.data);
}
crp->cr_clp = clp;
return 0;
@@ -1578,7 +1562,6 @@ nfsd4_cld_tracking_init(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
bool running;
int retries = 10;
- struct crypto_shash *tfm;
status = nfs4_cld_state_init(net);
if (status)
@@ -1603,12 +1586,6 @@ nfsd4_cld_tracking_init(struct net *net)
status = -ETIMEDOUT;
goto err_remove;
}
- tfm = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(tfm)) {
- status = PTR_ERR(tfm);
- goto err_remove;
- }
- nn->cld_net->cn_tfm = tfm;
status = nfsd4_cld_get_version(nn);
if (status == -EOPNOTSUPP)
@@ -1895,10 +1872,7 @@ nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
static void
nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
{
- smp_mb__before_atomic();
- clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
- smp_mb__after_atomic();
- wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
+ clear_and_wake_up_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
}
static void
@@ -2046,7 +2020,6 @@ static inline int check_for_legacy_methods(int status, struct net *net)
path_put(&path);
if (status)
return -ENOTDIR;
- status = nn->client_tracking_ops->init(net);
}
return status;
}
@@ -2086,8 +2059,8 @@ do_init:
status = nn->client_tracking_ops->init(net);
out:
if (status) {
- printk(KERN_WARNING "NFSD: Unable to initialize client "
- "recovery tracking! (%d)\n", status);
+ pr_warn("NFSD: Unable to initialize client recovery tracking! (%d)\n", status);
+ pr_warn("NFSD: Is nfsdcld running? If not, enable CONFIG_NFSD_LEGACY_CLIENT_TRACKING.\n");
nn->client_tracking_ops = NULL;
}
return status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1a93c7fcf76c..d5694987f86f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -149,14 +149,14 @@ void nfsd4_destroy_laundry_wq(void)
static bool is_session_dead(struct nfsd4_session *ses)
{
- return ses->se_flags & NFS4_SESSION_DEAD;
+ return ses->se_dead;
}
static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
{
if (atomic_read(&ses->se_ref) > ref_held_by_me)
return nfserr_jukebox;
- ses->se_flags |= NFS4_SESSION_DEAD;
+ ses->se_dead = true;
return nfs_ok;
}
@@ -400,6 +400,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
.prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
+ .opcode = OP_CB_NOTIFY_LOCK,
};
/*
@@ -541,7 +542,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
}
static struct nfs4_openowner *
-find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
struct nfs4_client *clp)
{
struct nfs4_stateowner *so;
@@ -558,18 +559,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
return NULL;
}
-static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
- struct nfs4_client *clp)
-{
- struct nfs4_openowner *oo;
-
- spin_lock(&clp->cl_lock);
- oo = find_openstateowner_str_locked(hashval, open, clp);
- spin_unlock(&clp->cl_lock);
- return oo;
-}
-
static inline u32
opaque_hashval(const void *ptr, int nbytes)
{
@@ -583,13 +572,6 @@ opaque_hashval(const void *ptr, int nbytes)
return x;
}
-static void nfsd4_free_file_rcu(struct rcu_head *rcu)
-{
- struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
-
- kmem_cache_free(file_slab, fp);
-}
-
void
put_nfs4_file(struct nfs4_file *fi)
{
@@ -597,7 +579,7 @@ put_nfs4_file(struct nfs4_file *fi)
nfsd4_file_hash_remove(fi);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
- call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
+ kfree_rcu(fi, fi_rcu);
}
}
@@ -964,15 +946,6 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
spin_lock_init(&stid->sc_lock);
INIT_LIST_HEAD(&stid->sc_cp_list);
- /*
- * It shouldn't be a problem to reuse an opaque stateid value.
- * I don't think it is for 4.1. But with 4.0 I worry that, for
- * example, a stray write retransmission could be accepted by
- * the server when it should have been rejected. Therefore,
- * adopt a trick from the sctp code to attempt to maximize the
- * amount of time until an id is reused, by ensuring they always
- * "increase" (mod INT_MAX):
- */
return stid;
out_free:
kmem_cache_free(slab, stid);
@@ -1068,6 +1041,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
return openlockstateid(stid);
}
+/*
+ * As the sc_free callback of deleg, this may be called by nfs4_put_stid
+ * in nfsd_break_one_deleg.
+ * Considering nfsd_break_one_deleg is called with the flc->flc_lock held,
+ * this function mustn't ever sleep.
+ */
static void nfs4_free_deleg(struct nfs4_stid *stid)
{
struct nfs4_delegation *dp = delegstateid(stid);
@@ -1089,7 +1068,8 @@ static void nfs4_free_deleg(struct nfs4_stid *stid)
* When a delegation is recalled, the filehandle is stored in the "new"
* filter.
* Every 30 seconds we swap the filters and clear the "new" one,
- * unless both are empty of course.
+ * unless both are empty of course. This results in delegations for a
+ * given filehandle being blocked for between 30 and 60 seconds.
*
* Each filter is 256 bits. We hash the filehandle to 32bit and use the
* low 3 bytes as hash-table indices.
@@ -1118,9 +1098,9 @@ static int delegation_blocked(struct knfsd_fh *fh)
if (ktime_get_seconds() - bd->swap_time > 30) {
bd->entries -= bd->old_entries;
bd->old_entries = bd->entries;
+ bd->new = 1-bd->new;
memset(bd->set[bd->new], 0,
sizeof(bd->set[0]));
- bd->new = 1-bd->new;
bd->swap_time = ktime_get_seconds();
}
spin_unlock(&blocked_delegations_lock);
@@ -1194,7 +1174,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
&nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
dp->dl_cb_fattr.ncf_file_modified = false;
- dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -1369,21 +1348,48 @@ static void destroy_delegation(struct nfs4_delegation *dp)
destroy_unhashed_deleg(dp);
}
+/**
+ * revoke_delegation - perform nfs4 delegation structure cleanup
+ * @dp: pointer to the delegation
+ *
+ * This function assumes that it's called either from the administrative
+ * interface (nfsd4_revoke_states()) that's revoking a specific delegation
+ * stateid or it's called from a laundromat thread (nfsd4_landromat()) that
+ * determined that this specific state has expired and needs to be revoked
+ * (both mark state with the appropriate stid sc_status mode). It is also
+ * assumed that a reference was taken on the @dp state.
+ *
+ * If this function finds that the @dp state is SC_STATUS_FREED it means
+ * that a FREE_STATEID operation for this stateid has been processed and
+ * we can proceed to removing it from recalled list. However, if @dp state
+ * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked
+ * list and wait for the FREE_STATEID to arrive from the client. At the same
+ * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the
+ * nfsd4_free_stateid() function that this stateid has already been added
+ * to the cl_revoked list and that nfsd4_free_stateid() is now responsible
+ * for removing it from the list. Inspection of where the delegation state
+ * in the revocation process is protected by the clp->cl_lock.
+ */
static void revoke_delegation(struct nfs4_delegation *dp)
{
struct nfs4_client *clp = dp->dl_stid.sc_client;
WARN_ON(!list_empty(&dp->dl_recall_lru));
+ WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 &&
+ !(dp->dl_stid.sc_status &
+ (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)));
trace_nfsd_stid_revoke(&dp->dl_stid);
- if (dp->dl_stid.sc_status &
- (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)) {
- spin_lock(&clp->cl_lock);
- refcount_inc(&dp->dl_stid.sc_count);
- list_add(&dp->dl_recall_lru, &clp->cl_revoked);
- spin_unlock(&clp->cl_lock);
+ spin_lock(&clp->cl_lock);
+ if (dp->dl_stid.sc_status & SC_STATUS_FREED) {
+ list_del_init(&dp->dl_recall_lru);
+ goto out;
}
+ list_add(&dp->dl_recall_lru, &clp->cl_revoked);
+ dp->dl_stid.sc_status |= SC_STATUS_FREEABLE;
+out:
+ spin_unlock(&clp->cl_lock);
destroy_unhashed_deleg(dp);
}
@@ -1409,11 +1415,16 @@ static void
recalculate_deny_mode(struct nfs4_file *fp)
{
struct nfs4_ol_stateid *stp;
+ u32 old_deny;
spin_lock(&fp->fi_lock);
+ old_deny = fp->fi_share_deny;
fp->fi_share_deny = 0;
- list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
+ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
+ if (fp->fi_share_deny == old_deny)
+ break;
+ }
spin_unlock(&fp->fi_lock);
}
@@ -1639,6 +1650,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
free_ol_stateid_reaplist(&reaplist);
}
+static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo)
+{
+ lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
+
+ return list_empty(&oo->oo_owner.so_strhash) &&
+ list_empty(&oo->oo_perclient);
+}
+
static void unhash_openowner_locked(struct nfs4_openowner *oo)
{
struct nfs4_client *clp = oo->oo_owner.so_client;
@@ -1670,9 +1689,7 @@ static void release_openowner(struct nfs4_openowner *oo)
{
struct nfs4_ol_stateid *stp;
struct nfs4_client *clp = oo->oo_owner.so_client;
- struct list_head reaplist;
-
- INIT_LIST_HEAD(&reaplist);
+ LIST_HEAD(reaplist);
spin_lock(&clp->cl_lock);
unhash_openowner_locked(oo);
@@ -1787,6 +1804,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
mutex_unlock(&stp->st_mutex);
break;
case SC_TYPE_DELEG:
+ refcount_inc(&stid->sc_count);
dp = delegstateid(stid);
spin_lock(&state_lock);
if (!unhash_delegation_locked(
@@ -1889,113 +1907,145 @@ gen_sessionid(struct nfsd4_session *ses)
*/
#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session. The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed. The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
{
int i;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
- free_svc_cred(&ses->se_slots[i]->sl_cred);
- kfree(ses->se_slots[i]);
+ if (from >= ses->se_fchannel.maxreqs)
+ return;
+
+ for (i = from; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+ /*
+ * Save the seqid in case we reactivate this slot.
+ * This will never require a memory allocation so GFP
+ * flag is irrelevant
+ */
+ xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
+ free_svc_cred(&slot->sl_cred);
+ kfree(slot);
+ }
+ ses->se_fchannel.maxreqs = from;
+ if (ses->se_target_maxslots > from) {
+ int new_target = from ?: 1;
+ atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+ ses->se_target_maxslots = new_target;
}
}
-/*
- * We don't actually need to cache the rpc and session headers, so we
- * can allocate a little less for each slot:
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses: The session to affect
+ * @dec: how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock. As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ * The number of slots that the target was reduced by.
*/
-static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
+static int
+reduce_session_slots(struct nfsd4_session *ses, int dec)
{
- u32 size;
+ struct nfsd_net *nn = net_generic(ses->se_client->net,
+ nfsd_net_id);
+ int ret = 0;
- if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
- size = 0;
- else
- size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
- return size + sizeof(struct nfsd4_slot);
+ if (ses->se_target_maxslots <= 1)
+ return ret;
+ if (!spin_trylock(&nn->client_lock))
+ return ret;
+ ret = min(dec, ses->se_target_maxslots-1);
+ ses->se_target_maxslots -= ret;
+ atomic_sub(ret, &nfsd_total_target_slots);
+ ses->se_slot_gen += 1;
+ if (ses->se_slot_gen == 0) {
+ int i;
+ ses->se_slot_gen = 1;
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+ slot->sl_generation = 0;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+ return ret;
}
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
+static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs,
+ int index, gfp_t gfp)
{
- u32 slotsize = slot_bytes(ca);
- u32 num = ca->maxreqs;
- unsigned long avail, total_avail;
- unsigned int scale_factor;
+ struct nfsd4_slot *slot;
+ size_t size;
- spin_lock(&nfsd_drc_lock);
- if (nfsd_drc_max_mem > nfsd_drc_mem_used)
- total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
- else
- /* We have handed out more space than we chose in
- * set_max_drc() to allow. That isn't really a
- * problem as long as that doesn't make us think we
- * have lots more due to integer overflow.
- */
- total_avail = 0;
- avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
/*
- * Never use more than a fraction of the remaining memory,
- * unless it's the only way to give this client a slot.
- * The chosen fraction is either 1/8 or 1/number of threads,
- * whichever is smaller. This ensures there are adequate
- * slots to support multiple clients per thread.
- * Give the client one slot even if that would require
- * over-allocation--it is better than failure.
+ * The RPC and NFS session headers are never saved in
+ * the slot reply cache buffer.
*/
- scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
-
- avail = clamp_t(unsigned long, avail, slotsize,
- total_avail/scale_factor);
- num = min_t(int, num, avail / slotsize);
- num = max_t(int, num, 1);
- nfsd_drc_mem_used += num * slotsize;
- spin_unlock(&nfsd_drc_lock);
-
- return num;
-}
-
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
- int slotsize = slot_bytes(ca);
+ size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ?
+ 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
- spin_lock(&nfsd_drc_lock);
- nfsd_drc_mem_used -= slotsize * ca->maxreqs;
- spin_unlock(&nfsd_drc_lock);
+ slot = kzalloc(struct_size(slot, sl_data, size), gfp);
+ if (!slot)
+ return NULL;
+ slot->sl_index = index;
+ return slot;
}
static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
struct nfsd4_channel_attrs *battrs)
{
int numslots = fattrs->maxreqs;
- int slotsize = slot_bytes(fattrs);
struct nfsd4_session *new;
+ struct nfsd4_slot *slot;
int i;
- BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
- > PAGE_SIZE);
-
- new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL;
- /* allocate each struct nfsd4_slot and data cache in one piece */
- for (i = 0; i < numslots; i++) {
- new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
- if (!new->se_slots[i])
- goto out_free;
- }
+ xa_init(&new->se_slots);
- memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
- memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
+ slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL);
+ if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+ goto out_free;
+ for (i = 1; i < numslots; i++) {
+ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ slot = nfsd4_alloc_slot(fattrs, i, gfp);
+ if (!slot)
+ break;
+ if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) {
+ kfree(slot);
+ break;
+ }
+ }
+ fattrs->maxreqs = i;
+ memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+ new->se_target_maxslots = i;
+ atomic_add(i - 1, &nfsd_total_target_slots);
+ new->se_cb_slot_avail = ~0U;
+ new->se_cb_highest_slot = min(battrs->maxreqs - 1,
+ NFSD_BC_SLOT_TABLE_SIZE - 1);
+ spin_lock_init(&new->se_lock);
return new;
out_free:
- while (i--)
- kfree(new->se_slots[i]);
+ kfree(slot);
+ xa_destroy(&new->se_slots);
kfree(new);
return NULL;
}
@@ -2101,17 +2151,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
static void __free_session(struct nfsd4_session *ses)
{
- free_session_slots(ses);
+ free_session_slots(ses, 0);
+ xa_destroy(&ses->se_slots);
kfree(ses);
}
static void free_session(struct nfsd4_session *ses)
{
nfsd4_del_conns(ses);
- nfsd4_put_drc_mem(&ses->se_fchannel);
__free_session(ses);
}
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+ unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+ return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ struct nfsd4_session *ses;
+ unsigned long scanned = 0;
+ unsigned long freed = 0;
+
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+ freed += reduce_session_slots(ses, 1);
+ scanned += 1;
+ if (scanned >= sc->nr_to_scan) {
+ /* Move starting point for next scan */
+ list_move(&nfsd_session_list, &ses->se_all_sessions);
+ break;
+ }
+ }
+ spin_unlock(&nfsd_session_list_lock);
+ sc->nr_scanned = scanned;
+ return freed;
+}
+
static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
{
int idx;
@@ -2122,17 +2202,24 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
INIT_LIST_HEAD(&new->se_conns);
- new->se_cb_seq_nr = 1;
- new->se_flags = cses->flags;
+ atomic_set(&new->se_ref, 0);
+ new->se_dead = false;
new->se_cb_prog = cses->callback_prog;
new->se_cb_sec = cses->cb_sec;
- atomic_set(&new->se_ref, 0);
+
+ for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx)
+ new->se_cb_seq_nr[idx] = 1;
+
idx = hash_sessionid(&new->se_sessionid);
list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
spin_lock(&clp->cl_lock);
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+ spin_unlock(&nfsd_session_list_lock);
+
{
struct sockaddr *sa = svc_addr(rqstp);
/*
@@ -2202,6 +2289,9 @@ unhash_session(struct nfsd4_session *ses)
spin_lock(&ses->se_client->cl_lock);
list_del(&ses->se_perclnt);
spin_unlock(&ses->se_client->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_del(&ses->se_all_sessions);
+ spin_unlock(&nfsd_session_list_lock);
}
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -2219,21 +2309,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
return 1;
}
-/*
- * XXX Should we use a slab cache ?
- * This type of memory management is somewhat inefficient, but we use it
- * anyway since SETCLIENTID is not a common operation.
- */
static struct nfs4_client *alloc_client(struct xdr_netobj name,
struct nfsd_net *nn)
{
struct nfs4_client *clp;
int i;
- if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) {
+ if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients &&
+ atomic_read(&nn->nfsd_courtesy_clients) > 0)
mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
- return NULL;
- }
+
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
@@ -2245,6 +2330,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
GFP_KERNEL);
if (!clp->cl_ownerstr_hashtbl)
goto err_no_hashtbl;
+ clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
+ if (!clp->cl_callback_wq)
+ goto err_no_callback_wq;
+
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
INIT_LIST_HEAD(&clp->cl_sessions);
@@ -2267,6 +2356,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
spin_lock_init(&clp->cl_lock);
rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
+err_no_callback_wq:
+ kfree(clp->cl_ownerstr_hashtbl);
err_no_hashtbl:
kfree(clp->cl_name.data);
err_no_name:
@@ -2280,6 +2371,7 @@ static void __free_client(struct kref *k)
struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
free_svc_cred(&clp->cl_cred);
+ destroy_workqueue(clp->cl_callback_wq);
kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data);
kfree(clp->cl_nii_domain.data);
@@ -2335,8 +2427,12 @@ unhash_client_locked(struct nfs4_client *clp)
}
list_del_init(&clp->cl_lru);
spin_lock(&clp->cl_lock);
- list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) {
list_del_init(&ses->se_hash);
+ list_del_init(&ses->se_all_sessions);
+ }
+ spin_unlock(&nfsd_session_list_lock);
spin_unlock(&clp->cl_lock);
}
@@ -2352,7 +2448,11 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_rpc_users))
+ int users = atomic_read(&clp->cl_rpc_users);
+
+ trace_nfsd_mark_client_expired(clp, users);
+
+ if (users)
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -2365,9 +2465,8 @@ __destroy_client(struct nfs4_client *clp)
int i;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
- struct list_head reaplist;
+ LIST_HEAD(reaplist);
- INIT_LIST_HEAD(&reaplist);
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
@@ -2655,6 +2754,7 @@ static const char *cb_state2str(int state)
static int client_info_show(struct seq_file *m, void *v)
{
struct inode *inode = file_inode(m->file);
+ struct nfsd4_session *ses;
struct nfs4_client *clp;
u64 clid;
@@ -2688,9 +2788,19 @@ static int client_info_show(struct seq_file *m, void *v)
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
- seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+ seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
seq_printf(m, "admin-revoked states: %d\n",
atomic_read(&clp->cl_admin_revoked));
+ spin_lock(&clp->cl_lock);
+ seq_printf(m, "session slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+ seq_printf(m, "\nsession target slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_target_maxslots);
+ spin_unlock(&clp->cl_lock);
+ seq_puts(m, "\n");
+
drop_client(clp);
return 0;
@@ -2785,15 +2895,18 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (file) {
- nfs4_show_superblock(s, file);
- seq_puts(s, ", ");
- nfs4_show_fname(s, file);
- seq_puts(s, ", ");
- }
- spin_unlock(&nf->fi_lock);
+ if (nf) {
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
+ spin_unlock(&nf->fi_lock);
+ } else
+ seq_puts(s, "closed, ");
nfs4_show_owner(s, oo);
if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
seq_puts(s, ", admin-revoked");
@@ -2840,6 +2953,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
return 0;
}
+static char *nfs4_show_deleg_type(u32 dl_type)
+{
+ switch (dl_type) {
+ case OPEN_DELEGATE_READ:
+ return "r";
+ case OPEN_DELEGATE_WRITE:
+ return "w";
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
+ return "ra";
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return "wa";
+ }
+ return "?";
+}
+
static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
{
struct nfs4_delegation *ds;
@@ -2853,8 +2981,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
nfs4_show_stateid(s, &st->sc_stateid);
seq_puts(s, ": { type: deleg, ");
- seq_printf(s, "access: %s",
- ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+ seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type));
/* XXX: lease time, whether it's being recalled. */
@@ -3042,12 +3169,8 @@ static void
nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- spin_lock(&nn->client_lock);
- clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
- put_client_renew_locked(clp);
- spin_unlock(&nn->client_lock);
+ drop_client(clp);
}
static int
@@ -3055,7 +3178,10 @@ nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
struct nfs4_cb_fattr *ncf =
container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+ trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task);
ncf->ncf_cb_status = task->tk_status;
switch (task->tk_status) {
case -NFS4ERR_DELAY:
@@ -3075,18 +3201,18 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
nfs4_put_stid(&dp->dl_stid);
- clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
- wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
}
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
+ .opcode = OP_CB_RECALL_ANY,
};
static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
.done = nfsd4_cb_getattr_done,
.release = nfsd4_cb_getattr_release,
+ .opcode = OP_CB_GETATTR,
};
static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
@@ -3094,11 +3220,15 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
- if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags))
return;
+
/* set to proper status when nfsd4_cb_getattr_done runs */
ncf->ncf_cb_status = NFS4ERR_IO;
+ /* ensure that wake_bit is done when RUNNING is cleared */
+ set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags);
+
refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&ncf->ncf_getattr);
}
@@ -3126,7 +3256,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = ktime_get_boottime_seconds();
- clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
@@ -3453,7 +3582,7 @@ static bool client_has_state(struct nfs4_client *clp)
#endif
|| !list_empty(&clp->cl_delegations)
|| !list_empty(&clp->cl_sessions)
- || !list_empty(&clp->async_copies);
+ || nfsd4_has_active_async_copies(clp);
}
static __be32 copy_impl_id(struct nfs4_client *clp,
@@ -3491,6 +3620,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__func__, rqstp, exid, exid->clname.len, exid->clname.data,
addr_str, exid->flags, exid->spa_how);
+ exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s",
+ utsname()->sysname, utsname()->release,
+ utsname()->version, utsname()->machine);
+ if (!exid->server_impl_name)
+ return nfserr_jukebox;
+
if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
return nfserr_inval;
@@ -3628,6 +3763,23 @@ out_copy:
exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
nfsd4_set_ex_flags(conf, exid);
+ exid->nii_domain.len = sizeof("kernel.org") - 1;
+ exid->nii_domain.data = "kernel.org";
+
+ /*
+ * Note that RFC 8881 places no length limit on
+ * nii_name, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes.
+ */
+ exid->nii_name.len = strlen(exid->server_impl_name);
+ if (exid->nii_name.len > NFS4_OPAQUE_LIMIT)
+ exid->nii_name.len = NFS4_OPAQUE_LIMIT;
+ exid->nii_name.data = exid->server_impl_name;
+
+ /* just send zeros - the date is in nii_name */
+ exid->nii_time.tv_sec = 0;
+ exid->nii_time.tv_nsec = 0;
+
dprintk("nfsd4_exchange_id seqid %d flags %x\n",
conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
status = nfs_ok;
@@ -3644,14 +3796,18 @@ out_nolock:
return status;
}
-static __be32
-check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
+void
+nfsd4_exchange_id_release(union nfsd4_op_u *u)
{
- dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
- slot_seqid);
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ kfree(exid->server_impl_name);
+}
+
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
+{
/* The slot is in use, and no response has been sent. */
- if (slot_inuse) {
+ if (flags & NFSD4_SLOT_INUSE) {
if (seqid == slot_seqid)
return nfserr_jukebox;
else
@@ -3660,6 +3816,8 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
/* Note unsigned 32-bit arithmetic handles wraparound: */
if (likely(seqid == slot_seqid + 1))
return nfs_ok;
+ if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+ return nfs_ok;
if (seqid == slot_seqid)
return nfserr_replay_cache;
return nfserr_seq_misordered;
@@ -3718,17 +3876,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
- /*
- * Note decreasing slot size below client's request may make it
- * difficult for client to function correctly, whereas
- * decreasing the number of slots will (just?) affect
- * performance. When short on memory we therefore prefer to
- * decrease number of slots instead of their size. Clients that
- * request larger slots than they need will get poor results:
- * Note that we always allow at least one slot, because our
- * accounting is soft and provides no guarantees either way.
- */
- ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
return nfs_ok;
}
@@ -3806,11 +3953,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
return status;
status = check_backchannel_attrs(&cr_ses->back_channel);
if (status)
- goto out_release_drc_mem;
+ goto out_err;
status = nfserr_jukebox;
new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
if (!new)
- goto out_release_drc_mem;
+ goto out_err;
conn = alloc_conn_from_crses(rqstp, cr_ses);
if (!conn)
goto out_free_session;
@@ -3826,20 +3973,28 @@ nfsd4_create_session(struct svc_rqst *rqstp,
}
/* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */
- if (conf)
+ if (conf) {
cs_slot = &conf->cl_cs_slot;
- else
+ trace_nfsd_slot_seqid_conf(conf, cr_ses);
+ } else {
cs_slot = &unconf->cl_cs_slot;
+ trace_nfsd_slot_seqid_unconf(unconf, cr_ses);
+ }
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache) {
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
- goto out_free_conn;
- }
+ switch (status) {
+ case nfs_ok:
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ break;
+ case nfserr_replay_cache:
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ fallthrough;
+ case nfserr_jukebox:
+ /* The server MUST NOT cache NFS4ERR_DELAY */
+ goto out_free_conn;
+ default:
goto out_cache_error;
}
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
if (conf) {
@@ -3859,10 +4014,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
- if (status) {
- old = NULL;
- goto out_cache_error;
- }
+ if (status)
+ goto out_expired_error;
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
@@ -3875,6 +4028,8 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cr_ses->flags &= ~SESSION4_PERSIST;
/* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
+ /* Report the correct number of backchannel slots */
+ cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1;
init_session(rqstp, new, conf, cr_ses);
nfsd4_get_session_locked(new);
@@ -3894,17 +4049,24 @@ nfsd4_create_session(struct svc_rqst *rqstp,
expire_client(old);
return status;
+out_expired_error:
+ /*
+ * Revert the slot seq_nr change so the server will process
+ * the client's resend instead of returning a cached response.
+ */
+ if (status == nfserr_jukebox) {
+ cs_slot->sl_seqid--;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ goto out_free_conn;
+ }
out_cache_error:
nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
spin_unlock(&nn->client_lock);
free_conn(conn);
- if (old)
- expire_client(old);
out_free_session:
__free_session(new);
-out_release_drc_mem:
- nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
return status;
}
@@ -4202,16 +4364,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (seq->slotid >= session->se_fchannel.maxreqs)
goto out_put_session;
- slot = session->se_slots[seq->slotid];
+ slot = xa_load(&session->se_slots, seq->slotid);
dprintk("%s: slotid %d\n", __func__, seq->slotid);
- /* We do not negotiate the number of slots yet, so set the
- * maxslots to the session maxreqs which is used to encode
- * sr_highest_slotid and the sr_target_slot id to maxslots */
- seq->maxslots = session->se_fchannel.maxreqs;
-
- status = check_slot_seqid(seq->seqid, slot->sl_seqid,
- slot->sl_flags & NFSD4_SLOT_INUSE);
+ trace_nfsd_slot_seqid_sequence(clp, seq, slot);
+ status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
if (status == nfserr_replay_cache) {
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4236,6 +4393,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out_put_session;
+ if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+ slot->sl_generation == session->se_slot_gen &&
+ seq->maxslots <= session->se_target_maxslots)
+ /* Client acknowledged our reduce maxreqs */
+ free_session_slots(session, session->se_target_maxslots);
+
buflen = (seq->cachethis) ?
session->se_fchannel.maxresp_cached :
session->se_fchannel.maxresp_sz;
@@ -4243,12 +4406,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfserr_rep_too_big;
if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
goto out_put_session;
- svc_reserve(rqstp, buflen);
+ svc_reserve_auth(rqstp, buflen);
status = nfs_ok;
- /* Success! bump slot seqid */
+ /* Success! accept new slot seqid */
slot->sl_seqid = seq->seqid;
+ slot->sl_flags &= ~NFSD4_SLOT_REUSED;
slot->sl_flags |= NFSD4_SLOT_INUSE;
+ slot->sl_generation = session->se_slot_gen;
if (seq->cachethis)
slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
else
@@ -4258,7 +4423,51 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->session = session;
cstate->clp = clp;
+ /*
+ * If the client ever uses the highest available slot,
+ * gently try to allocate another 20%. This allows
+ * fairly quick growth without grossly over-shooting what
+ * the client might use.
+ */
+ if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+ session->se_target_maxslots >= session->se_fchannel.maxreqs &&
+ session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+ int s = session->se_fchannel.maxreqs;
+ int cnt = DIV_ROUND_UP(s, 5);
+ void *prev_slot;
+
+ do {
+ /*
+ * GFP_NOWAIT both allows allocation under a
+ * spinlock, and only succeeds if there is
+ * plenty of memory.
+ */
+ slot = nfsd4_alloc_slot(&session->se_fchannel, s,
+ GFP_NOWAIT);
+ prev_slot = xa_load(&session->se_slots, s);
+ if (xa_is_value(prev_slot) && slot) {
+ slot->sl_seqid = xa_to_value(prev_slot);
+ slot->sl_flags |= NFSD4_SLOT_REUSED;
+ }
+ if (slot &&
+ !xa_is_err(xa_store(&session->se_slots, s, slot,
+ GFP_NOWAIT))) {
+ s += 1;
+ session->se_fchannel.maxreqs = s;
+ atomic_add(s - session->se_target_maxslots,
+ &nfsd_total_target_slots);
+ session->se_target_maxslots = s;
+ } else {
+ kfree(slot);
+ slot = NULL;
+ }
+ } while (slot && --cnt > 0);
+ }
+
out:
+ seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+ seq->target_maxslots = session->se_target_maxslots;
+
switch (clp->cl_cb_state) {
case NFSD4_CB_DOWN:
seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
@@ -4610,8 +4819,8 @@ out:
static unsigned long
nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
- int count;
struct nfsd_net *nn = shrink->private_data;
+ long count;
count = atomic_read(&nn->nfsd_courtesy_clients);
if (!count)
@@ -4651,21 +4860,32 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
atomic_set(&nn->nfsd_courtesy_clients, 0);
}
+enum rp_lock {
+ RP_UNLOCKED,
+ RP_LOCKED,
+ RP_UNHASHED,
+};
+
static void init_nfs4_replay(struct nfs4_replay *rp)
{
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- mutex_init(&rp->rp_mutex);
+ rp->rp_locked = RP_UNLOCKED;
}
-static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
- struct nfs4_stateowner *so)
+static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
+ struct nfs4_stateowner *so)
{
if (!nfsd4_has_session(cstate)) {
- mutex_lock(&so->so_replay.rp_mutex);
+ wait_var_event(&so->so_replay.rp_locked,
+ cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (so->so_replay.rp_locked == RP_UNHASHED)
+ return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
+ return 0;
}
void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
@@ -4674,7 +4894,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- mutex_unlock(&so->so_replay.rp_mutex);
+ store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED);
nfs4_put_stateowner(so);
}
}
@@ -4855,34 +5075,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
}
static struct nfs4_openowner *
-alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
- struct nfsd4_compound_state *cstate)
+find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
+ struct nfsd4_compound_state *cstate)
{
struct nfs4_client *clp = cstate->clp;
- struct nfs4_openowner *oo, *ret;
+ struct nfs4_openowner *oo, *new = NULL;
- oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
- if (!oo)
- return NULL;
- oo->oo_owner.so_ops = &openowner_ops;
- oo->oo_owner.so_is_open_owner = 1;
- oo->oo_owner.so_seqid = open->op_seqid;
- oo->oo_flags = 0;
- if (nfsd4_has_session(cstate))
- oo->oo_flags |= NFS4_OO_CONFIRMED;
- oo->oo_time = 0;
- oo->oo_last_closed_stid = NULL;
- INIT_LIST_HEAD(&oo->oo_close_lru);
+retry:
spin_lock(&clp->cl_lock);
- ret = find_openstateowner_str_locked(strhashval, open, clp);
- if (ret == NULL) {
- hash_openowner(oo, clp, strhashval);
- ret = oo;
- } else
- nfs4_free_stateowner(&oo->oo_owner);
-
+ oo = find_openstateowner_str(strhashval, open, clp);
+ if (!oo && new) {
+ hash_openowner(new, clp, strhashval);
+ spin_unlock(&clp->cl_lock);
+ return new;
+ }
spin_unlock(&clp->cl_lock);
- return ret;
+
+ if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ /* Replace unconfirmed owners without checking for replay. */
+ release_openowner(oo);
+ oo = NULL;
+ }
+ if (oo) {
+ if (new)
+ nfs4_free_stateowner(&new->oo_owner);
+ return oo;
+ }
+
+ new = alloc_stateowner(openowner_slab, &open->op_owner, clp);
+ if (!new)
+ return NULL;
+ new->oo_owner.so_ops = &openowner_ops;
+ new->oo_owner.so_is_open_owner = 1;
+ new->oo_owner.so_seqid = open->op_seqid;
+ new->oo_flags = 0;
+ if (nfsd4_has_session(cstate))
+ new->oo_flags |= NFS4_OO_CONFIRMED;
+ new->oo_time = 0;
+ new->oo_last_closed_stid = NULL;
+ INIT_LIST_HEAD(&new->oo_close_lru);
+ goto retry;
}
static struct nfs4_ol_stateid *
@@ -4902,6 +5134,12 @@ retry:
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
+ if (nfs4_openowner_unhashed(oo)) {
+ mutex_unlock(&stp->st_mutex);
+ stp = NULL;
+ goto out_unlock;
+ }
+
retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
goto out_unlock;
@@ -4958,7 +5196,10 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Wait for the refcount to drop to 2. Since it has been unhashed,
* there should be no danger of the refcount going back up again at
* this point.
+ * Some threads with a reference might be waiting for rp_locked,
+ * so tell them to stop waiting.
*/
+ store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -5172,10 +5413,16 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
+ .opcode = OP_CB_RECALL,
};
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
+ bool queued;
+
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags))
+ return;
+
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
@@ -5184,7 +5431,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
- WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall));
+ queued = nfsd4_run_cb(&dp->dl_recall);
+ WARN_ON_ONCE(!queued);
+ if (!queued)
+ refcount_dec(&dp->dl_stid.sc_count);
}
/* Called from break_lease() with flc_lock held. */
@@ -5231,11 +5481,8 @@ static bool nfsd_breaker_owns_lease(struct file_lease *fl)
struct svc_rqst *rqst;
struct nfs4_client *clp;
- if (!i_am_nfsd())
- return false;
- rqst = kthread_data(current);
- /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */
- if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4)
+ rqst = nfsd_current_rqst();
+ if (!nfsd_v4client(rqst))
return false;
clp = *(rqst->rq_lease_breaker);
return dl->dl_stid.sc_client == clp;
@@ -5331,27 +5578,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
- oo = find_openstateowner_str(strhashval, open, clp);
+retry:
+ oo = find_or_alloc_open_stateowner(strhashval, open, cstate);
open->op_openowner = oo;
- if (!oo) {
- goto new_owner;
- }
- if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- /* Replace unconfirmed owners without checking for replay. */
- release_openowner(oo);
- open->op_openowner = NULL;
- goto new_owner;
+ if (!oo)
+ return nfserr_jukebox;
+ if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
+ nfs4_put_stateowner(&oo->oo_owner);
+ goto retry;
}
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
- goto alloc_stateid;
-new_owner:
- oo = alloc_init_open_stateowner(strhashval, open, cstate);
- if (oo == NULL)
- return nfserr_jukebox;
- open->op_openowner = oo;
-alloc_stateid:
+
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
@@ -5369,7 +5608,7 @@ alloc_stateid:
static inline __be32
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{
- if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+ if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type))
return nfserr_openmode;
else
return nfs_ok;
@@ -5601,8 +5840,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
- int flag)
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
{
struct file_lease *fl;
@@ -5611,7 +5849,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
return NULL;
fl->fl_lmops = &nfsd_lease_mng_ops;
fl->c.flc_flags = FL_DELEG;
- fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
fl->c.flc_owner = (fl_owner_t)dp;
fl->c.flc_pid = current->tgid;
fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
@@ -5722,17 +5960,30 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
return 0;
}
+#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS
+static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
+{
+ return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
+}
+#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */
+static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
+{
+ return false;
+}
+#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */
+
static struct nfs4_delegation *
nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent)
{
- int status = 0;
+ bool deleg_ts = nfsd4_want_deleg_timestamps(open);
struct nfs4_client *clp = stp->st_stid.sc_client;
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
struct nfsd_file *nf = NULL;
struct file_lease *fl;
+ int status = 0;
u32 dl_type;
/*
@@ -5757,7 +6008,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
*/
if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
nf = find_rw_file(fp);
- dl_type = NFS4_OPEN_DELEGATE_WRITE;
+ dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
}
/*
@@ -5766,12 +6017,21 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
*/
if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
nf = find_readable_file(fp);
- dl_type = NFS4_OPEN_DELEGATE_READ;
+ dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
}
if (!nf)
return ERR_PTR(-EAGAIN);
+ /*
+ * File delegations and associated locks cannot be recovered if the
+ * export is from an NFS proxy server.
+ */
+ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
+ nfsd_file_put(nf);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
@@ -5798,7 +6058,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, dl_type);
+ fl = nfs4_alloc_init_lease(dp);
if (!fl)
goto out_clnt_odstate;
@@ -5821,7 +6081,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
/*
* Now that the deleg is set, check again to ensure that nothing
- * raced in and changed the mode while we weren't lookng.
+ * raced in and changed the mode while we weren't looking.
*/
status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
if (status)
@@ -5855,25 +6115,47 @@ out_delegees:
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
{
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
if (status == -EAGAIN)
open->op_why_no_deleg = WND4_CONTENTION;
else {
open->op_why_no_deleg = WND4_RESOURCE;
switch (open->op_deleg_want) {
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
break;
- case NFS4_SHARE_WANT_CANCEL:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
open->op_why_no_deleg = WND4_CANCELLED;
break;
- case NFS4_SHARE_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
WARN_ON_ONCE(1);
}
}
}
+static bool
+nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
+ struct kstat *stat)
+{
+ struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file);
+ struct path path;
+ int rc;
+
+ if (!nf)
+ return false;
+
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = file_dentry(nf->nf_file);
+
+ rc = vfs_getattr(&path, stat,
+ (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ AT_STATX_SYNC_AS_STAT);
+
+ nfsd_file_put(nf);
+ return rc == 0;
+}
+
/*
* The Linux NFS server does not offer write delegations to NFSv4.0
* clients in order to avoid conflicts between write delegations and
@@ -5902,14 +6184,14 @@ static void
nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *currentfh)
{
- struct nfs4_delegation *dp;
struct nfs4_openowner *oo = openowner(stp->st_stateowner);
+ bool deleg_ts = nfsd4_want_deleg_timestamps(open);
struct nfs4_client *clp = stp->st_stid.sc_client;
struct svc_fh *parent = NULL;
- int cb_up;
- int status = 0;
+ struct nfs4_delegation *dp;
struct kstat stat;
- struct path path;
+ int status = 0;
+ int cb_up;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -5945,30 +6227,27 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
- trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
- path.mnt = currentfh->fh_export->ex_path.mnt;
- path.dentry = currentfh->fh_dentry;
- if (vfs_getattr(&path, &stat,
- (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
- AT_STATX_SYNC_AS_STAT)) {
+ if (!nfs4_delegation_stat(dp, currentfh, &stat)) {
nfs4_put_stid(&dp->dl_stid);
destroy_delegation(dp);
goto out_no_deleg;
}
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG :
+ OPEN_DELEGATE_WRITE;
dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
- dp->dl_cb_fattr.ncf_initial_cinfo =
- nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
+ dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
} else {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG :
+ OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
}
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
+ open->op_delegate_type = OPEN_DELEGATE_NONE;
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
- open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
+ open->op_delegate_type != OPEN_DELEGATE_NONE) {
dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_recall = true;
}
@@ -5982,21 +6261,32 @@ out_no_deleg:
static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
struct nfs4_delegation *dp)
{
- if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
- } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ if (deleg_is_write(dp->dl_type)) {
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+ } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ }
}
/* Otherwise the client must be confused wanting a delegation
* it already has, therefore we don't return
- * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+ * OPEN_DELEGATE_NONE_EXT and reason.
*/
}
+/* Are we returning only a delegation stateid? */
+static bool open_xor_delegation(struct nfsd4_open *open)
+{
+ if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+ return false;
+ /* Did we actually get a delegation? */
+ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type))
+ return false;
+ return true;
+}
+
/**
* nfsd4_process_open2 - finish open processing
* @rqstp: the RPC transaction being executed
@@ -6042,6 +6332,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (!stp) {
stp = init_open_stateid(fp, open);
+ if (!stp) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
if (!open->op_stp)
new_stp = true;
}
@@ -6077,8 +6372,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
- if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_WANTED;
goto nodeleg;
}
@@ -6089,12 +6384,23 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* OPEN succeeds even if we fail.
*/
nfs4_open_delegation(open, stp, &resp->cstate.current_fh);
+
+ /*
+ * If there is an existing open stateid, it must be updated and
+ * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new
+ * open stateid would have to be created.
+ */
+ if (new_stp && open_xor_delegation(open)) {
+ memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid));
+ open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID;
+ release_open_stateid(stp);
+ }
nodeleg:
status = nfs_ok;
trace_nfsd_open(&stp->st_stid.sc_stateid);
out:
/* 4.1 client trying to upgrade/downgrade delegation? */
- if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp &&
open->op_deleg_want)
nfsd4_deleg_xgrade_none_ext(open, dp);
@@ -6105,7 +6411,7 @@ out:
/*
* To finish the open response, we just need to set the rflags.
*/
- open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+ open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
if (nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
@@ -6122,12 +6428,8 @@ out:
void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open)
{
- if (open->op_openowner) {
- struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
-
- nfsd4_cstate_assign_replay(cstate, so);
- nfs4_put_stateowner(so);
- }
+ if (open->op_openowner)
+ nfs4_put_stateowner(&open->op_openowner->oo_owner);
if (open->op_file)
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
@@ -6237,7 +6539,6 @@ void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list);
init_waitqueue_head(&nn->nfsd_ssc_waitq);
}
-EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work);
/*
* This is called when nfsd is being shutdown, after all inter_ssc
@@ -6482,6 +6783,7 @@ nfs4_laundromat(struct nfsd_net *nn)
_free_cpntf_state_locked(nn, cps);
}
spin_unlock(&nn->s2s_cp_lock);
+ nfsd4_async_copy_reaper(nn);
nfs4_get_client_reaplist(nn, &reaplist, &lt);
nfs4_process_client_reaplist(&reaplist);
@@ -6492,6 +6794,7 @@ nfs4_laundromat(struct nfsd_net *nn)
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (!state_expired(&lt, dp->dl_time))
break;
+ refcount_inc(&dp->dl_stid.sc_count);
unhash_delegation_locked(dp, SC_STATUS_REVOKED);
list_add(&dp->dl_recall_lru, &reaplist);
}
@@ -6585,40 +6888,34 @@ deleg_reaper(struct nfsd_net *nn)
{
struct list_head *pos, *next;
struct nfs4_client *clp;
- struct list_head cblist;
- INIT_LIST_HEAD(&cblist);
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (clp->cl_state != NFSD4_ACTIVE ||
- list_empty(&clp->cl_delegations) ||
- atomic_read(&clp->cl_delegs_in_recall) ||
- test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
- (ktime_get_boottime_seconds() -
- clp->cl_ra_time < 5)) {
+
+ if (clp->cl_state != NFSD4_ACTIVE)
+ continue;
+ if (list_empty(&clp->cl_delegations))
+ continue;
+ if (atomic_read(&clp->cl_delegs_in_recall))
+ continue;
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags))
+ continue;
+ if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5)
+ continue;
+ if (clp->cl_cb_state != NFSD4_CB_UP)
continue;
- }
- list_add(&clp->cl_ra_cblist, &cblist);
/* release in nfsd4_cb_recall_any_release */
- atomic_inc(&clp->cl_rpc_users);
- set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ kref_get(&clp->cl_nfsdfs.cl_ref);
clp->cl_ra_time = ktime_get_boottime_seconds();
- }
- spin_unlock(&nn->client_lock);
-
- while (!list_empty(&cblist)) {
- clp = list_first_entry(&cblist, struct nfs4_client,
- cl_ra_cblist);
- list_del_init(&clp->cl_ra_cblist);
clp->cl_ra->ra_keep = 0;
- clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
+ spin_unlock(&nn->client_lock);
}
static void
@@ -6783,7 +7080,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
*/
statusmask |= SC_STATUS_REVOKED;
- statusmask |= SC_STATUS_ADMIN_REVOKED;
+ statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
@@ -6858,7 +7155,8 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
nf = nfs4_find_file(s, flags);
if (nf) {
- status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ status = nfsd_permission(&rqstp->rq_cred,
+ fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
nfsd_file_put(nf);
@@ -6989,11 +7287,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
*nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
- if (cstid)
- status = nfserr_bad_stateid;
- else
- status = check_special_stateids(net, fhp, stateid,
- flags);
+ status = check_special_stateids(net, fhp, stateid, flags);
goto done;
}
@@ -7106,9 +7400,12 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (s->sc_type) {
case SC_TYPE_DELEG:
if (s->sc_status & SC_STATUS_REVOKED) {
+ s->sc_status |= SC_STATUS_CLOSED;
spin_unlock(&s->sc_lock);
dp = delegstateid(s);
- list_del_init(&dp->dl_recall_lru);
+ if (s->sc_status & SC_STATUS_FREEABLE)
+ list_del_init(&dp->dl_recall_lru);
+ s->sc_status |= SC_STATUS_FREED;
spin_unlock(&cl->cl_lock);
nfs4_put_stid(s);
ret = nfs_ok;
@@ -7191,12 +7488,16 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
+retry:
status = nfsd4_lookup_stateid(cstate, stateid,
typemask, statusmask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
- nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
+ if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) {
+ nfs4_put_stateowner(stp->st_stateowner);
+ goto retry;
+ }
status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
if (!status)
@@ -7338,7 +7639,7 @@ out:
return status;
}
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
struct nfs4_client *clp = s->st_stid.sc_client;
bool unhashed;
@@ -7355,11 +7656,11 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
list_for_each_entry(stp, &reaplist, st_locks)
nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
free_ol_stateid_reaplist(&reaplist);
+ return false;
} else {
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
- if (unhashed)
- move_to_close_lru(s, clp->net);
+ return unhashed;
}
}
@@ -7375,6 +7676,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *stp;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool need_move_to_close_list;
dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
@@ -7399,8 +7701,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
*/
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- nfsd4_close_open_stateid(stp);
+ need_move_to_close_list = nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
+ if (need_move_to_close_list)
+ move_to_close_lru(stp, net);
/* v4.1+ suggests that we send a special stateid in here, since the
* clients should just ignore this anyway. Since this is not useful
@@ -7431,7 +7735,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;
- status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, 0, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
@@ -7440,8 +7744,9 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto put_stateid;
trace_nfsd_deleg_return(stateid);
- wake_up_var(d_inode(cstate->current_fh.fh_dentry));
destroy_delegation(dp);
+ smp_mb__after_atomic();
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
put_stateid:
nfs4_put_stid(&dp->dl_stid);
out:
@@ -7538,7 +7843,7 @@ nfsd4_lm_notify(struct file_lock *fl)
if (queue) {
trace_nfsd_cb_notify_lock(lo, nbl);
- nfsd4_run_cb(&nbl->nbl_cb);
+ nfsd4_try_run_cb(&nbl->nbl_cb);
}
}
@@ -7837,7 +8142,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
- struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
@@ -7854,12 +8158,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_lock_length(lock->lk_offset, lock->lk_length))
return nfserr_inval;
- if ((status = fh_verify(rqstp, &cstate->current_fh,
- S_IFREG, NFSD_MAY_LOCK))) {
- dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
+ if (status != nfs_ok)
return status;
+ if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) {
+ status = nfserr_notsupp;
+ goto out;
}
- sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
@@ -7912,9 +8217,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
@@ -7925,9 +8227,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
@@ -7947,15 +8246,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- /*
- * Most filesystems with their own ->lock operations will block
- * the nfsd thread waiting to acquire the lock. That leads to
- * deadlocks (we don't want every nfsd thread tied up waiting
- * for file locks), so don't attempt blocking lock notifications
- * on those filesystems:
- */
- if (!exportfs_lock_op_is_async(sb->s_export_op))
- flags &= ~FL_SLEEP;
+ if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) &&
+ nfsd4_has_session(cstate) &&
+ locks_can_async_lock(nf->nf_file->f_op))
+ flags |= FL_SLEEP;
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
@@ -8210,6 +8504,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_lock_range;
goto put_stateid;
}
+ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
+ status = nfserr_notsupp;
+ goto put_file;
+ }
+
file_lock = locks_alloc_lock();
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
@@ -8297,7 +8596,7 @@ out:
* @cstate: NFSv4 COMPOUND state
* @u: RELEASE_LOCKOWNER arguments
*
- * Check if theree are any locks still held and if not - free the lockowner
+ * Check if there are any locks still held and if not, free the lockowner
* and any lock state that is owned.
*
* Return values:
@@ -8516,6 +8815,7 @@ static int nfs4_state_create_net(struct net *net)
spin_lock_init(&nn->client_lock);
spin_lock_init(&nn->s2s_cp_lock);
idr_init(&nn->s2s_cp_stateids);
+ atomic_set(&nn->pending_async_copies, 0);
spin_lock_init(&nn->blocked_locks_lock);
INIT_LIST_HEAD(&nn->blocked_locks_lru);
@@ -8604,7 +8904,6 @@ skip_grace:
}
/* initialization to perform when the nfsd service is started: */
-
int
nfs4_state_start(void)
{
@@ -8614,11 +8913,14 @@ nfs4_state_start(void)
if (ret)
return ret;
- ret = nfsd4_create_callback_queue();
- if (ret) {
+ nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+ if (!nfsd_slot_shrinker) {
rhltable_destroy(&nfs4_file_rhltable);
- return ret;
+ return -ENOMEM;
}
+ nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+ nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+ shrinker_register(nfsd_slot_shrinker);
set_max_delegations();
return 0;
@@ -8632,7 +8934,7 @@ nfs4_state_shutdown_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
shrinker_free(nn->nfsd_client_shrinker);
- cancel_work(&nn->nfsd_shrinker_work);
+ cancel_work_sync(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -8660,8 +8962,8 @@ nfs4_state_shutdown_net(struct net *net)
void
nfs4_state_shutdown(void)
{
- nfsd4_destroy_callback_queue();
rhltable_destroy(&nfs4_file_rhltable);
+ shrinker_free(nfsd_slot_shrinker);
}
static void
@@ -8779,11 +9081,82 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
}
/**
+ * set_cb_time - vet and set the timespec for a cb_getattr update
+ * @cb: timestamp from the CB_GETATTR response
+ * @orig: original timestamp in the inode
+ * @now: current time
+ *
+ * Given a timestamp in a CB_GETATTR response, check it against the
+ * current timestamp in the inode and the current time. Returns true
+ * if the inode's timestamp needs to be updated, and false otherwise.
+ * @cb may also be changed if the timestamp needs to be clamped.
+ */
+static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig,
+ const struct timespec64 *now)
+{
+
+ /*
+ * "When the time presented is before the original time, then the
+ * update is ignored." Also no need to update if there is no change.
+ */
+ if (timespec64_compare(cb, orig) <= 0)
+ return false;
+
+ /*
+ * "When the time presented is in the future, the server can either
+ * clamp the new time to the current time, or it may
+ * return NFS4ERR_DELAY to the client, allowing it to retry."
+ */
+ if (timespec64_compare(cb, now) > 0) {
+ /* clamp it */
+ *cb = *now;
+ }
+
+ return true;
+}
+
+static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp)
+{
+ struct inode *inode = d_inode(dentry);
+ struct timespec64 now = current_time(inode);
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ struct iattr attrs = { };
+ int ret;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ struct timespec64 atime = inode_get_atime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
+
+ attrs.ia_atime = ncf->ncf_cb_atime;
+ attrs.ia_mtime = ncf->ncf_cb_mtime;
+
+ if (set_cb_time(&attrs.ia_atime, &atime, &now))
+ attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+
+ if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) {
+ attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET;
+ attrs.ia_ctime = attrs.ia_mtime;
+ }
+ } else {
+ attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ attrs.ia_mtime = attrs.ia_ctime = now;
+ }
+
+ if (!attrs.ia_valid)
+ return 0;
+
+ attrs.ia_valid |= ATTR_DELEG;
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ return ret;
+}
+
+/**
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
- * @inode: file to be checked for a conflict
- * @modified: return true if file was modified
- * @size: new size of file if modified is true
+ * @dentry: dentry of inode to be checked for a conflict
+ * @pdp: returned WRITE delegation, if one was found
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@@ -8793,83 +9166,90 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* 18.7.4.
*
* Returns 0 if there is no conflict; otherwise an nfs_stat
- * code is returned.
+ * code is returned. If @pdp is set to a non-NULL value, then the
+ * caller must put the reference.
*/
__be32
-nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
- bool *modified, u64 *size)
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
+ struct nfs4_delegation **pdp)
{
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
+ struct nfs4_delegation *dp = NULL;
struct file_lease *fl;
- struct nfs4_delegation *dp;
- struct iattr attrs;
struct nfs4_cb_fattr *ncf;
+ struct inode *inode = d_inode(dentry);
- *modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
- return 0;
+ return nfs_ok;
+
+#define NON_NFSD_LEASE ((void *)1)
+
spin_lock(&ctx->flc_lock);
for_each_file_lock(fl, &ctx->flc_lease) {
- unsigned char type = fl->c.flc_type;
-
if (fl->c.flc_flags == FL_LAYOUT)
continue;
- if (fl->fl_lmops != &nfsd_lease_mng_ops) {
- /*
- * non-nfs lease, if it's a lease with F_RDLCK then
- * we are done; there isn't any write delegation
- * on this inode
- */
- if (type == F_RDLCK)
- break;
- goto break_lease;
- }
- if (type == F_WRLCK) {
- dp = fl->c.flc_owner;
- if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
- spin_unlock(&ctx->flc_lock);
- return 0;
- }
-break_lease:
- nfsd_stats_wdeleg_getattr_inc(nn);
- dp = fl->c.flc_owner;
- ncf = &dp->dl_cb_fattr;
- nfs4_cb_getattr(&dp->dl_cb_fattr);
- spin_unlock(&ctx->flc_lock);
- wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
- TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
- if (ncf->ncf_cb_status) {
- /* Recall delegation only if client didn't respond */
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
- if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode))
- return status;
- }
- if (!ncf->ncf_file_modified &&
- (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
- ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
- ncf->ncf_file_modified = true;
- if (ncf->ncf_file_modified) {
- /*
- * Per section 10.4.3 of RFC 8881, the server would
- * not update the file's metadata with the client's
- * modified size
- */
- attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
- attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
- setattr_copy(&nop_mnt_idmap, inode, &attrs);
- mark_inode_dirty(inode);
- ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
- *size = ncf->ncf_cur_fsize;
- *modified = true;
- }
- return 0;
+ if (fl->c.flc_type == F_WRLCK) {
+ if (fl->fl_lmops == &nfsd_lease_mng_ops)
+ dp = fl->c.flc_owner;
+ else
+ dp = NON_NFSD_LEASE;
}
break;
}
+ if (dp == NULL || dp == NON_NFSD_LEASE ||
+ dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ spin_unlock(&ctx->flc_lock);
+ if (dp == NON_NFSD_LEASE) {
+ status = nfserrno(nfsd_open_break_lease(inode,
+ NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ }
+ return 0;
+ }
+
+ nfsd_stats_wdeleg_getattr_inc(nn);
+ refcount_inc(&dp->dl_stid.sc_count);
+ ncf = &dp->dl_cb_fattr;
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
spin_unlock(&ctx->flc_lock);
- return 0;
+
+ wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING,
+ TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ if (ncf->ncf_cb_status) {
+ /* Recall delegation only if client didn't respond */
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ goto out_status;
+ }
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
+ ncf->ncf_file_modified = true;
+ if (ncf->ncf_file_modified) {
+ int err;
+
+ /*
+ * Per section 10.4.3 of RFC 8881, the server would
+ * not update the file's metadata with the client's
+ * modified size
+ */
+ err = cb_getattr_update_times(dentry, dp);
+ if (err) {
+ status = nfserrno(err);
+ goto out_status;
+ }
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ *pdp = dp;
+ return nfs_ok;
+ }
+ status = nfs_ok;
+out_status:
+ nfs4_put_stid(&dp->dl_stid);
+ return status;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fac938f563ad..3afcdbed6e14 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -55,6 +55,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
+#include "nfs4xdr_gen.h"
#include "trace.h"
@@ -118,11 +119,11 @@ static int zero_clientid(clientid_t *clid)
* operation described in @argp finishes.
*/
static void *
-svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
+svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, size_t len)
{
struct svcxdr_tmpbuf *tb;
- tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL);
+ tb = kmalloc(struct_size(tb, buf, len), GFP_KERNEL);
if (!tb)
return NULL;
tb->next = argp->to_free;
@@ -138,9 +139,9 @@ svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
* buffer might end on a page boundary.
*/
static char *
-svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
+svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, size_t len)
{
- char *p = svcxdr_tmpalloc(argp, len + 1);
+ char *p = svcxdr_tmpalloc(argp, size_add(len, 1));
if (!p)
return NULL;
@@ -150,7 +151,7 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
}
static void *
-svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len)
+svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, size_t len)
{
__be32 *tmp;
@@ -520,6 +521,26 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
*umask = mask & S_IRWXUGO;
iattr->ia_valid |= ATTR_MODE;
}
+ if (bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+ fattr4_time_deleg_access access;
+
+ if (!xdrgen_decode_fattr4_time_deleg_access(argp->xdr, &access))
+ return nfserr_bad_xdr;
+ iattr->ia_atime.tv_sec = access.seconds;
+ iattr->ia_atime.tv_nsec = access.nseconds;
+ iattr->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET | ATTR_DELEG;
+ }
+ if (bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ fattr4_time_deleg_modify modify;
+
+ if (!xdrgen_decode_fattr4_time_deleg_modify(argp->xdr, &modify))
+ return nfserr_bad_xdr;
+ iattr->ia_mtime.tv_sec = modify.seconds;
+ iattr->ia_mtime.tv_nsec = modify.nseconds;
+ iattr->ia_ctime.tv_sec = modify.seconds;
+ iattr->ia_ctime.tv_nsec = modify.seconds;
+ iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG;
+ }
/* request sanity: did attrlist4 contain the expected number of words? */
if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos)
@@ -1066,13 +1087,13 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
return nfs_ok;
if (!argp->minorversion)
return nfserr_bad_xdr;
- switch (w & NFS4_SHARE_WANT_MASK) {
- case NFS4_SHARE_WANT_NO_PREFERENCE:
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
- case NFS4_SHARE_WANT_NO_DELEG:
- case NFS4_SHARE_WANT_CANCEL:
+ switch (w & NFS4_SHARE_WANT_TYPE_MASK) {
+ case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
break;
default:
return nfserr_bad_xdr;
@@ -1246,14 +1267,6 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
}
static __be32
-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
-{
- if (argp->minorversion == 0)
- return nfs_ok;
- return nfserr_notsupp;
-}
-
-static __be32
nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
struct nfsd4_read *read = &u->read;
@@ -1732,6 +1745,35 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
}
+static __be32
+nfsd4_decode_get_dir_delegation(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ __be32 status;
+
+ memset(gdd, 0, sizeof(*gdd));
+
+ if (xdr_stream_decode_bool(argp->xdr, &gdd->gdda_signal_deleg_avail) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_bitmap4(argp, gdd->gdda_notification_types,
+ ARRAY_SIZE(gdd->gdda_notification_types));
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &gdd->gdda_child_attr_delay);
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &gdd->gdda_dir_attr_delay);
+ if (status)
+ return status;
+ status = nfsd4_decode_bitmap4(argp, gdd->gdda_child_attributes,
+ ARRAY_SIZE(gdd->gdda_child_attributes));
+ if (status)
+ return status;
+ return nfsd4_decode_bitmap4(argp, gdd->gdda_dir_attributes,
+ ARRAY_SIZE(gdd->gdda_dir_attributes));
+}
+
#ifdef CONFIG_NFSD_PNFS
static __be32
nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
@@ -1863,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
return nfserr_bad_xdr;
seq->seqid = be32_to_cpup(p++);
seq->slotid = be32_to_cpup(p++);
- seq->maxslots = be32_to_cpup(p++);
+ /* sa_highest_slotid counts from 0 but maxslots counts from 1 ... */
+ seq->maxslots = be32_to_cpup(p++) + 1;
seq->cachethis = be32_to_cpup(p);
seq->status_flags = 0;
@@ -2117,7 +2160,7 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
*/
static __be32
nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr,
- char **bufp, u32 buflen)
+ char **bufp, size_t buflen)
{
struct page **pages = xdr->pages;
struct kvec *head = xdr->head;
@@ -2345,7 +2388,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm,
[OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade,
[OP_PUTFH] = nfsd4_decode_putfh,
- [OP_PUTPUBFH] = nfsd4_decode_putpubfh,
+ [OP_PUTPUBFH] = nfsd4_decode_noop,
[OP_PUTROOTFH] = nfsd4_decode_noop,
[OP_READ] = nfsd4_decode_read,
[OP_READDIR] = nfsd4_decode_readdir,
@@ -2370,7 +2413,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_CREATE_SESSION] = nfsd4_decode_create_session,
[OP_DESTROY_SESSION] = nfsd4_decode_destroy_session,
[OP_FREE_STATEID] = nfsd4_decode_free_stateid,
- [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp,
+ [OP_GET_DIR_DELEGATION] = nfsd4_decode_get_dir_delegation,
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo,
[OP_GETDEVICELIST] = nfsd4_decode_notsupp,
@@ -2521,7 +2564,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
/* Sessions make the DRC unnecessary: */
if (argp->minorversion)
cachethis = false;
- svc_reserve(argp->rqstp, max_reply + readbytes);
+ svc_reserve_auth(argp->rqstp, max_reply + readbytes);
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
argp->splice_ok = nfsd_read_splice_ok(argp->rqstp);
@@ -2631,13 +2674,10 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
strlen = end - str;
if (strlen) {
- p = xdr_reserve_space(xdr, strlen + 4);
- if (!p)
+ if (xdr_stream_encode_opaque(xdr, str, strlen) < 0)
return nfserr_resource;
- p = xdr_encode_opaque(p, str, strlen);
count++;
- }
- else
+ } else
end++;
if (found_esc)
end = next;
@@ -2678,7 +2718,6 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr,
const struct path *path)
{
struct path cur = *path;
- __be32 *p;
struct dentry **components = NULL;
unsigned int ncomponents = 0;
__be32 err = nfserr_jukebox;
@@ -2709,24 +2748,19 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr,
components[ncomponents++] = cur.dentry;
cur.dentry = dget_parent(cur.dentry);
}
+
err = nfserr_resource;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, ncomponents) != XDR_UNIT)
goto out_free;
- *p++ = cpu_to_be32(ncomponents);
-
while (ncomponents) {
struct dentry *dentry = components[ncomponents - 1];
- unsigned int len;
spin_lock(&dentry->d_lock);
- len = dentry->d_name.len;
- p = xdr_reserve_space(xdr, len + 4);
- if (!p) {
+ if (xdr_stream_encode_opaque(xdr, dentry->d_name.name,
+ dentry->d_name.len) < 0) {
spin_unlock(&dentry->d_lock);
goto out_free;
}
- p = xdr_encode_opaque(p, dentry->d_name.name, len);
dprintk("/%pd", dentry);
spin_unlock(&dentry->d_lock);
dput(dentry);
@@ -2806,11 +2840,11 @@ static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqst
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static inline __be32
nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- void *context, int len)
+ const struct lsm_context *context)
{
__be32 *p;
- p = xdr_reserve_space(xdr, len + 4 + 4 + 4);
+ p = xdr_reserve_space(xdr, context->len + 4 + 4 + 4);
if (!p)
return nfserr_resource;
@@ -2820,13 +2854,13 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
*/
*p++ = cpu_to_be32(0); /* lfs */
*p++ = cpu_to_be32(0); /* pi */
- p = xdr_encode_opaque(p, context, len);
+ p = xdr_encode_opaque(p, context->context, context->len);
return 0;
}
#else
static inline __be32
nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- void *context, int len)
+ struct lsm_context *context)
{ return 0; }
#endif
@@ -2907,10 +2941,9 @@ struct nfsd4_fattr_args {
struct kstat stat;
struct kstatfs statfs;
struct nfs4_acl *acl;
- u64 size;
+ u64 change_attr;
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- void *context;
- int contextlen;
+ struct lsm_context context;
#endif
u32 rdattr_err;
bool contextsupport;
@@ -3007,7 +3040,6 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
const struct svc_export *exp = args->exp;
- u64 c;
if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) {
u32 flush_time = convert_to_wallclock(exp->cd->flush_time);
@@ -3018,15 +3050,13 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
return nfserr_resource;
return nfs_ok;
}
-
- c = nfsd4_change_attribute(&args->stat, d_inode(args->dentry));
- return nfsd4_encode_changeid4(xdr, c);
+ return nfsd4_encode_changeid4(xdr, args->change_attr);
}
static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
- return nfsd4_encode_uint64_t(xdr, args->size);
+ return nfsd4_encode_uint64_t(xdr, args->stat.size);
}
static __be32 nfsd4_encode_fattr4_fsid(struct xdr_stream *xdr,
@@ -3361,12 +3391,28 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]);
}
+/*
+ * Copied from generic_remap_checks/generic_remap_file_range_prep.
+ *
+ * These generic functions use the file system's s_blocksize, but
+ * individual file systems aren't required to use
+ * generic_remap_file_range_prep. Until there is a mechanism for
+ * determining a particular file system's (or file's) clone block
+ * size, this is the best NFSD can do.
+ */
+static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct inode *inode = d_inode(args->dentry);
+
+ return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize);
+}
+
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
- return nfsd4_encode_security_label(xdr, args->rqstp,
- args->context, args->contextlen);
+ return nfsd4_encode_security_label(xdr, args->rqstp, &args->context);
}
#endif
@@ -3378,6 +3424,56 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr,
return nfsd4_encode_bool(xdr, err == 0);
}
+#define NFSD_OA_SHARE_ACCESS (BIT(OPEN_ARGS_SHARE_ACCESS_READ) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WRITE) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_BOTH))
+
+#define NFSD_OA_SHARE_DENY (BIT(OPEN_ARGS_SHARE_DENY_NONE) | \
+ BIT(OPEN_ARGS_SHARE_DENY_READ) | \
+ BIT(OPEN_ARGS_SHARE_DENY_WRITE) | \
+ BIT(OPEN_ARGS_SHARE_DENY_BOTH))
+
+#define NFSD_OA_SHARE_ACCESS_WANT (BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+
+#define NFSD_OA_OPEN_CLAIM (BIT(OPEN_ARGS_OPEN_CLAIM_NULL) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV)| \
+ BIT(OPEN_ARGS_OPEN_CLAIM_FH) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH))
+
+#define NFSD_OA_CREATE_MODE (BIT(OPEN_ARGS_CREATEMODE_UNCHECKED4) | \
+ BIT(OPEN_ARGS_CREATE_MODE_GUARDED) | \
+ BIT(OPEN_ARGS_CREATEMODE_EXCLUSIVE4) | \
+ BIT(OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1))
+
+static uint32_t oa_share_access = NFSD_OA_SHARE_ACCESS;
+static uint32_t oa_share_deny = NFSD_OA_SHARE_DENY;
+static uint32_t oa_share_access_want = NFSD_OA_SHARE_ACCESS_WANT;
+static uint32_t oa_open_claim = NFSD_OA_OPEN_CLAIM;
+static uint32_t oa_create_mode = NFSD_OA_CREATE_MODE;
+
+static const struct open_arguments4 nfsd_open_arguments = {
+ .oa_share_access = { .count = 1, .element = &oa_share_access },
+ .oa_share_deny = { .count = 1, .element = &oa_share_deny },
+ .oa_share_access_want = { .count = 1, .element = &oa_share_access_want },
+ .oa_open_claim = { .count = 1, .element = &oa_open_claim },
+ .oa_create_mode = { .count = 1, .element = &oa_create_mode },
+};
+
+static __be32 nfsd4_encode_fattr4_open_arguments(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ if (!xdrgen_encode_fattr4_open_arguments(xdr, &nfsd_open_arguments))
+ return nfserr_resource;
+ return nfs_ok;
+}
+
static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_SUPPORTED_ATTRS] = nfsd4_encode_fattr4_supported_attrs,
[FATTR4_TYPE] = nfsd4_encode_fattr4_type,
@@ -3466,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop,
[FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat,
[FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop,
- [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop,
+ [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize,
[FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop,
[FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop,
@@ -3478,6 +3574,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop,
[FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support,
+ [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments,
};
/*
@@ -3490,11 +3587,14 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
struct dentry *dentry, const u32 *bmval,
int ignore_crossmnt)
{
+ DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ struct nfs4_delegation *dp = NULL;
struct nfsd4_fattr_args args;
struct svc_fh *tempfh = NULL;
int starting_len = xdr->buf->len;
- __be32 *attrlen_p, status;
- int attrlen_offset;
+ unsigned int attrlen_offset;
+ __be32 attrlen, status;
+ u32 attrmask[3];
int err;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
@@ -3502,13 +3602,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
.mnt = exp->ex_path.mnt,
.dentry = dentry,
};
- union {
- u32 attrmask[3];
- unsigned long mask[2];
- } u;
unsigned long bit;
- bool file_modified = false;
- u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -3517,26 +3611,31 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.exp = exp;
args.dentry = dentry;
args.ignore_crossmnt = (ignore_crossmnt != 0);
+ args.acl = NULL;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+ args.context.context = NULL;
+#endif
/*
* Make a local copy of the attribute bitmap that can be modified.
*/
- memset(&u, 0, sizeof(u));
- u.attrmask[0] = bmval[0];
- u.attrmask[1] = bmval[1];
- u.attrmask[2] = bmval[2];
+ attrmask[0] = bmval[0];
+ attrmask[1] = bmval[1];
+ attrmask[2] = bmval[2];
args.rdattr_err = 0;
if (exp->ex_fslocs.migrated) {
- status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1],
- &u.attrmask[2], &args.rdattr_err);
+ status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+ &attrmask[2], &args.rdattr_err);
if (status)
goto out;
}
- args.size = 0;
- if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
- &file_modified, &size);
+ if ((attrmask[0] & (FATTR4_WORD0_CHANGE |
+ FATTR4_WORD0_SIZE)) ||
+ (attrmask[1] & (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_MODIFY |
+ FATTR4_WORD1_TIME_METADATA))) {
+ status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp);
if (status)
goto out;
}
@@ -3544,25 +3643,40 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
err = vfs_getattr(&path, &args.stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
AT_STATX_SYNC_AS_STAT);
+ if (dp) {
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+
+ if (ncf->ncf_file_modified) {
+ ++ncf->ncf_initial_cinfo;
+ args.stat.size = ncf->ncf_cur_fsize;
+ if (!timespec64_is_epoch(&ncf->ncf_cb_mtime))
+ args.stat.mtime = ncf->ncf_cb_mtime;
+ }
+ args.change_attr = ncf->ncf_initial_cinfo;
+
+ if (!timespec64_is_epoch(&ncf->ncf_cb_atime))
+ args.stat.atime = ncf->ncf_cb_atime;
+
+ nfs4_put_stid(&dp->dl_stid);
+ } else {
+ args.change_attr = nfsd4_change_attribute(&args.stat);
+ }
+
if (err)
goto out_nfserr;
- if (file_modified)
- args.size = size;
- else
- args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
- u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
- if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
- (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &args.statfs);
if (err)
goto out_nfserr;
}
- if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+ if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
!fhp) {
tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
status = nfserr_jukebox;
@@ -3576,11 +3690,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
} else
args.fhp = fhp;
- args.acl = NULL;
- if (u.attrmask[0] & FATTR4_WORD0_ACL) {
+ if (attrmask[0] & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
if (err == -EOPNOTSUPP)
- u.attrmask[0] &= ~FATTR4_WORD0_ACL;
+ attrmask[0] &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
@@ -3591,18 +3704,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.contextsupport = false;
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- args.context = NULL;
- if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
- u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+ attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
- &args.context, &args.contextlen);
+ &args.context);
else
err = -EOPNOTSUPP;
args.contextsupport = (err == 0);
- if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
+ if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
- u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
else if (err)
goto out_nfserr;
}
@@ -3610,29 +3722,31 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
/* attrmask */
- status = nfsd4_encode_bitmap4(xdr, u.attrmask[0],
- u.attrmask[1], u.attrmask[2]);
+ status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+ attrmask[2]);
if (status)
goto out;
/* attr_vals */
attrlen_offset = xdr->buf->len;
- attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
- if (!attrlen_p)
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
goto out_resource;
- for_each_set_bit(bit, (const unsigned long *)&u.mask,
+ bitmap_from_arr32(attr_bitmap, attrmask,
+ ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ for_each_set_bit(bit, attr_bitmap,
ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
if (status != nfs_ok)
goto out;
}
- *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+ attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+ write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT);
status = nfs_ok;
out:
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- if (args.context)
- security_release_secctx(args.context, args.contextlen);
+ if (args.context.context)
+ security_release_secctx(&args.context);
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
kfree(args.acl);
if (tempfh) {
@@ -3715,7 +3829,9 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ cd->rd_fhp->fh_dentry);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
@@ -3745,7 +3861,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
nfserr = nfserrno(err);
goto out_put;
}
- nfserr = check_nfsd_access(exp, cd->rd_rqstp);
+ nfserr = check_nfsd_access(exp, cd->rd_rqstp, false);
if (nfserr)
goto out_put;
@@ -4216,18 +4332,20 @@ nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open)
if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT)
return nfserr_resource;
switch (open->op_delegate_type) {
- case NFS4_OPEN_DELEGATE_NONE:
+ case OPEN_DELEGATE_NONE:
status = nfs_ok;
break;
- case NFS4_OPEN_DELEGATE_READ:
+ case OPEN_DELEGATE_READ:
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
/* read */
status = nfsd4_encode_open_read_delegation4(xdr, open);
break;
- case NFS4_OPEN_DELEGATE_WRITE:
+ case OPEN_DELEGATE_WRITE:
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
/* write */
status = nfsd4_encode_open_write_delegation4(xdr, open);
break;
- case NFS4_OPEN_DELEGATE_NONE_EXT:
+ case OPEN_DELEGATE_NONE_EXT:
/* od_whynone */
status = nfsd4_encode_open_none_delegation4(xdr, open);
break;
@@ -4304,6 +4422,15 @@ static __be32 nfsd4_encode_splice_read(
__be32 nfserr;
/*
+ * Splice read doesn't work if encoding has already wandered
+ * into the XDR buf's page array.
+ */
+ if (unlikely(xdr->buf->page_len)) {
+ WARN_ON_ONCE(1);
+ return nfserr_serverfault;
+ }
+
+ /*
* Make sure there is room at the end of buf->head for
* svcxdr_encode_opaque_pages() to create a tail buffer
* to XDR-pad the payload.
@@ -4385,25 +4512,23 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp;
struct nfsd4_read *read = &u->read;
struct xdr_stream *xdr = resp->xdr;
- int starting_len = xdr->buf->len;
bool splice_ok = argp->splice_ok;
+ unsigned int eof_offset;
unsigned long maxcount;
+ __be32 wire_data[2];
struct file *file;
- __be32 *p;
if (nfserr)
return nfserr;
+
+ eof_offset = xdr->buf->len;
file = read->rd_nf->nf_file;
- p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
- if (!p) {
+ /* Reserve space for the eof flag and byte count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) {
WARN_ON_ONCE(splice_ok);
return nfserr_resource;
}
- if (resp->xdr->buf->page_len && splice_ok) {
- WARN_ON_ONCE(1);
- return nfserr_serverfault;
- }
xdr_commit_encode(xdr);
maxcount = min_t(unsigned long, read->rd_length,
@@ -4414,12 +4539,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (nfserr) {
- xdr_truncate_encode(xdr, starting_len);
+ xdr_truncate_encode(xdr, eof_offset);
return nfserr;
}
- p = xdr_encode_bool(p, read->rd_eof);
- *p = cpu_to_be32(read->rd_length);
+ wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+ wire_data[1] = cpu_to_be32(read->rd_length);
+ write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
return nfs_ok;
}
@@ -4428,25 +4554,21 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
struct nfsd4_readlink *readlink = &u->readlink;
- __be32 *p, *maxcount_p, zero = xdr_zero;
+ __be32 *p, wire_count, zero = xdr_zero;
struct xdr_stream *xdr = resp->xdr;
- int length_offset = xdr->buf->len;
+ unsigned int length_offset;
int maxcount, status;
- maxcount_p = xdr_reserve_space(xdr, XDR_UNIT);
- if (!maxcount_p)
+ /* linktext4.count */
+ length_offset = xdr->buf->len;
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
return nfserr_resource;
- maxcount = PAGE_SIZE;
+ /* linktext4.data */
+ maxcount = PAGE_SIZE;
p = xdr_reserve_space(xdr, maxcount);
if (!p)
return nfserr_resource;
- /*
- * XXX: By default, vfs_readlink() will truncate symlinks if they
- * would overflow the buffer. Is this kosher in NFSv4? If not, one
- * easy fix is: if vfs_readlink() precisely fills the buffer, assume
- * that truncation occurred, and return NFS4ERR_RESOURCE.
- */
nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
(char *)p, &maxcount);
if (nfserr == nfserr_isdir)
@@ -4459,7 +4581,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfserrno(status);
goto out_err;
}
- *maxcount_p = cpu_to_be32(maxcount);
+
+ wire_count = cpu_to_be32(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, XDR_UNIT);
xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount));
write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero,
xdr_pad_size(maxcount));
@@ -4594,14 +4718,42 @@ nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr,
}
static __be32
-nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
+nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf,
+ u32 *supported)
+{
+ struct rpcsec_gss_info info;
+ __be32 status;
+
+ if (rpcauth_get_gssinfo(pf, &info) == 0) {
+ (*supported)++;
+
+ /* flavor */
+ status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
+ if (status != nfs_ok)
+ return status;
+ /* flavor_info */
+ status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
+ if (status != nfs_ok)
+ return status;
+ } else if (pf < RPC_AUTH_MAXFLAVOR) {
+ (*supported)++;
+
+ /* flavor */
+ status = nfsd4_encode_uint32_t(xdr, pf);
+ if (status != nfs_ok)
+ return status;
+ }
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_SECINFO4resok(struct xdr_stream *xdr, struct svc_export *exp)
{
u32 i, nflavs, supported;
struct exp_flavor_info *flavs;
struct exp_flavor_info def_flavs[2];
- static bool report = true;
- __be32 *flavorsp;
- __be32 status;
+ unsigned int count_offset;
+ __be32 status, wire_count;
if (exp->ex_nflavors) {
flavs = exp->ex_flavors;
@@ -4623,43 +4775,20 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
}
}
- supported = 0;
- flavorsp = xdr_reserve_space(xdr, XDR_UNIT);
- if (!flavorsp)
+ count_offset = xdr->buf->len;
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
return nfserr_resource;
- for (i = 0; i < nflavs; i++) {
- rpc_authflavor_t pf = flavs[i].pseudoflavor;
- struct rpcsec_gss_info info;
-
- if (rpcauth_get_gssinfo(pf, &info) == 0) {
- supported++;
-
- /* flavor */
- status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
- if (status != nfs_ok)
- return status;
- /* flavor_info */
- status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
- if (status != nfs_ok)
- return status;
- } else if (pf < RPC_AUTH_MAXFLAVOR) {
- supported++;
-
- /* flavor */
- status = nfsd4_encode_uint32_t(xdr, pf);
- if (status != nfs_ok)
- return status;
- } else {
- if (report)
- pr_warn("NFS: SECINFO: security flavor %u "
- "is not supported\n", pf);
- }
+ for (i = 0, supported = 0; i < nflavs; i++) {
+ status = nfsd4_encode_secinfo4(xdr, flavs[i].pseudoflavor,
+ &supported);
+ if (status != nfs_ok)
+ return status;
}
- if (nflavs != supported)
- report = false;
- *flavorsp = cpu_to_be32(supported);
+ wire_count = cpu_to_be32(supported);
+ write_bytes_to_xdr_buf(xdr->buf, count_offset, &wire_count,
+ XDR_UNIT);
return 0;
}
@@ -4670,7 +4799,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo *secinfo = &u->secinfo;
struct xdr_stream *xdr = resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
+ return nfsd4_encode_SECINFO4resok(xdr, secinfo->si_exp);
}
static __be32
@@ -4680,7 +4809,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
struct xdr_stream *xdr = resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
+ return nfsd4_encode_SECINFO4resok(xdr, secinfo->sin_exp);
}
static __be32
@@ -4804,6 +4933,25 @@ nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp)
}
static __be32
+nfsd4_encode_nfs_impl_id4(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+
+ /* nii_domain */
+ status = nfsd4_encode_opaque(xdr, exid->nii_domain.data,
+ exid->nii_domain.len);
+ if (status != nfs_ok)
+ return status;
+ /* nii_name */
+ status = nfsd4_encode_opaque(xdr, exid->nii_name.data,
+ exid->nii_name.len);
+ if (status != nfs_ok)
+ return status;
+ /* nii_time */
+ return nfsd4_encode_nfstime4(xdr, &exid->nii_time);
+}
+
+static __be32
nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
union nfsd4_op_u *u)
{
@@ -4837,8 +4985,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr != nfs_ok)
return nfserr;
/* eir_server_impl_id<1> */
- if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
return nfserr_resource;
+ nfserr = nfsd4_encode_nfs_impl_id4(xdr, exid);
+ if (nfserr != nfs_ok)
+ return nfserr;
return nfs_ok;
}
@@ -4933,7 +5084,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr != nfs_ok)
return nfserr;
/* sr_target_highest_slotid */
- nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+ nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
if (nfserr != nfs_ok)
return nfserr;
/* sr_status_flags */
@@ -4964,6 +5115,49 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfs_ok;
}
+static __be32
+nfsd4_encode_get_dir_delegation(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 status = nfserr_resource;
+
+ switch(gdd->gddrnf_status) {
+ case GDD4_OK:
+ if (xdr_stream_encode_u32(xdr, GDD4_OK) != XDR_UNIT)
+ break;
+ status = nfsd4_encode_verifier4(xdr, &gdd->gddr_cookieverf);
+ if (status)
+ break;
+ status = nfsd4_encode_stateid4(xdr, &gdd->gddr_stateid);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_notification[0], 0, 0);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_child_attributes[0],
+ gdd->gddr_child_attributes[1],
+ gdd->gddr_child_attributes[2]);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_dir_attributes[0],
+ gdd->gddr_dir_attributes[1],
+ gdd->gddr_dir_attributes[2]);
+ break;
+ default:
+ pr_warn("nfsd: bad gddrnf_status (%u)\n", gdd->gddrnf_status);
+ gdd->gddrnf_will_signal_deleg_avail = 0;
+ fallthrough;
+ case GDD4_UNAVAIL:
+ if (xdr_stream_encode_u32(xdr, GDD4_UNAVAIL) != XDR_UNIT)
+ break;
+ status = nfsd4_encode_bool(xdr, gdd->gddrnf_will_signal_deleg_avail);
+ break;
+ }
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
static __be32
nfsd4_encode_device_addr4(struct xdr_stream *xdr,
@@ -5200,7 +5394,12 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr != nfs_ok)
return nfserr;
/* osr_complete<1> */
- if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ if (os->completed) {
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
+ return nfserr_resource;
+ if (xdr_stream_encode_be32(xdr, os->status) != XDR_UNIT)
+ return nfserr_resource;
+ } else if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
return nfserr_resource;
return nfs_ok;
}
@@ -5213,17 +5412,20 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
struct file *file = read->rd_nf->nf_file;
struct xdr_stream *xdr = resp->xdr;
bool splice_ok = argp->splice_ok;
+ unsigned int offset_offset;
+ __be32 nfserr, wire_count;
unsigned long maxcount;
- __be32 nfserr, *p;
+ __be64 wire_offset;
- /* Content type, offset, byte count */
- p = xdr_reserve_space(xdr, 4 + 8 + 4);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, NFS4_CONTENT_DATA) != XDR_UNIT)
return nfserr_io;
- if (resp->xdr->buf->page_len && splice_ok) {
- WARN_ON_ONCE(splice_ok);
- return nfserr_serverfault;
- }
+
+ offset_offset = xdr->buf->len;
+
+ /* Reserve space for the byte offset and count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 3)))
+ return nfserr_io;
+ xdr_commit_encode(xdr);
maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
@@ -5235,10 +5437,12 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
if (nfserr)
return nfserr;
- *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
- p = xdr_encode_hyper(p, read->rd_offset);
- *p = cpu_to_be32(read->rd_length);
-
+ wire_offset = cpu_to_be64(read->rd_offset);
+ write_bytes_to_xdr_buf(xdr->buf, offset_offset, &wire_offset,
+ XDR_UNIT * 2);
+ wire_count = cpu_to_be32(read->rd_length);
+ write_bytes_to_xdr_buf(xdr->buf, offset_offset + XDR_UNIT * 2,
+ &wire_count, XDR_UNIT);
return nfs_ok;
}
@@ -5249,16 +5453,17 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_read *read = &u->read;
struct file *file = read->rd_nf->nf_file;
struct xdr_stream *xdr = resp->xdr;
- int starting_len = xdr->buf->len;
+ unsigned int eof_offset;
+ __be32 wire_data[2];
u32 segments = 0;
- __be32 *p;
if (nfserr)
return nfserr;
- /* eof flag, segment count */
- p = xdr_reserve_space(xdr, 4 + 4);
- if (!p)
+ eof_offset = xdr->buf->len;
+
+ /* Reserve space for the eof flag and segment count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2)))
return nfserr_io;
xdr_commit_encode(xdr);
@@ -5268,15 +5473,16 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_read_plus_data(resp, read);
if (nfserr) {
- xdr_truncate_encode(xdr, starting_len);
+ xdr_truncate_encode(xdr, eof_offset);
return nfserr;
}
segments++;
out:
- p = xdr_encode_bool(p, read->rd_eof);
- *p = cpu_to_be32(segments);
+ wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+ wire_data[1] = cpu_to_be32(segments);
+ write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
return nfserr;
}
@@ -5580,7 +5786,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
[OP_CREATE_SESSION] = nfsd4_encode_create_session,
[OP_DESTROY_SESSION] = nfsd4_encode_noop,
[OP_FREE_STATEID] = nfsd4_encode_noop,
- [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop,
+ [OP_GET_DIR_DELEGATION] = nfsd4_encode_get_dir_delegation,
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo,
[OP_GETDEVICELIST] = nfsd4_encode_noop,
@@ -5653,6 +5859,23 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize)
return nfserr_rep_too_big;
}
+static __be32 nfsd4_map_status(__be32 status, u32 minor)
+{
+ switch (status) {
+ case nfs_ok:
+ break;
+ case nfserr_wrong_type:
+ /* RFC 8881 - 15.1.2.9 */
+ if (minor == 0)
+ status = nfserr_inval;
+ break;
+ case nfserr_symlink_not_dir:
+ status = nfserr_symlink;
+ break;
+ }
+ return status;
+}
+
void
nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
{
@@ -5660,15 +5883,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
struct nfs4_stateowner *so = resp->cstate.replay_owner;
struct svc_rqst *rqstp = resp->rqstp;
const struct nfsd4_operation *opdesc = op->opdesc;
- int post_err_offset;
+ unsigned int op_status_offset;
nfsd4_enc encoder;
- __be32 *p;
- p = xdr_reserve_space(xdr, 8);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+ goto release;
+ op_status_offset = xdr->buf->len;
+ if (!xdr_reserve_space(xdr, XDR_UNIT))
goto release;
- *p++ = cpu_to_be32(op->opnum);
- post_err_offset = xdr->buf->len;
if (op->opnum == OP_ILLEGAL)
goto status;
@@ -5709,18 +5931,21 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
* bug if we had to do this on a non-idempotent op:
*/
warn_on_nonidempotent_op(op);
- xdr_truncate_encode(xdr, post_err_offset);
+ xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT);
}
if (so) {
- int len = xdr->buf->len - post_err_offset;
+ int len = xdr->buf->len - (op_status_offset + XDR_UNIT);
so->so_replay.rp_status = op->status;
so->so_replay.rp_buflen = len;
- read_bytes_from_xdr_buf(xdr->buf, post_err_offset,
+ read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT,
so->so_replay.rp_buf, len);
}
status:
- *p = op->status;
+ op->status = nfsd4_map_status(op->status,
+ resp->cstate.minorversion);
+ write_bytes_to_xdr_buf(xdr->buf, op_status_offset,
+ &op->status, XDR_UNIT);
release:
if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u);
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
new file mode 100644
index 000000000000..a17b5d8e60b3
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
+// XDR specification modification time: Mon Oct 14 09:10:13 2024
+
+#include <linux/sunrpc/svc.h>
+
+#include "nfs4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr)
+{
+ return xdrgen_decode_hyper(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr)
+{
+ return xdrgen_decode_unsigned_int(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr)
+{
+ if (xdr_stream_decode_u32(xdr, &ptr->count) < 0)
+ return false;
+ for (u32 i = 0; i < ptr->count; i++)
+ if (!xdrgen_decode_uint32_t(xdr, &ptr->element[i]))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr)
+{
+ if (!xdrgen_decode_int64_t(xdr, &ptr->seconds))
+ return false;
+ if (!xdrgen_decode_uint32_t(xdr, &ptr->nseconds))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr)
+{
+ return xdrgen_decode_bool(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_arguments4(struct xdr_stream *xdr, struct open_arguments4 *ptr)
+{
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_deny))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access_want))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_open_claim))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_create_mode))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+bool
+xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr)
+{
+ return xdrgen_decode_open_arguments4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr)
+{
+ return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr)
+{
+ return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
+{
+ return xdrgen_encode_hyper(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value)
+{
+ return xdrgen_encode_unsigned_int(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value)
+{
+ if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT)
+ return false;
+ for (u32 i = 0; i < value.count; i++)
+ if (!xdrgen_encode_uint32_t(xdr, value.element[i]))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value)
+{
+ if (!xdrgen_encode_int64_t(xdr, value->seconds))
+ return false;
+ if (!xdrgen_encode_uint32_t(xdr, value->nseconds))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value)
+{
+ return xdrgen_encode_bool(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_arguments4(struct xdr_stream *xdr, const struct open_arguments4 *value)
+{
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_deny))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access_want))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_open_claim))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_create_mode))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+bool
+xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value)
+{
+ return xdrgen_encode_open_arguments4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value)
+{
+ return xdrgen_encode_nfstime4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value)
+{
+ return xdrgen_encode_nfstime4(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
new file mode 100644
index 000000000000..41a0033b7256
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
+/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */
+
+#ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
+#define _LINUX_XDRGEN_NFS4_1_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nfs4_1.h>
+
+bool xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr);
+bool xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value);
+
+bool xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr);
+bool xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value);
+
+bool xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr);
+bool xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value);
+
+#endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ecd18bffeebc..6a42cc7a845a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -15,8 +15,10 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/svc.h>
#include <linux/module.h>
#include <linux/fsnotify.h>
+#include <linux/nfslocalio.h>
#include "idmap.h"
#include "nfsd.h"
@@ -46,14 +48,11 @@ enum {
NFSD_Versions,
NFSD_Ports,
NFSD_MaxBlkSize,
- NFSD_MaxConnections,
NFSD_Filecache,
-#ifdef CONFIG_NFSD_V4
NFSD_Leasetime,
NFSD_Gracetime,
NFSD_RecoveryDir,
NFSD_V4EndGrace,
-#endif
NFSD_MaxReserved
};
@@ -68,7 +67,6 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
@@ -87,7 +85,6 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
[NFSD_MaxBlkSize] = write_maxblksize,
- [NFSD_MaxConnections] = write_maxconn,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
@@ -175,6 +172,13 @@ static int export_features_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(export_features);
+static int nfsd_pool_stats_open(struct inode *inode, struct file *file)
+{
+ struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
+
+ return svc_pool_stats_open(&nn->nfsd_info, file);
+}
+
static const struct file_operations pool_stats_operations = {
.open = nfsd_pool_stats_open,
.read = seq_read,
@@ -406,7 +410,9 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
if (newthreads < 0)
return -EINVAL;
trace_nfsd_ctl_threads(net, newthreads);
- rv = nfsd_svc(newthreads, net, file->f_cred);
+ mutex_lock(&nfsd_mutex);
+ rv = nfsd_svc(1, &newthreads, net, file->f_cred, NULL);
+ mutex_unlock(&nfsd_mutex);
if (rv < 0)
return rv;
} else
@@ -480,6 +486,14 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
goto out_free;
trace_nfsd_ctl_pool_threads(net, i, nthreads[i]);
}
+
+ /*
+ * There must always be a thread in pool 0; the admin
+ * can't shut down NFS completely using pool_threads.
+ */
+ if (nthreads[0] == 0)
+ nthreads[0] = 1;
+
rv = nfsd_set_nrthreads(i, nthreads, net);
if (rv)
goto out_free;
@@ -885,44 +899,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
nfsd_max_blksize);
}
-/*
- * write_maxconn - Set or report the current max number of connections
- *
- * Input:
- * buf: ignored
- * size: zero
- * OR
- *
- * Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * number of max connections
- * size: non-zero length of C string in @buf
- * Output:
- * On success: passed-in buffer filled with '\n'-terminated C string
- * containing numeric value of max_connections setting
- * for this net namespace;
- * return code is the size in bytes of the string
- * On error: return code is zero or a negative errno value
- */
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
-{
- char *mesg = buf;
- struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
- unsigned int maxconn = nn->max_connections;
-
- if (size > 0) {
- int rv = get_uint(&mesg, &maxconn);
-
- if (rv)
- return rv;
- trace_nfsd_ctl_maxconn(netns(file), maxconn);
- nn->max_connections = maxconn;
- }
-
- return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
-}
-
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time64_t *time, struct nfsd_net *nn)
@@ -1355,12 +1331,13 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
- [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
[NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
#endif
/* last one */ {""}
@@ -1457,28 +1434,6 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
-/**
- * nfsd_nl_rpc_status_get_start - Prepare rpc_status_get dumpit
- * @cb: netlink metadata and command arguments
- *
- * Return values:
- * %0: The rpc_status_get command may proceed
- * %-ENODEV: There is no NFSD running in this namespace
- */
-int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb)
-{
- struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id);
- int ret = -ENODEV;
-
- mutex_lock(&nfsd_mutex);
- if (nn->nfsd_serv)
- ret = 0;
- else
- mutex_unlock(&nfsd_mutex);
-
- return ret;
-}
-
static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
struct netlink_callback *cb,
struct nfsd_genl_rqstp *rqstp)
@@ -1555,8 +1510,16 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id);
int i, ret, rqstp_index = 0;
+ struct nfsd_net *nn;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(sock_net(skb->sk), nfsd_net_id);
+ if (!nn->nfsd_serv) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
rcu_read_lock();
@@ -1633,25 +1596,586 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
ret = skb->len;
out:
rcu_read_unlock();
+out_unlock:
+ mutex_unlock(&nfsd_mutex);
return ret;
}
/**
- * nfsd_nl_rpc_status_get_done - rpc_status_get dumpit post-processing
- * @cb: netlink metadata and command arguments
+ * nfsd_nl_threads_set_doit - set the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem;
+ struct net *net = genl_info_net(info);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ const struct nlattr *attr;
+ const char *scope = NULL;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS))
+ return -EINVAL;
+
+ /* count number of SERVER_THREADS values */
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ if (nla_type(attr) == NFSD_A_SERVER_THREADS)
+ nrpools++;
+ }
+
+ mutex_lock(&nfsd_mutex);
+
+ nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL);
+ if (!nthreads) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ i = 0;
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ if (nla_type(attr) == NFSD_A_SERVER_THREADS) {
+ nthreads[i++] = nla_get_u32(attr);
+ if (i >= nrpools)
+ break;
+ }
+ }
+
+ if (info->attrs[NFSD_A_SERVER_GRACETIME] ||
+ info->attrs[NFSD_A_SERVER_LEASETIME] ||
+ info->attrs[NFSD_A_SERVER_SCOPE]) {
+ ret = -EBUSY;
+ if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ attr = info->attrs[NFSD_A_SERVER_GRACETIME];
+ if (attr) {
+ u32 gracetime = nla_get_u32(attr);
+
+ if (gracetime < 10 || gracetime > 3600)
+ goto out_unlock;
+
+ nn->nfsd4_grace = gracetime;
+ }
+
+ attr = info->attrs[NFSD_A_SERVER_LEASETIME];
+ if (attr) {
+ u32 leasetime = nla_get_u32(attr);
+
+ if (leasetime < 10 || leasetime > 3600)
+ goto out_unlock;
+
+ nn->nfsd4_lease = leasetime;
+ }
+
+ attr = info->attrs[NFSD_A_SERVER_SCOPE];
+ if (attr)
+ scope = nla_data(attr);
+ }
+
+ ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope);
+ if (ret > 0)
+ ret = 0;
+out_unlock:
+ mutex_unlock(&nfsd_mutex);
+ kfree(nthreads);
+ return ret;
+}
+
+/**
+ * nfsd_nl_threads_get_doit - get the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
*
- * Return values:
- * %0: Success
+ * Return 0 on success or a negative errno.
*/
-int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb)
+int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = genl_info_net(info);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ void *hdr;
+ int err;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+
+ err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME,
+ nn->nfsd4_grace) ||
+ nla_put_u32(skb, NFSD_A_SERVER_LEASETIME,
+ nn->nfsd4_lease) ||
+ nla_put_string(skb, NFSD_A_SERVER_SCOPE,
+ nn->nfsd_name);
+ if (err)
+ goto err_unlock;
+
+ if (nn->nfsd_serv) {
+ int i;
+
+ for (i = 0; i < nfsd_nrpools(net); ++i) {
+ struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
+
+ err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
+ sp->sp_nrthreads);
+ if (err)
+ goto err_unlock;
+ }
+ } else {
+ err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0);
+ if (err)
+ goto err_unlock;
+ }
+
+ mutex_unlock(&nfsd_mutex);
+
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_unlock:
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_version_set_doit - set the nfs enabled versions
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct nlattr *attr;
+ struct nfsd_net *nn;
+ int i, rem;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION))
+ return -EINVAL;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+ if (nn->nfsd_serv) {
+ mutex_unlock(&nfsd_mutex);
+ return -EBUSY;
+ }
+
+ /* clear current supported versions. */
+ nfsd_vers(nn, 2, NFSD_CLEAR);
+ nfsd_vers(nn, 3, NFSD_CLEAR);
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++)
+ nfsd_minorversion(nn, i, NFSD_CLEAR);
+
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_VERSION_MAX + 1];
+ u32 major, minor = 0;
+ bool enabled;
+
+ if (nla_type(attr) != NFSD_A_SERVER_PROTO_VERSION)
+ continue;
+
+ if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr,
+ nfsd_version_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_VERSION_MAJOR])
+ continue;
+
+ major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]);
+ if (tb[NFSD_A_VERSION_MINOR])
+ minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]);
+
+ enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]);
+
+ switch (major) {
+ case 4:
+ nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR);
+ break;
+ case 3:
+ case 2:
+ if (!minor)
+ nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR);
+ break;
+ default:
+ break;
+ }
+ }
+
mutex_unlock(&nfsd_mutex);
return 0;
}
/**
+ * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nfsd_net *nn;
+ int i, err;
+ void *hdr;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+
+ for (i = 2; i <= 4; i++) {
+ int j;
+
+ for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) {
+ struct nlattr *attr;
+
+ /* Don't record any versions the kernel doesn't have
+ * compiled in
+ */
+ if (!nfsd_support_version(i))
+ continue;
+
+ /* NFSv{2,3} does not support minor numbers */
+ if (i < 4 && j)
+ continue;
+
+ attr = nla_nest_start(skb,
+ NFSD_A_SERVER_PROTO_VERSION);
+ if (!attr) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) ||
+ nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ /* Set the enabled flag if the version is enabled */
+ if (nfsd_vers(nn, i, NFSD_TEST) &&
+ (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) &&
+ nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ nla_nest_end(skb, attr);
+ }
+ }
+
+ mutex_unlock(&nfsd_mutex);
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_nfsd_unlock:
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_listener_set_doit - set the nfs running sockets
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct svc_xprt *xprt, *tmp;
+ const struct nlattr *attr;
+ struct svc_serv *serv;
+ LIST_HEAD(permsocks);
+ struct nfsd_net *nn;
+ bool delete = false;
+ int err, rem;
+
+ mutex_lock(&nfsd_mutex);
+
+ err = nfsd_create_serv(net);
+ if (err) {
+ mutex_unlock(&nfsd_mutex);
+ return err;
+ }
+
+ nn = net_generic(net, nfsd_net_id);
+ serv = nn->nfsd_serv;
+
+ spin_lock_bh(&serv->sv_lock);
+
+ /* Move all of the old listener sockets to a temp list */
+ list_splice_init(&serv->sv_permsocks, &permsocks);
+
+ /*
+ * Walk the list of server_socks from userland and move any that match
+ * back to sv_permsocks
+ */
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_SOCK_MAX + 1];
+ const char *xcl_name;
+ struct sockaddr *sa;
+
+ if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR)
+ continue;
+
+ if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr,
+ nfsd_sock_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME])
+ continue;
+
+ if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa))
+ continue;
+
+ xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]);
+ sa = nla_data(tb[NFSD_A_SOCK_ADDR]);
+
+ /* Put back any matching sockets */
+ list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) {
+ /* This shouldn't be possible */
+ if (WARN_ON_ONCE(xprt->xpt_net != net)) {
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+ continue;
+ }
+
+ /* If everything matches, put it back */
+ if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) &&
+ rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) {
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+ break;
+ }
+ }
+ }
+
+ /*
+ * If there are listener transports remaining on the permsocks list,
+ * it means we were asked to remove a listener.
+ */
+ if (!list_empty(&permsocks)) {
+ list_splice_init(&permsocks, &serv->sv_permsocks);
+ delete = true;
+ }
+ spin_unlock_bh(&serv->sv_lock);
+
+ /* Do not remove listeners while there are active threads. */
+ if (serv->sv_nrthreads) {
+ err = -EBUSY;
+ goto out_unlock_mtx;
+ }
+
+ /*
+ * Since we can't delete an arbitrary llist entry, destroy the
+ * remaining listeners and recreate the list.
+ */
+ if (delete)
+ svc_xprt_destroy_all(serv, net);
+
+ /* walk list of addrs again, open any that still don't exist */
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_SOCK_MAX + 1];
+ const char *xcl_name;
+ struct sockaddr *sa;
+ int ret;
+
+ if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR)
+ continue;
+
+ if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr,
+ nfsd_sock_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME])
+ continue;
+
+ if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa))
+ continue;
+
+ xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]);
+ sa = nla_data(tb[NFSD_A_SOCK_ADDR]);
+
+ xprt = svc_find_listener(serv, xcl_name, net, sa);
+ if (xprt) {
+ if (delete)
+ WARN_ONCE(1, "Transport type=%s already exists\n",
+ xcl_name);
+ svc_xprt_put(xprt);
+ continue;
+ }
+
+ ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0,
+ get_current_cred());
+ /* always save the latest error */
+ if (ret < 0)
+ err = ret;
+ }
+
+ if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks))
+ nfsd_destroy_serv(net);
+
+out_unlock_mtx:
+ mutex_unlock(&nfsd_mutex);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_listener_get_doit - get the nfs running listeners
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct svc_xprt *xprt;
+ struct svc_serv *serv;
+ struct nfsd_net *nn;
+ void *hdr;
+ int err;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+
+ /* no nfs server? Just send empty socket list */
+ if (!nn->nfsd_serv)
+ goto out_unlock_mtx;
+
+ serv = nn->nfsd_serv;
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ struct nlattr *attr;
+
+ attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR);
+ if (!attr) {
+ err = -EINVAL;
+ goto err_serv_unlock;
+ }
+
+ if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME,
+ xprt->xpt_class->xcl_name) ||
+ nla_put(skb, NFSD_A_SOCK_ADDR,
+ sizeof(struct sockaddr_storage),
+ &xprt->xpt_local)) {
+ err = -EINVAL;
+ goto err_serv_unlock;
+ }
+
+ nla_nest_end(skb, attr);
+ }
+ spin_unlock_bh(&serv->sv_lock);
+out_unlock_mtx:
+ mutex_unlock(&nfsd_mutex);
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_serv_unlock:
+ spin_unlock_bh(&serv->sv_lock);
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_pool_mode_set_doit - set the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct nlattr *attr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_POOL_MODE_MODE))
+ return -EINVAL;
+
+ attr = info->attrs[NFSD_A_POOL_MODE_MODE];
+ return sunrpc_set_pool_mode(nla_data(attr));
+}
+
+/**
+ * nfsd_nl_pool_mode_get_doit - get info about pool_mode
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ char buf[16];
+ void *hdr;
+ int err;
+
+ if (sunrpc_get_pool_mode(buf, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
+ return -ERANGE;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr)
+ goto err_free_msg;
+
+ err = nla_put_string(skb, NFSD_A_POOL_MODE_MODE, buf) |
+ nla_put_u32(skb, NFSD_A_POOL_MODE_NPOOLS, nfsd_nrpools(net));
+ if (err)
+ goto err_free_msg;
+
+ genlmsg_end(skb, hdr);
+ return genlmsg_reply(skb, info);
+
+err_free_msg:
+ nlmsg_free(skb);
+ return err;
+}
+
+/**
* nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
* @net: a freshly-created network namespace
*
@@ -1663,8 +2187,9 @@ int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb)
*/
static __net_init int nfsd_net_init(struct net *net)
{
- int retval;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int retval;
+ int i;
retval = nfsd_export_init(net);
if (retval)
@@ -1672,20 +2197,35 @@ static __net_init int nfsd_net_init(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- retval = nfsd_stat_counters_init(nn);
+ retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL,
+ NFSD_STATS_COUNTERS_NUM);
if (retval)
goto out_repcache_error;
+
memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
- nn->nfsd_svcstats.program = &nfsd_program;
- nn->nfsd_versions = NULL;
- nn->nfsd4_minorversions = NULL;
+ nn->nfsd_svcstats.program = &nfsd_programs[0];
+ if (!nfsd_proc_stat_init(net)) {
+ retval = -ENOMEM;
+ goto out_proc_error;
+ }
+
+ for (i = 0; i < sizeof(nn->nfsd_versions); i++)
+ nn->nfsd_versions[i] = nfsd_support_version(i);
+ for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++)
+ nn->nfsd4_minorversions[i] = nfsd_support_version(4);
+ nn->nfsd_info.mutex = &nfsd_mutex;
+ nn->nfsd_serv = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
- nfsd_proc_stat_init(net);
-
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ spin_lock_init(&nn->local_clients_lock);
+ INIT_LIST_HEAD(&nn->local_clients);
+#endif
return 0;
+out_proc_error:
+ percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
out_repcache_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
@@ -1694,6 +2234,23 @@ out_export_error:
return retval;
}
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+/**
+ * nfsd_net_pre_exit - Disconnect localio clients from net namespace
+ * @net: a network namespace that is about to be destroyed
+ *
+ * This invalidates ->net pointers held by localio clients
+ * while they can still safely access nn->counter.
+ */
+static __net_exit void nfsd_net_pre_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs_localio_invalidate_clients(&nn->local_clients,
+ &nn->local_clients_lock);
+}
+#endif
+
/**
* nfsd_net_exit - Release the nfsd_net portion of a net namespace
* @net: a network namespace that is about to be destroyed
@@ -1704,14 +2261,16 @@ static __net_exit void nfsd_net_exit(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_proc_stat_shutdown(net);
- nfsd_stat_counters_destroy(nn);
+ percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
- nfsd_netns_free_versions(nn);
}
static struct pernet_operations nfsd_net_ops = {
.init = nfsd_net_init,
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ .pre_exit = nfsd_net_pre_exit,
+#endif
.exit = nfsd_net_exit,
.id = &nfsd_net_id,
.size = sizeof(struct nfsd_net),
@@ -1721,6 +2280,8 @@ static int __init init_nfsd(void)
{
int retval;
+ nfsd_debugfs_init();
+
retval = nfsd4_init_slabs();
if (retval)
return retval;
@@ -1731,12 +2292,9 @@ static int __init init_nfsd(void)
if (retval)
goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
- retval = create_proc_exports_entry();
- if (retval)
- goto out_free_lockd;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
- goto out_free_exports;
+ goto out_free_lockd;
retval = register_cld_notifier();
if (retval)
goto out_free_subsys;
@@ -1745,21 +2303,26 @@ static int __init init_nfsd(void)
goto out_free_cld;
retval = register_filesystem(&nfsd_fs_type);
if (retval)
- goto out_free_all;
+ goto out_free_nfsd4;
retval = genl_register_family(&nfsd_nl_family);
if (retval)
+ goto out_free_filesystem;
+ retval = create_proc_exports_entry();
+ if (retval)
goto out_free_all;
+ nfsd_localio_ops_init();
return 0;
out_free_all:
+ genl_unregister_family(&nfsd_nl_family);
+out_free_filesystem:
+ unregister_filesystem(&nfsd_fs_type);
+out_free_nfsd4:
nfsd4_destroy_laundry_wq();
out_free_cld:
unregister_cld_notifier();
out_free_subsys:
unregister_pernet_subsys(&nfsd_net_ops);
-out_free_exports:
- remove_proc_entry("fs/nfs/exports", NULL);
- remove_proc_entry("fs/nfs", NULL);
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
@@ -1767,22 +2330,24 @@ out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
+ nfsd_debugfs_exit();
return retval;
}
static void __exit exit_nfsd(void)
{
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
genl_unregister_family(&nfsd_nl_family);
unregister_filesystem(&nfsd_fs_type);
nfsd4_destroy_laundry_wq();
unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
nfsd_drc_slab_free();
- remove_proc_entry("fs/nfs/exports", NULL);
- remove_proc_entry("fs/nfs", NULL);
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();
+ nfsd_debugfs_exit();
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 16c5a05f340e..1bfd0b4e9af7 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -23,9 +23,7 @@
#include <uapi/linux/nfsd/debug.h>
-#include "netns.h"
#include "export.h"
-#include "stats.h"
#undef ifdebug
#ifdef CONFIG_SUNRPC_DEBUG
@@ -37,26 +35,23 @@
/*
* nfsd version
*/
+#define NFSD_MINVERS 2
+#define NFSD_MAXVERS 4
#define NFSD_SUPPORTED_MINOR_VERSION 2
-/*
- * Maximum blocksizes supported by daemon under various circumstances.
- */
-#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD
-/* NFSv2 is limited by the protocol specification, see RFC 1094 */
-#define NFSSVC_MAXBLKSIZE_V2 (8*1024)
+bool nfsd_support_version(int vers);
+#include "netns.h"
+#include "stats.h"
/*
- * Largest number of bytes we need to allocate for an NFS
- * call or reply. Used to control buffer sizes. We use
- * the length of v3 WRITE, READDIR and READDIR replies
- * which are an RPC header, up to 26 XDR units of reply
- * data, and some page data.
- *
- * Note that accuracy here doesn't matter too much as the
- * size is rounded up to a page size when allocating space.
+ * Default and maximum payload size (NFS READ or WRITE), in bytes.
+ * The default is historical, and the maximum is an implementation
+ * limit.
*/
-#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE)
+enum {
+ NFSSVC_DEFBLKSIZE = 1 * 1024 * 1024,
+ NFSSVC_MAXBLKSIZE = RPCSVC_MAXPAYLOAD,
+};
struct readdir_cd {
__be32 err; /* 0, nfserr, or nfserr_eof */
@@ -80,12 +75,9 @@ struct nfsd_genl_rqstp {
u32 rq_opnum[NFSD_MAX_OPS_PER_COMPOUND];
};
-extern struct svc_program nfsd_program;
+extern struct svc_program nfsd_programs[];
extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
extern struct mutex nfsd_mutex;
-extern spinlock_t nfsd_drc_lock;
-extern unsigned long nfsd_drc_max_mem;
-extern unsigned long nfsd_drc_mem_used;
extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
@@ -103,18 +95,17 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp,
/*
* Function prototypes.
*/
-int nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
+int nfsd_svc(int n, int *nservers, struct net *net,
+ const struct cred *cred, const char *scope);
int nfsd_dispatch(struct svc_rqst *rqstp);
int nfsd_nrthreads(struct net *);
int nfsd_nrpools(struct net *);
int nfsd_get_nrthreads(int n, int *, struct net *);
int nfsd_set_nrthreads(int n, int *, struct net *);
-int nfsd_pool_stats_open(struct inode *, struct file *);
-int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-bool i_am_nfsd(void);
+struct svc_rqst *nfsd_current_rqst(void);
struct nfsdfs_client {
struct kref cl_ref;
@@ -142,6 +133,10 @@ extern const struct svc_version nfsd_acl_version3;
#endif
#endif
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+extern const struct svc_version localio_version1;
+#endif
+
struct nfsd_net;
enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
@@ -151,11 +146,21 @@ void nfsd_reset_versions(struct nfsd_net *nn);
int nfsd_create_serv(struct net *net);
void nfsd_destroy_serv(struct net *net);
+#ifdef CONFIG_DEBUG_FS
+void nfsd_debugfs_init(void);
+void nfsd_debugfs_exit(void);
+#else
+static inline void nfsd_debugfs_init(void) {}
+static inline void nfsd_debugfs_exit(void) {}
+#endif
+
+extern bool nfsd_disable_splice_read __read_mostly;
+
extern int nfsd_max_blksize;
static inline int nfsd_v4client(struct svc_rqst *rq)
{
- return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
+ return rq && rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
}
static inline struct user_namespace *
nfsd_user_namespace(const struct svc_rqst *rqstp)
@@ -230,7 +235,6 @@ void nfsd_lockd_shutdown(void);
#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
-#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
@@ -327,17 +331,36 @@ void nfsd_lockd_shutdown(void);
#define nfserr_xattr2big cpu_to_be32(NFS4ERR_XATTR2BIG)
#define nfserr_noxattr cpu_to_be32(NFS4ERR_NOXATTR)
-/* error codes for internal use */
+/*
+ * Error codes for internal use. We use enum to choose numbers that are
+ * not already assigned, then covert to be32 resulting in a number that
+ * cannot conflict with any existing be32 nfserr value.
+ */
+enum {
+ NFSERR_DROPIT = NFS4ERR_FIRST_FREE,
/* if a request fails due to kmalloc failure, it gets dropped.
* Client should resend eventually
*/
-#define nfserr_dropit cpu_to_be32(30000)
+#define nfserr_dropit cpu_to_be32(NFSERR_DROPIT)
+
/* end-of-file indicator in readdir */
-#define nfserr_eof cpu_to_be32(30001)
+ NFSERR_EOF,
+#define nfserr_eof cpu_to_be32(NFSERR_EOF)
+
/* replay detected */
-#define nfserr_replay_me cpu_to_be32(11001)
+ NFSERR_REPLAY_ME,
+#define nfserr_replay_me cpu_to_be32(NFSERR_REPLAY_ME)
+
/* nfs41 replay detected */
-#define nfserr_replay_cache cpu_to_be32(11002)
+ NFSERR_REPLAY_CACHE,
+#define nfserr_replay_cache cpu_to_be32(NFSERR_REPLAY_CACHE)
+
+/* symlink found where dir expected - handled differently to
+ * other symlink found errors by NFSv3.
+ */
+ NFSERR_SYMLINK_NOT_DIR,
+#define nfserr_symlink_not_dir cpu_to_be32(NFSERR_SYMLINK_NOT_DIR)
+};
/* Check for dir entries '.' and '..' */
#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
@@ -432,7 +455,10 @@ void nfsd_lockd_shutdown(void);
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS | \
- FATTR4_WORD2_XATTR_SUPPORT)
+ FATTR4_WORD2_XATTR_SUPPORT | \
+ FATTR4_WORD2_TIME_DELEG_ACCESS | \
+ FATTR4_WORD2_TIME_DELEG_MODIFY | \
+ FATTR4_WORD2_OPEN_ARGUMENTS)
extern const u32 nfsd_suppattrs[3][3];
@@ -502,7 +528,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
#endif
#define NFSD_WRITEABLE_ATTRS_WORD2 \
(FATTR4_WORD2_MODE_UMASK \
- | MAYBE_FATTR4_WORD2_SECURITY_LABEL)
+ | MAYBE_FATTR4_WORD2_SECURITY_LABEL \
+ | FATTR4_WORD2_TIME_DELEG_ACCESS \
+ | FATTR4_WORD2_TIME_DELEG_MODIFY \
+ )
#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
NFSD_WRITEABLE_ATTRS_WORD0
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 40fecf7b224f..aef474f1b84b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -62,8 +62,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
* the write call).
*/
static inline __be32
-nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
- umode_t requested)
+nfsd_mode_check(struct dentry *dentry, umode_t requested)
{
umode_t mode = d_inode(dentry)->i_mode & S_IFMT;
@@ -76,36 +75,36 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
}
return nfs_ok;
}
- /*
- * v4 has an error more specific than err_notdir which we should
- * return in preference to err_notdir:
- */
- if (rqstp->rq_vers == 4 && mode == S_IFLNK)
+ if (mode == S_IFLNK) {
+ if (requested == S_IFDIR)
+ return nfserr_symlink_not_dir;
return nfserr_symlink;
+ }
if (requested == S_IFDIR)
return nfserr_notdir;
if (mode == S_IFDIR)
return nfserr_isdir;
- return nfserr_inval;
+ return nfserr_wrong_type;
}
-static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
+static bool nfsd_originating_port_ok(struct svc_rqst *rqstp,
+ struct svc_cred *cred,
+ struct svc_export *exp)
{
- if (flags & NFSEXP_INSECURE_PORT)
+ if (nfsexp_flags(cred, exp) & NFSEXP_INSECURE_PORT)
return true;
/* We don't require gss requests to use low ports: */
- if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
+ if (cred->cr_flavor >= RPC_AUTH_GSS)
return true;
return test_bit(RQ_SECURE, &rqstp->rq_flags);
}
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
+ struct svc_cred *cred,
struct svc_export *exp)
{
- int flags = nfsexp_flags(rqstp, exp);
-
/* Check if the request originated from a secure port. */
- if (!nfsd_originating_port_ok(rqstp, flags)) {
+ if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
@@ -113,23 +112,15 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
}
/* Set user creds for this exportpoint */
- return nfserrno(nfsd_setuser(rqstp, exp));
+ return nfserrno(nfsd_setuser(cred, exp));
}
-static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
- struct dentry *dentry, struct svc_export *exp)
+static inline __be32 check_pseudo_root(struct dentry *dentry,
+ struct svc_export *exp)
{
if (!(exp->ex_flags & NFSEXP_V4ROOT))
return nfs_ok;
/*
- * v2/v3 clients have no need for the V4ROOT export--they use
- * the mount protocl instead; also, further V4ROOT checks may be
- * in v4-specific code, in which case v2/v3 clients could bypass
- * them.
- */
- if (!nfsd_v4client(rqstp))
- return nfserr_stale;
- /*
* We're exposing only the directories and symlinks that have to be
* traversed on the way to real exports:
*/
@@ -151,7 +142,11 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
* dentry. On success, the results are used to set fh_export and
* fh_dentry.
*/
-static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
+ struct svc_cred *cred,
+ struct auth_domain *client,
+ struct auth_domain *gssclient,
+ struct svc_fh *fhp)
{
struct knfsd_fh *fh = &fhp->fh_handle;
struct fid *fid = NULL;
@@ -162,10 +157,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
int len;
__be32 error;
- error = nfserr_stale;
- if (rqstp->rq_vers > 2)
- error = nfserr_badhandle;
- if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+ error = nfserr_badhandle;
+ if (fh->fh_size == 0)
return nfserr_nofilehandle;
if (fh->fh_version != 1)
@@ -195,7 +188,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
data_left -= len;
if (data_left < 0)
return error;
- exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
+ exp = rqst_exp_find(rqstp ? &rqstp->rq_chandle : NULL,
+ net, client, gssclient,
+ fh->fh_fsid_type, fh->fh_fsid);
fid = (struct fid *)(fh->fh_fsid + len);
error = nfserr_stale;
@@ -227,9 +222,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
put_cred(override_creds(new));
- put_cred(new);
} else {
- error = nfsd_setuser_and_check_port(rqstp, exp);
+ error = nfsd_setuser_and_check_port(rqstp, cred, exp);
if (error)
goto out;
}
@@ -237,9 +231,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
/*
* Look up the dentry using the NFS file handle.
*/
- error = nfserr_stale;
- if (rqstp->rq_vers > 2)
- error = nfserr_badhandle;
+ error = nfserr_badhandle;
fileid_type = fh->fh_fileid_type;
@@ -247,7 +239,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
dentry = dget(exp->ex_path.dentry);
else {
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
- data_left, fileid_type,
+ data_left, fileid_type, 0,
nfsd_acceptable, exp);
if (IS_ERR_OR_NULL(dentry)) {
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
@@ -278,17 +270,25 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
fhp->fh_dentry = dentry;
fhp->fh_export = exp;
- switch (rqstp->rq_vers) {
- case 4:
+ switch (fhp->fh_maxsize) {
+ case NFS4_FHSIZE:
if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
fhp->fh_no_atomic_attr = true;
+ fhp->fh_64bit_cookies = true;
break;
- case 3:
+ case NFS3_FHSIZE:
if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC)
fhp->fh_no_wcc = true;
+ fhp->fh_64bit_cookies = true;
+ if (exp->ex_flags & NFSEXP_V4ROOT)
+ goto out;
break;
- case 2:
+ case NFS_FHSIZE:
fhp->fh_no_wcc = true;
+ if (EX_WGATHER(exp))
+ fhp->fh_use_wgather = true;
+ if (exp->ex_flags & NFSEXP_V4ROOT)
+ goto out;
}
return 0;
@@ -298,42 +298,34 @@ out:
}
/**
- * fh_verify - filehandle lookup and access checking
- * @rqstp: pointer to current rpc request
+ * __fh_verify - filehandle lookup and access checking
+ * @rqstp: RPC transaction context, or NULL
+ * @net: net namespace in which to perform the export lookup
+ * @cred: RPC user credential
+ * @client: RPC auth domain
+ * @gssclient: RPC GSS auth domain, or NULL
* @fhp: filehandle to be verified
* @type: expected type of object pointed to by filehandle
* @access: type of access needed to object
*
- * Look up a dentry from the on-the-wire filehandle, check the client's
- * access to the export, and set the current task's credentials.
- *
- * Regardless of success or failure of fh_verify(), fh_put() should be
- * called on @fhp when the caller is finished with the filehandle.
- *
- * fh_verify() may be called multiple times on a given filehandle, for
- * example, when processing an NFSv4 compound. The first call will look
- * up a dentry using the on-the-wire filehandle. Subsequent calls will
- * skip the lookup and just perform the other checks and possibly change
- * the current task's credentials.
- *
- * @type specifies the type of object expected using one of the S_IF*
- * constants defined in include/linux/stat.h. The caller may use zero
- * to indicate that it doesn't care, or a negative integer to indicate
- * that it expects something not of the given type.
- *
- * @access is formed from the NFSD_MAY_* constants defined in
- * fs/nfsd/vfs.h.
+ * See fh_verify() for further descriptions of @fhp, @type, and @access.
*/
-__be32
-fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+static __be32
+__fh_verify(struct svc_rqst *rqstp,
+ struct net *net, struct svc_cred *cred,
+ struct auth_domain *client,
+ struct auth_domain *gssclient,
+ struct svc_fh *fhp, umode_t type, int access)
{
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_export *exp = NULL;
+ bool may_bypass_gss = false;
struct dentry *dentry;
__be32 error;
if (!fhp->fh_dentry) {
- error = nfsd_set_fh_dentry(rqstp, fhp);
+ error = nfsd_set_fh_dentry(rqstp, net, cred, client,
+ gssclient, fhp);
if (error)
goto out;
}
@@ -358,25 +350,24 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
* (for example, if different id-squashing options are in
* effect on the new filesystem).
*/
- error = check_pseudo_root(rqstp, dentry, exp);
+ error = check_pseudo_root(dentry, exp);
if (error)
goto out;
- error = nfsd_setuser_and_check_port(rqstp, exp);
+ error = nfsd_setuser_and_check_port(rqstp, cred, exp);
if (error)
goto out;
- error = nfsd_mode_check(rqstp, dentry, type);
+ error = nfsd_mode_check(dentry, type);
if (error)
goto out;
- /*
- * pseudoflavor restrictions are not enforced on NLM,
- * which clients virtually always use auth_sys for,
- * even while using RPCSEC_GSS for NFS.
- */
- if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
- goto skip_pseudoflavor_check;
+ if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM))
+ /* NLM is allowed to fully bypass authentication */
+ goto out;
+
+ if (access & NFSD_MAY_BYPASS_GSS)
+ may_bypass_gss = true;
/*
* Clients may expect to be able to use auth_sys during mount,
* even if they use gss for everything else; see section 2.3.2
@@ -384,15 +375,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
*/
if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
&& exp->ex_path.dentry == dentry)
- goto skip_pseudoflavor_check;
+ may_bypass_gss = true;
- error = check_nfsd_access(exp, rqstp);
+ error = check_nfsd_access(exp, rqstp, may_bypass_gss);
if (error)
goto out;
+ /* During LOCALIO call to fh_verify will be called with a NULL rqstp */
+ if (rqstp)
+ svc_xprt_set_valid(rqstp->rq_xprt);
-skip_pseudoflavor_check:
/* Finally, check access permissions. */
- error = nfsd_permission(rqstp, exp, dentry, access);
+ error = nfsd_permission(cred, exp, dentry, access);
out:
trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
@@ -400,6 +393,63 @@ out:
return error;
}
+/**
+ * fh_verify_local - filehandle lookup and access checking
+ * @net: net namespace in which to perform the export lookup
+ * @cred: RPC user credential
+ * @client: RPC auth domain
+ * @fhp: filehandle to be verified
+ * @type: expected type of object pointed to by filehandle
+ * @access: type of access needed to object
+ *
+ * This API can be used by callers who do not have an RPC
+ * transaction context (ie are not running in an nfsd thread).
+ *
+ * See fh_verify() for further descriptions of @fhp, @type, and @access.
+ */
+__be32
+fh_verify_local(struct net *net, struct svc_cred *cred,
+ struct auth_domain *client, struct svc_fh *fhp,
+ umode_t type, int access)
+{
+ return __fh_verify(NULL, net, cred, client, NULL,
+ fhp, type, access);
+}
+
+/**
+ * fh_verify - filehandle lookup and access checking
+ * @rqstp: pointer to current rpc request
+ * @fhp: filehandle to be verified
+ * @type: expected type of object pointed to by filehandle
+ * @access: type of access needed to object
+ *
+ * Look up a dentry from the on-the-wire filehandle, check the client's
+ * access to the export, and set the current task's credentials.
+ *
+ * Regardless of success or failure of fh_verify(), fh_put() should be
+ * called on @fhp when the caller is finished with the filehandle.
+ *
+ * fh_verify() may be called multiple times on a given filehandle, for
+ * example, when processing an NFSv4 compound. The first call will look
+ * up a dentry using the on-the-wire filehandle. Subsequent calls will
+ * skip the lookup and just perform the other checks and possibly change
+ * the current task's credentials.
+ *
+ * @type specifies the type of object expected using one of the S_IF*
+ * constants defined in include/linux/stat.h. The caller may use zero
+ * to indicate that it doesn't care, or a negative integer to indicate
+ * that it expects something not of the given type.
+ *
+ * @access is formed from the NFSD_MAY_* constants defined in
+ * fs/nfsd/vfs.h.
+ */
+__be32
+fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+{
+ return __fh_verify(rqstp, SVC_NET(rqstp), &rqstp->rq_cred,
+ rqstp->rq_client, rqstp->rq_gssclient,
+ fhp, type, access);
+}
/*
* Compose a file handle for an NFS reply.
@@ -573,7 +623,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
_fh_update(fhp, exp, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
fh_put(fhp);
- return nfserr_opnotsupp;
+ return nfserr_stale;
}
return 0;
@@ -599,7 +649,7 @@ fh_update(struct svc_fh *fhp)
_fh_update(fhp, fhp->fh_export, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
- return nfserr_opnotsupp;
+ return nfserr_stale;
return 0;
out_bad:
printk(KERN_ERR "fh_update: fh not verified!\n");
@@ -618,20 +668,18 @@ out_negative:
__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode;
struct kstat stat;
__be32 err;
if (fhp->fh_no_wcc || fhp->fh_pre_saved)
return nfs_ok;
- inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err)
return err;
if (v4)
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat);
fhp->fh_pre_mtime = stat.mtime;
fhp->fh_pre_ctime = stat.ctime;
@@ -648,7 +696,6 @@ __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
__be32 fh_fill_post_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
- struct inode *inode = d_inode(fhp->fh_dentry);
__be32 err;
if (fhp->fh_no_wcc)
@@ -664,7 +711,7 @@ __be32 fh_fill_post_attrs(struct svc_fh *fhp)
fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
- nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ nfsd4_change_attribute(&fhp->fh_post_attr);
return nfs_ok;
}
@@ -721,7 +768,7 @@ char * SVCFH_fmt(struct svc_fh *fhp)
struct knfsd_fh *fh = &fhp->fh_handle;
static char buf[2+1+1+64*3+1];
- if (fh->fh_size < 0 || fh->fh_size> 64)
+ if (fh->fh_size > 64)
return "bad-fh";
sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw);
return buf;
@@ -755,7 +802,14 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
return FSIDSOURCE_DEV;
}
-/*
+/**
+ * nfsd4_change_attribute - Generate an NFSv4 change_attribute value
+ * @stat: inode attributes
+ *
+ * Caller must fill in @stat before calling, typically by invoking
+ * vfs_getattr() with STATX_MODE, STATX_CTIME, and STATX_CHANGE_COOKIE.
+ * Returns an unsigned 64-bit changeid4 value (RFC 8881 Section 3.2).
+ *
* We could use i_version alone as the change attribute. However, i_version
* can go backwards on a regular file after an unclean shutdown. On its own
* that doesn't necessarily cause a problem, but if i_version goes backwards
@@ -772,13 +826,13 @@ enum fsid_source fsid_source(const struct svc_fh *fhp)
* assume that the new change attr is always logged to stable storage in some
* fashion before the results can be seen.
*/
-u64 nfsd4_change_attribute(const struct kstat *stat, const struct inode *inode)
+u64 nfsd4_change_attribute(const struct kstat *stat)
{
u64 chattr;
if (stat->result_mask & STATX_CHANGE_COOKIE) {
chattr = stat->change_cookie;
- if (S_ISREG(inode->i_mode) &&
+ if (S_ISREG(stat->mode) &&
!(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
chattr += (u64)stat->ctime.tv_sec << 30;
chattr += stat->ctime.tv_nsec;
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 6ebdf7ea27bf..5103c2f4d225 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -88,6 +88,8 @@ typedef struct svc_fh {
* wcc data is not atomic with
* operation
*/
+ bool fh_use_wgather; /* NFSv2 wgather option */
+ bool fh_64bit_cookies;/* readdir cookie size */
int fh_flags; /* FH flags */
bool fh_post_saved; /* post-op attrs saved */
bool fh_pre_saved; /* pre-op attrs saved */
@@ -215,6 +217,8 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
* Function prototypes
*/
__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
+__be32 fh_verify_local(struct net *, struct svc_cred *, struct auth_domain *,
+ struct svc_fh *, umode_t, int);
__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
__be32 fh_update(struct svc_fh *);
void fh_put(struct svc_fh *);
@@ -263,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
return true;
}
-#ifdef CONFIG_CRC32
/**
* knfsd_fh_hash - calculate the crc32 hash for the filehandle
* @fh - pointer to filehandle
@@ -275,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
{
return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
}
-#else
-static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
-{
- return 0;
-}
-#endif
/**
* fh_clear_pre_post_attrs - Reset pre/post attributes
@@ -293,8 +290,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
fhp->fh_pre_saved = false;
}
-u64 nfsd4_change_attribute(const struct kstat *stat,
- const struct inode *inode);
+u64 nfsd4_change_attribute(const struct kstat *stat);
__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
__be32 fh_fill_post_attrs(struct svc_fh *fhp);
__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 36370b957b63..c10fa8128a8a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -10,9 +10,35 @@
#include "cache.h"
#include "xdr.h"
#include "vfs.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
+static __be32 nfsd_map_status(__be32 status)
+{
+ switch (status) {
+ case nfs_ok:
+ break;
+ case nfserr_nofilehandle:
+ case nfserr_badhandle:
+ status = nfserr_stale;
+ break;
+ case nfserr_wrongsec:
+ case nfserr_xdev:
+ case nfserr_file_open:
+ status = nfserr_acces;
+ break;
+ case nfserr_symlink_not_dir:
+ status = nfserr_notdir;
+ break;
+ case nfserr_symlink:
+ case nfserr_wrong_type:
+ status = nfserr_inval;
+ break;
+ }
+ return status;
+}
+
static __be32
nfsd_proc_null(struct svc_rqst *rqstp)
{
@@ -29,7 +55,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
- dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
+ trace_nfsd_vfs_getattr(rqstp, &argp->fh);
fh_copy(&resp->fh, &argp->fh);
resp->status = fh_verify(rqstp, &resp->fh, 0,
@@ -38,6 +64,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp)
goto out;
resp->status = fh_getattr(&resp->fh, &resp->stat);
out:
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -109,6 +136,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
out:
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -143,6 +171,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
out:
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -164,6 +193,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp)
page_address(resp->page), &resp->len);
fh_put(&argp->fh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -182,7 +212,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
SVCFH_fmt(&argp->fh),
argp->count, argp->offset);
- argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2);
+ argp->count = min_t(u32, argp->count, NFS_MAXDATA);
argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
resp->pages = rqstp->rq_next_page;
@@ -200,6 +230,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
set_bit(RQ_DROPME, &rqstp->rq_flags);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -220,21 +251,19 @@ nfsd_proc_write(struct svc_rqst *rqstp)
struct nfsd_writeargs *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
unsigned long cnt = argp->len;
- unsigned int nvecs;
dprintk("nfsd: WRITE %s %u bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nvecs = svc_fill_write_vector(rqstp, &argp->payload);
-
- resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
- argp->offset, rqstp->rq_vec, nvecs,
- &cnt, NFS_DATA_SYNC, NULL);
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
+ &argp->payload, &cnt, NFS_DATA_SYNC, NULL);
if (resp->status == nfs_ok)
resp->status = fh_getattr(&resp->fh, &resp->stat);
else if (resp->status == nfserr_jukebox)
set_bit(RQ_DROPME, &rqstp->rq_flags);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -261,9 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp)
int hosterr;
dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
- dprintk("nfsd: CREATE %s %.*s\n",
- SVCFH_fmt(dirfhp), argp->len, argp->name);
-
/* First verify the parent file handle */
resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
if (resp->status != nfs_ok)
@@ -281,7 +307,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
}
inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+ dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len),
+ dirfhp->fh_dentry);
if (IS_ERR(dchild)) {
resp->status = nfserrno(PTR_ERR(dchild));
goto out_unlock;
@@ -300,7 +327,7 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
resp->status = nfserr_acces;
if (!newfhp->fh_dentry) {
- printk(KERN_WARNING
+ printk(KERN_WARNING
"nfsd_proc_create: file handle not verified\n");
goto out_unlock;
}
@@ -331,10 +358,11 @@ nfsd_proc_create(struct svc_rqst *rqstp)
* echo thing > device-special-file-or-pipe
* by doing a CREATE with type==0
*/
- resp->status = nfsd_permission(rqstp,
- newfhp->fh_export,
- newfhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
+ resp->status = nfsd_permission(
+ &rqstp->rq_cred,
+ newfhp->fh_export,
+ newfhp->fh_dentry,
+ NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
if (resp->status && resp->status != nfserr_rofs)
goto out_unlock;
}
@@ -403,6 +431,7 @@ done:
goto out;
resp->status = fh_getattr(&resp->fh, &resp->stat);
out:
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -412,13 +441,11 @@ nfsd_proc_remove(struct svc_rqst *rqstp)
struct nfsd_diropargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh),
- argp->len, argp->name);
-
/* Unlink. -SIFDIR means file must not be a directory */
resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR,
argp->name, argp->len);
fh_put(&argp->fh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -428,15 +455,11 @@ nfsd_proc_rename(struct svc_rqst *rqstp)
struct nfsd_renameargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: RENAME %s %.*s -> \n",
- SVCFH_fmt(&argp->ffh), argp->flen, argp->fname);
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname);
-
resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen,
&argp->tfh, argp->tname, argp->tlen);
fh_put(&argp->ffh);
fh_put(&argp->tfh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -446,17 +469,11 @@ nfsd_proc_link(struct svc_rqst *rqstp)
struct nfsd_linkargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: LINK %s ->\n",
- SVCFH_fmt(&argp->ffh));
- dprintk("nfsd: %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
-
resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen,
&argp->ffh);
fh_put(&argp->ffh);
fh_put(&argp->tfh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -483,10 +500,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
goto out;
}
- dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n",
- SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
- argp->tlen, argp->tname);
-
fh_init(&newfh, NFS_FHSIZE);
resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
argp->tname, &attrs, &newfh);
@@ -495,6 +508,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp)
fh_put(&argp->ffh);
fh_put(&newfh);
out:
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -511,8 +525,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
.na_iattr = &argp->attrs,
};
- dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
-
if (resp->fh.fh_dentry) {
printk(KERN_WARNING
"nfsd_proc_mkdir: response already verified??\n");
@@ -528,6 +540,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
resp->status = fh_getattr(&resp->fh, &resp->stat);
out:
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -540,11 +553,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp)
struct nfsd_diropargs *argp = rqstp->rq_argp;
struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
-
resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR,
argp->name, argp->len);
fh_put(&argp->fh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -576,9 +588,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp)
struct nfsd_readdirres *resp = rqstp->rq_resp;
loff_t offset;
- dprintk("nfsd: READDIR %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
nfsd_init_dirlist_pages(rqstp, resp, argp->count);
@@ -590,6 +600,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp)
nfssvc_encode_nfscookie(resp, offset);
fh_put(&argp->fh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -602,11 +613,10 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_statfsres *resp = rqstp->rq_resp;
- dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh));
-
resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats,
NFSD_MAY_BYPASS_GSS_ON_ROOT);
fh_put(&argp->fh);
+ resp->status = nfsd_map_status(resp->status);
return rpc_success;
}
@@ -698,7 +708,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_argzero = sizeof(struct nfsd_readargs),
.pc_ressize = sizeof(struct nfsd_readres),
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
+ .pc_xdrressize = ST+AT+1+NFS_MAXDATA/4,
.pc_name = "READ",
},
[NFSPROC_WRITECACHE] = {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index c0d17b92b249..82b0111ac469 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -19,6 +19,7 @@
#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
+#include <linux/nfslocalio.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <net/addrconf.h>
@@ -35,7 +36,6 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
-extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static int nfsd_acl_rpcbind_set(struct net *,
@@ -70,15 +70,14 @@ static __be32 nfsd_init_request(struct svc_rqst *,
*/
DEFINE_MUTEX(nfsd_mutex);
-/*
- * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
- * nfsd_drc_max_pages limits the total amount of memory available for
- * version 4.1 DRC caches.
- * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
- */
-DEFINE_SPINLOCK(nfsd_drc_lock);
-unsigned long nfsd_drc_max_mem;
-unsigned long nfsd_drc_mem_used;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+static const struct svc_version *localio_versions[] = {
+ [1] = &localio_version1,
+};
+
+#define NFSD_LOCALIO_NRVERS ARRAY_SIZE(localio_versions)
+
+#endif /* CONFIG_NFS_LOCALIO */
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static const struct svc_version *nfsd_acl_version[] = {
@@ -90,23 +89,12 @@ static const struct svc_version *nfsd_acl_version[] = {
# endif
};
-#define NFSD_ACL_MINVERS 2
+#define NFSD_ACL_MINVERS 2
#define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version)
-static struct svc_program nfsd_acl_program = {
- .pg_prog = NFS_ACL_PROGRAM,
- .pg_nvers = NFSD_ACL_NRVERS,
- .pg_vers = nfsd_acl_version,
- .pg_name = "nfsacl",
- .pg_class = "nfsd",
- .pg_authenticate = &svc_set_client,
- .pg_init_request = nfsd_acl_init_request,
- .pg_rpcbind_set = nfsd_acl_rpcbind_set,
-};
-
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
-static const struct svc_version *nfsd_version[] = {
+static const struct svc_version *nfsd_version[NFSD_MAXVERS+1] = {
#if defined(CONFIG_NFSD_V2)
[2] = &nfsd_version2,
#endif
@@ -116,98 +104,63 @@ static const struct svc_version *nfsd_version[] = {
#endif
};
-#define NFSD_MINVERS 2
-#define NFSD_NRVERS ARRAY_SIZE(nfsd_version)
-
-struct svc_program nfsd_program = {
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- .pg_next = &nfsd_acl_program,
-#endif
+struct svc_program nfsd_programs[] = {
+ {
.pg_prog = NFS_PROGRAM, /* program number */
- .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
+ .pg_nvers = NFSD_MAXVERS+1, /* nr of entries in nfsd_version */
.pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
- .pg_authenticate = &svc_set_client, /* export authentication */
+ .pg_authenticate = svc_set_client, /* export authentication */
.pg_init_request = nfsd_init_request,
.pg_rpcbind_set = nfsd_rpcbind_set,
+ },
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ {
+ .pg_prog = NFS_ACL_PROGRAM,
+ .pg_nvers = NFSD_ACL_NRVERS,
+ .pg_vers = nfsd_acl_version,
+ .pg_name = "nfsacl",
+ .pg_class = "nfsd",
+ .pg_authenticate = svc_set_client,
+ .pg_init_request = nfsd_acl_init_request,
+ .pg_rpcbind_set = nfsd_acl_rpcbind_set,
+ },
+#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ {
+ .pg_prog = NFS_LOCALIO_PROGRAM,
+ .pg_nvers = NFSD_LOCALIO_NRVERS,
+ .pg_vers = localio_versions,
+ .pg_name = "nfslocalio",
+ .pg_class = "nfsd",
+ .pg_authenticate = svc_set_client,
+ .pg_init_request = svc_generic_init_request,
+ .pg_rpcbind_set = svc_generic_rpcbind_set,
+ }
+#endif /* CONFIG_NFS_LOCALIO */
};
-static bool
-nfsd_support_version(int vers)
+bool nfsd_support_version(int vers)
{
- if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS)
+ if (vers >= NFSD_MINVERS && vers <= NFSD_MAXVERS)
return nfsd_version[vers] != NULL;
return false;
}
-static bool *
-nfsd_alloc_versions(void)
-{
- bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL);
- unsigned i;
-
- if (vers) {
- /* All compiled versions are enabled by default */
- for (i = 0; i < NFSD_NRVERS; i++)
- vers[i] = nfsd_support_version(i);
- }
- return vers;
-}
-
-static bool *
-nfsd_alloc_minorversions(void)
-{
- bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1,
- sizeof(bool), GFP_KERNEL);
- unsigned i;
-
- if (vers) {
- /* All minor versions are enabled by default */
- for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++)
- vers[i] = nfsd_support_version(4);
- }
- return vers;
-}
-
-void
-nfsd_netns_free_versions(struct nfsd_net *nn)
-{
- kfree(nn->nfsd_versions);
- kfree(nn->nfsd4_minorversions);
- nn->nfsd_versions = NULL;
- nn->nfsd4_minorversions = NULL;
-}
-
-static void
-nfsd_netns_init_versions(struct nfsd_net *nn)
-{
- if (!nn->nfsd_versions) {
- nn->nfsd_versions = nfsd_alloc_versions();
- nn->nfsd4_minorversions = nfsd_alloc_minorversions();
- if (!nn->nfsd_versions || !nn->nfsd4_minorversions)
- nfsd_netns_free_versions(nn);
- }
-}
-
int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change)
{
- if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
+ if (vers < NFSD_MINVERS || vers > NFSD_MAXVERS)
return 0;
switch(change) {
case NFSD_SET:
- if (nn->nfsd_versions)
- nn->nfsd_versions[vers] = nfsd_support_version(vers);
+ nn->nfsd_versions[vers] = nfsd_support_version(vers);
break;
case NFSD_CLEAR:
- nfsd_netns_init_versions(nn);
- if (nn->nfsd_versions)
- nn->nfsd_versions[vers] = false;
+ nn->nfsd_versions[vers] = false;
break;
case NFSD_TEST:
- if (nn->nfsd_versions)
- return nn->nfsd_versions[vers];
- fallthrough;
+ return nn->nfsd_versions[vers];
case NFSD_AVAIL:
return nfsd_support_version(vers);
}
@@ -234,23 +187,16 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
switch(change) {
case NFSD_SET:
- if (nn->nfsd4_minorversions) {
- nfsd_vers(nn, 4, NFSD_SET);
- nn->nfsd4_minorversions[minorversion] =
- nfsd_vers(nn, 4, NFSD_TEST);
- }
+ nfsd_vers(nn, 4, NFSD_SET);
+ nn->nfsd4_minorversions[minorversion] =
+ nfsd_vers(nn, 4, NFSD_TEST);
break;
case NFSD_CLEAR:
- nfsd_netns_init_versions(nn);
- if (nn->nfsd4_minorversions) {
- nn->nfsd4_minorversions[minorversion] = false;
- nfsd_adjust_nfsd_versions4(nn);
- }
+ nn->nfsd4_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4(nn);
break;
case NFSD_TEST:
- if (nn->nfsd4_minorversions)
- return nn->nfsd4_minorversions[minorversion];
- return nfsd_vers(nn, 4, NFSD_TEST);
+ return nn->nfsd4_minorversions[minorversion];
case NFSD_AVAIL:
return minorversion <= NFSD_SUPPORTED_MINOR_VERSION &&
nfsd_vers(nn, 4, NFSD_AVAIL);
@@ -258,6 +204,34 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change
return 0;
}
+bool nfsd_net_try_get(struct net *net) __must_hold(rcu)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ return (nn && percpu_ref_tryget_live(&nn->nfsd_net_ref));
+}
+
+void nfsd_net_put(struct net *net) __must_hold(rcu)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ percpu_ref_put(&nn->nfsd_net_ref);
+}
+
+static void nfsd_net_done(struct percpu_ref *ref)
+{
+ struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref);
+
+ complete(&nn->nfsd_net_confirm_done);
+}
+
+static void nfsd_net_free(struct percpu_ref *ref)
+{
+ struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref);
+
+ complete(&nn->nfsd_net_free_done);
+}
+
/*
* Maximum number of nfsd processes
*/
@@ -422,13 +396,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred)
if (ret)
goto out_filecache;
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_init_umount_work(nn);
+#endif
ret = nfs4_state_start_net(net);
if (ret)
goto out_reply_cache;
-#ifdef CONFIG_NFSD_V4_2_INTER_SSC
- nfsd4_ssc_init_umount_work(nn);
-#endif
nn->nfsd_net_up = true;
return 0;
@@ -450,6 +424,13 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ if (!nn->nfsd_net_up)
+ return;
+
+ percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done);
+ wait_for_completion(&nn->nfsd_net_confirm_done);
+
+ nfsd_export_flush(net);
nfs4_state_shutdown_net(net);
nfsd_reply_cache_shutdown(nn);
nfsd_file_cache_shutdown_net(net);
@@ -457,6 +438,10 @@ static void nfsd_shutdown_net(struct net *net)
lockd_down(net);
nn->lockd_up = false;
}
+
+ wait_for_completion(&nn->nfsd_net_free_done);
+ percpu_ref_exit(&nn->nfsd_net_ref);
+
nn->nfsd_net_up = false;
nfsd_shutdown_generic();
}
@@ -536,6 +521,8 @@ void nfsd_destroy_serv(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_serv *serv = nn->nfsd_serv;
+ lockdep_assert_held(&nfsd_mutex);
+
spin_lock(&nfsd_notifier_lock);
nn->nfsd_serv = NULL;
spin_unlock(&nfsd_notifier_lock);
@@ -557,11 +544,8 @@ void nfsd_destroy_serv(struct net *net)
* other initialization has been done except the rpcb information.
*/
svc_rpcb_cleanup(serv, net);
- if (!nn->nfsd_net_up)
- return;
nfsd_shutdown_net(net);
- nfsd_export_flush(net);
svc_destroy(&serv);
}
@@ -569,11 +553,11 @@ void nfsd_reset_versions(struct nfsd_net *nn)
{
int i;
- for (i = 0; i < NFSD_NRVERS; i++)
+ for (i = 0; i <= NFSD_MAXVERS; i++)
if (nfsd_vers(nn, i, NFSD_TEST))
return;
- for (i = 0; i < NFSD_NRVERS; i++)
+ for (i = 0; i <= NFSD_MAXVERS; i++)
if (i != 4)
nfsd_vers(nn, i, NFSD_SET);
else {
@@ -583,27 +567,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
}
}
-/*
- * Each session guarantees a negotiated per slot memory cache for replies
- * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
- * NFSv4.1 server might want to use more memory for a DRC than a machine
- * with mutiple services.
- *
- * Impose a hard limit on the number of pages for the DRC which varies
- * according to the machines free pages. This is of course only a default.
- *
- * For now this is a #defined shift which could be under admin control
- * in the future.
- */
-static void set_max_drc(void)
-{
- #define NFSD_DRC_SIZE_SHIFT 7
- nfsd_drc_max_mem = (nr_free_buffer_pages()
- >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
- nfsd_drc_mem_used = 0;
- dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
-}
-
static int nfsd_get_default_max_blksize(void)
{
struct sysinfo i;
@@ -619,7 +582,7 @@ static int nfsd_get_default_max_blksize(void)
*/
target >>= 12;
- ret = NFSSVC_MAXBLKSIZE;
+ ret = NFSSVC_DEFBLKSIZE;
while (ret > target && ret >= 8*1024*2)
ret /= 2;
return ret;
@@ -643,9 +606,11 @@ void nfsd_shutdown_threads(struct net *net)
mutex_unlock(&nfsd_mutex);
}
-bool i_am_nfsd(void)
+struct svc_rqst *nfsd_current_rqst(void)
{
- return kthread_func(current) == nfsd;
+ if (kthread_func(current) == nfsd)
+ return kthread_data(current);
+ return NULL;
}
int nfsd_create_serv(struct net *net)
@@ -658,26 +623,31 @@ int nfsd_create_serv(struct net *net)
if (nn->nfsd_serv)
return 0;
+ error = percpu_ref_init(&nn->nfsd_net_ref, nfsd_net_free,
+ 0, GFP_KERNEL);
+ if (error)
+ return error;
+ init_completion(&nn->nfsd_net_free_done);
+ init_completion(&nn->nfsd_net_confirm_done);
+
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats,
+ serv = svc_create_pooled(nfsd_programs, ARRAY_SIZE(nfsd_programs),
+ &nn->nfsd_svcstats,
nfsd_max_blksize, nfsd);
if (serv == NULL)
return -ENOMEM;
- serv->sv_maxconn = nn->max_connections;
error = svc_bind(serv, net);
if (error < 0) {
svc_destroy(&serv);
return error;
}
spin_lock(&nfsd_notifier_lock);
- nn->nfsd_info.mutex = &nfsd_mutex;
nn->nfsd_serv = serv;
spin_unlock(&nfsd_notifier_lock);
- set_max_drc();
/* check if the notifier is already set */
if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
register_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -707,10 +677,23 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
if (serv)
for (i = 0; i < serv->sv_nrpools && i < n; i++)
- nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads);
+ nthreads[i] = serv->sv_pools[i].sp_nrthreads;
return 0;
}
+/**
+ * nfsd_set_nrthreads - set the number of running threads in the net's service
+ * @n: number of array members in @nthreads
+ * @nthreads: array of thread counts for each pool
+ * @net: network namespace to operate within
+ *
+ * This function alters the number of running threads for the given network
+ * namespace in each pool. If passed an array longer then the number of pools
+ * the extra pool settings are ignored. If passed an array shorter than the
+ * number of pools, the missing values are interpreted as 0's.
+ *
+ * Returns 0 on success or a negative errno on error.
+ */
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
{
int i = 0;
@@ -718,11 +701,18 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
int err = 0;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- WARN_ON(!mutex_is_locked(&nfsd_mutex));
+ lockdep_assert_held(&nfsd_mutex);
if (nn->nfsd_serv == NULL || n <= 0)
return 0;
+ /*
+ * Special case: When n == 1, pass in NULL for the pool, so that the
+ * change is distributed equally among them.
+ */
+ if (n == 1)
+ return svc_set_num_threads(nn->nfsd_serv, NULL, nthreads[0]);
+
if (n > nn->nfsd_serv->sv_nrpools)
n = nn->nfsd_serv->sv_nrpools;
@@ -745,47 +735,50 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
}
}
- /*
- * There must always be a thread in pool 0; the admin
- * can't shut down NFS completely using pool_threads.
- */
- if (nthreads[0] == 0)
- nthreads[0] = 1;
-
/* apply the new numbers */
for (i = 0; i < n; i++) {
err = svc_set_num_threads(nn->nfsd_serv,
&nn->nfsd_serv->sv_pools[i],
nthreads[i]);
if (err)
- break;
+ goto out;
}
+
+ /* Anything undefined in array is considered to be 0 */
+ for (i = n; i < nn->nfsd_serv->sv_nrpools; ++i) {
+ err = svc_set_num_threads(nn->nfsd_serv,
+ &nn->nfsd_serv->sv_pools[i],
+ 0);
+ if (err)
+ goto out;
+ }
+out:
return err;
}
-/*
- * Adjust the number of threads and return the new number of threads.
- * This is also the function that starts the server if necessary, if
- * this is the first time nrservs is nonzero.
+/**
+ * nfsd_svc: start up or shut down the nfsd server
+ * @n: number of array members in @nthreads
+ * @nthreads: array of thread counts for each pool
+ * @net: network namespace to operate within
+ * @cred: credentials to use for xprt creation
+ * @scope: server scope value (defaults to nodename)
+ *
+ * Adjust the number of threads in each pool and return the new
+ * total number of threads in the service.
*/
int
-nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+nfsd_svc(int n, int *nthreads, struct net *net, const struct cred *cred, const char *scope)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_serv *serv;
- mutex_lock(&nfsd_mutex);
- dprintk("nfsd: creating service\n");
+ lockdep_assert_held(&nfsd_mutex);
- nrservs = max(nrservs, 0);
- nrservs = min(nrservs, NFSD_MAXSERVS);
- error = 0;
-
- if (nrservs == 0 && nn->nfsd_serv == NULL)
- goto out;
+ dprintk("nfsd: creating service\n");
- strscpy(nn->nfsd_name, utsname()->nodename,
+ strscpy(nn->nfsd_name, scope ? scope : utsname()->nodename,
sizeof(nn->nfsd_name));
error = nfsd_create_serv(net);
@@ -796,7 +789,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
error = nfsd_startup_net(net, cred);
if (error)
goto out_put;
- error = svc_set_num_threads(serv, NULL, nrservs);
+ error = nfsd_set_nrthreads(n, nthreads, net);
if (error)
goto out_put;
error = serv->sv_nrthreads;
@@ -804,7 +797,6 @@ out_put:
if (serv->sv_nrthreads == 0)
nfsd_destroy_serv(net);
out:
- mutex_unlock(&nfsd_mutex);
return error;
}
@@ -885,17 +877,17 @@ nfsd_init_request(struct svc_rqst *rqstp,
if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
return svc_generic_init_request(rqstp, progp, ret);
- ret->mismatch.lovers = NFSD_NRVERS;
- for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
+ ret->mismatch.lovers = NFSD_MAXVERS + 1;
+ for (i = NFSD_MINVERS; i <= NFSD_MAXVERS; i++) {
if (nfsd_vers(nn, i, NFSD_TEST)) {
ret->mismatch.lovers = i;
break;
}
}
- if (ret->mismatch.lovers == NFSD_NRVERS)
+ if (ret->mismatch.lovers > NFSD_MAXVERS)
return rpc_prog_unavail;
ret->mismatch.hivers = NFSD_MINVERS;
- for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) {
+ for (i = NFSD_MAXVERS; i >= NFSD_MINVERS; i--) {
if (nfsd_vers(nn, i, NFSD_TEST)) {
ret->mismatch.hivers = i;
break;
@@ -917,11 +909,9 @@ nfsd(void *vrqstp)
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
- * umask as defined by the client instead of init's umask. */
- if (unshare_fs_struct() < 0) {
- printk("Unable to start nfsd thread: out of memory\n");
- goto out;
- }
+ * umask as defined by the client instead of init's umask.
+ */
+ svc_thread_init_status(rqstp, unshare_fs_struct());
current->fs->umask = 0;
@@ -933,24 +923,19 @@ nfsd(void *vrqstp)
* The main request loop
*/
while (!svc_thread_should_stop(rqstp)) {
- /* Update sv_maxconn if it has changed */
- rqstp->rq_server->sv_maxconn = nn->max_connections;
-
svc_recv(rqstp);
-
nfsd_file_net_dispose(nn);
}
atomic_dec(&nfsd_th_cnt);
-out:
/* Release the thread */
svc_exit_thread(rqstp);
return 0;
}
/**
- * nfsd_dispatch - Process an NFS or NFSACL Request
+ * nfsd_dispatch - Process an NFS or NFSACL or LOCALIO Request
* @rqstp: incoming request
*
* This RPC dispatcher integrates the NFS server's duplicate reply cache.
@@ -1064,10 +1049,3 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
return true;
}
-
-int nfsd_pool_stats_open(struct inode *inode, struct file *file)
-{
- struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
-
- return svc_pool_stats_open(&nn->nfsd_info, file);
-}
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 5777f40c7353..fc262ceafca9 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -336,7 +336,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
/* opaque data */
if (xdr_stream_decode_u32(xdr, &args->len) < 0)
return false;
- if (args->len > NFSSVC_MAXBLKSIZE_V2)
+ if (args->len > NFS_MAXDATA)
return false;
return xdr_stream_subsegment(xdr, &args->payload, args->len);
@@ -540,7 +540,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
p = xdr_reserve_space(xdr, XDR_UNIT * 5);
if (!p)
return false;
- *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2);
+ *p++ = cpu_to_be32(NFS_MAXDATA);
*p++ = cpu_to_be32(stat->f_bsize);
*p++ = cpu_to_be32(stat->f_blocks);
*p++ = cpu_to_be32(stat->f_bfree);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 01c6f3445646..1995bca158b8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -64,21 +64,43 @@ typedef struct {
refcount_t cs_count;
} copy_stateid_t;
+struct nfsd4_referring_call {
+ struct list_head __list;
+
+ u32 rc_sequenceid;
+ u32 rc_slotid;
+};
+
+struct nfsd4_referring_call_list {
+ struct list_head __list;
+
+ struct nfs4_sessionid rcl_sessionid;
+ int __nr_referring_calls;
+ struct list_head rcl_referring_calls;
+};
+
struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
+#define NFSD4_CALLBACK_RUNNING (0)
+#define NFSD4_CALLBACK_WAKE (1)
+#define NFSD4_CALLBACK_REQUEUE (2)
+ unsigned long cb_flags;
const struct nfsd4_callback_ops *cb_ops;
- struct delayed_work cb_work;
+ struct work_struct cb_work;
int cb_seq_status;
int cb_status;
- bool cb_need_restart;
- bool cb_holds_slot;
+ int cb_held_slot;
+
+ int cb_nr_referring_call_list;
+ struct list_head cb_referring_call_list;
};
struct nfsd4_callback_ops {
void (*prepare)(struct nfsd4_callback *);
int (*done)(struct nfsd4_callback *, struct rpc_task *);
void (*release)(struct nfsd4_callback *);
+ uint32_t opcode;
};
/*
@@ -113,6 +135,8 @@ struct nfs4_stid {
/* For a deleg stateid kept around only to process free_stateid's: */
#define SC_STATUS_REVOKED BIT(1)
#define SC_STATUS_ADMIN_REVOKED BIT(2)
+#define SC_STATUS_FREEABLE BIT(3)
+#define SC_STATUS_FREED BIT(4)
unsigned short sc_status;
struct list_head sc_cp_list;
@@ -134,24 +158,36 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
+/*
+ * RFC 7862 Section 4.8 states:
+ *
+ * | A copy offload stateid will be valid until either (A) the client
+ * | or server restarts or (B) the client returns the resource by
+ * | issuing an OFFLOAD_CANCEL operation or the client replies to a
+ * | CB_OFFLOAD operation.
+ *
+ * Because a client might not reply to a CB_OFFLOAD, or a reply
+ * might get lost due to connection loss, NFSD purges async copy
+ * state after a short period to prevent it from accumulating
+ * over time.
+ */
+#define NFSD_COPY_INITIAL_TTL 10
+
struct nfs4_cb_fattr {
struct nfsd4_callback ncf_getattr;
u32 ncf_cb_status;
- u32 ncf_cb_bmap[1];
/* from CB_GETATTR reply */
u64 ncf_cb_change;
u64 ncf_cb_fsize;
+ struct timespec64 ncf_cb_mtime;
+ struct timespec64 ncf_cb_atime;
- unsigned long ncf_cb_flags;
bool ncf_file_modified;
u64 ncf_initial_cinfo;
u64 ncf_cur_fsize;
};
-/* bits for ncf_cb_flags */
-#define CB_GETATTR_BUSY 0
-
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -179,8 +215,8 @@ struct nfs4_delegation {
struct list_head dl_perclnt;
struct list_head dl_recall_lru; /* delegation recalled */
struct nfs4_clnt_odstate *dl_clnt_odstate;
- u32 dl_type;
time64_t dl_time;
+ u32 dl_type;
/* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
@@ -190,6 +226,22 @@ struct nfs4_delegation {
struct nfs4_cb_fattr dl_cb_fattr;
};
+static inline bool deleg_is_read(u32 dl_type)
+{
+ return (dl_type == OPEN_DELEGATE_READ || dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG);
+}
+
+static inline bool deleg_is_write(u32 dl_type)
+{
+ return (dl_type == OPEN_DELEGATE_WRITE || dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG);
+}
+
+static inline bool deleg_attrs_deleg(u32 dl_type)
+{
+ return dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG ||
+ dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+}
+
#define cb_to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
@@ -210,8 +262,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
return container_of(s, struct nfs4_delegation, dl_stid);
}
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION 160
+/* Maximum number of slots per session. This is for sanity-check only.
+ * It could be increased if we had a mechanism to shutdown misbehaving clients.
+ * A large number can be needed to get good throughput on high-latency servers.
+ */
+#define NFSD_MAX_SLOTS_PER_SESSION 2048
/* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -223,12 +278,15 @@ struct nfsd4_slot {
u32 sl_seqid;
__be32 sl_status;
struct svc_cred sl_cred;
+ u32 sl_index;
u32 sl_datalen;
u16 sl_opcnt;
+ u16 sl_generation;
#define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2)
#define NFSD4_SLOT_CACHED (1 << 3)
+#define NFSD4_SLOT_REUSED (1 << 4)
u8 sl_flags;
char sl_data[];
};
@@ -287,6 +345,9 @@ struct nfsd4_conn {
unsigned char cn_flags;
};
+/* Maximum number of slots that nfsd will use in the backchannel */
+#define NFSD_BC_SLOT_TABLE_SIZE (sizeof(u32) * 8)
+
/*
* Representation of a v4.1+ session. These are refcounted in a similar fashion
* to the nfs4_client. References are only taken when the server is actively
@@ -294,20 +355,23 @@ struct nfsd4_conn {
*/
struct nfsd4_session {
atomic_t se_ref;
+ spinlock_t se_lock;
+ u32 se_cb_slot_avail; /* bitmap of available slots */
+ u32 se_cb_highest_slot; /* highest slot client wants */
+ u32 se_cb_prog;
struct list_head se_hash; /* hash by sessionid */
struct list_head se_perclnt;
-/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */
-#define NFS4_SESSION_DEAD 0x010
- u32 se_flags;
+ struct list_head se_all_sessions;/* global list of sessions */
struct nfs4_client *se_client;
struct nfs4_sessionid se_sessionid;
struct nfsd4_channel_attrs se_fchannel;
- struct nfsd4_channel_attrs se_bchannel;
struct nfsd4_cb_sec se_cb_sec;
struct list_head se_conns;
- u32 se_cb_prog;
- u32 se_cb_seq_nr;
- struct nfsd4_slot *se_slots[]; /* forward channel slots */
+ u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
+ struct xarray se_slots; /* forward channel slots */
+ u16 se_slot_gen;
+ bool se_dead;
+ u32 se_target_maxslots;
};
/* formatted contents of nfs4_sessionid */
@@ -406,8 +470,9 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
-#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
+
+ struct workqueue_struct *cl_callback_wq;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
@@ -438,9 +503,6 @@ struct nfs4_client {
*/
struct dentry *cl_nfsd_info_dentry;
- /* for nfs41 callbacks */
- /* We currently support a single back channel with a single slot */
- unsigned long cl_cb_slot_busy;
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
struct net *net;
@@ -453,7 +515,6 @@ struct nfs4_client {
struct nfsd4_cb_recall_any *cl_ra;
time64_t cl_ra_time;
- struct list_head cl_ra_cblist;
};
/* struct nfs4_client_reset
@@ -486,7 +547,7 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
- struct mutex rp_mutex;
+ int rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
@@ -732,13 +793,24 @@ extern __be32 nfs4_check_open_reclaim(struct nfs4_client *);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
+extern void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
+ struct nfs4_sessionid *sessionid,
+ u32 slotid, u32 seqno);
+extern void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
-extern int nfsd4_create_callback_queue(void);
-extern void nfsd4_destroy_callback_queue(void);
+
+static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb)
+{
+ if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags))
+ WARN_ON_ONCE(!nfsd4_run_cb(cb));
+}
+
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
+void nfsd4_async_copy_reaper(struct nfsd_net *nn);
+bool nfsd4_has_active_async_copies(struct nfs4_client *clp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
@@ -781,5 +853,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct inode *inode, bool *file_modified, u64 *size);
+ struct dentry *dentry, struct nfs4_delegation **pdp);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index be52fb1e928e..f7eaf95e20fc 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -73,53 +73,11 @@ static int nfsd_show(struct seq_file *seq, void *v)
DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
-int nfsd_percpu_counters_init(struct percpu_counter *counters, int num)
-{
- int i, err = 0;
-
- for (i = 0; !err && i < num; i++)
- err = percpu_counter_init(&counters[i], 0, GFP_KERNEL);
-
- if (!err)
- return 0;
-
- for (; i > 0; i--)
- percpu_counter_destroy(&counters[i-1]);
-
- return err;
-}
-
-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num)
-{
- int i;
-
- for (i = 0; i < num; i++)
- percpu_counter_set(&counters[i], 0);
-}
-
-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
-{
- int i;
-
- for (i = 0; i < num; i++)
- percpu_counter_destroy(&counters[i]);
-}
-
-int nfsd_stat_counters_init(struct nfsd_net *nn)
-{
- return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
-}
-
-void nfsd_stat_counters_destroy(struct nfsd_net *nn)
-{
- nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
-}
-
-void nfsd_proc_stat_init(struct net *net)
+struct proc_dir_entry *nfsd_proc_stat_init(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
+ return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
}
void nfsd_proc_stat_shutdown(struct net *net)
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index d2753e975dfd..e4efb0e4e56d 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -10,12 +10,7 @@
#include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
-int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
-void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
-void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
-int nfsd_stat_counters_init(struct nfsd_net *nn);
-void nfsd_stat_counters_destroy(struct nfsd_net *nn);
-void nfsd_proc_stat_init(struct net *net);
+struct proc_dir_entry *nfsd_proc_stat_init(struct net *net);
void nfsd_proc_stat_shutdown(struct net *net);
static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 1cd2076210b1..3c5505ef5e3a 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -11,6 +11,7 @@
#include <linux/tracepoint.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/misc/fs.h>
#include <trace/misc/nfs.h>
#include <trace/misc/sunrpc.h>
@@ -18,22 +19,40 @@
#include "nfsfh.h"
#include "xdr4.h"
-#define NFSD_TRACE_PROC_RES_FIELDS \
+#define NFSD_TRACE_PROC_CALL_FIELDS(r) \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen)
+
+#define NFSD_TRACE_PROC_CALL_ASSIGNMENTS(r) \
+ do { \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __entry->netns_ino = SVC_NET(r)->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
+ } while (0)
+
+#define NFSD_TRACE_PROC_RES_FIELDS(r) \
__field(unsigned int, netns_ino) \
__field(u32, xid) \
__field(unsigned long, status) \
- __array(unsigned char, server, sizeof(struct sockaddr_in6)) \
- __array(unsigned char, client, sizeof(struct sockaddr_in6))
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen)
-#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \
+#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(r, error) \
do { \
- __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
- __entry->xid = be32_to_cpu(rqstp->rq_xid); \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __entry->netns_ino = SVC_NET(r)->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
__entry->status = be32_to_cpu(error); \
- memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
- rqstp->rq_xprt->xpt_locallen); \
- memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
- rqstp->rq_xprt->xpt_remotelen); \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
} while (0);
DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
@@ -79,14 +98,15 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
{ NFSD_MAY_READ, "READ" }, \
{ NFSD_MAY_SATTR, "SATTR" }, \
{ NFSD_MAY_TRUNC, "TRUNC" }, \
- { NFSD_MAY_LOCK, "LOCK" }, \
+ { NFSD_MAY_NLM, "NLM" }, \
{ NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \
{ NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \
{ NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \
{ NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \
{ NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \
{ NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \
- { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" })
+ { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }, \
+ { NFSD_MAY_LOCALIO, "LOCALIO" })
TRACE_EVENT(nfsd_compound,
TP_PROTO(
@@ -104,7 +124,7 @@ TRACE_EVENT(nfsd_compound,
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->opcnt = opcnt;
- __assign_str(tag, tag);
+ __assign_str(tag);
),
TP_printk("xid=0x%08x opcnt=%u tag=%s",
__entry->xid, __entry->opcnt, __get_str(tag)
@@ -127,7 +147,7 @@ TRACE_EVENT(nfsd_compound_status,
__entry->args_opcnt = args_opcnt;
__entry->resp_opcnt = resp_opcnt;
__entry->status = be32_to_cpu(status);
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("op=%u/%u %s status=%d",
__entry->resp_opcnt, __entry->args_opcnt,
@@ -144,14 +164,14 @@ TRACE_EVENT(nfsd_compound_decode_err,
),
TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status),
TP_STRUCT__entry(
- NFSD_TRACE_PROC_RES_FIELDS
+ NFSD_TRACE_PROC_RES_FIELDS(rqstp)
__field(u32, args_opcnt)
__field(u32, resp_opcnt)
__field(u32, opnum)
),
TP_fast_assign(
- NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status)
__entry->args_opcnt = args_opcnt;
__entry->resp_opcnt = resp_opcnt;
@@ -162,7 +182,7 @@ TRACE_EVENT(nfsd_compound_decode_err,
__entry->opnum, __entry->status)
);
-TRACE_EVENT(nfsd_compound_encode_err,
+DECLARE_EVENT_CLASS(nfsd_compound_err_class,
TP_PROTO(
const struct svc_rqst *rqstp,
u32 opnum,
@@ -170,12 +190,12 @@ TRACE_EVENT(nfsd_compound_encode_err,
),
TP_ARGS(rqstp, opnum, status),
TP_STRUCT__entry(
- NFSD_TRACE_PROC_RES_FIELDS
+ NFSD_TRACE_PROC_RES_FIELDS(rqstp)
__field(u32, opnum)
),
TP_fast_assign(
- NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status)
__entry->opnum = opnum;
),
@@ -183,6 +203,18 @@ TRACE_EVENT(nfsd_compound_encode_err,
__entry->opnum, __entry->status)
);
+#define DEFINE_NFSD_COMPOUND_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_compound_err_class, nfsd_compound_##name##_err, \
+ TP_PROTO( \
+ const struct svc_rqst *rqstp, \
+ u32 opnum, \
+ __be32 status \
+ ), \
+ TP_ARGS(rqstp, opnum, status))
+
+DEFINE_NFSD_COMPOUND_ERR_EVENT(op);
+DEFINE_NFSD_COMPOUND_ERR_EVENT(encode);
+
#define show_fs_file_type(x) \
__print_symbolic(x, \
{ S_IFLNK, "LNK" }, \
@@ -193,7 +225,7 @@ TRACE_EVENT(nfsd_compound_encode_err,
{ S_IFIFO, "FIFO" }, \
{ S_IFSOCK, "SOCK" })
-TRACE_EVENT(nfsd_fh_verify,
+TRACE_EVENT_CONDITION(nfsd_fh_verify,
TP_PROTO(
const struct svc_rqst *rqstp,
const struct svc_fh *fhp,
@@ -201,6 +233,7 @@ TRACE_EVENT(nfsd_fh_verify,
int access
),
TP_ARGS(rqstp, fhp, type, access),
+ TP_CONDITION(rqstp != NULL),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
@@ -239,7 +272,7 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
__be32 error
),
TP_ARGS(rqstp, fhp, type, access, error),
- TP_CONDITION(error),
+ TP_CONDITION(rqstp != NULL && error),
TP_STRUCT__entry(
__field(unsigned int, netns_ino)
__sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
@@ -295,12 +328,13 @@ DECLARE_EVENT_CLASS(nfsd_fh_err_class,
__entry->status)
)
-#define DEFINE_NFSD_FH_ERR_EVENT(name) \
-DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \
- TP_PROTO(struct svc_rqst *rqstp, \
- struct svc_fh *fhp, \
- int status), \
- TP_ARGS(rqstp, fhp, status))
+#define DEFINE_NFSD_FH_ERR_EVENT(name) \
+DEFINE_EVENT_CONDITION(nfsd_fh_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *fhp, \
+ int status), \
+ TP_ARGS(rqstp, fhp, status), \
+ TP_CONDITION(rqstp != NULL))
DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport);
DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle);
@@ -318,7 +352,7 @@ TRACE_EVENT(nfsd_exp_find_key,
TP_fast_assign(
__entry->fsidtype = key->ek_fsidtype;
memcpy(__entry->fsid, key->ek_fsid, 4*6);
- __assign_str(auth_domain, key->ek_client->name);
+ __assign_str(auth_domain);
__entry->status = status;
),
TP_printk("fsid=%x::%s domain=%s status=%d",
@@ -342,8 +376,8 @@ TRACE_EVENT(nfsd_expkey_update,
TP_fast_assign(
__entry->fsidtype = key->ek_fsidtype;
memcpy(__entry->fsid, key->ek_fsid, 4*6);
- __assign_str(auth_domain, key->ek_client->name);
- __assign_str(path, exp_path);
+ __assign_str(auth_domain);
+ __assign_str(path);
__entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags);
),
TP_printk("fsid=%x::%s domain=%s path=%s cache=%s",
@@ -365,8 +399,8 @@ TRACE_EVENT(nfsd_exp_get_by_name,
__field(int, status)
),
TP_fast_assign(
- __assign_str(path, key->ex_path.dentry->d_name.name);
- __assign_str(auth_domain, key->ex_client->name);
+ __assign_str(path);
+ __assign_str(auth_domain);
__entry->status = status;
),
TP_printk("path=%s domain=%s status=%d",
@@ -385,8 +419,8 @@ TRACE_EVENT(nfsd_export_update,
__field(bool, cache)
),
TP_fast_assign(
- __assign_str(path, key->ex_path.dentry->d_name.name);
- __assign_str(auth_domain, key->ex_client->name);
+ __assign_str(path);
+ __assign_str(auth_domain);
__entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags);
),
TP_printk("path=%s domain=%s cache=%s",
@@ -436,6 +470,8 @@ DEFINE_NFSD_IO_EVENT(write_start);
DEFINE_NFSD_IO_EVENT(write_opened);
DEFINE_NFSD_IO_EVENT(write_io_done);
DEFINE_NFSD_IO_EVENT(write_done);
+DEFINE_NFSD_IO_EVENT(commit_start);
+DEFINE_NFSD_IO_EVENT(commit_done);
DECLARE_EVENT_CLASS(nfsd_err_class,
TP_PROTO(struct svc_rqst *rqstp,
@@ -485,7 +521,7 @@ TRACE_EVENT(nfsd_dirent,
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("fh_hash=0x%08x ino=%llu name=%s",
__entry->fh_hash, __entry->ino, __get_str(name)
@@ -611,7 +647,6 @@ DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
DEFINE_STATEID_EVENT(deleg_write);
DEFINE_STATEID_EVENT(deleg_return);
-DEFINE_STATEID_EVENT(deleg_recall);
DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
TP_PROTO(u32 seqid, const stateid_t *stp),
@@ -749,6 +784,87 @@ TRACE_EVENT_CONDITION(nfsd_seq4_status,
)
);
+DECLARE_EVENT_CLASS(nfsd_cs_slot_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_create_session *cs
+ ),
+ TP_ARGS(clp, cs),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, slot_seqid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ const struct nfsd4_clid_slot *slot = &clp->cl_cs_slot;
+
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen);
+ __entry->seqid = cs->seqid;
+ __entry->slot_seqid = slot->sl_seqid;
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->seqid, __entry->slot_seqid
+ )
+);
+
+#define DEFINE_CS_SLOT_EVENT(name) \
+DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const struct nfsd4_create_session *cs \
+ ), \
+ TP_ARGS(clp, cs))
+
+DEFINE_CS_SLOT_EVENT(slot_seqid_conf);
+DEFINE_CS_SLOT_EVENT(slot_seqid_unconf);
+
+#define show_nfs_slot_flags(val) \
+ __print_flags(val, "|", \
+ { NFSD4_SLOT_INUSE, "INUSE" }, \
+ { NFSD4_SLOT_CACHETHIS, "CACHETHIS" }, \
+ { NFSD4_SLOT_INITIALIZED, "INITIALIZED" }, \
+ { NFSD4_SLOT_CACHED, "CACHED" }, \
+ { NFSD4_SLOT_REUSED, "REUSED" })
+
+TRACE_EVENT(nfsd_slot_seqid_sequence,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_sequence *seq,
+ const struct nfsd4_slot *slot
+ ),
+ TP_ARGS(clp, seq, slot),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, slot_seqid)
+ __field(u32, slot_index)
+ __field(unsigned long, slot_flags)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen);
+ __entry->seqid = seq->seqid;
+ __entry->slot_seqid = slot->sl_seqid;
+ __entry->slot_index = seq->slotid;
+ __entry->slot_flags = slot->sl_flags;
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x idx=%u seqid=%u slot_seqid=%u flags=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->slot_index, __entry->seqid, __entry->slot_seqid,
+ show_nfs_slot_flags(__entry->slot_flags)
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
@@ -778,6 +894,30 @@ DEFINE_CLIENTID_EVENT(purged);
DEFINE_CLIENTID_EVENT(renew);
DEFINE_CLIENTID_EVENT(stale);
+TRACE_EVENT(nfsd_mark_client_expired,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ int cl_rpc_users
+ ),
+ TP_ARGS(clp, cl_rpc_users),
+ TP_STRUCT__entry(
+ __field(int, cl_rpc_users)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_rpc_users = cl_rpc_users;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x cl_rpc_users=%d",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->cl_rpc_users)
+);
+
DECLARE_EVENT_CLASS(nfsd_net_class,
TP_PROTO(const struct nfsd_net *nn),
TP_ARGS(nn),
@@ -906,7 +1046,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__entry->flavor = clp->cl_cred.cr_flavor;
memcpy(__entry->verifier, (void *)&clp->cl_verifier,
NFS4_VERIFIER_SIZE);
- __assign_str(name, clp->cl_name.data);
+ __assign_str(name);
),
TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
__entry->addr, __get_str(name),
@@ -931,6 +1071,7 @@ DEFINE_CLID_EVENT(confirmed_r);
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
{ 1 << NFSD_FILE_PENDING, "PENDING" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \
+ { 1 << NFSD_FILE_RECENT, "RECENT" }, \
{ 1 << NFSD_FILE_GC, "GC" })
DECLARE_EVENT_CLASS(nfsd_file_class,
@@ -1016,7 +1157,7 @@ TRACE_EVENT(nfsd_file_acquire,
),
TP_fast_assign(
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0;
__entry->inode = inode;
__entry->may_flags = may_flags;
__entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
@@ -1050,7 +1191,7 @@ TRACE_EVENT(nfsd_file_insert_err,
__field(long, error)
),
TP_fast_assign(
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0;
__entry->inode = inode;
__entry->may_flags = may_flags;
__entry->error = error;
@@ -1080,7 +1221,7 @@ TRACE_EVENT(nfsd_file_cons_err,
__field(const void *, nf_file)
),
TP_fast_assign(
- __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0;
__entry->inode = inode;
__entry->may_flags = may_flags;
__entry->nf_ref = refcount_read(&nf->nf_ref);
@@ -1209,6 +1350,7 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_aged);
DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
@@ -1238,6 +1380,7 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
TP_ARGS(removed, remaining))
DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
+DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent);
DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
TRACE_EVENT(nfsd_file_close,
@@ -1392,6 +1535,9 @@ DEFINE_NFSD_CB_EVENT(new_state);
DEFINE_NFSD_CB_EVENT(probe);
DEFINE_NFSD_CB_EVENT(lost);
DEFINE_NFSD_CB_EVENT(shutdown);
+DEFINE_NFSD_CB_EVENT(rpc_prepare);
+DEFINE_NFSD_CB_EVENT(rpc_done);
+DEFINE_NFSD_CB_EVENT(rpc_release);
TRACE_DEFINE_ENUM(RPC_AUTH_NULL);
TRACE_DEFINE_ENUM(RPC_AUTH_UNIX);
@@ -1425,7 +1571,7 @@ TRACE_EVENT(nfsd_cb_setup,
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
- __assign_str(netid, netid);
+ __assign_str(netid);
__entry->authflavor = authflavor;
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
@@ -1459,6 +1605,19 @@ TRACE_EVENT(nfsd_cb_setup_err,
__entry->error)
);
+/* Not a real opcode, but there is no 0 operation. */
+#define _CB_NULL 0
+
+#define show_nfsd_cb_opcode(val) \
+ __print_symbolic(val, \
+ { _CB_NULL, "CB_NULL" }, \
+ { OP_CB_GETATTR, "CB_GETATTR" }, \
+ { OP_CB_RECALL, "CB_RECALL" }, \
+ { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \
+ { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \
+ { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \
+ { OP_CB_OFFLOAD, "CB_OFFLOAD" })
+
DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
TP_PROTO(
const struct nfs4_client *clp,
@@ -1469,6 +1628,7 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
__field(u32, cl_boot)
__field(u32, cl_id)
__field(const void *, cb)
+ __field(unsigned long, opcode)
__field(bool, need_restart)
__sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
),
@@ -1476,14 +1636,15 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
__entry->cl_boot = clp->cl_clientid.cl_boot;
__entry->cl_id = clp->cl_clientid.cl_id;
__entry->cb = cb;
- __entry->need_restart = cb->cb_need_restart;
+ __entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL;
+ __entry->need_restart = test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
__assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
clp->cl_cb_conn.cb_addrlen)
),
- TP_printk("addr=%pISpc client %08x:%08x cb=%p%s",
- __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
- __entry->cb, __entry->need_restart ?
- " (need restart)" : " (first try)"
+ TP_printk("addr=%pISpc client %08x:%08x cb=%p%s opcode=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->cb,
+ __entry->need_restart ? " (need restart)" : " (first try)",
+ show_nfsd_cb_opcode(__entry->opcode)
)
);
@@ -1534,7 +1695,7 @@ TRACE_EVENT(nfsd_cb_seq_status,
__entry->seq_status = cb->cb_seq_status;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
- " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n",
+ " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
@@ -1570,10 +1731,10 @@ TRACE_EVENT(nfsd_cb_free_slot,
__entry->cl_id = sid->clientid.cl_id;
__entry->seqno = sid->sequence;
__entry->reserved = sid->reserved;
- __entry->slot_seqno = session->se_cb_seq_nr;
+ __entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
- " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n",
+ " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
@@ -1736,6 +1897,7 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_getattr_done);
TRACE_EVENT(nfsd_cb_recall_any_done,
TP_PROTO(
@@ -1770,7 +1932,7 @@ TRACE_EVENT(nfsd_ctl_unlock_ip,
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
- __assign_str(address, address);
+ __assign_str(address);
),
TP_printk("address=%s",
__get_str(address)
@@ -1789,7 +1951,7 @@ TRACE_EVENT(nfsd_ctl_unlock_fs,
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
- __assign_str(path, path);
+ __assign_str(path);
),
TP_printk("path=%s",
__get_str(path)
@@ -1813,8 +1975,8 @@ TRACE_EVENT(nfsd_ctl_filehandle,
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->maxsize = maxsize;
- __assign_str(domain, domain);
- __assign_str(path, path);
+ __assign_str(domain);
+ __assign_str(path);
),
TP_printk("domain=%s path=%s maxsize=%d",
__get_str(domain), __get_str(path), __entry->maxsize
@@ -1874,7 +2036,7 @@ TRACE_EVENT(nfsd_ctl_version,
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
- __assign_str(mesg, mesg);
+ __assign_str(mesg);
),
TP_printk("%s",
__get_str(mesg)
@@ -1915,7 +2077,7 @@ TRACE_EVENT(nfsd_ctl_ports_addxprt,
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->port = port;
- __assign_str(transport, transport);
+ __assign_str(transport);
),
TP_printk("transport=%s port=%d",
__get_str(transport), __entry->port
@@ -1976,9 +2138,9 @@ TRACE_EVENT(nfsd_ctl_time,
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
__entry->time = time;
- __assign_str(name, name);
+ __assign_str(name);
),
- TP_printk("file=%s time=%d\n",
+ TP_printk("file=%s time=%d",
__get_str(name), __entry->time
)
);
@@ -1995,7 +2157,7 @@ TRACE_EVENT(nfsd_ctl_recoverydir,
),
TP_fast_assign(
__entry->netns_ino = net->ns.inum;
- __assign_str(recdir, recdir);
+ __assign_str(recdir);
),
TP_printk("recdir=%s",
__get_str(recdir)
@@ -2033,6 +2195,10 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
__field(u32, dst_cl_id)
__field(u32, dst_so_id)
__field(u32, dst_si_generation)
+ __field(u32, cb_cl_boot)
+ __field(u32, cb_cl_id)
+ __field(u32, cb_so_id)
+ __field(u32, cb_si_generation)
__field(u64, src_cp_pos)
__field(u64, dst_cp_pos)
__field(u64, cp_count)
@@ -2041,6 +2207,7 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
TP_fast_assign(
const stateid_t *src_stp = &copy->cp_src_stateid;
const stateid_t *dst_stp = &copy->cp_dst_stateid;
+ const stateid_t *cb_stp = &copy->cp_res.cb_stateid;
__entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
__entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
@@ -2052,6 +2219,10 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
__entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id;
__entry->dst_so_id = dst_stp->si_opaque.so_id;
__entry->dst_si_generation = dst_stp->si_generation;
+ __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot;
+ __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id;
+ __entry->cb_so_id = cb_stp->si_opaque.so_id;
+ __entry->cb_si_generation = cb_stp->si_generation;
__entry->src_cp_pos = copy->cp_src_pos;
__entry->dst_cp_pos = copy->cp_dst_pos;
__entry->cp_count = copy->cp_count;
@@ -2059,14 +2230,17 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
sizeof(struct sockaddr_in6));
),
TP_printk("client=%pISpc intra=%d async=%d "
- "src_stateid[si_generation:0x%x cl_boot:0x%x cl_id:0x%x so_id:0x%x] "
- "dst_stateid[si_generation:0x%x cl_boot:0x%x cl_id:0x%x so_id:0x%x] "
+ "src_client %08x:%08x src_stateid %08x:%08x "
+ "dst_client %08x:%08x dst_stateid %08x:%08x "
+ "cb_client %08x:%08x cb_stateid %08x:%08x "
"cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
__get_sockaddr(addr), __entry->intra, __entry->async,
- __entry->src_si_generation, __entry->src_cl_boot,
- __entry->src_cl_id, __entry->src_so_id,
- __entry->dst_si_generation, __entry->dst_cl_boot,
- __entry->dst_cl_id, __entry->dst_so_id,
+ __entry->src_cl_boot, __entry->src_cl_id,
+ __entry->src_so_id, __entry->src_si_generation,
+ __entry->dst_cl_boot, __entry->dst_cl_id,
+ __entry->dst_so_id, __entry->dst_si_generation,
+ __entry->cb_cl_boot, __entry->cb_cl_id,
+ __entry->cb_so_id, __entry->cb_si_generation,
__entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
)
);
@@ -2078,7 +2252,7 @@ DEFINE_EVENT(nfsd_copy_class, nfsd_copy_##name, \
DEFINE_COPY_EVENT(inter);
DEFINE_COPY_EVENT(intra);
-DEFINE_COPY_EVENT(do_async);
+DEFINE_COPY_EVENT(async);
TRACE_EVENT(nfsd_copy_done,
TP_PROTO(
@@ -2099,11 +2273,342 @@ TRACE_EVENT(nfsd_copy_done,
__assign_sockaddr(addr, &copy->cp_clp->cl_addr,
sizeof(struct sockaddr_in6));
),
- TP_printk("addr=%pISpc status=%d intra=%d async=%d ",
+ TP_printk("addr=%pISpc status=%d intra=%d async=%d",
__get_sockaddr(addr), __entry->status, __entry->intra, __entry->async
)
);
+DECLARE_EVENT_CLASS(nfsd_copy_async_done_class,
+ TP_PROTO(
+ const struct nfsd4_copy *copy
+ ),
+ TP_ARGS(copy),
+ TP_STRUCT__entry(
+ __field(int, status)
+ __field(bool, intra)
+ __field(bool, async)
+ __field(u32, src_cl_boot)
+ __field(u32, src_cl_id)
+ __field(u32, src_so_id)
+ __field(u32, src_si_generation)
+ __field(u32, dst_cl_boot)
+ __field(u32, dst_cl_id)
+ __field(u32, dst_so_id)
+ __field(u32, dst_si_generation)
+ __field(u32, cb_cl_boot)
+ __field(u32, cb_cl_id)
+ __field(u32, cb_so_id)
+ __field(u32, cb_si_generation)
+ __field(u64, src_cp_pos)
+ __field(u64, dst_cp_pos)
+ __field(u64, cp_count)
+ __sockaddr(addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ const stateid_t *src_stp = &copy->cp_src_stateid;
+ const stateid_t *dst_stp = &copy->cp_dst_stateid;
+ const stateid_t *cb_stp = &copy->cp_res.cb_stateid;
+
+ __entry->status = be32_to_cpu(copy->nfserr);
+ __entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+ __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+ __entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot;
+ __entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id;
+ __entry->src_so_id = src_stp->si_opaque.so_id;
+ __entry->src_si_generation = src_stp->si_generation;
+ __entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot;
+ __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id;
+ __entry->dst_so_id = dst_stp->si_opaque.so_id;
+ __entry->dst_si_generation = dst_stp->si_generation;
+ __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot;
+ __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id;
+ __entry->cb_so_id = cb_stp->si_opaque.so_id;
+ __entry->cb_si_generation = cb_stp->si_generation;
+ __entry->src_cp_pos = copy->cp_src_pos;
+ __entry->dst_cp_pos = copy->cp_dst_pos;
+ __entry->cp_count = copy->cp_count;
+ __assign_sockaddr(addr, &copy->cp_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client=%pISpc status=%d intra=%d async=%d "
+ "src_client %08x:%08x src_stateid %08x:%08x "
+ "dst_client %08x:%08x dst_stateid %08x:%08x "
+ "cb_client %08x:%08x cb_stateid %08x:%08x "
+ "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
+ __get_sockaddr(addr),
+ __entry->status, __entry->intra, __entry->async,
+ __entry->src_cl_boot, __entry->src_cl_id,
+ __entry->src_so_id, __entry->src_si_generation,
+ __entry->dst_cl_boot, __entry->dst_cl_id,
+ __entry->dst_so_id, __entry->dst_si_generation,
+ __entry->cb_cl_boot, __entry->cb_cl_id,
+ __entry->cb_so_id, __entry->cb_si_generation,
+ __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
+ )
+);
+
+#define DEFINE_COPY_ASYNC_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_async_done_class, \
+ nfsd_copy_async_##name, \
+ TP_PROTO(const struct nfsd4_copy *copy), \
+ TP_ARGS(copy))
+
+DEFINE_COPY_ASYNC_DONE_EVENT(done);
+DEFINE_COPY_ASYNC_DONE_EVENT(cancel);
+
+TRACE_EVENT(nfsd_vfs_setattr,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const struct iattr *iap,
+ const struct timespec64 *guardtime
+ ),
+ TP_ARGS(rqstp, fhp, iap, guardtime),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(s64, gtime_tv_sec)
+ __field(u32, gtime_tv_nsec)
+ __field(unsigned int, ia_valid)
+ __field(loff_t, ia_size)
+ __field(uid_t, ia_uid)
+ __field(gid_t, ia_gid)
+ __field(umode_t, ia_mode)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->gtime_tv_sec = guardtime ? guardtime->tv_sec : 0;
+ __entry->gtime_tv_nsec = guardtime ? guardtime->tv_nsec : 0;
+ __entry->ia_valid = iap->ia_valid;
+ __entry->ia_size = iap->ia_size;
+ __entry->ia_uid = __kuid_val(iap->ia_uid);
+ __entry->ia_gid = __kgid_val(iap->ia_gid);
+ __entry->ia_mode = iap->ia_mode;
+ ),
+ TP_printk(
+ "xid=0x%08x fh_hash=0x%08x ia_valid=%s ia_size=%llu ia_mode=0%o ia_uid=%u ia_gid=%u guard_time=%lld.%u",
+ __entry->xid, __entry->fh_hash, show_ia_valid_flags(__entry->ia_valid),
+ __entry->ia_size, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid,
+ __entry->gtime_tv_sec, __entry->gtime_tv_nsec
+ )
+)
+
+TRACE_EVENT(nfsd_vfs_lookup,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s",
+ __entry->xid, __entry->fh_hash, __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_create,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ umode_t type,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, type, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(umode_t, type)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->type = type;
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s name=%s",
+ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type), __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_symlink,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int namelen,
+ const char *target
+ ),
+ TP_ARGS(rqstp, fhp, name, namelen, target),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, namelen)
+ __string(target, target)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ __assign_str(target);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s target=%s",
+ __entry->xid, __entry->fh_hash,
+ __get_str(name), __get_str(target)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_link,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *sfhp,
+ const struct svc_fh *tfhp,
+ const char *name,
+ unsigned int namelen
+ ),
+ TP_ARGS(rqstp, sfhp, tfhp, name, namelen),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, sfh_hash)
+ __field(u32, tfh_hash)
+ __string_len(name, name, namelen)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle);
+ __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x src_fh=0x%08x tgt_fh=0x%08x name=%s",
+ __entry->xid, __entry->sfh_hash, __entry->tfh_hash,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_unlink,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s",
+ __entry->xid, __entry->fh_hash,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_rename,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *sfhp,
+ const struct svc_fh *tfhp,
+ const char *source,
+ unsigned int sourcelen,
+ const char *target,
+ unsigned int targetlen
+ ),
+ TP_ARGS(rqstp, sfhp, tfhp, source, sourcelen, target, targetlen),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, sfh_hash)
+ __field(u32, tfh_hash)
+ __string_len(source, source, sourcelen)
+ __string_len(target, target, targetlen)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle);
+ __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle);
+ __assign_str(source);
+ __assign_str(target);
+ ),
+ TP_printk("xid=0x%08x sfh_hash=0x%08x tfh_hash=0x%08x source=%s target=%s",
+ __entry->xid, __entry->sfh_hash, __entry->tfh_hash,
+ __get_str(source), __get_str(target)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_readdir,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ u32 count,
+ u64 offset
+ ),
+ TP_ARGS(rqstp, fhp, count, offset),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(u32, count)
+ __field(u64, offset)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->count = count;
+ __entry->offset = offset;
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu count=%u",
+ __entry->xid, __entry->fh_hash,
+ __entry->offset, __entry->count
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_vfs_getattr_class,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp
+ ),
+ TP_ARGS(rqstp, fhp),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x",
+ __entry->xid, __entry->fh_hash
+ )
+);
+
+#define DEFINE_NFSD_VFS_GETATTR_EVENT(__name) \
+DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \
+ TP_PROTO( \
+ const struct svc_rqst *rqstp, \
+ const struct svc_fh *fhp \
+ ), \
+ TP_ARGS(rqstp, fhp))
+
+DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr);
+DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6a9464262fae..cd689df2ca5d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -31,11 +31,11 @@
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/security.h>
+#include <linux/sunrpc/xdr.h>
#include "xdr3.h"
#ifdef CONFIG_NFSD_V4
-#include "../internal.h"
#include "acl.h"
#include "idmap.h"
#include "xdr4.h"
@@ -48,6 +48,8 @@
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
+bool nfsd_disable_splice_read __read_mostly;
+
/**
* nfserrno - Map Linux errnos to NFS errnos
* @errno: POSIX(-ish) error code to be mapped
@@ -72,7 +74,6 @@ nfserrno (int errno)
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
- { nfserr_mlink, -EMLINK },
{ nfserr_nodev, -ENODEV },
{ nfserr_notdir, -ENOTDIR },
{ nfserr_isdir, -EISDIR },
@@ -100,6 +101,7 @@ nfserrno (int errno)
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
{ nfserr_no_grace, -ENOGRACE},
+ { nfserr_io, -EBADMSG },
};
int i;
@@ -245,7 +247,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry;
int host_err;
- dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
+ trace_nfsd_vfs_lookup(rqstp, fhp, name, len);
dparent = fhp->fh_dentry;
exp = exp_get(fhp->fh_export);
@@ -265,7 +267,8 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
} else {
- dentry = lookup_one_len_unlocked(name, dparent, len);
+ dentry = lookup_one_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, len), dparent);
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
@@ -320,7 +323,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
if (err)
return err;
- err = check_nfsd_access(exp, rqstp);
+ err = check_nfsd_access(exp, rqstp, false);
if (err)
goto out;
/*
@@ -421,8 +424,9 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (iap->ia_size < inode->i_size) {
__be32 err;
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+ err = nfsd_permission(&rqstp->rq_cred,
+ fhp->fh_export, fhp->fh_dentry,
+ NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
if (err)
return err;
}
@@ -499,6 +503,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
bool size_change = (iap->ia_valid & ATTR_SIZE);
int retries;
+ trace_nfsd_vfs_setattr(rqstp, fhp, iap, guardtime);
+
if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
ftype = S_IFREG;
@@ -814,7 +820,8 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
sresult |= map->access;
- err2 = nfsd_permission(rqstp, export, dentry, map->how);
+ err2 = nfsd_permission(&rqstp->rq_cred, export,
+ dentry, map->how);
switch (err2) {
case nfs_ok:
result |= map->access;
@@ -858,8 +865,7 @@ int nfsd_open_break_lease(struct inode *inode, int access)
* N.B. After this call fhp needs an fh_put
*/
static int
-__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int may_flags, struct file **filp)
+__nfsd_open(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp)
{
struct path path;
struct inode *inode;
@@ -900,11 +906,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
goto out;
}
- if (may_flags & NFSD_MAY_64BIT_COOKIE)
- file->f_mode |= FMODE_64BITHASH;
- else
- file->f_mode |= FMODE_32BITHASH;
-
*filp = file;
out:
return host_err;
@@ -927,14 +928,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* directories, but we never have and it doesn't seem to have
* caused anyone a problem. If we were to change this, note
* also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
+ * lookup_one_positive_unlocked() that doesn't check permissions.
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
retry:
err = fh_verify(rqstp, fhp, type, may_flags);
if (!err) {
- host_err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ host_err = __nfsd_open(fhp, type, may_flags, filp);
if (host_err == -EOPENSTALE && !retried) {
retried = true;
fh_put(fhp);
@@ -947,7 +948,6 @@ retry:
/**
* nfsd_open_verified - Open a regular file for the filecache
- * @rqstp: RPC request
* @fhp: NFS filehandle of the file to open
* @may_flags: internal permission flags
* @filp: OUT: open "struct file *"
@@ -955,10 +955,9 @@ retry:
* Returns zero on success, or a negative errno value.
*/
int
-nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
- struct file **filp)
+nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp)
{
- return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
+ return __nfsd_open(fhp, S_IFREG, may_flags, filp);
}
/*
@@ -1088,23 +1087,23 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long v, total;
struct iov_iter iter;
loff_t ppos = offset;
- struct page *page;
ssize_t host_err;
+ size_t len;
v = 0;
total = *count;
while (total) {
- page = *(rqstp->rq_next_page++);
- rqstp->rq_vec[v].iov_base = page_address(page) + base;
- rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
- total -= rqstp->rq_vec[v].iov_len;
+ len = min_t(size_t, total, PAGE_SIZE - base);
+ bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++),
+ len, base);
+ total -= len;
++v;
base = 0;
}
- WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
+ WARN_ON_ONCE(v > rqstp->rq_maxpages);
trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
+ iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count);
host_err = vfs_iter_read(file, &iter, &ppos, 0);
return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
@@ -1146,11 +1145,27 @@ static int wait_for_concurrent_writes(struct file *file)
return err;
}
+/**
+ * nfsd_vfs_write - write data to an already-open file
+ * @rqstp: RPC execution context
+ * @fhp: File handle of file to write into
+ * @nf: An open file matching @fhp
+ * @offset: Byte offset of start
+ * @payload: xdr_buf containing the write payload
+ * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
+ * @stable: An NFS stable_how value
+ * @verf: NFS WRITE verifier
+ *
+ * Upon return, caller must invoke fh_put on @fhp.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
+ */
__be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
- loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable,
- __be32 *verf)
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, loff_t offset,
+ const struct xdr_buf *payload, unsigned long *cnt,
+ int stable, __be32 *verf)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file *file = nf->nf_file;
@@ -1160,12 +1175,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
errseq_t since;
__be32 nfserr;
int host_err;
- int use_wgather;
loff_t pos = offset;
unsigned long exp_op_flags = 0;
unsigned int pflags = current->flags;
rwf_t flags = 0;
bool restore_flags = false;
+ unsigned int nvecs;
trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
@@ -1186,15 +1201,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
}
exp = fhp->fh_export;
- use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
stable = NFS_UNSTABLE;
- if (stable && !use_wgather)
+ if (stable && !fhp->fh_use_wgather)
flags |= RWF_SYNC;
- iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt);
+ nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
+ iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
@@ -1210,7 +1225,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
if (host_err < 0)
goto out_nfserr;
- if (stable && use_wgather) {
+ if (stable && fhp->fh_use_wgather) {
host_err = wait_for_concurrent_writes(file);
if (host_err < 0)
commit_reset_write_verifier(nn, rqstp, host_err);
@@ -1245,6 +1260,8 @@ out_nfserr:
*/
bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
{
+ if (nfsd_disable_splice_read)
+ return false;
switch (svc_auth_flavor(rqstp)) {
case RPC_AUTH_GSS_KRB5I:
case RPC_AUTH_GSS_KRB5P:
@@ -1292,14 +1309,24 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
return err;
}
-/*
- * Write data to a file.
- * The stable flag requests synchronous writes.
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_write - open a file and write data to it
+ * @rqstp: RPC execution context
+ * @fhp: File handle of file to write into; nfsd_write() may modify it
+ * @offset: Byte offset of start
+ * @payload: xdr_buf containing the write payload
+ * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
+ * @stable: An NFS stable_how value
+ * @verf: NFS WRITE verifier
+ *
+ * Upon return, caller must invoke fh_put on @fhp.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable,
+ const struct xdr_buf *payload, unsigned long *cnt, int stable,
__be32 *verf)
{
struct nfsd_file *nf;
@@ -1311,8 +1338,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
- vlen, cnt, stable, verf);
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt,
+ stable, verf);
nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
@@ -1348,6 +1375,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
loff_t start, end;
struct nfsd_net *nn;
+ trace_nfsd_commit_start(rqstp, fhp, offset, count);
+
/*
* Convert the client-provided (offset, count) range to a
* (start, end) range. If the client-provided range falls
@@ -1386,6 +1415,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
} else
nfsd_copy_write_verifier(verf, nn);
+ trace_nfsd_commit_done(rqstp, fhp, offset, count);
return err;
}
@@ -1422,7 +1452,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
* Callers expect new file metadata to be committed even
* if the attributes have not changed.
*/
- if (iap->ia_valid)
+ if (nfsd_attrs_valid(attrs))
status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
else
status = nfserrno(commit_metadata(resfhp));
@@ -1469,13 +1499,14 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct inode *dirp;
struct iattr *iap = attrs->na_iattr;
__be32 err;
- int host_err;
+ int host_err = 0;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
dchild = dget(resfhp->fh_dentry);
- err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
+ err = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, dentry,
+ NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1495,28 +1526,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
- if (!host_err && unlikely(d_unhashed(dchild))) {
- struct dentry *d;
- d = lookup_one_len(dchild->d_name.name,
- dchild->d_parent,
- dchild->d_name.len);
- if (IS_ERR(d)) {
- host_err = PTR_ERR(d);
- break;
- }
- if (unlikely(d_is_negative(d))) {
- dput(d);
- err = nfserr_serverfault;
- goto out;
- }
+ dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
+ if (IS_ERR(dchild)) {
+ host_err = PTR_ERR(dchild);
+ } else if (d_is_negative(dchild)) {
+ err = nfserr_serverfault;
+ goto out;
+ } else if (unlikely(dchild != resfhp->fh_dentry)) {
dput(resfhp->fh_dentry);
- resfhp->fh_dentry = dget(d);
- err = fh_update(resfhp);
- dput(dchild);
- dchild = d;
- if (err)
- goto out;
+ resfhp->fh_dentry = dget(dchild);
}
break;
case S_IFCHR:
@@ -1537,7 +1555,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
out:
- dput(dchild);
+ if (!IS_ERR(dchild))
+ dput(dchild);
return err;
out_nfserr:
@@ -1560,6 +1579,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 err;
int host_err;
+ trace_nfsd_vfs_create(rqstp, fhp, type, fname, flen);
+
if (isdotent(fname, flen))
return nfserr_exist;
@@ -1574,7 +1595,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
return nfserrno(host_err);
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dchild = lookup_one_len(fname, dentry, flen);
+ dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild)) {
err = nfserrno(host_err);
@@ -1660,6 +1681,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32 err, cerr;
int host_err;
+ trace_nfsd_vfs_symlink(rqstp, fhp, fname, flen, path);
+
err = nfserr_noent;
if (!flen || path[0] == '\0')
goto out;
@@ -1679,7 +1702,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
- dnew = lookup_one_len(fname, dentry, flen);
+ dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
if (IS_ERR(dnew)) {
err = nfserrno(PTR_ERR(dnew));
inode_unlock(dentry->d_inode);
@@ -1706,9 +1729,17 @@ out:
return err;
}
-/*
- * Create a hardlink
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
+/**
+ * nfsd_link - create a link
+ * @rqstp: RPC transaction context
+ * @ffhp: the file handle of the directory where the new link is to be created
+ * @name: the filename of the new link
+ * @len: the length of @name in octets
+ * @tfhp: the file handle of an existing file object
+ *
+ * After this call _both_ ffhp and tfhp need an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
@@ -1716,9 +1747,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
{
struct dentry *ddir, *dnew, *dold;
struct inode *dirp;
+ int type;
__be32 err;
int host_err;
+ trace_nfsd_vfs_link(rqstp, ffhp, tfhp, name, len);
+
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1735,19 +1769,19 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (isdotent(name, len))
goto out;
+ err = nfs_ok;
+ type = d_inode(tfhp->fh_dentry)->i_mode & S_IFMT;
host_err = fh_want_write(tfhp);
- if (host_err) {
- err = nfserrno(host_err);
+ if (host_err)
goto out;
- }
ddir = ffhp->fh_dentry;
dirp = d_inode(ddir);
inode_lock_nested(dirp, I_MUTEX_PARENT);
- dnew = lookup_one_len(name, ddir, len);
+ dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir);
if (IS_ERR(dnew)) {
- err = nfserrno(PTR_ERR(dnew));
+ host_err = PTR_ERR(dnew);
goto out_unlock;
}
@@ -1763,20 +1797,26 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
if (!host_err) {
- err = nfserrno(commit_metadata(ffhp));
- if (!err)
- err = nfserrno(commit_metadata(tfhp));
- } else {
- if (host_err == -EXDEV && rqstp->rq_vers == 2)
- err = nfserr_acces;
- else
- err = nfserrno(host_err);
+ host_err = commit_metadata(ffhp);
+ if (!host_err)
+ host_err = commit_metadata(tfhp);
}
+
dput(dnew);
out_drop_write:
fh_drop_write(tfhp);
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.9.4 para 1-2: NFSv4 LINK
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ }
out:
- return err;
+ return err != nfs_ok ? err : nfserrno(host_err);
out_dput:
dput(dnew);
@@ -1805,9 +1845,19 @@ nfsd_has_cached_files(struct dentry *dentry)
return ret;
}
-/*
- * Rename a file
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
+/**
+ * nfsd_rename - rename a directory entry
+ * @rqstp: RPC transaction context
+ * @ffhp: the file handle of parent directory containing the entry to be renamed
+ * @fname: the filename of directory entry to be renamed
+ * @flen: the length of @fname in octets
+ * @tfhp: the file handle of parent directory to contain the renamed entry
+ * @tname: the filename of the new entry
+ * @tlen: the length of @tlen in octets
+ *
+ * After this call _both_ ffhp and tfhp need an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
@@ -1815,10 +1865,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
{
struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
struct inode *fdir, *tdir;
+ int type = S_IFDIR;
__be32 err;
int host_err;
bool close_cached = false;
+ trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen);
+
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
goto out;
@@ -1836,7 +1889,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
- err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+ err = nfserr_xdev;
if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
goto out;
if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
@@ -1851,8 +1904,8 @@ retry:
trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
- err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
- goto out;
+ err = nfserr_xdev;
+ goto out_want_write;
}
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
@@ -1861,7 +1914,7 @@ retry:
if (err != nfs_ok)
goto out_unlock;
- odentry = lookup_one_len(fname, fdentry, flen);
+ odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry);
host_err = PTR_ERR(odentry);
if (IS_ERR(odentry))
goto out_nfserr;
@@ -1872,11 +1925,14 @@ retry:
host_err = -EINVAL;
if (odentry == trap)
goto out_dput_old;
+ type = d_inode(odentry)->i_mode & S_IFMT;
- ndentry = lookup_one_len(tname, tdentry, tlen);
+ ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry);
host_err = PTR_ERR(ndentry);
if (IS_ERR(ndentry))
goto out_dput_old;
+ if (d_inode(ndentry))
+ type = d_inode(ndentry)->i_mode & S_IFMT;
host_err = -ENOTEMPTY;
if (ndentry == trap)
goto out_dput_new;
@@ -1914,7 +1970,18 @@ retry:
out_dput_old:
dput(odentry);
out_nfserr:
- err = nfserrno(host_err);
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ } else {
+ err = nfserrno(host_err);
+ }
if (!close_cached) {
fh_fill_post_attrs(ffhp);
@@ -1922,6 +1989,7 @@ retry:
}
out_unlock:
unlock_rename(tdentry, fdentry);
+out_want_write:
fh_drop_write(ffhp);
/*
@@ -1940,9 +2008,17 @@ out:
return err;
}
-/*
- * Unlink a file or directory
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_unlink - remove a directory entry
+ * @rqstp: RPC transaction context
+ * @fhp: the file handle of the parent directory to be modified
+ * @type: enforced file type of the object to be removed
+ * @fname: the name of directory entry to be removed
+ * @flen: length of @fname in octets
+ *
+ * After this call fhp needs an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
@@ -1954,6 +2030,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
__be32 err;
int host_err;
+ trace_nfsd_vfs_unlink(rqstp, fhp, fname, flen);
+
err = nfserr_acces;
if (!flen || isdotent(fname, flen))
goto out;
@@ -1969,7 +2047,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
dirp = d_inode(dentry);
inode_lock_nested(dirp, I_MUTEX_PARENT);
- rdentry = lookup_one_len(fname, dentry, flen);
+ rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry);
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
goto out_unlock;
@@ -2016,18 +2094,17 @@ out_drop_write:
fh_drop_write(fhp);
out_nfserr:
if (host_err == -EBUSY) {
- /* name is mounted-on. There is no perfect
- * error status.
+ /*
+ * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE
+ * wants a status unique to the object type.
*/
- if (nfsd_v4client(rqstp))
+ if (type != S_IFDIR)
err = nfserr_file_open;
else
err = nfserr_acces;
- } else {
- err = nfserrno(host_err);
}
out:
- return err;
+ return err != nfs_ok ? err : nfserrno(host_err);
out_unlock:
inode_unlock(dirp);
goto out_drop_write;
@@ -2177,14 +2254,15 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
loff_t offset = *offsetp;
int may_flags = NFSD_MAY_READ;
- /* NFSv2 only supports 32 bit cookies */
- if (rqstp->rq_vers > 2)
- may_flags |= NFSD_MAY_64BIT_COOKIE;
-
err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
if (err)
goto out;
+ if (fhp->fh_64bit_cookies)
+ file->f_mode |= FMODE_64BITHASH;
+ else
+ file->f_mode |= FMODE_32BITHASH;
+
offset = vfs_llseek(file, offset, SEEK_SET);
if (offset < 0) {
err = nfserrno((int)offset);
@@ -2242,6 +2320,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in
{
__be32 err;
+ trace_nfsd_vfs_statfs(rqstp, fhp);
+
err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
if (!err) {
struct path path = {
@@ -2254,9 +2334,9 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in
return err;
}
-static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp)
+static int exp_rdonly(struct svc_cred *cred, struct svc_export *exp)
{
- return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
+ return nfsexp_flags(cred, exp) & NFSEXP_READONLY;
}
#ifdef CONFIG_NFSD_V4
@@ -2500,8 +2580,8 @@ out_unlock:
* Check for a user's access permissions to this inode.
*/
__be32
-nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
- struct dentry *dentry, int acc)
+nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
+ struct dentry *dentry, int acc)
{
struct inode *inode = d_inode(dentry);
int err;
@@ -2516,7 +2596,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
(acc & NFSD_MAY_EXEC)? " exec" : "",
(acc & NFSD_MAY_SATTR)? " sattr" : "",
(acc & NFSD_MAY_TRUNC)? " trunc" : "",
- (acc & NFSD_MAY_LOCK)? " lock" : "",
+ (acc & NFSD_MAY_NLM)? " nlm" : "",
(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
@@ -2532,7 +2612,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
*/
if (!(acc & NFSD_MAY_LOCAL_ACCESS))
if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
- if (exp_rdonly(rqstp, exp) ||
+ if (exp_rdonly(cred, exp) ||
__mnt_is_readonly(exp->ex_path.mnt))
return nfserr_rofs;
if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
@@ -2541,16 +2621,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
- if (acc & NFSD_MAY_LOCK) {
- /* If we cannot rely on authentication in NLM requests,
- * just allow locks, otherwise require read permission, or
- * ownership
- */
- if (exp->ex_flags & NFSEXP_NOAUTHNLM)
- return 0;
- else
- acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
- }
/*
* The file owner always gets access permission for accesses that
* would normally be checked at open time. This is to make
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c60fdb6200fd..eff04959606f 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -20,7 +20,7 @@
#define NFSD_MAY_READ 0x004 /* == MAY_READ */
#define NFSD_MAY_SATTR 0x008
#define NFSD_MAY_TRUNC 0x010
-#define NFSD_MAY_LOCK 0x020
+#define NFSD_MAY_NLM 0x020 /* request is from lockd */
#define NFSD_MAY_MASK 0x03f
/* extra hints to permission and open routines: */
@@ -33,6 +33,8 @@
#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */
+#define NFSD_MAY_LOCALIO 0x2000 /* for tracing, reflects when localio used */
+
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
@@ -60,6 +62,14 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
posix_acl_release(attrs->na_dpacl);
}
+static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs)
+{
+ struct iattr *iap = attrs->na_iattr;
+
+ return (iap->ia_valid || (attrs->na_seclabel &&
+ attrs->na_seclabel->len));
+}
+
__be32 nfserrno (int errno);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
@@ -104,8 +114,8 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp,
- int may_flags, struct file **filp);
+int nfsd_open_verified(struct svc_fh *fhp, int may_flags,
+ struct file **filp);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
unsigned long *count,
@@ -118,13 +128,13 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp);
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, unsigned long *count,
u32 *eof);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
- struct kvec *, int, unsigned long *,
- int stable, __be32 *verf);
+__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, const struct xdr_buf *payload,
+ unsigned long *cnt, int stable, __be32 *verf);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct nfsd_file *nf, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt,
- int stable, __be32 *verf);
+ const struct xdr_buf *payload,
+ unsigned long *cnt, int stable, __be32 *verf);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
@@ -145,8 +155,8 @@ __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
struct kstatfs *, int access);
-__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
- struct dentry *, int);
+__be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
+ struct dentry *dentry, int acc);
void nfsd_filp_close(struct file *fp);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 415516c1b27e..aa2a356da784 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -518,6 +518,24 @@ struct nfsd4_free_stateid {
stateid_t fr_stateid; /* request */
};
+struct nfsd4_get_dir_delegation {
+ /* request */
+ u32 gdda_signal_deleg_avail;
+ u32 gdda_notification_types[1];
+ struct timespec64 gdda_child_attr_delay;
+ struct timespec64 gdda_dir_attr_delay;
+ u32 gdda_child_attributes[3];
+ u32 gdda_dir_attributes[3];
+ /* response */
+ u32 gddrnf_status;
+ nfs4_verifier gddr_cookieverf;
+ stateid_t gddr_stateid;
+ u32 gddr_notification[1];
+ u32 gddr_child_attributes[3];
+ u32 gddr_dir_attributes[3];
+ bool gddrnf_will_signal_deleg_avail;
+};
+
/* also used for NVERIFY */
struct nfsd4_verify {
u32 ve_bmval[3]; /* request */
@@ -549,6 +567,7 @@ struct nfsd4_exchange_id {
struct xdr_netobj nii_domain;
struct xdr_netobj nii_name;
struct timespec64 nii_time;
+ char *server_impl_name;
};
struct nfsd4_sequence {
@@ -557,9 +576,7 @@ struct nfsd4_sequence {
u32 slotid; /* request/response */
u32 maxslots; /* request/response */
u32 cachethis; /* request */
-#if 0
u32 target_maxslots; /* response */
-#endif /* not yet */
u32 status_flags; /* response */
};
@@ -657,7 +674,12 @@ struct nfsd4_cb_offload {
struct nfsd4_callback co_cb;
struct nfsd42_write_res co_res;
__be32 co_nfserr;
+ unsigned int co_retries;
struct knfsd_fh co_fh;
+
+ struct nfs4_sessionid co_referring_sessionid;
+ u32 co_referring_slotid;
+ u32 co_referring_seqno;
};
struct nfsd4_copy {
@@ -674,11 +696,17 @@ struct nfsd4_copy {
#define NFSD4_COPY_F_INTRA (1)
#define NFSD4_COPY_F_SYNCHRONOUS (2)
#define NFSD4_COPY_F_COMMITTED (3)
+#define NFSD4_COPY_F_COMPLETED (4)
+#define NFSD4_COPY_F_OFFLOAD_DONE (5)
/* response */
+ __be32 nfserr;
struct nfsd42_write_res cp_res;
struct knfsd_fh fh;
+ /* offload callback */
+ struct nfsd4_cb_offload cp_cb_offload;
+
struct nfs4_client *cp_clp;
struct nfsd_file *nf_src;
@@ -689,10 +717,12 @@ struct nfsd4_copy {
struct list_head copies;
struct task_struct *copy_task;
refcount_t refcount;
+ unsigned int cp_ttl;
struct nfsd4_ssc_umount_item *ss_nsui;
struct nfs_fh c_fh;
nfs4_stateid stateid;
+ struct nfsd_net *cp_nn;
};
static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync)
@@ -735,7 +765,8 @@ struct nfsd4_offload_status {
/* response */
u64 count;
- u32 status;
+ __be32 status;
+ bool completed;
};
struct nfsd4_copy_notify {
@@ -797,6 +828,7 @@ struct nfsd4_op {
struct nfsd4_reclaim_complete reclaim_complete;
struct nfsd4_test_stateid test_stateid;
struct nfsd4_free_stateid free_stateid;
+ struct nfsd4_get_dir_delegation get_dir_delegation;
struct nfsd4_getdeviceinfo getdeviceinfo;
struct nfsd4_layoutget layoutget;
struct nfsd4_layoutcommit layoutcommit;
@@ -907,6 +939,7 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
+void nfsd4_exchange_id_release(union nfsd4_op_u *u);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *,
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index e8b00309c449..f4e29c0c701c 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -6,8 +6,11 @@
#define cb_compound_enc_hdr_sz 4
#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
+#define enc_referring_call4_sz (1 + 1)
+#define enc_referring_call_list4_sz (sessionid_sz + 1 + \
+ enc_referring_call4_sz)
#define cb_sequence_enc_sz (sessionid_sz + 4 + \
- 1 /* no referring calls list yet */)
+ enc_referring_call_list4_sz)
#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
#define op_enc_sz 1
@@ -59,16 +62,20 @@
* 1: CB_GETATTR opcode (32-bit)
* N: file_handle
* 1: number of entry in attribute array (32-bit)
- * 1: entry 0 in attribute array (32-bit)
+ * 3: entry 0-2 in attribute array (32-bit * 3)
*/
#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
cb_sequence_enc_sz + \
- 1 + enc_nfs4_fh_sz + 1 + 1)
+ 1 + enc_nfs4_fh_sz + 1 + 3)
/*
* 4: fattr_bitmap_maxsz
* 1: attribute array len
* 2: change attr (64-bit)
* 2: size (64-bit)
+ * 2: atime.seconds (64-bit)
+ * 1: atime.nanoseconds (32-bit)
+ * 2: mtime.seconds (64-bit)
+ * 1: mtime.nanoseconds (32-bit)
*/
#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
- cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
+ cb_sequence_dec_sz + 4 + 1 + 2 + 2 + 2 + 1 + 2 + 1 + op_dec_sz)