diff options
Diffstat (limited to 'fs/nfsd')
46 files changed, 5063 insertions, 1906 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 272ab8d5c4d7..879e0b104d1c 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -4,9 +4,11 @@ config NFSD depends on INET depends on FILE_LOCKING depends on FSNOTIFY + select CRC32 select LOCKD select SUNRPC select EXPORTFS + select NFS_COMMON select NFS_ACL_SUPPORT if NFSD_V2_ACL select NFS_ACL_SUPPORT if NFSD_V3_ACL depends on MULTIUSER @@ -75,8 +77,8 @@ config NFSD_V4 select FS_POSIX_ACL select RPCSEC_GSS_KRB5 select CRYPTO + select CRYPTO_LIB_SHA256 select CRYPTO_MD5 - select CRYPTO_SHA256 select GRACE_PERIOD select NFS_V4_2_SSC_HELPER if NFS_V4_2 help @@ -162,7 +164,7 @@ config NFSD_V4_SECURITY_LABEL config NFSD_LEGACY_CLIENT_TRACKING bool "Support legacy NFSv4 client tracking methods (DEPRECATED)" depends on NFSD_V4 - default n + default y help The NFSv4 server needs to store a small amount of information on stable storage in order to handle state recovery after reboot. Most @@ -171,6 +173,16 @@ config NFSD_LEGACY_CLIENT_TRACKING recoverydir, or spawn a process directly using a usermodehelper upcall. - These legacy client tracking methods have proven to be probelmatic + These legacy client tracking methods have proven to be problematic and will be removed in the future. Say Y here if you need support for them in the interim. + +config NFSD_V4_DELEG_TIMESTAMPS + bool "Support delegated timestamps" + depends on NFSD_V4 + default n + help + NFSD implements delegated timestamps according to + draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This + is currently an experimental feature and is therefore left disabled + by default. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index b8736a82e57c..55744bb786c9 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -18,8 +18,24 @@ nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ - nfs4acl.o nfs4callback.o nfs4recover.o + nfs4acl.o nfs4callback.o nfs4recover.o nfs4xdr_gen.o nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o +nfsd-$(CONFIG_NFS_LOCALIO) += localio.o +nfsd-$(CONFIG_DEBUG_FS) += debugfs.o + + +.PHONY: xdrgen + +xdrgen: ../../include/linux/sunrpc/xdrgen/nfs4_1.h nfs4xdr_gen.h nfs4xdr_gen.c + +../../include/linux/sunrpc/xdrgen/nfs4_1.h: ../../Documentation/sunrpc/xdr/nfs4_1.x + ../../tools/net/sunrpc/xdrgen/xdrgen definitions $< > $@ + +nfs4xdr_gen.h: ../../Documentation/sunrpc/xdr/nfs4_1.x + ../../tools/net/sunrpc/xdrgen/xdrgen declarations $< > $@ + +nfs4xdr_gen.c: ../../Documentation/sunrpc/xdr/nfs4_1.x + ../../tools/net/sunrpc/xdrgen/xdrgen source $< > $@ diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index e6beaaf4f170..4dc327e02456 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -5,37 +5,37 @@ #include "nfsd.h" #include "auth.h" -int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) +int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp) { struct exp_flavor_info *f; struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; for (f = exp->ex_flavors; f < end; f++) { - if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) + if (f->pseudoflavor == cred->cr_flavor) return f->flags; } return exp->ex_flags; } -int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) +int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) { struct group_info *rqgi; struct group_info *gi; struct cred *new; int i; - int flags = nfsexp_flags(rqstp, exp); + int flags = nfsexp_flags(cred, exp); /* discard any old override before preparing the new set */ - revert_creds(get_cred(current_real_cred())); + put_cred(revert_creds(get_cred(current_real_cred()))); new = prepare_creds(); if (!new) return -ENOMEM; - new->fsuid = rqstp->rq_cred.cr_uid; - new->fsgid = rqstp->rq_cred.cr_gid; + new->fsuid = cred->cr_uid; + new->fsgid = cred->cr_gid; - rqgi = rqstp->rq_cred.cr_group_info; + rqgi = cred->cr_group_info; if (flags & NFSEXP_ALLSQUASH) { new->fsuid = exp->ex_anon_uid; @@ -80,7 +80,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); put_cred(override_creds(new)); - put_cred(new); return 0; oom: diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h index dbd66424f600..8c5031bbbcee 100644 --- a/fs/nfsd/auth.h +++ b/fs/nfsd/auth.h @@ -12,6 +12,6 @@ * Set the current process's fsuid/fsgid etc to those of the NFS * client user */ -int nfsd_setuser(struct svc_rqst *, struct svc_export *); +int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp); #endif /* LINUX_NFSD_AUTH_H */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 3c040c81c77d..08a20e5bcf7f 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -147,8 +147,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb, struct pnfs_block_deviceaddr *dev; struct pnfs_block_volume *b; - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + - sizeof(struct pnfs_block_volume), GFP_KERNEL); + dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL); if (!dev) return -ENOMEM; gdp->gd_device = dev; @@ -255,8 +254,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, const struct pr_ops *ops; int ret; - dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + - sizeof(struct pnfs_block_volume), GFP_KERNEL); + dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL); if (!dev) return -ENOMEM; gdp->gd_device = dev; diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h index b0361e8aa9a7..4e28ac8f1127 100644 --- a/fs/nfsd/blocklayoutxdr.h +++ b/fs/nfsd/blocklayoutxdr.h @@ -47,7 +47,7 @@ struct pnfs_block_volume { struct pnfs_block_deviceaddr { u32 nr_volumes; - struct pnfs_block_volume volumes[]; + struct pnfs_block_volume volumes[] __counted_by(nr_volumes); }; __be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr, diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 66a05fefae98..bb7addef4a31 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -10,7 +10,7 @@ #define NFSCACHE_H #include <linux/sunrpc/svc.h> -#include "netns.h" +#include "nfsd.h" /* * Representation of a reply cache entry. diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c new file mode 100644 index 000000000000..84b0c8b559dc --- /dev/null +++ b/fs/nfsd/debugfs.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/debugfs.h> + +#include "nfsd.h" + +static struct dentry *nfsd_top_dir __read_mostly; + +/* + * /sys/kernel/debug/nfsd/disable-splice-read + * + * Contents: + * %0: NFS READ is allowed to use page splicing + * %1: NFS READ uses only iov iter read + * + * The default value of this setting is zero (page splicing is + * allowed). This setting takes immediate effect for all NFS + * versions, all exports, and in all NFSD net namespaces. + */ + +static int nfsd_dsr_get(void *data, u64 *val) +{ + *val = nfsd_disable_splice_read ? 1 : 0; + return 0; +} + +static int nfsd_dsr_set(void *data, u64 val) +{ + nfsd_disable_splice_read = (val > 0) ? true : false; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n"); + +void nfsd_debugfs_exit(void) +{ + debugfs_remove_recursive(nfsd_top_dir); + nfsd_top_dir = NULL; +} + +void nfsd_debugfs_init(void) +{ + nfsd_top_dir = debugfs_create_dir("nfsd", NULL); + + debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO, + nfsd_top_dir, NULL, &nfsd_dsr_fops); +} diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 7b641095a665..88ae410b4113 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -334,33 +334,46 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) static int export_stats_init(struct export_stats *stats) { stats->start_time = ktime_get_seconds(); - return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM); + return percpu_counter_init_many(stats->counter, 0, GFP_KERNEL, + EXP_STATS_COUNTERS_NUM); } static void export_stats_reset(struct export_stats *stats) { - if (stats) - nfsd_percpu_counters_reset(stats->counter, - EXP_STATS_COUNTERS_NUM); + if (stats) { + int i; + + for (i = 0; i < EXP_STATS_COUNTERS_NUM; i++) + percpu_counter_set(&stats->counter[i], 0); + } } static void export_stats_destroy(struct export_stats *stats) { if (stats) - nfsd_percpu_counters_destroy(stats->counter, - EXP_STATS_COUNTERS_NUM); + percpu_counter_destroy_many(stats->counter, + EXP_STATS_COUNTERS_NUM); } -static void svc_export_put(struct kref *ref) +static void svc_export_release(struct rcu_head *rcu_head) { - struct svc_export *exp = container_of(ref, struct svc_export, h.ref); - path_put(&exp->ex_path); - auth_domain_put(exp->ex_client); + struct svc_export *exp = container_of(rcu_head, struct svc_export, + ex_rcu); + nfsd4_fslocs_free(&exp->ex_fslocs); export_stats_destroy(exp->ex_stats); kfree(exp->ex_stats); kfree(exp->ex_uuid); - kfree_rcu(exp, ex_rcu); + kfree(exp); +} + +static void svc_export_put(struct kref *ref) +{ + struct svc_export *exp = container_of(ref, struct svc_export, h.ref); + + path_put(&exp->ex_path); + auth_domain_put(exp->ex_client); + call_rcu(&exp->ex_rcu, svc_export_release); } static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) @@ -1070,10 +1083,32 @@ static struct svc_export *exp_find(struct cache_detail *cd, return exp; } -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) +/** + * check_nfsd_access - check if access to export is allowed. + * @exp: svc_export that is being accessed. + * @rqstp: svc_rqst attempting to access @exp (will be NULL for LOCALIO). + * @may_bypass_gss: reduce strictness of authorization check + * + * Return values: + * %nfs_ok if access is granted, or + * %nfserr_wrongsec if access is denied + */ +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, + bool may_bypass_gss) { struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors; - struct svc_xprt *xprt = rqstp->rq_xprt; + struct svc_xprt *xprt; + + /* + * If rqstp is NULL, this is a LOCALIO request which will only + * ever use a filehandle/credential pair for which access has + * been affirmed (by ACCESS or OPEN NFS requests) over the + * wire. So there is no need for further checks here. + */ + if (!rqstp) + return nfs_ok; + + xprt = rqstp->rq_xprt; if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) { if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) @@ -1089,22 +1124,23 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) test_bit(XPT_PEER_AUTH, &xprt->xpt_flags)) goto ok; } - goto denied; + if (!may_bypass_gss) + goto denied; ok: /* legacy gss-only clients are always OK: */ if (exp->ex_client == rqstp->rq_gssclient) - return 0; + return nfs_ok; /* ip-address based client; check sec= export option: */ for (f = exp->ex_flavors; f < end; f++) { if (f->pseudoflavor == rqstp->rq_cred.cr_flavor) - return 0; + return nfs_ok; } /* defaults in absence of sec= options: */ if (exp->ex_nflavors == 0) { if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX) - return 0; + return nfs_ok; } /* If the compound op contains a spo_must_allowed op, @@ -1114,10 +1150,27 @@ ok: */ if (nfsd4_spo_must_allow(rqstp)) - return 0; + return nfs_ok; + + /* Some calls may be processed without authentication + * on GSS exports. For example NFS2/3 calls on root + * directory, see section 2.3.2 of rfc 2623. + * For "may_bypass_gss" check that export has really + * enabled some flavor with authentication (GSS or any + * other) and also check that the used auth flavor is + * without authentication (none or sys). + */ + if (may_bypass_gss && ( + rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL || + rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) { + for (f = exp->ex_flavors; f < end; f++) { + if (f->pseudoflavor >= RPC_AUTH_DES) + return 0; + } + } denied: - return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec; + return nfserr_wrongsec; } /* @@ -1160,19 +1213,35 @@ gss: return gssexp; } +/** + * rqst_exp_find - Find an svc_export in the context of a rqst or similar + * @reqp: The handle to be used to suspend the request if a cache-upcall is needed + * If NULL, missing in-cache information will result in failure. + * @net: The network namespace in which the request exists + * @cl: default auth_domain to use for looking up the export + * @gsscl: an alternate auth_domain defined using deprecated gss/krb5 format. + * @fsid_type: The type of fsid to look for + * @fsidv: The actual fsid to look up in the context of either client. + * + * Perform a lookup for @cl/@fsidv in the given @net for an export. If + * none found and @gsscl specified, repeat the lookup. + * + * Returns an export, or an error pointer. + */ struct svc_export * -rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) +rqst_exp_find(struct cache_req *reqp, struct net *net, + struct auth_domain *cl, struct auth_domain *gsscl, + int fsid_type, u32 *fsidv) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT); - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct cache_detail *cd = nn->svc_export_cache; - if (rqstp->rq_client == NULL) + if (!cl) goto gss; /* First try the auth_unix client: */ - exp = exp_find(cd, rqstp->rq_client, fsid_type, - fsidv, &rqstp->rq_chandle); + exp = exp_find(cd, cl, fsid_type, fsidv, reqp); if (PTR_ERR(exp) == -ENOENT) goto gss; if (IS_ERR(exp)) @@ -1182,10 +1251,9 @@ rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv) return exp; gss: /* Otherwise, try falling back on gss client */ - if (rqstp->rq_gssclient == NULL) + if (!gsscl) return exp; - gssexp = exp_find(cd, rqstp->rq_gssclient, fsid_type, fsidv, - &rqstp->rq_chandle); + gssexp = exp_find(cd, gsscl, fsid_type, fsidv, reqp); if (PTR_ERR(gssexp) == -ENOENT) return exp; if (!IS_ERR(exp)) @@ -1216,7 +1284,9 @@ struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp) mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); - return rqst_exp_find(rqstp, FSID_NUM, fsidv); + return rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp), + rqstp->rq_client, rqstp->rq_gssclient, + FSID_NUM, fsidv); } /* @@ -1365,10 +1435,9 @@ static int e_show(struct seq_file *m, void *p) return 0; } - exp_get(exp); - if (cache_check(cd, &exp->h, NULL)) + if (cache_check_rcu(cd, &exp->h, NULL)) return 0; - exp_put(exp); + return svc_export_show(m, cd, cp); } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index ca9dc230ae3d..4d92b99c1ffd 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -99,8 +99,10 @@ struct svc_expkey { #define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE) #define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES) -int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp); -__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); +struct svc_cred; +int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp); +__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp, + bool may_bypass_gss); /* * Function declarations @@ -127,6 +129,8 @@ static inline struct svc_export *exp_get(struct svc_export *exp) cache_get(&exp->h); return exp; } -struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); +struct svc_export *rqst_exp_find(struct cache_req *reqp, struct net *net, + struct auth_domain *cl, struct auth_domain *gsscl, + int fsid_type, u32 *fsidv); #endif /* NFSD_EXPORT_H */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ddd3e0d9cfa6..e108b6c705b4 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -39,6 +39,7 @@ #include <linux/fsnotify.h> #include <linux/seq_file.h> #include <linux/rhashtable.h> +#include <linux/nfslocalio.h> #include "vfs.h" #include "nfsd.h" @@ -52,10 +53,11 @@ #define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ -#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) +#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO) static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); @@ -111,7 +113,7 @@ static void nfsd_file_schedule_laundrette(void) { if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags)) - queue_delayed_work(system_wq, &nfsd_filecache_laundrette, + queue_delayed_work(system_unbound_wq, &nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); } @@ -151,7 +153,7 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) +nfsd_file_mark_find_or_create(struct inode *inode) { int err; struct fsnotify_mark *mark; @@ -159,8 +161,8 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) do { fsnotify_group_lock(nfsd_file_fsnotify_group); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + mark = fsnotify_find_inode_mark(inode, + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, @@ -215,7 +217,9 @@ nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need, if (unlikely(!nf)) return NULL; + this_cpu_inc(nfsd_file_allocations); INIT_LIST_HEAD(&nf->nf_lru); + INIT_LIST_HEAD(&nf->nf_gc); nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); @@ -315,15 +319,14 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } - -static bool nfsd_file_lru_add(struct nfsd_file *nf) +static void nfsd_file_lru_add(struct nfsd_file *nf) { - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) { + refcount_inc(&nf->nf_ref); + if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru)) trace_nfsd_file_lru_add(nf); - return true; - } - return false; + else + WARN_ON(1); + nfsd_file_schedule_laundrette(); } static bool nfsd_file_lru_remove(struct nfsd_file *nf) @@ -359,42 +362,76 @@ nfsd_file_put(struct nfsd_file *nf) if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - /* - * If this is the last reference (nf_ref == 1), then try to - * transfer it to the LRU. - */ - if (refcount_dec_not_one(&nf->nf_ref)) - return; - - /* Try to add it to the LRU. If that fails, decrement. */ - if (nfsd_file_lru_add(nf)) { - /* If it's still hashed, we're done */ - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_schedule_laundrette(); - return; - } - - /* - * We're racing with unhashing, so try to remove it from - * the LRU. If removal fails, then someone else already - * has our reference. - */ - if (!nfsd_file_lru_remove(nf)) - return; - } + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + set_bit(NFSD_FILE_RECENT, &nf->nf_flags); } + if (refcount_dec_and_test(&nf->nf_ref)) nfsd_file_free(nf); } +/** + * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller + * @nf: nfsd_file of which to put the reference + * + * First save the associated net to return to caller, then put + * the reference of the nfsd_file. + */ +struct net * +nfsd_file_put_local(struct nfsd_file __rcu **pnf) +{ + struct nfsd_file *nf; + struct net *net = NULL; + + nf = unrcu_pointer(xchg(pnf, NULL)); + if (nf) { + net = nf->nf_net; + nfsd_file_put(nf); + } + return net; +} + +/** + * nfsd_file_get_local - get nfsd_file reference and reference to net + * @nf: nfsd_file of which to put the reference + * + * Get reference to both the nfsd_file and nf->nf_net. + */ +struct nfsd_file * +nfsd_file_get_local(struct nfsd_file *nf) +{ + struct net *net = nf->nf_net; + + if (nfsd_net_try_get(net)) { + nf = nfsd_file_get(nf); + if (!nf) + nfsd_net_put(net); + } else { + nf = NULL; + } + return nf; +} + +/** + * nfsd_file_file - get the backing file of an nfsd_file + * @nf: nfsd_file of which to access the backing file. + * + * Return backing file for @nf. + */ +struct file * +nfsd_file_file(struct nfsd_file *nf) +{ + return nf->nf_file; +} + static void nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; while (!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); + nf = list_first_entry(dispose, struct nfsd_file, nf_gc); + list_del_init(&nf->nf_gc); nfsd_file_free(nf); } } @@ -411,14 +448,23 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) { while(!list_empty(dispose)) { struct nfsd_file *nf = list_first_entry(dispose, - struct nfsd_file, nf_lru); + struct nfsd_file, nf_gc); struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); struct nfsd_fcache_disposal *l = nn->fcache_disposal; + struct svc_serv *serv; spin_lock(&l->lock); - list_move_tail(&nf->nf_lru, &l->freeme); + list_move_tail(&nf->nf_gc, &l->freeme); spin_unlock(&l->lock); - svc_wake_up(nn->nfsd_serv); + + /* + * The filecache laundrette is shut down after the + * nn->nfsd_serv pointer is cleared, but before the + * svc_serv is freed. + */ + serv = nn->nfsd_serv; + if (serv) + svc_wake_up(serv); } } @@ -456,7 +502,6 @@ void nfsd_file_net_dispose(struct nfsd_net *nn) * nfsd_file_lru_cb - Examine an entry on the LRU list * @item: LRU entry to examine * @lru: controlling LRU - * @lock: LRU list lock (unused) * @arg: dispose list * * Return values: @@ -466,9 +511,7 @@ void nfsd_file_net_dispose(struct nfsd_net *nn) */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, - spinlock_t *lock, void *arg) - __releases(lock) - __acquires(lock) + void *arg) { struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); @@ -492,31 +535,71 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, } /* - * Put the reference held on behalf of the LRU. If it wasn't the last - * one, then just remove it from the LRU and ignore it. + * Put the reference held on behalf of the LRU if it is the last + * reference, else rotate. */ - if (!refcount_dec_and_test(&nf->nf_ref)) { + if (!refcount_dec_if_one(&nf->nf_ref)) { trace_nfsd_file_gc_in_use(nf); - list_lru_isolate(lru, &nf->nf_lru); - return LRU_REMOVED; + return LRU_ROTATE; } /* Refcount went to zero. Unhash it and queue it to the dispose list */ nfsd_file_unhash(nf); - list_lru_isolate_move(lru, &nf->nf_lru, head); + list_lru_isolate(lru, &nf->nf_lru); + list_add(&nf->nf_gc, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } +static enum lru_status +nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru, + void *arg) +{ + struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); + + if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) { + /* + * "REFERENCED" really means "should be at the end of the + * LRU. As we are putting it there we can clear the flag. + */ + clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + trace_nfsd_file_gc_aged(nf); + return LRU_ROTATE; + } + return nfsd_file_lru_cb(item, lru, arg); +} + +/* If the shrinker runs between calls to list_lru_walk_node() in + * nfsd_file_gc(), the "remaining" count will be wrong. This could + * result in premature freeing of some files. This may not matter much + * but is easy to fix with this spinlock which temporarily disables + * the shrinker. + */ +static DEFINE_SPINLOCK(nfsd_gc_lock); static void nfsd_file_gc(void) { + unsigned long ret = 0; LIST_HEAD(dispose); - unsigned long ret; + int nid; - ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, - &dispose, list_lru_count(&nfsd_file_lru)); + spin_lock(&nfsd_gc_lock); + for_each_node_state(nid, N_NORMAL_MEMORY) { + unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid); + + while (remaining > 0) { + unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH); + + remaining -= nr; + ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb, + &dispose, &nr); + if (nr) + /* walk aborted early */ + remaining = 0; + } + } + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); } @@ -524,9 +607,9 @@ nfsd_file_gc(void) static void nfsd_file_gc_worker(struct work_struct *work) { - nfsd_file_gc(); if (list_lru_count(&nfsd_file_lru)) - nfsd_file_schedule_laundrette(); + nfsd_file_gc(); + nfsd_file_schedule_laundrette(); } static unsigned long @@ -541,8 +624,12 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) LIST_HEAD(dispose); unsigned long ret; + if (!spin_trylock(&nfsd_gc_lock)) + return SHRINK_STOP; + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); + spin_unlock(&nfsd_gc_lock); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); nfsd_file_dispose_list_delayed(&dispose); return ret; @@ -578,7 +665,7 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) /* If refcount goes to 0, then put on the dispose list */ if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); + list_add(&nf->nf_gc, dispose); trace_nfsd_file_closing(nf); } } @@ -647,24 +734,19 @@ nfsd_file_close_inode(struct inode *inode) void nfsd_file_close_inode_sync(struct inode *inode) { - struct nfsd_file *nf; LIST_HEAD(dispose); trace_nfsd_file_close(inode); nfsd_file_queue_for_close(inode, &dispose); - while (!list_empty(&dispose)) { - nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_free(nf); - } + nfsd_file_dispose_list(&dispose); } static int nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, void *data) { - struct file_lock *fl = data; + struct file_lease *fl = data; /* Only close files for F_SETLEASE leases */ if (fl->c.flc_flags & FL_LEASE) @@ -719,7 +801,7 @@ nfsd_file_cache_init(void) ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params); if (ret) - return ret; + goto out; ret = -ENOMEM; nfsd_file_slab = KMEM_CACHE(nfsd_file, 0); @@ -760,7 +842,7 @@ nfsd_file_cache_init(void) } nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, - FSNOTIFY_GROUP_NOFS); + 0); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); @@ -771,6 +853,8 @@ nfsd_file_cache_init(void) INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: + if (ret) + clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags); return ret; out_notifier: lease_unregister_notifier(&nfsd_file_lease_notifier); @@ -802,6 +886,14 @@ __nfsd_file_cache_purge(struct net *net) struct nfsd_file *nf; LIST_HEAD(dispose); +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + if (net) { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); + } +#endif + rhltable_walk_enter(&nfsd_file_rhltable, &iter); do { rhashtable_walk_start(&iter); @@ -909,6 +1001,7 @@ nfsd_file_cache_shutdown(void) for_each_possible_cpu(i) { per_cpu(nfsd_file_cache_hits, i) = 0; per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_allocations, i) = 0; per_cpu(nfsd_file_releases, i) = 0; per_cpu(nfsd_file_total_age, i) = 0; per_cpu(nfsd_file_evictions, i) = 0; @@ -977,12 +1070,14 @@ nfsd_file_is_cached(struct inode *inode) } static __be32 -nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net, + struct svc_cred *cred, + struct auth_domain *client, + struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf, bool want_gc) { unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - struct net *net = SVC_NET(rqstp); struct nfsd_file *new, *nf; bool stale_retry = true; bool open_retry = true; @@ -991,8 +1086,13 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, int ret; retry: - status = fh_verify(rqstp, fhp, S_IFREG, - may_flags|NFSD_MAY_OWNER_OVERRIDE); + if (rqstp) { + status = fh_verify(rqstp, fhp, S_IFREG, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + } else { + status = fh_verify_local(net, cred, client, fhp, S_IFREG, + may_flags|NFSD_MAY_OWNER_OVERRIDE); + } if (status != nfs_ok) return status; inode = d_inode(fhp->fh_dentry); @@ -1001,16 +1101,8 @@ retry: nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc); rcu_read_unlock(); - if (nf) { - /* - * If the nf is on the LRU then it holds an extra reference - * that must be put if it's removed. It had better not be - * the last one however, since we should hold another. - */ - if (nfsd_file_lru_remove(nf)) - WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); + if (nf) goto wait_for_construction; - } new = nfsd_file_alloc(net, inode, need, want_gc); if (!new) { @@ -1024,7 +1116,7 @@ retry: if (unlikely(nf)) { spin_unlock(&inode->i_lock); rcu_read_unlock(); - nfsd_file_slab_free(&new->nf_rcu); + nfsd_file_free(new); goto wait_for_construction; } nf = new; @@ -1035,8 +1127,6 @@ retry: if (likely(ret == 0)) goto open_file; - if (ret == -EEXIST) - goto retry; trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret); status = nfserr_jukebox; goto construction_err; @@ -1051,6 +1141,7 @@ wait_for_construction: status = nfserr_jukebox; goto construction_err; } + nfsd_file_put(nf); open_retry = false; fh_put(fhp); goto retry; @@ -1074,7 +1165,7 @@ out: open_file: trace_nfsd_file_alloc(nf); - nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode); + nf->nf_mark = nfsd_file_mark_find_or_create(inode); if (nf->nf_mark) { if (file) { get_file(file); @@ -1082,8 +1173,7 @@ open_file: status = nfs_ok; trace_nfsd_file_opened(nf, status); } else { - ret = nfsd_open_verified(rqstp, fhp, may_flags, - &nf->nf_file); + ret = nfsd_open_verified(fhp, may_flags, &nf->nf_file); if (ret == -EOPENSTALE && stale_retry) { stale_retry = false; nfsd_file_unhash(nf); @@ -1106,6 +1196,9 @@ open_file: */ if (status != nfs_ok || inode->i_nlink == 0) nfsd_file_unhash(nf); + else if (want_gc) + nfsd_file_lru_add(nf); + clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags); if (status == nfs_ok) goto out; @@ -1139,7 +1232,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, NULL, pnf, true); } /** @@ -1163,7 +1257,54 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, NULL, pnf, false); +} + +/** + * nfsd_file_acquire_local - Get a struct nfsd_file with an open file for localio + * @net: The network namespace in which to perform a lookup + * @cred: the user credential with which to validate access + * @client: the auth_domain for LOCALIO lookup + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * This file lookup interface provide access to a file given the + * filehandle and credential. No connection-based authorisation + * is performed and in that way it is quite different to other + * file access mediated by nfsd. It allows a kernel module such as the NFS + * client to reach across network and filesystem namespaces to access + * a file. The security implications of this should be carefully + * considered before use. + * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). + * + * Return values: + * %nfs_ok - @pnf points to an nfsd_file with its reference + * count boosted. + * + * On error, an nfsstat value in network byte order is returned. + */ +__be32 +nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + /* + * Save creds before calling nfsd_file_do_acquire() (which calls + * nfsd_setuser). Important because caller (LOCALIO) is from + * client context. + */ + const struct cred *save_cred = get_current_cred(); + __be32 beres; + + beres = nfsd_file_do_acquire(NULL, net, cred, client, + fhp, may_flags, NULL, pnf, false); + put_cred(revert_creds(save_cred)); + return beres; } /** @@ -1189,7 +1330,8 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); + return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, + fhp, may_flags, file, pnf, false); } /* @@ -1199,7 +1341,7 @@ nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, */ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long releases = 0, evictions = 0; + unsigned long allocations = 0, releases = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; @@ -1224,6 +1366,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); acquisitions += per_cpu(nfsd_file_acquisitions, i); + allocations += per_cpu(nfsd_file_allocations, i); releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); evictions += per_cpu(nfsd_file_evictions, i); @@ -1234,6 +1377,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "allocations: %lu\n", allocations); seq_printf(m, "releases: %lu\n", releases); seq_printf(m, "evictions: %lu\n", evictions); if (releases) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index c61884def906..722b26c71e45 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -4,6 +4,12 @@ #include <linux/fsnotify_backend.h> /* + * Limit the time that the list_lru_one lock is held during + * an LRU scan. + */ +#define NFSD_FILE_GC_BATCH (16UL) + +/* * This is the fsnotify_mark container that nfsd attaches to the files that it * is holding open. Note that we have a separate refcount here aside from the * one in the fsnotify_mark. We only want a single fsnotify_mark attached to @@ -38,12 +44,14 @@ struct nfsd_file { #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) #define NFSD_FILE_GC (3) +#define NFSD_FILE_RECENT (4) unsigned long nf_flags; refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; struct list_head nf_lru; + struct list_head nf_gc; struct rcu_head nf_rcu; ktime_t nf_birthtime; }; @@ -54,7 +62,10 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); +struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf); +struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); +struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); void nfsd_file_net_dispose(struct nfsd_net *nn); bool nfsd_file_is_cached(struct inode *inode); @@ -65,5 +76,8 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct file *file, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c new file mode 100644 index 000000000000..80d9ff6608a7 --- /dev/null +++ b/fs/nfsd/localio.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NFS server support for local clients to bypass network stack + * + * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com> + * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + * Copyright (C) 2024 NeilBrown <neilb@suse.de> + */ + +#include <linux/exportfs.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs_common.h> +#include <linux/nfslocalio.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_xdr.h> +#include <linux/string.h> + +#include "nfsd.h" +#include "vfs.h" +#include "netns.h" +#include "filecache.h" +#include "cache.h" + +/** + * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file + * + * @net: 'struct net' to get the proper nfsd_net required for LOCALIO access + * @dom: 'struct auth_domain' required for LOCALIO access + * @rpc_clnt: rpc_clnt that the client established + * @cred: cred that the client established + * @nfs_fh: filehandle to lookup + * @nfp: place to find the nfsd_file, or store it if it was non-NULL + * @fmode: fmode_t to use for open + * + * This function maps a local fh to a path on a local filesystem. + * This is useful when the nfs client has the local server mounted - it can + * avoid all the NFS overhead with reads, writes and commits. + * + * On successful return, returned nfsd_file will have its nf_net member + * set. Caller (NFS client) is responsible for calling nfsd_net_put and + * nfsd_file_put (via nfs_to_nfsd_file_put_local). + */ +static struct nfsd_file * +nfsd_open_local_fh(struct net *net, struct auth_domain *dom, + struct rpc_clnt *rpc_clnt, const struct cred *cred, + const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf, + const fmode_t fmode) +{ + int mayflags = NFSD_MAY_LOCALIO; + struct svc_cred rq_cred; + struct svc_fh fh; + struct nfsd_file *localio; + __be32 beres; + + if (nfs_fh->size > NFS4_FHSIZE) + return ERR_PTR(-EINVAL); + + if (!nfsd_net_try_get(net)) + return ERR_PTR(-ENXIO); + + rcu_read_lock(); + localio = nfsd_file_get(rcu_dereference(*pnf)); + rcu_read_unlock(); + if (localio) + return localio; + + /* nfs_fh -> svc_fh */ + fh_init(&fh, NFS4_FHSIZE); + fh.fh_handle.fh_size = nfs_fh->size; + memcpy(fh.fh_handle.fh_raw, nfs_fh->data, nfs_fh->size); + + if (fmode & FMODE_READ) + mayflags |= NFSD_MAY_READ; + if (fmode & FMODE_WRITE) + mayflags |= NFSD_MAY_WRITE; + + svcauth_map_clnt_to_svc_cred_local(rpc_clnt, cred, &rq_cred); + + beres = nfsd_file_acquire_local(net, &rq_cred, dom, + &fh, mayflags, &localio); + if (beres) + localio = ERR_PTR(nfs_stat_to_errno(be32_to_cpu(beres))); + + fh_put(&fh); + if (rq_cred.cr_group_info) + put_group_info(rq_cred.cr_group_info); + + if (!IS_ERR(localio)) { + struct nfsd_file *new; + if (!nfsd_net_try_get(net)) { + nfsd_file_put(localio); + nfsd_net_put(net); + return ERR_PTR(-ENXIO); + } + nfsd_file_get(localio); + again: + new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio))); + if (new) { + /* Some other thread installed an nfsd_file */ + if (nfsd_file_get(new) == NULL) + goto again; + /* + * Drop the ref we were going to install and the + * one we were going to return. + */ + nfsd_file_put(localio); + nfsd_file_put(localio); + localio = new; + } + } else + nfsd_net_put(net); + + return localio; +} + +static const struct nfsd_localio_operations nfsd_localio_ops = { + .nfsd_net_try_get = nfsd_net_try_get, + .nfsd_net_put = nfsd_net_put, + .nfsd_open_local_fh = nfsd_open_local_fh, + .nfsd_file_put_local = nfsd_file_put_local, + .nfsd_file_get_local = nfsd_file_get_local, + .nfsd_file_file = nfsd_file_file, +}; + +void nfsd_localio_ops_init(void) +{ + nfs_to = &nfsd_localio_ops; +} + +/* + * UUID_IS_LOCAL XDR functions + */ + +static __be32 localio_proc_null(struct svc_rqst *rqstp) +{ + return rpc_success; +} + +struct localio_uuidarg { + uuid_t uuid; +}; + +static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp) +{ + struct localio_uuidarg *argp = rqstp->rq_argp; + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs_uuid_is_local(&argp->uuid, &nn->local_clients, + &nn->local_clients_lock, + net, rqstp->rq_client, THIS_MODULE); + + return rpc_success; +} + +static bool localio_decode_uuidarg(struct svc_rqst *rqstp, + struct xdr_stream *xdr) +{ + struct localio_uuidarg *argp = rqstp->rq_argp; + u8 uuid[UUID_SIZE]; + + if (decode_opaque_fixed(xdr, uuid, UUID_SIZE)) + return false; + import_uuid(&argp->uuid, uuid); + + return true; +} + +static const struct svc_procedure localio_procedures1[] = { + [LOCALIOPROC_NULL] = { + .pc_func = localio_proc_null, + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 0, + .pc_name = "NULL", + }, + [LOCALIOPROC_UUID_IS_LOCAL] = { + .pc_func = localio_proc_uuid_is_local, + .pc_decode = localio_decode_uuidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct localio_uuidarg), + .pc_argzero = sizeof(struct localio_uuidarg), + .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_name = "UUID_IS_LOCAL", + }, +}; + +#define LOCALIO_NR_PROCEDURES ARRAY_SIZE(localio_procedures1) +static DEFINE_PER_CPU_ALIGNED(unsigned long, + localio_count[LOCALIO_NR_PROCEDURES]); +const struct svc_version localio_version1 = { + .vs_vers = 1, + .vs_nproc = LOCALIO_NR_PROCEDURES, + .vs_proc = localio_procedures1, + .vs_dispatch = nfsd_dispatch, + .vs_count = localio_count, + .vs_xdrsize = XDR_QUADLEN(UUID_SIZE), + .vs_hidden = true, +}; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 46a7f9b813e5..edc9f75dc75c 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -38,11 +38,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, memcpy(&fh.fh_handle.fh_raw, f->data, f->size); fh.fh_export = NULL; + /* + * Allow BYPASS_GSS as some client implementations use AUTH_SYS + * for NLM even when GSS is used for NFS. + * Allow OWNER_OVERRIDE as permission might have been changed + * after the file was opened. + * Pass MAY_NLM so that authentication can be completely bypassed + * if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL + * for NLM requests. + */ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; - access |= NFSD_MAY_LOCK; + access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS; nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp); fh_put(&fh); - /* We return nlm error codes as nlm doesn't know + /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. */ switch (nfserr) { diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c index 0e1d635ec5f9..ca54aa583530 100644 --- a/fs/nfsd/netlink.c +++ b/fs/nfsd/netlink.c @@ -10,15 +10,96 @@ #include <uapi/linux/nfsd_netlink.h> +/* Common nested types */ +const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1] = { + [NFSD_A_SOCK_ADDR] = { .type = NLA_BINARY, }, + [NFSD_A_SOCK_TRANSPORT_NAME] = { .type = NLA_NUL_STRING, }, +}; + +const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = { + [NFSD_A_VERSION_MAJOR] = { .type = NLA_U32, }, + [NFSD_A_VERSION_MINOR] = { .type = NLA_U32, }, + [NFSD_A_VERSION_ENABLED] = { .type = NLA_FLAG, }, +}; + +/* NFSD_CMD_THREADS_SET - do */ +static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_SCOPE + 1] = { + [NFSD_A_SERVER_THREADS] = { .type = NLA_U32, }, + [NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, }, + [NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, }, + [NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, }, +}; + +/* NFSD_CMD_VERSION_SET - do */ +static const struct nla_policy nfsd_version_set_nl_policy[NFSD_A_SERVER_PROTO_VERSION + 1] = { + [NFSD_A_SERVER_PROTO_VERSION] = NLA_POLICY_NESTED(nfsd_version_nl_policy), +}; + +/* NFSD_CMD_LISTENER_SET - do */ +static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_ADDR + 1] = { + [NFSD_A_SERVER_SOCK_ADDR] = NLA_POLICY_NESTED(nfsd_sock_nl_policy), +}; + +/* NFSD_CMD_POOL_MODE_SET - do */ +static const struct nla_policy nfsd_pool_mode_set_nl_policy[NFSD_A_POOL_MODE_MODE + 1] = { + [NFSD_A_POOL_MODE_MODE] = { .type = NLA_NUL_STRING, }, +}; + /* Ops table for nfsd */ static const struct genl_split_ops nfsd_nl_ops[] = { { .cmd = NFSD_CMD_RPC_STATUS_GET, - .start = nfsd_nl_rpc_status_get_start, .dumpit = nfsd_nl_rpc_status_get_dumpit, - .done = nfsd_nl_rpc_status_get_done, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = NFSD_CMD_THREADS_SET, + .doit = nfsd_nl_threads_set_doit, + .policy = nfsd_threads_set_nl_policy, + .maxattr = NFSD_A_SERVER_SCOPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_THREADS_GET, + .doit = nfsd_nl_threads_get_doit, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_VERSION_SET, + .doit = nfsd_nl_version_set_doit, + .policy = nfsd_version_set_nl_policy, + .maxattr = NFSD_A_SERVER_PROTO_VERSION, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_VERSION_GET, + .doit = nfsd_nl_version_get_doit, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_LISTENER_SET, + .doit = nfsd_nl_listener_set_doit, + .policy = nfsd_listener_set_nl_policy, + .maxattr = NFSD_A_SERVER_SOCK_ADDR, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_LISTENER_GET, + .doit = nfsd_nl_listener_get_doit, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_POOL_MODE_SET, + .doit = nfsd_nl_pool_mode_set_doit, + .policy = nfsd_pool_mode_set_nl_policy, + .maxattr = NFSD_A_POOL_MODE_MODE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_POOL_MODE_GET, + .doit = nfsd_nl_pool_mode_get_doit, + .flags = GENL_CMD_CAP_DO, + }, }; struct genl_family nfsd_nl_family __ro_after_init = { diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h index d83dd6bdee92..8eb903f24c41 100644 --- a/fs/nfsd/netlink.h +++ b/fs/nfsd/netlink.h @@ -11,11 +11,20 @@ #include <uapi/linux/nfsd_netlink.h> -int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb); -int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb); +/* Common nested types */ +extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1]; +extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1]; int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info); extern struct genl_family nfsd_nl_family; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index d4be519b5734..3e2d0fde80a7 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -13,6 +13,7 @@ #include <linux/filelock.h> #include <linux/nfs4.h> #include <linux/percpu_counter.h> +#include <linux/percpu-refcount.h> #include <linux/siphash.h> #include <linux/sunrpc/stats.h> @@ -127,12 +128,6 @@ struct nfsd_net { seqlock_t writeverf_lock; unsigned char writeverf[8]; - /* - * Max number of connections this nfsd container will allow. Defaults - * to '0' which is means that it bases this on the number of threads. - */ - unsigned int max_connections; - u32 clientid_base; u32 clientid_counter; u32 clverifier_counter; @@ -140,6 +135,9 @@ struct nfsd_net { struct svc_info nfsd_info; #define nfsd_serv nfsd_info.serv + struct percpu_ref nfsd_net_ref; + struct completion nfsd_net_confirm_done; + struct completion nfsd_net_free_done; /* * clientid and stateid data for construction of net unique COPY @@ -148,12 +146,13 @@ struct nfsd_net { u32 s2s_cp_cl_id; struct idr s2s_cp_stateids; spinlock_t s2s_cp_lock; + atomic_t pending_async_copies; /* * Version information */ - bool *nfsd_versions; - bool *nfsd4_minorversions; + bool nfsd_versions[NFSD_MAXVERS + 1]; + bool nfsd4_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1]; /* * Duplicate reply cache @@ -213,15 +212,22 @@ struct nfsd_net { /* last time an admin-revoke happened for NFSv4.0 */ time64_t nfs40_last_revoke; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + /* Local clients to be invalidated when net is shut down */ + spinlock_t local_clients_lock; + struct list_head local_clients; +#endif }; /* Simple check to find out if a given net was properly initialized */ #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl) -extern void nfsd_netns_free_versions(struct nfsd_net *nn); - +extern bool nfsd_support_version(int vers); extern unsigned int nfsd_net_id; +bool nfsd_net_try_get(struct net *net); +void nfsd_net_put(struct net *net); + void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); void nfsd_reset_write_verifier(struct nfsd_net *nn); #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 12b2b9bc07bf..5fb202acb0fd 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -84,6 +84,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } @@ -308,8 +310,6 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp) fh_put(&resp->fh); } -struct nfsd3_voidargs { int dummy; }; - #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 73adca47d373..7b5433bd3019 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -76,6 +76,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } @@ -221,8 +223,6 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } -struct nfsd3_voidargs { int dummy; }; - #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index dfcc957e460d..a817d8485d21 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -14,6 +14,7 @@ #include "xdr3.h" #include "vfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -28,6 +29,29 @@ static int nfs3_ftypes[] = { S_IFIFO, /* NF3FIFO */ }; +static __be32 nfsd3_map_status(__be32 status) +{ + switch (status) { + case nfs_ok: + break; + case nfserr_nofilehandle: + status = nfserr_badhandle; + break; + case nfserr_wrongsec: + case nfserr_file_open: + status = nfserr_acces; + break; + case nfserr_symlink_not_dir: + status = nfserr_notdir; + break; + case nfserr_symlink: + case nfserr_wrong_type: + status = nfserr_inval; + break; + } + return status; +} + /* * NULL call. */ @@ -46,8 +70,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: GETATTR(3) %s\n", - SVCFH_fmt(&argp->fh)); + trace_nfsd_vfs_getattr(rqstp, &argp->fh); fh_copy(&resp->fh, &argp->fh); resp->status = fh_verify(rqstp, &resp->fh, 0, @@ -57,6 +80,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -80,6 +104,7 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) if (argp->check_guard) guardtime = &argp->guardtime; resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -103,6 +128,7 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp) resp->status = nfsd_lookup(rqstp, &resp->dirfh, argp->name, argp->len, &resp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -122,6 +148,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -142,6 +169,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp) resp->pages = rqstp->rq_next_page++; resp->status = nfsd_readlink(rqstp, &resp->fh, page_address(*resp->pages), &resp->len); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -179,6 +207,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, &resp->count, &resp->eof); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -191,7 +220,6 @@ nfsd3_proc_write(struct svc_rqst *rqstp) struct nfsd3_writeargs *argp = rqstp->rq_argp; struct nfsd3_writeres *resp = rqstp->rq_resp; unsigned long cnt = argp->len; - unsigned int nvecs; dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), @@ -206,12 +234,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, nvecs, &cnt, + &argp->payload, &cnt, resp->committed, resp->verf); resp->count = cnt; + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -236,6 +263,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 status; int host_err; + trace_nfsd_vfs_create(rqstp, fhp, S_IFREG, argp->name, argp->len); + if (isdotent(argp->name, argp->len)) return nfserr_exist; if (!(iap->ia_valid & ATTR_MODE)) @@ -254,7 +283,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, inode_lock_nested(inode, I_MUTEX_PARENT); - child = lookup_one_len(argp->name, parent, argp->len); + child = lookup_one(&nop_mnt_idmap, + &QSTR_LEN(argp->name, argp->len), + parent); if (IS_ERR(child)) { status = nfserrno(PTR_ERR(child)); goto out; @@ -350,15 +381,11 @@ nfsd3_proc_create(struct svc_rqst *rqstp) struct nfsd3_diropres *resp = rqstp->rq_resp; svc_fh *dirfhp, *newfhp; - dprintk("nfsd: CREATE(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - dirfhp = fh_copy(&resp->dirfh, &argp->fh); newfhp = fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -374,16 +401,12 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) .na_iattr = &argp->attrs, }; - dprintk("nfsd: MKDIR(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - argp->attrs.ia_valid &= ~ATTR_SIZE; fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &attrs, S_IFDIR, 0, &resp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -413,17 +436,13 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) goto out; } - dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", - SVCFH_fmt(&argp->ffh), - argp->flen, argp->fname, - argp->tlen, argp->tname); - fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, argp->tname, &attrs, &resp->fh); kfree(argp->tname); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -441,11 +460,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) int type; dev_t rdev = 0; - dprintk("nfsd: MKNOD(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); @@ -465,6 +479,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, &attrs, type, rdev, &resp->fh); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -477,15 +492,11 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: REMOVE(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - /* Unlink. -S_IFDIR means file must not be a directory */ fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -498,14 +509,10 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: RMDIR(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -515,19 +522,11 @@ nfsd3_proc_rename(struct svc_rqst *rqstp) struct nfsd3_renameargs *argp = rqstp->rq_argp; struct nfsd3_renameres *resp = rqstp->rq_resp; - dprintk("nfsd: RENAME(3) %s %.*s ->\n", - SVCFH_fmt(&argp->ffh), - argp->flen, - argp->fname); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - fh_copy(&resp->ffh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, &resp->tfh, argp->tname, argp->tlen); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -537,17 +536,11 @@ nfsd3_proc_link(struct svc_rqst *rqstp) struct nfsd3_linkargs *argp = rqstp->rq_argp; struct nfsd3_linkres *resp = rqstp->rq_resp; - dprintk("nfsd: LINK(3) %s ->\n", - SVCFH_fmt(&argp->ffh)); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - fh_copy(&resp->fh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, &resp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -581,9 +574,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) struct nfsd3_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, (u32) argp->cookie); + trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd3_init_dirlist_pages(rqstp, resp, argp->count); @@ -600,6 +591,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) /* Recycle only pages that were part of the reply */ rqstp->rq_next_page = resp->xdr.page_ptr + 1; + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -614,9 +606,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) struct nfsd3_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, (u32) argp->cookie); + trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd3_init_dirlist_pages(rqstp, resp, argp->count); @@ -644,6 +634,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) rqstp->rq_next_page = resp->xdr.page_ptr + 1; out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -656,11 +647,9 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_fsstatres *resp = rqstp->rq_resp; - dprintk("nfsd: FSSTAT(3) %s\n", - SVCFH_fmt(&argp->fh)); - resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -704,6 +693,7 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp) } fh_put(&argp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -746,6 +736,7 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp) } fh_put(&argp->fh); + resp->status = nfsd3_map_status(resp->status); return rpc_success; } @@ -773,6 +764,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) argp->count, resp->verf); nfsd_file_put(nf); out: + resp->status = nfsd3_map_status(resp->status); return rpc_success; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index a7a07470c1f8..ef4971d71ac4 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -1001,7 +1001,9 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, } else dchild = dget(dparent); } else - dchild = lookup_positive_unlocked(name, dparent, namlen); + dchild = lookup_one_positive_unlocked(&nop_mnt_idmap, + &QSTR_LEN(name, namlen), + dparent); if (IS_ERR(dchild)) return rv; if (d_mountpoint(dchild)) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 96e786b5e544..936ea1ad9586 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -198,8 +198,6 @@ summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas) memset(pas, 0, sizeof(*pas)); pas->mask = 07; - pe = acl->a_entries + acl->a_count; - FOREACH_ACL_ENTRY(pa, acl, pe) { switch (pa->e_tag) { case ACL_USER_OBJ: diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 87c9547989f6..e00b2aea8da2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -42,11 +42,10 @@ #include "trace.h" #include "xdr4cb.h" #include "xdr4.h" +#include "nfs4xdr_gen.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -static void nfsd4_mark_cb_fault(struct nfs4_client *clp); - #define NFSPROC4_CB_NULL 0 #define NFSPROC4_CB_COMPOUND 1 @@ -93,12 +92,35 @@ static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, { fattr->ncf_cb_change = 0; fattr->ncf_cb_fsize = 0; + fattr->ncf_cb_atime.tv_sec = 0; + fattr->ncf_cb_atime.tv_nsec = 0; + fattr->ncf_cb_mtime.tv_sec = 0; + fattr->ncf_cb_mtime.tv_nsec = 0; + if (bitmap[0] & FATTR4_WORD0_CHANGE) if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (bitmap[0] & FATTR4_WORD0_SIZE) if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0) - return -NFSERR_BAD_XDR; + return -EIO; + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) { + fattr4_time_deleg_access access; + + if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access)) + return -EIO; + fattr->ncf_cb_atime.tv_sec = access.seconds; + fattr->ncf_cb_atime.tv_nsec = access.nseconds; + + } + if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) { + fattr4_time_deleg_modify modify; + + if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify)) + return -EIO; + fattr->ncf_cb_mtime.tv_sec = modify.seconds; + fattr->ncf_cb_mtime.tv_nsec = modify.nseconds; + + } return 0; } @@ -287,17 +309,17 @@ static int decode_cb_compound4res(struct xdr_stream *xdr, u32 length; __be32 *p; - p = xdr_inline_decode(xdr, 4 + 4); + p = xdr_inline_decode(xdr, XDR_UNIT); if (unlikely(p == NULL)) goto out_overflow; - hdr->status = be32_to_cpup(p++); + hdr->status = be32_to_cpup(p); /* Ignore the tag */ - length = be32_to_cpup(p++); - p = xdr_inline_decode(xdr, length + 4); - if (unlikely(p == NULL)) + if (xdr_stream_decode_u32(xdr, &length) < 0) + goto out_overflow; + if (xdr_inline_decode(xdr, length) == NULL) + goto out_overflow; + if (xdr_stream_decode_u32(xdr, &hdr->nops) < 0) goto out_overflow; - p += XDR_QUADLEN(length); - hdr->nops = be32_to_cpup(p); return 0; out_overflow: return -EIO; @@ -361,16 +383,63 @@ static void encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, struct nfs4_cb_fattr *fattr) { - struct nfs4_delegation *dp = - container_of(fattr, struct nfs4_delegation, dl_cb_fattr); + struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr); struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle; - + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + u32 bmap_size = 1; + u32 bmap[3]; + + bmap[0] = FATTR4_WORD0_SIZE; + if (!ncf->ncf_file_modified) + bmap[0] |= FATTR4_WORD0_CHANGE; + + if (deleg_attrs_deleg(dp->dl_type)) { + bmap[1] = 0; + bmap[2] = FATTR4_WORD2_TIME_DELEG_ACCESS | FATTR4_WORD2_TIME_DELEG_MODIFY; + bmap_size = 3; + } encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR); encode_nfs_fh4(xdr, fh); - encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap)); + encode_bitmap4(xdr, bmap, bmap_size); hdr->nops++; } +static u32 highest_slotid(struct nfsd4_session *ses) +{ + u32 idx; + + spin_lock(&ses->se_lock); + idx = fls(~ses->se_cb_slot_avail); + if (idx > 0) + --idx; + idx = max(idx, ses->se_cb_highest_slot); + spin_unlock(&ses->se_lock); + return idx; +} + +static void +encode_referring_call4(struct xdr_stream *xdr, + const struct nfsd4_referring_call *rc) +{ + encode_uint32(xdr, rc->rc_sequenceid); + encode_uint32(xdr, rc->rc_slotid); +} + +static void +encode_referring_call_list4(struct xdr_stream *xdr, + const struct nfsd4_referring_call_list *rcl) +{ + struct nfsd4_referring_call *rc; + __be32 *p; + + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); + xdr_encode_opaque_fixed(p, rcl->rcl_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + encode_uint32(xdr, rcl->__nr_referring_calls); + list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) + encode_referring_call4(xdr, rc); +} + /* * CB_SEQUENCE4args * @@ -388,6 +457,7 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) { struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + struct nfsd4_referring_call_list *rcl; __be32 *p; if (hdr->minorversion == 0) @@ -396,16 +466,45 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr, encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE); encode_sessionid4(xdr, session); - p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4); - *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */ - *p++ = xdr_zero; /* csa_slotid */ - *p++ = xdr_zero; /* csa_highest_slotid */ + p = xdr_reserve_space(xdr, XDR_UNIT * 4); + *p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */ + *p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */ + *p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */ *p++ = xdr_zero; /* csa_cachethis */ - xdr_encode_empty_array(p); /* csa_referring_call_lists */ + + /* csa_referring_call_lists */ + encode_uint32(xdr, cb->cb_nr_referring_call_list); + list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) + encode_referring_call_list4(xdr, rcl); hdr->nops++; } +static void update_cb_slot_table(struct nfsd4_session *ses, u32 target) +{ + /* No need to do anything if nothing changed */ + if (likely(target == READ_ONCE(ses->se_cb_highest_slot))) + return; + + spin_lock(&ses->se_lock); + if (target > ses->se_cb_highest_slot) { + int i; + + target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1); + + /* + * Growing the slot table. Reset any new sequences to 1. + * + * NB: There is some debate about whether the RFC requires this, + * but the Linux client expects it. + */ + for (i = ses->se_cb_highest_slot + 1; i <= target; ++i) + ses->se_cb_seq_nr[i] = 1; + } + ses->se_cb_highest_slot = target; + spin_unlock(&ses->se_lock); +} + /* * CB_SEQUENCE4resok * @@ -433,7 +532,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, struct nfsd4_session *session = cb->cb_clp->cl_cb_session; int status = -ESERVERFAULT; __be32 *p; - u32 dummy; + u32 seqid, slotid, target; /* * If the server returns different values for sessionID, slotID or @@ -449,21 +548,22 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr, } p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); - dummy = be32_to_cpup(p++); - if (dummy != session->se_cb_seq_nr) { + seqid = be32_to_cpup(p++); + if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) { dprintk("NFS: %s Invalid sequence number\n", __func__); goto out; } - dummy = be32_to_cpup(p++); - if (dummy != 0) { + slotid = be32_to_cpup(p++); + if (slotid != cb->cb_held_slot) { dprintk("NFS: %s Invalid slotid\n", __func__); goto out; } - /* - * FIXME: process highest slotid and target highest slotid - */ + p++; // ignore current highest slot value + + target = be32_to_cpup(p++); + update_cb_slot_table(session, target); status = 0; out: cb->cb_seq_status = status; @@ -592,7 +692,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, struct nfs4_cb_compound_hdr hdr; int status; u32 bitmap[3] = {0}; - u32 attrlen; + u32 attrlen, maxlen; struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); @@ -605,14 +705,18 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, return status; status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); - if (status) + if (unlikely(status || cb->cb_status)) return status; if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) - return -NFSERR_BAD_XDR; + return -EIO; if (xdr_stream_decode_u32(xdr, &attrlen) < 0) - return -NFSERR_BAD_XDR; - if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize))) - return -NFSERR_BAD_XDR; + return -EIO; + maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize); + if (bitmap[2] != 0) + maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) + + sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2; + if (attrlen > maxlen) + return -EIO; status = decode_cb_fattr4(xdr, bitmap, ncf); return status; } @@ -978,20 +1082,23 @@ static int max_cb_time(struct net *net) return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ; } -static struct workqueue_struct *callback_wq; - static bool nfsd4_queue_cb(struct nfsd4_callback *cb) { - trace_nfsd_cb_queue(cb->cb_clp, cb); - return queue_delayed_work(callback_wq, &cb->cb_work, 0); + struct nfs4_client *clp = cb->cb_clp; + + trace_nfsd_cb_queue(clp, cb); + return queue_work(clp->cl_callback_wq, &cb->cb_work); } -static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb, - unsigned long msecs) +static void nfsd4_requeue_cb(struct rpc_task *task, struct nfsd4_callback *cb) { - trace_nfsd_cb_queue(cb->cb_clp, cb); - queue_delayed_work(callback_wq, &cb->cb_work, - msecs_to_jiffies(msecs)); + struct nfs4_client *clp = cb->cb_clp; + + if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { + trace_nfsd_cb_restart(clp, cb); + task->tk_status = 0; + set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); + } } static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) @@ -1002,8 +1109,7 @@ static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) static void nfsd41_cb_inflight_end(struct nfs4_client *clp) { - if (atomic_dec_and_test(&clp->cl_cb_inflight)) - wake_up_var(&clp->cl_cb_inflight); + atomic_dec_and_wake_up(&clp->cl_cb_inflight); } static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp) @@ -1066,7 +1172,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { - if (!conn->cb_xprt) + if (!conn->cb_xprt || !ses) return -EINVAL; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; @@ -1161,7 +1267,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) void nfsd4_probe_callback_sync(struct nfs4_client *clp) { nfsd4_probe_callback(clp); - flush_workqueue(callback_wq); + flush_workqueue(clp->cl_callback_wq); } void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) @@ -1172,6 +1278,22 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) spin_unlock(&clp->cl_lock); } +static int grab_slot(struct nfsd4_session *ses) +{ + int idx; + + spin_lock(&ses->se_lock); + idx = ffs(ses->se_cb_slot_avail) - 1; + if (idx < 0 || idx > ses->se_cb_highest_slot) { + spin_unlock(&ses->se_lock); + return -1; + } + /* clear the bit for the slot */ + ses->se_cb_slot_avail &= ~BIT(idx); + spin_unlock(&ses->se_lock); + return idx; +} + /* * There's currently a single callback channel slot. * If the slot is available, then mark it busy. Otherwise, set the @@ -1180,28 +1302,32 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = clp->cl_cb_session; - if (!cb->cb_holds_slot && - test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + if (cb->cb_held_slot >= 0) + return true; + cb->cb_held_slot = grab_slot(ses); + if (cb->cb_held_slot < 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); /* Race breaker */ - if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { - dprintk("%s slot is busy\n", __func__); + cb->cb_held_slot = grab_slot(ses); + if (cb->cb_held_slot < 0) return false; - } rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } - cb->cb_holds_slot = true; return true; } static void nfsd41_cb_release_slot(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; + struct nfsd4_session *ses = clp->cl_cb_session; - if (cb->cb_holds_slot) { - cb->cb_holds_slot = false; - clear_bit(0, &clp->cl_cb_slot_busy); + if (cb->cb_held_slot >= 0) { + spin_lock(&ses->se_lock); + ses->se_cb_slot_avail |= BIT(cb->cb_held_slot); + spin_unlock(&ses->se_lock); + cb->cb_held_slot = -1; rpc_wake_up_next(&clp->cl_cb_waitq); } } @@ -1212,15 +1338,113 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb) trace_nfsd_cb_destroy(clp, cb); nfsd41_cb_release_slot(cb); + if (test_bit(NFSD4_CALLBACK_WAKE, &cb->cb_flags)) + clear_and_wake_up_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags); + else + clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags); + if (cb->cb_ops && cb->cb_ops->release) cb->cb_ops->release(cb); nfsd41_cb_inflight_end(clp); } -/* - * TODO: cb_sequence should support referring call lists, cachethis, multiple - * slots, and mark callback channel down on communication errors. +/** + * nfsd41_cb_referring_call - add a referring call to a callback operation + * @cb: context of callback to add the rc to + * @sessionid: referring call's session ID + * @slotid: referring call's session slot index + * @seqno: referring call's slot sequence number + * + * Caller serializes access to @cb. + * + * NB: If memory allocation fails, the referring call is not added. */ +void nfsd41_cb_referring_call(struct nfsd4_callback *cb, + struct nfs4_sessionid *sessionid, + u32 slotid, u32 seqno) +{ + struct nfsd4_referring_call_list *rcl; + struct nfsd4_referring_call *rc; + bool found; + + might_sleep(); + + found = false; + list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) { + if (!memcmp(rcl->rcl_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN)) { + found = true; + break; + } + } + if (!found) { + rcl = kmalloc(sizeof(*rcl), GFP_KERNEL); + if (!rcl) + return; + memcpy(rcl->rcl_sessionid.data, sessionid->data, + NFS4_MAX_SESSIONID_LEN); + rcl->__nr_referring_calls = 0; + INIT_LIST_HEAD(&rcl->rcl_referring_calls); + list_add(&rcl->__list, &cb->cb_referring_call_list); + cb->cb_nr_referring_call_list++; + } + + found = false; + list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) { + if (rc->rc_sequenceid == seqno && rc->rc_slotid == slotid) { + found = true; + break; + } + } + if (!found) { + rc = kmalloc(sizeof(*rc), GFP_KERNEL); + if (!rc) + goto out; + rc->rc_sequenceid = seqno; + rc->rc_slotid = slotid; + rcl->__nr_referring_calls++; + list_add(&rc->__list, &rcl->rcl_referring_calls); + } + +out: + if (!rcl->__nr_referring_calls) { + cb->cb_nr_referring_call_list--; + list_del(&rcl->__list); + kfree(rcl); + } +} + +/** + * nfsd41_cb_destroy_referring_call_list - release referring call info + * @cb: context of a callback that has completed + * + * Callers who allocate referring calls using nfsd41_cb_referring_call() must + * release those resources by calling nfsd41_cb_destroy_referring_call_list. + * + * Caller serializes access to @cb. + */ +void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb) +{ + struct nfsd4_referring_call_list *rcl; + struct nfsd4_referring_call *rc; + + while (!list_empty(&cb->cb_referring_call_list)) { + rcl = list_first_entry(&cb->cb_referring_call_list, + struct nfsd4_referring_call_list, + __list); + + while (!list_empty(&rcl->rcl_referring_calls)) { + rc = list_first_entry(&rcl->rcl_referring_calls, + struct nfsd4_referring_call, + __list); + list_del(&rc->__list); + kfree(rc); + } + list_del(&rcl->__list); + kfree(rcl); + } +} + static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) { struct nfsd4_callback *cb = calldata; @@ -1231,6 +1455,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) * cb_seq_status is only set in decode_cb_sequence4res, * and so will remain 1 if an rpc level failure occurs. */ + trace_nfsd_cb_rpc_prepare(clp); cb->cb_seq_status = 1; cb->cb_status = 0; if (minorversion && !nfsd41_cb_get_slot(cb, task)) @@ -1238,30 +1463,14 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) rpc_call_start(task); } +/* Returns true if CB_COMPOUND processing should continue */ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb) { - struct nfs4_client *clp = cb->cb_clp; - struct nfsd4_session *session = clp->cl_cb_session; - bool ret = true; - - if (!clp->cl_minorversion) { - /* - * If the backchannel connection was shut down while this - * task was queued, we need to resubmit it after setting up - * a new backchannel connection. - * - * Note that if we lost our callback connection permanently - * the submission code will error out, so we don't need to - * handle that case here. - */ - if (RPC_SIGNALLED(task)) - goto need_restart; - - return true; - } + struct nfsd4_session *session = cb->cb_clp->cl_cb_session; + bool ret = false; - if (!cb->cb_holds_slot) - goto need_restart; + if (cb->cb_held_slot < 0) + goto requeue; /* This is the operation status code for CB_SEQUENCE */ trace_nfsd_cb_seq_status(task, cb); @@ -1274,12 +1483,17 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback * If CB_SEQUENCE returns an error, then the state of the slot * (sequence ID, cached reply) MUST NOT change. */ - ++session->se_cb_seq_nr; + ++session->se_cb_seq_nr[cb->cb_held_slot]; + ret = true; break; case -ESERVERFAULT: - ++session->se_cb_seq_nr; + /* + * Call succeeded, but the session, slot index, or slot + * sequence number in the response do not match the same + * in the server's call. The sequence information is thus + * untrustworthy. + */ nfsd4_mark_cb_fault(cb->cb_clp); - ret = false; break; case 1: /* @@ -1291,44 +1505,42 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback fallthrough; case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp); - ret = false; - goto need_restart; + goto requeue; case -NFS4ERR_DELAY: cb->cb_seq_status = 1; - if (!rpc_restart_call(task)) - goto out; - + if (RPC_SIGNALLED(task) || !rpc_restart_call(task)) + goto requeue; rpc_delay(task, 2 * HZ); return false; + case -NFS4ERR_SEQ_MISORDERED: case -NFS4ERR_BADSLOT: + /* + * A SEQ_MISORDERED or BADSLOT error means that the client and + * server are out of sync as to the backchannel parameters. Mark + * the backchannel faulty and restart the RPC, but leak the slot + * so that it's no longer used. + */ + nfsd4_mark_cb_fault(cb->cb_clp); + cb->cb_held_slot = -1; goto retry_nowait; - case -NFS4ERR_SEQ_MISORDERED: - if (session->se_cb_seq_nr != 1) { - session->se_cb_seq_nr = 1; - goto retry_nowait; - } - break; default: nfsd4_mark_cb_fault(cb->cb_clp); } - nfsd41_cb_release_slot(cb); - trace_nfsd_cb_free_slot(task, cb); - - if (RPC_SIGNALLED(task)) - goto need_restart; -out: + nfsd41_cb_release_slot(cb); return ret; retry_nowait: - if (rpc_restart_call_prepare(task)) - ret = false; - goto out; -need_restart: - if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) { - trace_nfsd_cb_restart(clp, cb); - task->tk_status = 0; - cb->cb_need_restart = true; + /* + * RPC_SIGNALLED() means that the rpc_client is being torn down and + * (possibly) recreated. Requeue the call in that case. + */ + if (!RPC_SIGNALLED(task)) { + if (rpc_restart_call_prepare(task)) + return false; } +requeue: + nfsd41_cb_release_slot(cb); + nfsd4_requeue_cb(task, cb); return false; } @@ -1337,11 +1549,28 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; - if (!nfsd4_cb_sequence_done(task, cb)) + trace_nfsd_cb_rpc_done(clp); + + if (!clp->cl_minorversion) { + /* + * If the backchannel connection was shut down while this + * task was queued, we need to resubmit it after setting up + * a new backchannel connection. + * + * Note that if we lost our callback connection permanently + * the submission code will error out, so we don't need to + * handle that case here. + */ + if (RPC_SIGNALLED(task)) + nfsd4_requeue_cb(task, cb); + } else if (!nfsd4_cb_sequence_done(task, cb)) { return; + } if (cb->cb_status) { - WARN_ON_ONCE(task->tk_status); + WARN_ONCE(task->tk_status, + "cb_status=%d tk_status=%d cb_opcode=%d", + cb->cb_status, task->tk_status, cb->cb_ops->opcode); task->tk_status = cb->cb_status; } @@ -1367,7 +1596,9 @@ static void nfsd4_cb_release(void *calldata) { struct nfsd4_callback *cb = calldata; - if (cb->cb_need_restart) + trace_nfsd_cb_rpc_release(cb->cb_clp); + + if (test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) nfsd4_queue_cb(cb); else nfsd41_destroy_cb(cb); @@ -1380,19 +1611,6 @@ static const struct rpc_call_ops nfsd4_cb_ops = { .rpc_release = nfsd4_cb_release, }; -int nfsd4_create_callback_queue(void) -{ - callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); - if (!callback_wq) - return -ENOMEM; - return 0; -} - -void nfsd4_destroy_callback_queue(void) -{ - destroy_workqueue(callback_wq); -} - /* must be called under the state lock */ void nfsd4_shutdown_callback(struct nfs4_client *clp) { @@ -1406,7 +1624,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) * client, destroy the rpc client, and stop: */ nfsd4_run_cb(&clp->cl_cb_null); - flush_workqueue(callback_wq); + flush_workqueue(clp->cl_callback_wq); nfsd41_cb_inflight_wait_complete(clp); } @@ -1428,9 +1646,9 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) /* * Note there isn't a lot of locking in this code; instead we depend on - * the fact that it is run from the callback_wq, which won't run two - * work items at once. So, for example, callback_wq handles all access - * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client. + * the fact that it is run from clp->cl_callback_wq, which won't run two + * work items at once. So, for example, clp->cl_callback_wq handles all + * access of cl_cb_client and all calls to rpc_create or rpc_shutdown_client. */ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) { @@ -1490,10 +1708,10 @@ static void nfsd4_run_cb_work(struct work_struct *work) { struct nfsd4_callback *cb = - container_of(work, struct nfsd4_callback, cb_work.work); + container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - int flags; + int flags, ret; trace_nfsd_cb_start(clp); @@ -1501,17 +1719,12 @@ nfsd4_run_cb_work(struct work_struct *work) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; - if (!clnt) { - if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) - nfsd41_destroy_cb(cb); - else { - /* - * XXX: Ideally, we could wait for the client to - * reconnect, but I haven't figured out how - * to do that yet. - */ - nfsd4_queue_cb_delayed(cb, 25); - } + if (!clnt || clp->cl_state == NFSD4_COURTESY) { + /* + * Callback channel broken, client killed or + * nfs4_client in courtesy state; give up. + */ + nfsd41_destroy_cb(cb); return; } @@ -1524,16 +1737,19 @@ nfsd4_run_cb_work(struct work_struct *work) return; } - if (cb->cb_need_restart) { - cb->cb_need_restart = false; - } else { + if (!test_and_clear_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) { if (cb->cb_ops && cb->cb_ops->prepare) cb->cb_ops->prepare(cb); } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, - cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); + ret = rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, + cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); + if (ret != 0) { + set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); + nfsd4_queue_cb(cb); + } } void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, @@ -1543,11 +1759,13 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op]; cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; + cb->cb_flags = 0; cb->cb_ops = ops; - INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work); + INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; - cb->cb_need_restart = false; - cb->cb_holds_slot = false; + cb->cb_held_slot = -1; + cb->cb_nr_referring_call_list = 0; + INIT_LIST_HEAD(&cb->cb_referring_call_list); } /** diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 7a806ac13e31..8cca1329f348 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -581,6 +581,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, .id = id, .type = type, }; + __be32 status = nfs_ok; __be32 *p; int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -593,12 +594,16 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, return nfserrno(ret); ret = strlen(item->name); WARN_ON_ONCE(ret > IDMAP_NAMESZ); + p = xdr_reserve_space(xdr, ret + 4); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque(p, item->name, ret); + if (unlikely(!p)) { + status = nfserr_resource; + goto out_put; + } + xdr_encode_opaque(p, item->name, ret); +out_put: cache_put(&item->h, nn->idtoname_cache); - return 0; + return status; } static bool diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 4f3072b5979a..290271ac4245 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -344,9 +344,10 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); - refcount_inc(&ls->ls_stid.sc_count); - nfsd4_run_cb(&ls->ls_recall); - + if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) { + refcount_inc(&ls->ls_stid.sc_count); + nfsd4_run_cb(&ls->ls_recall); + } out_unlock: spin_unlock(&ls->ls_lock); } @@ -740,6 +741,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = { .prepare = nfsd4_cb_layout_prepare, .done = nfsd4_cb_layout_done, .release = nfsd4_cb_layout_release, + .opcode = OP_CB_LAYOUTRECALL, }; static bool diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2927b1263f08..f13abbb13b38 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -57,6 +57,8 @@ module_param(inter_copy_offload_enable, bool, 0644); MODULE_PARM_DESC(inter_copy_offload_enable, "Enable inter server to server copy offload. Default: false"); +static void cleanup_async_copy(struct nfsd4_copy *copy); + #ifdef CONFIG_NFSD_V4_2_INTER_SSC static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ module_param(nfsd4_ssc_umount_timeout, int, 0644); @@ -158,7 +160,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs return fh_verify(rqstp, current_fh, S_IFREG, accmode); } -static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) +static __be32 nfsd_check_obj_isreg(struct svc_fh *fh, u32 minor_version) { umode_t mode = d_inode(fh->fh_dentry)->i_mode; @@ -166,14 +168,15 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) return nfs_ok; if (S_ISDIR(mode)) return nfserr_isdir; - /* - * Using err_symlink as our catch-all case may look odd; but - * there's no other obvious error for this case in 4.0, and we - * happen to know that it will cause the linux v4 client to do - * the right thing on attempts to open something other than a - * regular file. - */ - return nfserr_symlink; + if (S_ISLNK(mode)) + return nfserr_symlink; + + /* RFC 7530 - 16.16.6 */ + if (minor_version == 0) + return nfserr_symlink; + else + return nfserr_wrong_type; + } static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh) @@ -263,7 +266,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, inode_lock_nested(inode, I_MUTEX_PARENT); - child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); + child = lookup_one(&nop_mnt_idmap, + &QSTR_LEN(open->op_fname, open->op_fnamelen), + parent); if (IS_ERR(child)) { status = nfserrno(PTR_ERR(child)); goto out; @@ -466,7 +471,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru } if (status) goto out; - status = nfsd_check_obj_isreg(*resfh); + status = nfsd_check_obj_isreg(*resfh, cstate->minorversion); if (status) goto out; @@ -751,15 +756,6 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &access->ac_supported); } -static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) -{ - __be32 *verf = (__be32 *)verifier->data; - - BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data)); - - nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id)); -} - static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -882,6 +878,8 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_getattr *getattr = &u->getattr; __be32 status; + trace_nfsd_vfs_getattr(rqstp, &cstate->current_fh); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) return status; @@ -1004,6 +1002,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, u64 cookie = readdir->rd_cookie; static const nfs4_verifier zeroverf; + trace_nfsd_vfs_readdir(rqstp, &cstate->current_fh, + readdir->rd_maxcount, readdir->rd_cookie); + /* no need to check permission - this will be done in nfsd_readdir() */ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) @@ -1141,18 +1142,43 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, .na_iattr = &setattr->sa_iattr, .na_seclabel = &setattr->sa_label, }; + bool save_no_wcc, deleg_attrs; + struct nfs4_stid *st = NULL; struct inode *inode; __be32 status = nfs_ok; - bool save_no_wcc; int err; - if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { + deleg_attrs = setattr->sa_bmval[2] & (FATTR4_WORD2_TIME_DELEG_ACCESS | + FATTR4_WORD2_TIME_DELEG_MODIFY); + + if (deleg_attrs || (setattr->sa_iattr.ia_valid & ATTR_SIZE)) { + int flags = WR_STATE; + + if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) + flags |= RD_STATE; + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, &setattr->sa_stateid, - WR_STATE, NULL, NULL); + flags, NULL, &st); if (status) return status; } + + if (deleg_attrs) { + status = nfserr_bad_stateid; + if (st->sc_type & SC_TYPE_DELEG) { + struct nfs4_delegation *dp = delegstateid(st); + + /* Only for *_ATTRS_DELEG flavors */ + if (deleg_attrs_deleg(dp->dl_type)) + status = nfs_ok; + } + } + if (st) + nfs4_put_stid(st); + if (status) + return status; + err = fh_want_write(&cstate->current_fh); if (err) return nfserrno(err); @@ -1192,7 +1218,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd_file *nf = NULL; __be32 status = nfs_ok; unsigned long cnt; - int nvecs; if (write->wr_offset > (u64)OFFSET_MAX || write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) @@ -1207,13 +1232,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; write->wr_how_written = write->wr_stable_how; - - nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); - WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, - write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - write->wr_how_written, + write->wr_offset, &write->wr_payload, + &cnt, write->wr_how_written, (__be32 *)write->wr_verifier.data); nfsd_file_put(nf); @@ -1284,6 +1305,71 @@ out: return status; } +/** + * nfsd4_has_active_async_copies - Check for ongoing copy operations + * @clp: Client to be checked + * + * NFSD maintains state for async COPY operations after they complete, + * and this state remains in the nfs4_client's async_copies list. + * Ongoing copies should block the destruction of the nfs4_client, but + * completed copies should not. + * + * Return values: + * %true: At least one active async COPY is ongoing + * %false: No active async COPY operations were found + */ +bool nfsd4_has_active_async_copies(struct nfs4_client *clp) +{ + struct nfsd4_copy *copy; + bool result = false; + + spin_lock(&clp->async_lock); + list_for_each_entry(copy, &clp->async_copies, copies) { + if (!test_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags) && + !test_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) { + result = true; + break; + } + } + spin_unlock(&clp->async_lock); + return result; +} + +/** + * nfsd4_async_copy_reaper - Purge completed copies + * @nn: Network namespace with possible active copy information + */ +void nfsd4_async_copy_reaper(struct nfsd_net *nn) +{ + struct nfs4_client *clp; + struct nfsd4_copy *copy; + LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + struct list_head *pos, *next; + + spin_lock(&clp->async_lock); + list_for_each_safe(pos, next, &clp->async_copies) { + copy = list_entry(pos, struct nfsd4_copy, copies); + if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags)) { + if (--copy->cp_ttl) { + list_del_init(©->copies); + list_add(©->copies, &reaplist); + } + } + } + spin_unlock(&clp->async_lock); + } + spin_unlock(&nn->client_lock); + + while (!list_empty(&reaplist)) { + copy = list_first_entry(&reaplist, struct nfsd4_copy, copies); + list_del_init(©->copies); + cleanup_async_copy(copy); + } +} + static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) @@ -1294,12 +1380,16 @@ static void nfs4_put_copy(struct nfsd4_copy *copy) static void nfsd4_stop_copy(struct nfsd4_copy *copy) { - if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) + trace_nfsd_copy_async_cancel(copy); + if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) { kthread_stop(copy->copy_task); + copy->nfserr = nfs_ok; + set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); + } nfs4_put_copy(copy); } -static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) +static struct nfsd4_copy *nfsd4_unhash_copy(struct nfs4_client *clp) { struct nfsd4_copy *copy = NULL; @@ -1308,6 +1398,9 @@ static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) copy = list_first_entry(&clp->async_copies, struct nfsd4_copy, copies); refcount_inc(©->refcount); + copy->cp_clp = NULL; + if (!list_empty(©->copies)) + list_del_init(©->copies); } spin_unlock(&clp->async_lock); return copy; @@ -1317,7 +1410,7 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp) { struct nfsd4_copy *copy; - while ((copy = nfsd4_get_copy(clp)) != NULL) + while ((copy = nfsd4_unhash_copy(clp)) != NULL) nfsd4_stop_copy(copy); } #ifdef CONFIG_NFSD_V4_2_INTER_SSC @@ -1605,8 +1698,10 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { struct nfsd4_cb_offload *cbo = container_of(cb, struct nfsd4_cb_offload, co_cb); + struct nfsd4_copy *copy = + container_of(cbo, struct nfsd4_copy, cp_cb_offload); - kfree(cbo); + set_bit(NFSD4_COPY_F_OFFLOAD_DONE, ©->cp_flags); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, @@ -1616,12 +1711,21 @@ static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, container_of(cb, struct nfsd4_cb_offload, co_cb); trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + if (cbo->co_retries--) { + rpc_delay(task, HZ / 5); + return 0; + } + } + nfsd41_cb_destroy_referring_call_list(cb); return 1; } static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { .release = nfsd4_cb_offload_release, - .done = nfsd4_cb_offload_done + .done = nfsd4_cb_offload_done, + .opcode = OP_CB_OFFLOAD, }; static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) @@ -1630,7 +1734,6 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ? NFS_FILE_SYNC : NFS_UNSTABLE; nfsd4_copy_set_sync(copy, sync); - gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, @@ -1737,23 +1840,23 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } -static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy) { - struct nfsd4_cb_offload *cbo; - - cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); - if (!cbo) - return; + struct nfsd4_cb_offload *cbo = ©->cp_cb_offload; memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); - cbo->co_nfserr = nfserr; + cbo->co_nfserr = copy->nfserr; + cbo->co_retries = 5; nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); + nfsd41_cb_referring_call(&cbo->co_cb, &cbo->co_referring_sessionid, + cbo->co_referring_slotid, + cbo->co_referring_seqno); trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, - &cbo->co_fh, copy->cp_count, nfserr); - nfsd4_run_cb(&cbo->co_cb); + &cbo->co_fh, copy->cp_count, copy->nfserr); + nfsd4_try_run_cb(&cbo->co_cb); } /** @@ -1766,9 +1869,8 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - __be32 nfserr; - trace_nfsd_copy_do_async(copy); + trace_nfsd_copy_async(copy); if (nfsd4_ssc_is_inter(copy)) { struct file *filp; @@ -1777,25 +1879,31 @@ static int nfsd4_do_async_copy(void *data) if (IS_ERR(filp)) { switch (PTR_ERR(filp)) { case -EBADF: - nfserr = nfserr_wrong_type; + copy->nfserr = nfserr_wrong_type; break; default: - nfserr = nfserr_offload_denied; + copy->nfserr = nfserr_offload_denied; } /* ss_mnt will be unmounted by the laundromat */ goto do_callback; } - nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, - false); + copy->nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, + false); nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst); } else { - nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, - copy->nf_dst->nf_file, false); + copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); } do_callback: - nfsd4_send_cb_offload(copy, nfserr); - cleanup_async_copy(copy); + /* The kthread exits forthwith. Ensure that a subsequent + * OFFLOAD_CANCEL won't try to kill it again. */ + set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags); + + set_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags); + trace_nfsd_copy_async_done(copy); + nfsd4_send_cb_offload(copy); + atomic_dec(©->cp_nn->pending_async_copies); return 0; } @@ -1803,9 +1911,21 @@ static __be32 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd4_copy *async_copy = NULL; struct nfsd4_copy *copy = &u->copy; + struct nfsd42_write_res *result; __be32 status; - struct nfsd4_copy *async_copy = NULL; + + /* + * Currently, async COPY is not reliable. Force all COPY + * requests to be synchronous to avoid client application + * hangs waiting for COPY completion. + */ + nfsd4_copy_set_sync(copy, true); + + result = ©->cp_res; + nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn); copy->cp_clp = cstate->clp; if (nfsd4_ssc_is_inter(copy)) { @@ -1831,26 +1951,34 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(©->fh, &cstate->current_fh.fh_handle, sizeof(struct knfsd_fh)); if (nfsd4_copy_is_async(copy)) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - - status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; + async_copy->cp_nn = nn; INIT_LIST_HEAD(&async_copy->copies); refcount_set(&async_copy->refcount, 1); + async_copy->cp_ttl = NFSD_COPY_INITIAL_TTL; + /* Arbitrary cap on number of pending async copy operations */ + if (atomic_inc_return(&nn->pending_async_copies) > + (int)rqstp->rq_pool->sp_nrthreads) + goto out_dec_async_copy_err; async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); if (!async_copy->cp_src) - goto out_err; + goto out_dec_async_copy_err; if (!nfs4_init_copy_state(nn, copy)) - goto out_err; - memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid, - sizeof(copy->cp_res.cb_stateid)); + goto out_dec_async_copy_err; + memcpy(&result->cb_stateid, ©->cp_stateid.cs_stid, + sizeof(result->cb_stateid)); dup_copy_fields(copy, async_copy); + memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data, + cstate->session->se_sessionid.data, + NFS4_MAX_SESSIONID_LEN); + async_copy->cp_cb_offload.co_referring_slotid = cstate->slot->sl_index; + async_copy->cp_cb_offload.co_referring_seqno = cstate->slot->sl_seqid; async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) - goto out_err; + goto out_dec_async_copy_err; spin_lock(&async_copy->cp_clp->async_lock); list_add(&async_copy->copies, &async_copy->cp_clp->async_copies); @@ -1865,6 +1993,9 @@ out: trace_nfsd_copy_done(copy, status); release_copy_files(copy); return status; +out_dec_async_copy_err: + if (async_copy) + atomic_dec(&nn->pending_async_copies); out_err: if (nfsd4_ssc_is_inter(copy)) { /* @@ -1876,7 +2007,7 @@ out_err: } if (async_copy) cleanup_async_copy(async_copy); - status = nfserrno(-ENOMEM); + status = nfserr_jukebox; goto out; } @@ -1935,7 +2066,7 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_copy_notify *cn = &u->copy_notify; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - struct nfs4_stid *stid; + struct nfs4_stid *stid = NULL; struct nfs4_cpntf_state *cps; struct nfs4_client *clp = cstate->clp; @@ -1944,6 +2075,8 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &stid); if (status) return status; + if (!stid) + return nfserr_bad_stateid; cn->cpn_lease_time.tv_sec = nn->nfsd4_lease; cn->cpn_lease_time.tv_nsec = 0; @@ -2003,11 +2136,16 @@ nfsd4_offload_status(struct svc_rqst *rqstp, struct nfsd4_copy *copy; struct nfs4_client *clp = cstate->clp; + os->completed = false; spin_lock(&clp->async_lock); copy = find_async_copy_locked(clp, &os->stateid); - if (copy) + if (copy) { os->count = copy->cp_res.wr_bytes_written; - else + if (test_bit(NFSD4_COPY_F_COMPLETED, ©->cp_flags)) { + os->completed = true; + os->status = copy->nfserr; + } + } else status = nfserr_bad_stateid; spin_unlock(&clp->async_lock); @@ -2154,6 +2292,29 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status == nfserr_same ? nfs_ok : status; } +static __be32 +nfsd4_get_dir_delegation(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + + /* + * RFC 8881, section 18.39.3 says: + * + * "The server may refuse to grant the delegation. In that case, the + * server will return NFS4ERR_DIRDELEG_UNAVAIL." + * + * This is sub-optimal, since it means that the server would need to + * abort compound processing just because the delegation wasn't + * available. RFC8881bis should change this to allow the server to + * return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this + * situation. + */ + gdd->gddrnf_status = GDD4_UNAVAIL; + return nfs_ok; +} + #ifdef CONFIG_NFSD_PNFS static const struct nfsd4_layout_ops * nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type) @@ -2196,7 +2357,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, return nfserr_noent; } - exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid); + exp = rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp), + rqstp->rq_client, rqstp->rq_gssclient, + map->fsid_type, map->fsid); if (IS_ERR(exp)) { dprintk("%s: could not find device id\n", __func__); return nfserr_noent; @@ -2234,7 +2397,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp, const struct nfsd4_layout_ops *ops; struct nfs4_layout_stateid *ls; __be32 nfserr; - int accmode = NFSD_MAY_READ_IF_EXEC; + int accmode = NFSD_MAY_READ_IF_EXEC | NFSD_MAY_OWNER_OVERRIDE; switch (lgp->lg_seg.iomode) { case IOMODE_READ: @@ -2324,7 +2487,8 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp, struct nfs4_layout_stateid *ls; __be32 nfserr; - nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE); + nfserr = fh_verify(rqstp, current_fh, 0, + NFSD_MAY_WRITE | NFSD_MAY_OWNER_OVERRIDE); if (nfserr) goto out; @@ -2739,6 +2903,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->opdesc->op_get_currentstateid) op->opdesc->op_get_currentstateid(cstate, &op->u); op->status = op->opdesc->op_func(rqstp, cstate, &op->u); + trace_nfsd_compound_op_err(rqstp, op->opnum, op->status); /* Only from SEQUENCE */ if (cstate->status == nfserr_replay_cache) { @@ -2755,7 +2920,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (current_fh->fh_export && need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(current_fh->fh_export, rqstp); + op->status = check_nfsd_access(current_fh->fh_export, rqstp, false); } encode_op: if (op->status == nfserr_replay_me) { @@ -3082,6 +3247,18 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, * sizeof(__be32); } +static u32 nfsd4_get_dir_delegation_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 1 /* gddr_status */ + + op_encode_verifier_maxsz + + op_encode_stateid_maxsz + + 2 /* gddr_notification */ + + 2 /* gddr_child_attributes */ + + 2 /* gddr_dir_attributes */); +} + #ifdef CONFIG_NFSD_PNFS static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, const struct nfsd4_op *op) @@ -3399,6 +3576,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { /* NFSv4.1 operations */ [OP_EXCHANGE_ID] = { .op_func = nfsd4_exchange_id, + .op_release = nfsd4_exchange_id_release, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP | OP_MODIFIES_SOMETHING, .op_name = "OP_EXCHANGE_ID", @@ -3470,6 +3648,12 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_get_currentstateid = nfsd4_get_freestateid, .op_rsize_bop = nfsd4_only_status_rsize, }, + [OP_GET_DIR_DELEGATION] = { + .op_func = nfsd4_get_dir_delegation, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_GET_DIR_DELEGATION", + .op_rsize_bop = nfsd4_get_dir_delegation_rsize, + }, #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = { .op_func = nfsd4_getdeviceinfo, @@ -3596,7 +3780,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; - if (!cstate->minorversion) + if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || + cstate->minorversion == 0) return false; if (cstate->spo_must_allowed) @@ -3662,7 +3847,7 @@ static const struct svc_procedure nfsd_procedures4[2] = { .pc_ressize = sizeof(struct nfsd4_compoundres), .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = NFSD_BUFSIZE/4, + .pc_xdrressize = 3+NFSSVC_MAXBLKSIZE/4, .pc_name = "COMPOUND", }, }; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2c060e0b1604..82785db730d9 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -33,6 +33,7 @@ */ #include <crypto/hash.h> +#include <crypto/sha2.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/namei.h> @@ -82,14 +83,13 @@ nfs4_save_creds(const struct cred **original_creds) new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; *original_creds = override_creds(new); - put_cred(new); return 0; } static void nfs4_reset_creds(const struct cred *original) { - revert_creds(original); + put_cred(revert_creds(original)); } static void @@ -219,7 +219,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) /* lock the parent */ inode_lock(d_inode(dir)); - dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1); + dentry = lookup_one(&nop_mnt_idmap, &QSTR(dname), dir); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; @@ -234,9 +234,12 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + if (IS_ERR(dentry)) + status = PTR_ERR(dentry); out_put: - dput(dentry); + if (!status) + dput(dentry); out_unlock: inode_unlock(d_inode(dir)); if (status == 0) { @@ -314,7 +317,8 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) list_for_each_entry_safe(entry, tmp, &ctx.names, list) { if (!status) { struct dentry *dentry; - dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); + dentry = lookup_one(&nop_mnt_idmap, + &QSTR(entry->name), dir); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); break; @@ -337,16 +341,16 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) } static int -nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) +nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn) { struct dentry *dir, *dentry; int status; - dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); + dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name); dir = nn->rec_file->f_path.dentry; inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); - dentry = lookup_one_len(name, dir, namlen); + dentry = lookup_one(&nop_mnt_idmap, &QSTR(name), dir); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; @@ -406,7 +410,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (status < 0) goto out_drop_write; - status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn); + status = nfsd4_unlink_clid_dir(dname, nn); nfs4_reset_creds(original_cred); if (status == 0) { vfs_fsync(nn->rec_file, 0); @@ -659,7 +663,8 @@ nfs4_reset_recoverydir(char *recdir) return status; status = -ENOTDIR; if (d_is_dir(path.dentry)) { - strcpy(user_recovery_dirname, recdir); + strscpy(user_recovery_dirname, recdir, + sizeof(user_recovery_dirname)); status = 0; } path_put(&path); @@ -733,7 +738,6 @@ struct cld_net { spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; - struct crypto_shash *cn_tfm; #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING bool cn_has_legacy; #endif @@ -809,6 +813,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, ci = &cmsg->cm_u.cm_clntinfo; if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; + if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) { + dprintk("%s: invalid namelen (%u)", __func__, namelen); + return -EINVAL; + } name.data = memdup_user(&ci->cc_name.cn_id, namelen); if (IS_ERR(name.data)) return PTR_ERR(name.data); @@ -831,6 +839,10 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, cnm = &cmsg->cm_u.cm_name; if (get_user(namelen, &cnm->cn_len)) return -EFAULT; + if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) { + dprintk("%s: invalid namelen (%u)", __func__, namelen); + return -EINVAL; + } name.data = memdup_user(&cnm->cn_id, namelen); if (IS_ERR(name.data)) return PTR_ERR(name.data); @@ -1051,8 +1063,6 @@ nfsd4_remove_cld_pipe(struct net *net) nfsd4_cld_unregister_net(net, cn->cn_pipe); rpc_destroy_pipe_data(cn->cn_pipe); - if (cn->cn_tfm) - crypto_free_shash(cn->cn_tfm); kfree(nn->cld_net); nn->cld_net = NULL; } @@ -1146,8 +1156,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; struct cld_msg_v2 *cmsg; - struct crypto_shash *tfm = cn->cn_tfm; - struct xdr_netobj cksum; char *principal = NULL; /* Don't upcall if it's already stored */ @@ -1170,22 +1178,9 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal) { - cksum.len = crypto_shash_digestsize(tfm); - cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) { - ret = -ENOMEM; - goto out; - } - ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal), - cksum.data); - if (ret) { - kfree(cksum.data); - goto out; - } - cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len; - memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data, - cksum.data, cksum.len); - kfree(cksum.data); + sha256(principal, strlen(principal), + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data); + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = SHA256_DIGEST_SIZE; } else cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; @@ -1195,7 +1190,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } -out: free_cld_upcall(cup); out_err: if (ret) @@ -1334,12 +1328,11 @@ found: static int nfsd4_cld_check_v2(struct nfs4_client *clp) { - struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING struct cld_net *cn = nn->cld_net; - int status; - struct crypto_shash *tfm = cn->cn_tfm; - struct xdr_netobj cksum; +#endif + struct nfs4_client_reclaim *crp; char *principal = NULL; /* did we already find that this client is stable? */ @@ -1355,6 +1348,7 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) if (cn->cn_has_legacy) { struct xdr_netobj name; char dname[HEXDIR_LEN]; + int status; status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) @@ -1377,28 +1371,18 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) return -ENOENT; found: if (crp->cr_princhash.len) { + u8 digest[SHA256_DIGEST_SIZE]; + if (clp->cl_cred.cr_raw_principal) principal = clp->cl_cred.cr_raw_principal; else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal == NULL) return -ENOENT; - cksum.len = crypto_shash_digestsize(tfm); - cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) - return -ENOENT; - status = crypto_shash_tfm_digest(tfm, principal, - strlen(principal), cksum.data); - if (status) { - kfree(cksum.data); + sha256(principal, strlen(principal), digest); + if (memcmp(crp->cr_princhash.data, digest, + crp->cr_princhash.len)) return -ENOENT; - } - if (memcmp(crp->cr_princhash.data, cksum.data, - crp->cr_princhash.len)) { - kfree(cksum.data); - return -ENOENT; - } - kfree(cksum.data); } crp->cr_clp = clp; return 0; @@ -1578,7 +1562,6 @@ nfsd4_cld_tracking_init(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool running; int retries = 10; - struct crypto_shash *tfm; status = nfs4_cld_state_init(net); if (status) @@ -1603,12 +1586,6 @@ nfsd4_cld_tracking_init(struct net *net) status = -ETIMEDOUT; goto err_remove; } - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - status = PTR_ERR(tfm); - goto err_remove; - } - nn->cld_net->cn_tfm = tfm; status = nfsd4_cld_get_version(nn); if (status == -EOPNOTSUPP) @@ -1895,10 +1872,7 @@ nfsd4_cltrack_upcall_lock(struct nfs4_client *clp) static void nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp) { - smp_mb__before_atomic(); - clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); - smp_mb__after_atomic(); - wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK); + clear_and_wake_up_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags); } static void @@ -2046,7 +2020,6 @@ static inline int check_for_legacy_methods(int status, struct net *net) path_put(&path); if (status) return -ENOTDIR; - status = nn->client_tracking_ops->init(net); } return status; } @@ -2086,8 +2059,8 @@ do_init: status = nn->client_tracking_ops->init(net); out: if (status) { - printk(KERN_WARNING "NFSD: Unable to initialize client " - "recovery tracking! (%d)\n", status); + pr_warn("NFSD: Unable to initialize client recovery tracking! (%d)\n", status); + pr_warn("NFSD: Is nfsdcld running? If not, enable CONFIG_NFSD_LEGACY_CLIENT_TRACKING.\n"); nn->client_tracking_ops = NULL; } return status; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1a93c7fcf76c..d5694987f86f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -149,14 +149,14 @@ void nfsd4_destroy_laundry_wq(void) static bool is_session_dead(struct nfsd4_session *ses) { - return ses->se_flags & NFS4_SESSION_DEAD; + return ses->se_dead; } static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; - ses->se_flags |= NFS4_SESSION_DEAD; + ses->se_dead = true; return nfs_ok; } @@ -400,6 +400,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { .prepare = nfsd4_cb_notify_lock_prepare, .done = nfsd4_cb_notify_lock_done, .release = nfsd4_cb_notify_lock_release, + .opcode = OP_CB_NOTIFY_LOCK, }; /* @@ -541,7 +542,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) } static struct nfs4_openowner * -find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_client *clp) { struct nfs4_stateowner *so; @@ -558,18 +559,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, return NULL; } -static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - struct nfs4_client *clp) -{ - struct nfs4_openowner *oo; - - spin_lock(&clp->cl_lock); - oo = find_openstateowner_str_locked(hashval, open, clp); - spin_unlock(&clp->cl_lock); - return oo; -} - static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -583,13 +572,6 @@ opaque_hashval(const void *ptr, int nbytes) return x; } -static void nfsd4_free_file_rcu(struct rcu_head *rcu) -{ - struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu); - - kmem_cache_free(file_slab, fp); -} - void put_nfs4_file(struct nfs4_file *fi) { @@ -597,7 +579,7 @@ put_nfs4_file(struct nfs4_file *fi) nfsd4_file_hash_remove(fi); WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); - call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); + kfree_rcu(fi, fi_rcu); } } @@ -964,15 +946,6 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla spin_lock_init(&stid->sc_lock); INIT_LIST_HEAD(&stid->sc_cp_list); - /* - * It shouldn't be a problem to reuse an opaque stateid value. - * I don't think it is for 4.1. But with 4.0 I worry that, for - * example, a stray write retransmission could be accepted by - * the server when it should have been rejected. Therefore, - * adopt a trick from the sctp code to attempt to maximize the - * amount of time until an id is reused, by ensuring they always - * "increase" (mod INT_MAX): - */ return stid; out_free: kmem_cache_free(slab, stid); @@ -1068,6 +1041,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) return openlockstateid(stid); } +/* + * As the sc_free callback of deleg, this may be called by nfs4_put_stid + * in nfsd_break_one_deleg. + * Considering nfsd_break_one_deleg is called with the flc->flc_lock held, + * this function mustn't ever sleep. + */ static void nfs4_free_deleg(struct nfs4_stid *stid) { struct nfs4_delegation *dp = delegstateid(stid); @@ -1089,7 +1068,8 @@ static void nfs4_free_deleg(struct nfs4_stid *stid) * When a delegation is recalled, the filehandle is stored in the "new" * filter. * Every 30 seconds we swap the filters and clear the "new" one, - * unless both are empty of course. + * unless both are empty of course. This results in delegations for a + * given filehandle being blocked for between 30 and 60 seconds. * * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. @@ -1118,9 +1098,9 @@ static int delegation_blocked(struct knfsd_fh *fh) if (ktime_get_seconds() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; + bd->new = 1-bd->new; memset(bd->set[bd->new], 0, sizeof(bd->set[0])); - bd->new = 1-bd->new; bd->swap_time = ktime_get_seconds(); } spin_unlock(&blocked_delegations_lock); @@ -1194,7 +1174,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); dp->dl_cb_fattr.ncf_file_modified = false; - dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; get_nfs4_file(fp); dp->dl_stid.sc_file = fp; return dp; @@ -1369,21 +1348,48 @@ static void destroy_delegation(struct nfs4_delegation *dp) destroy_unhashed_deleg(dp); } +/** + * revoke_delegation - perform nfs4 delegation structure cleanup + * @dp: pointer to the delegation + * + * This function assumes that it's called either from the administrative + * interface (nfsd4_revoke_states()) that's revoking a specific delegation + * stateid or it's called from a laundromat thread (nfsd4_landromat()) that + * determined that this specific state has expired and needs to be revoked + * (both mark state with the appropriate stid sc_status mode). It is also + * assumed that a reference was taken on the @dp state. + * + * If this function finds that the @dp state is SC_STATUS_FREED it means + * that a FREE_STATEID operation for this stateid has been processed and + * we can proceed to removing it from recalled list. However, if @dp state + * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked + * list and wait for the FREE_STATEID to arrive from the client. At the same + * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the + * nfsd4_free_stateid() function that this stateid has already been added + * to the cl_revoked list and that nfsd4_free_stateid() is now responsible + * for removing it from the list. Inspection of where the delegation state + * in the revocation process is protected by the clp->cl_lock. + */ static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; WARN_ON(!list_empty(&dp->dl_recall_lru)); + WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 && + !(dp->dl_stid.sc_status & + (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED))); trace_nfsd_stid_revoke(&dp->dl_stid); - if (dp->dl_stid.sc_status & - (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)) { - spin_lock(&clp->cl_lock); - refcount_inc(&dp->dl_stid.sc_count); - list_add(&dp->dl_recall_lru, &clp->cl_revoked); - spin_unlock(&clp->cl_lock); + spin_lock(&clp->cl_lock); + if (dp->dl_stid.sc_status & SC_STATUS_FREED) { + list_del_init(&dp->dl_recall_lru); + goto out; } + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + dp->dl_stid.sc_status |= SC_STATUS_FREEABLE; +out: + spin_unlock(&clp->cl_lock); destroy_unhashed_deleg(dp); } @@ -1409,11 +1415,16 @@ static void recalculate_deny_mode(struct nfs4_file *fp) { struct nfs4_ol_stateid *stp; + u32 old_deny; spin_lock(&fp->fi_lock); + old_deny = fp->fi_share_deny; fp->fi_share_deny = 0; - list_for_each_entry(stp, &fp->fi_stateids, st_perfile) + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); + if (fp->fi_share_deny == old_deny) + break; + } spin_unlock(&fp->fi_lock); } @@ -1639,6 +1650,14 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) free_ol_stateid_reaplist(&reaplist); } +static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo) +{ + lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + + return list_empty(&oo->oo_owner.so_strhash) && + list_empty(&oo->oo_perclient); +} + static void unhash_openowner_locked(struct nfs4_openowner *oo) { struct nfs4_client *clp = oo->oo_owner.so_client; @@ -1670,9 +1689,7 @@ static void release_openowner(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; struct nfs4_client *clp = oo->oo_owner.so_client; - struct list_head reaplist; - - INIT_LIST_HEAD(&reaplist); + LIST_HEAD(reaplist); spin_lock(&clp->cl_lock); unhash_openowner_locked(oo); @@ -1787,6 +1804,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb) mutex_unlock(&stp->st_mutex); break; case SC_TYPE_DELEG: + refcount_inc(&stid->sc_count); dp = delegstateid(stid); spin_lock(&state_lock); if (!unhash_delegation_locked( @@ -1889,113 +1907,145 @@ gen_sessionid(struct nfsd4_session *ses) */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) +static struct shrinker *nfsd_slot_shrinker; +static DEFINE_SPINLOCK(nfsd_session_list_lock); +static LIST_HEAD(nfsd_session_list); +/* The sum of "target_slots-1" on every session. The shrinker can push this + * down, though it can take a little while for the memory to actually + * be freed. The "-1" is because we can never free slot 0 while the + * session is active. + */ +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); + static void -free_session_slots(struct nfsd4_session *ses) +free_session_slots(struct nfsd4_session *ses, int from) { int i; - for (i = 0; i < ses->se_fchannel.maxreqs; i++) { - free_svc_cred(&ses->se_slots[i]->sl_cred); - kfree(ses->se_slots[i]); + if (from >= ses->se_fchannel.maxreqs) + return; + + for (i = from; i < ses->se_fchannel.maxreqs; i++) { + struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); + + /* + * Save the seqid in case we reactivate this slot. + * This will never require a memory allocation so GFP + * flag is irrelevant + */ + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0); + free_svc_cred(&slot->sl_cred); + kfree(slot); + } + ses->se_fchannel.maxreqs = from; + if (ses->se_target_maxslots > from) { + int new_target = from ?: 1; + atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); + ses->se_target_maxslots = new_target; } } -/* - * We don't actually need to cache the rpc and session headers, so we - * can allocate a little less for each slot: +/** + * reduce_session_slots - reduce the target max-slots of a session if possible + * @ses: The session to affect + * @dec: how much to decrease the target by + * + * This interface can be used by a shrinker to reduce the target max-slots + * for a session so that some slots can eventually be freed. + * It uses spin_trylock() as it may be called in a context where another + * spinlock is held that has a dependency on client_lock. As shrinkers are + * best-effort, skiping a session is client_lock is already held has no + * great coast + * + * Return value: + * The number of slots that the target was reduced by. */ -static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) +static int +reduce_session_slots(struct nfsd4_session *ses, int dec) { - u32 size; + struct nfsd_net *nn = net_generic(ses->se_client->net, + nfsd_net_id); + int ret = 0; - if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ) - size = 0; - else - size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; - return size + sizeof(struct nfsd4_slot); + if (ses->se_target_maxslots <= 1) + return ret; + if (!spin_trylock(&nn->client_lock)) + return ret; + ret = min(dec, ses->se_target_maxslots-1); + ses->se_target_maxslots -= ret; + atomic_sub(ret, &nfsd_total_target_slots); + ses->se_slot_gen += 1; + if (ses->se_slot_gen == 0) { + int i; + ses->se_slot_gen = 1; + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { + struct nfsd4_slot *slot = xa_load(&ses->se_slots, i); + slot->sl_generation = 0; + } + } + spin_unlock(&nn->client_lock); + return ret; } -/* - * XXX: If we run out of reserved DRC memory we could (up to a point) - * re-negotiate active sessions and reduce their slot usage to make - * room for new connections. For now we just fail the create session. - */ -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) +static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs, + int index, gfp_t gfp) { - u32 slotsize = slot_bytes(ca); - u32 num = ca->maxreqs; - unsigned long avail, total_avail; - unsigned int scale_factor; + struct nfsd4_slot *slot; + size_t size; - spin_lock(&nfsd_drc_lock); - if (nfsd_drc_max_mem > nfsd_drc_mem_used) - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; - else - /* We have handed out more space than we chose in - * set_max_drc() to allow. That isn't really a - * problem as long as that doesn't make us think we - * have lots more due to integer overflow. - */ - total_avail = 0; - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); /* - * Never use more than a fraction of the remaining memory, - * unless it's the only way to give this client a slot. - * The chosen fraction is either 1/8 or 1/number of threads, - * whichever is smaller. This ensures there are adequate - * slots to support multiple clients per thread. - * Give the client one slot even if that would require - * over-allocation--it is better than failure. + * The RPC and NFS session headers are never saved in + * the slot reply cache buffer. */ - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); - - avail = clamp_t(unsigned long, avail, slotsize, - total_avail/scale_factor); - num = min_t(int, num, avail / slotsize); - num = max_t(int, num, 1); - nfsd_drc_mem_used += num * slotsize; - spin_unlock(&nfsd_drc_lock); - - return num; -} - -static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) -{ - int slotsize = slot_bytes(ca); + size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ? + 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; - spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * ca->maxreqs; - spin_unlock(&nfsd_drc_lock); + slot = kzalloc(struct_size(slot, sl_data, size), gfp); + if (!slot) + return NULL; + slot->sl_index = index; + return slot; } static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { int numslots = fattrs->maxreqs; - int slotsize = slot_bytes(fattrs); struct nfsd4_session *new; + struct nfsd4_slot *slot; int i; - BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION) - > PAGE_SIZE); - - new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return NULL; - /* allocate each struct nfsd4_slot and data cache in one piece */ - for (i = 0; i < numslots; i++) { - new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL); - if (!new->se_slots[i]) - goto out_free; - } + xa_init(&new->se_slots); - memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); - memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs)); + slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL); + if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL))) + goto out_free; + for (i = 1; i < numslots; i++) { + const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + slot = nfsd4_alloc_slot(fattrs, i, gfp); + if (!slot) + break; + if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) { + kfree(slot); + break; + } + } + fattrs->maxreqs = i; + memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); + new->se_target_maxslots = i; + atomic_add(i - 1, &nfsd_total_target_slots); + new->se_cb_slot_avail = ~0U; + new->se_cb_highest_slot = min(battrs->maxreqs - 1, + NFSD_BC_SLOT_TABLE_SIZE - 1); + spin_lock_init(&new->se_lock); return new; out_free: - while (i--) - kfree(new->se_slots[i]); + kfree(slot); + xa_destroy(&new->se_slots); kfree(new); return NULL; } @@ -2101,17 +2151,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s) static void __free_session(struct nfsd4_session *ses) { - free_session_slots(ses); + free_session_slots(ses, 0); + xa_destroy(&ses->se_slots); kfree(ses); } static void free_session(struct nfsd4_session *ses) { nfsd4_del_conns(ses); - nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); } +static unsigned long +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) +{ + unsigned long cnt = atomic_read(&nfsd_total_target_slots); + + return cnt ? cnt : SHRINK_EMPTY; +} + +static unsigned long +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) +{ + struct nfsd4_session *ses; + unsigned long scanned = 0; + unsigned long freed = 0; + + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { + freed += reduce_session_slots(ses, 1); + scanned += 1; + if (scanned >= sc->nr_to_scan) { + /* Move starting point for next scan */ + list_move(&nfsd_session_list, &ses->se_all_sessions); + break; + } + } + spin_unlock(&nfsd_session_list_lock); + sc->nr_scanned = scanned; + return freed; +} + static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; @@ -2122,17 +2202,24 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru INIT_LIST_HEAD(&new->se_conns); - new->se_cb_seq_nr = 1; - new->se_flags = cses->flags; + atomic_set(&new->se_ref, 0); + new->se_dead = false; new->se_cb_prog = cses->callback_prog; new->se_cb_sec = cses->cb_sec; - atomic_set(&new->se_ref, 0); + + for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx) + new->se_cb_seq_nr[idx] = 1; + idx = hash_sessionid(&new->se_sessionid); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_add_tail(&new->se_all_sessions, &nfsd_session_list); + spin_unlock(&nfsd_session_list_lock); + { struct sockaddr *sa = svc_addr(rqstp); /* @@ -2202,6 +2289,9 @@ unhash_session(struct nfsd4_session *ses) spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); spin_unlock(&ses->se_client->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_del(&ses->se_all_sessions); + spin_unlock(&nfsd_session_list_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ @@ -2219,21 +2309,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) return 1; } -/* - * XXX Should we use a slab cache ? - * This type of memory management is somewhat inefficient, but we use it - * anyway since SETCLIENTID is not a common operation. - */ static struct nfs4_client *alloc_client(struct xdr_netobj name, struct nfsd_net *nn) { struct nfs4_client *clp; int i; - if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients && + atomic_read(&nn->nfsd_courtesy_clients) > 0) mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); - return NULL; - } + clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -2245,6 +2330,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name, GFP_KERNEL); if (!clp->cl_ownerstr_hashtbl) goto err_no_hashtbl; + clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0); + if (!clp->cl_callback_wq) + goto err_no_callback_wq; + for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); INIT_LIST_HEAD(&clp->cl_sessions); @@ -2267,6 +2356,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name, spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; +err_no_callback_wq: + kfree(clp->cl_ownerstr_hashtbl); err_no_hashtbl: kfree(clp->cl_name.data); err_no_name: @@ -2280,6 +2371,7 @@ static void __free_client(struct kref *k) struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs); free_svc_cred(&clp->cl_cred); + destroy_workqueue(clp->cl_callback_wq); kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); kfree(clp->cl_nii_domain.data); @@ -2335,8 +2427,12 @@ unhash_client_locked(struct nfs4_client *clp) } list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); - list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { list_del_init(&ses->se_hash); + list_del_init(&ses->se_all_sessions); + } + spin_unlock(&nfsd_session_list_lock); spin_unlock(&clp->cl_lock); } @@ -2352,7 +2448,11 @@ unhash_client(struct nfs4_client *clp) static __be32 mark_client_expired_locked(struct nfs4_client *clp) { - if (atomic_read(&clp->cl_rpc_users)) + int users = atomic_read(&clp->cl_rpc_users); + + trace_nfsd_mark_client_expired(clp, users); + + if (users) return nfserr_jukebox; unhash_client_locked(clp); return nfs_ok; @@ -2365,9 +2465,8 @@ __destroy_client(struct nfs4_client *clp) int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; - struct list_head reaplist; + LIST_HEAD(reaplist); - INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); @@ -2655,6 +2754,7 @@ static const char *cb_state2str(int state) static int client_info_show(struct seq_file *m, void *v) { struct inode *inode = file_inode(m->file); + struct nfsd4_session *ses; struct nfs4_client *clp; u64 clid; @@ -2688,9 +2788,19 @@ static int client_info_show(struct seq_file *m, void *v) clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state)); - seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr); + seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr); seq_printf(m, "admin-revoked states: %d\n", atomic_read(&clp->cl_admin_revoked)); + spin_lock(&clp->cl_lock); + seq_printf(m, "session slots:"); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + seq_printf(m, " %u", ses->se_fchannel.maxreqs); + seq_printf(m, "\nsession target slots:"); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + seq_printf(m, " %u", ses->se_target_maxslots); + spin_unlock(&clp->cl_lock); + seq_puts(m, "\n"); + drop_client(clp); return 0; @@ -2785,15 +2895,18 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) deny & NFS4_SHARE_ACCESS_READ ? "r" : "-", deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-"); - spin_lock(&nf->fi_lock); - file = find_any_file_locked(nf); - if (file) { - nfs4_show_superblock(s, file); - seq_puts(s, ", "); - nfs4_show_fname(s, file); - seq_puts(s, ", "); - } - spin_unlock(&nf->fi_lock); + if (nf) { + spin_lock(&nf->fi_lock); + file = find_any_file_locked(nf); + if (file) { + nfs4_show_superblock(s, file); + seq_puts(s, ", "); + nfs4_show_fname(s, file); + seq_puts(s, ", "); + } + spin_unlock(&nf->fi_lock); + } else + seq_puts(s, "closed, "); nfs4_show_owner(s, oo); if (st->sc_status & SC_STATUS_ADMIN_REVOKED) seq_puts(s, ", admin-revoked"); @@ -2840,6 +2953,21 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) return 0; } +static char *nfs4_show_deleg_type(u32 dl_type) +{ + switch (dl_type) { + case OPEN_DELEGATE_READ: + return "r"; + case OPEN_DELEGATE_WRITE: + return "w"; + case OPEN_DELEGATE_READ_ATTRS_DELEG: + return "ra"; + case OPEN_DELEGATE_WRITE_ATTRS_DELEG: + return "wa"; + } + return "?"; +} + static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_delegation *ds; @@ -2853,8 +2981,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) nfs4_show_stateid(s, &st->sc_stateid); seq_puts(s, ": { type: deleg, "); - seq_printf(s, "access: %s", - ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w"); + seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type)); /* XXX: lease time, whether it's being recalled. */ @@ -3042,12 +3169,8 @@ static void nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); - clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); + drop_client(clp); } static int @@ -3055,7 +3178,10 @@ nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task) { struct nfs4_cb_fattr *ncf = container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + struct nfs4_delegation *dp = + container_of(ncf, struct nfs4_delegation, dl_cb_fattr); + trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task); ncf->ncf_cb_status = task->tk_status; switch (task->tk_status) { case -NFS4ERR_DELAY: @@ -3075,18 +3201,18 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb) container_of(ncf, struct nfs4_delegation, dl_cb_fattr); nfs4_put_stid(&dp->dl_stid); - clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); - wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY); } static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { .done = nfsd4_cb_recall_any_done, .release = nfsd4_cb_recall_any_release, + .opcode = OP_CB_RECALL_ANY, }; static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = { .done = nfsd4_cb_getattr_done, .release = nfsd4_cb_getattr_release, + .opcode = OP_CB_GETATTR, }; static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) @@ -3094,11 +3220,15 @@ static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) struct nfs4_delegation *dp = container_of(ncf, struct nfs4_delegation, dl_cb_fattr); - if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags)) return; + /* set to proper status when nfsd4_cb_getattr_done runs */ ncf->ncf_cb_status = NFS4ERR_IO; + /* ensure that wake_bit is done when RUNNING is cleared */ + set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags); + refcount_inc(&dp->dl_stid.sc_count); nfsd4_run_cb(&ncf->ncf_getattr); } @@ -3126,7 +3256,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, kref_init(&clp->cl_nfsdfs.cl_ref); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL); clp->cl_time = ktime_get_boottime_seconds(); - clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; @@ -3453,7 +3582,7 @@ static bool client_has_state(struct nfs4_client *clp) #endif || !list_empty(&clp->cl_delegations) || !list_empty(&clp->cl_sessions) - || !list_empty(&clp->async_copies); + || nfsd4_has_active_async_copies(clp); } static __be32 copy_impl_id(struct nfs4_client *clp, @@ -3491,6 +3620,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); + exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s", + utsname()->sysname, utsname()->release, + utsname()->version, utsname()->machine); + if (!exid->server_impl_name) + return nfserr_jukebox; + if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; @@ -3628,6 +3763,23 @@ out_copy: exid->seqid = conf->cl_cs_slot.sl_seqid + 1; nfsd4_set_ex_flags(conf, exid); + exid->nii_domain.len = sizeof("kernel.org") - 1; + exid->nii_domain.data = "kernel.org"; + + /* + * Note that RFC 8881 places no length limit on + * nii_name, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes. + */ + exid->nii_name.len = strlen(exid->server_impl_name); + if (exid->nii_name.len > NFS4_OPAQUE_LIMIT) + exid->nii_name.len = NFS4_OPAQUE_LIMIT; + exid->nii_name.data = exid->server_impl_name; + + /* just send zeros - the date is in nii_name */ + exid->nii_time.tv_sec = 0; + exid->nii_time.tv_nsec = 0; + dprintk("nfsd4_exchange_id seqid %d flags %x\n", conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; @@ -3644,14 +3796,18 @@ out_nolock: return status; } -static __be32 -check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) +void +nfsd4_exchange_id_release(union nfsd4_op_u *u) { - dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid, - slot_seqid); + struct nfsd4_exchange_id *exid = &u->exchange_id; + kfree(exid->server_impl_name); +} + +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags) +{ /* The slot is in use, and no response has been sent. */ - if (slot_inuse) { + if (flags & NFSD4_SLOT_INUSE) { if (seqid == slot_seqid) return nfserr_jukebox; else @@ -3660,6 +3816,8 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; + if ((flags & NFSD4_SLOT_REUSED) && seqid == 1) + return nfs_ok; if (seqid == slot_seqid) return nfserr_replay_cache; return nfserr_seq_misordered; @@ -3718,17 +3876,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); - /* - * Note decreasing slot size below client's request may make it - * difficult for client to function correctly, whereas - * decreasing the number of slots will (just?) affect - * performance. When short on memory we therefore prefer to - * decrease number of slots instead of their size. Clients that - * request larger slots than they need will get poor results: - * Note that we always allow at least one slot, because our - * accounting is soft and provides no guarantees either way. - */ - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); return nfs_ok; } @@ -3806,11 +3953,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, return status; status = check_backchannel_attrs(&cr_ses->back_channel); if (status) - goto out_release_drc_mem; + goto out_err; status = nfserr_jukebox; new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel); if (!new) - goto out_release_drc_mem; + goto out_err; conn = alloc_conn_from_crses(rqstp, cr_ses); if (!conn) goto out_free_session; @@ -3826,20 +3973,28 @@ nfsd4_create_session(struct svc_rqst *rqstp, } /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */ - if (conf) + if (conf) { cs_slot = &conf->cl_cs_slot; - else + trace_nfsd_slot_seqid_conf(conf, cr_ses); + } else { cs_slot = &unconf->cl_cs_slot; + trace_nfsd_slot_seqid_unconf(unconf, cr_ses); + } status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); - if (status) { - if (status == nfserr_replay_cache) { - status = nfsd4_replay_create_session(cr_ses, cs_slot); - goto out_free_conn; - } + switch (status) { + case nfs_ok: + cs_slot->sl_seqid++; + cr_ses->seqid = cs_slot->sl_seqid; + break; + case nfserr_replay_cache: + status = nfsd4_replay_create_session(cr_ses, cs_slot); + fallthrough; + case nfserr_jukebox: + /* The server MUST NOT cache NFS4ERR_DELAY */ + goto out_free_conn; + default: goto out_cache_error; } - cs_slot->sl_seqid++; - cr_ses->seqid = cs_slot->sl_seqid; /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */ if (conf) { @@ -3859,10 +4014,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); - if (status) { - old = NULL; - goto out_cache_error; - } + if (status) + goto out_expired_error; trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); @@ -3875,6 +4028,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; /* Upshifting from TCP to RDMA is not supported */ cr_ses->flags &= ~SESSION4_RDMA; + /* Report the correct number of backchannel slots */ + cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1; init_session(rqstp, new, conf, cr_ses); nfsd4_get_session_locked(new); @@ -3894,17 +4049,24 @@ nfsd4_create_session(struct svc_rqst *rqstp, expire_client(old); return status; +out_expired_error: + /* + * Revert the slot seq_nr change so the server will process + * the client's resend instead of returning a cached response. + */ + if (status == nfserr_jukebox) { + cs_slot->sl_seqid--; + cr_ses->seqid = cs_slot->sl_seqid; + goto out_free_conn; + } out_cache_error: nfsd4_cache_create_session(cr_ses, cs_slot, status); out_free_conn: spin_unlock(&nn->client_lock); free_conn(conn); - if (old) - expire_client(old); out_free_session: __free_session(new); -out_release_drc_mem: - nfsd4_put_drc_mem(&cr_ses->fore_channel); +out_err: return status; } @@ -4202,16 +4364,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (seq->slotid >= session->se_fchannel.maxreqs) goto out_put_session; - slot = session->se_slots[seq->slotid]; + slot = xa_load(&session->se_slots, seq->slotid); dprintk("%s: slotid %d\n", __func__, seq->slotid); - /* We do not negotiate the number of slots yet, so set the - * maxslots to the session maxreqs which is used to encode - * sr_highest_slotid and the sr_target_slot id to maxslots */ - seq->maxslots = session->se_fchannel.maxreqs; - - status = check_slot_seqid(seq->seqid, slot->sl_seqid, - slot->sl_flags & NFSD4_SLOT_INUSE); + trace_nfsd_slot_seqid_sequence(clp, seq, slot); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) @@ -4236,6 +4393,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out_put_session; + if (session->se_target_maxslots < session->se_fchannel.maxreqs && + slot->sl_generation == session->se_slot_gen && + seq->maxslots <= session->se_target_maxslots) + /* Client acknowledged our reduce maxreqs */ + free_session_slots(session, session->se_target_maxslots); + buflen = (seq->cachethis) ? session->se_fchannel.maxresp_cached : session->se_fchannel.maxresp_sz; @@ -4243,12 +4406,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfserr_rep_too_big; if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack)) goto out_put_session; - svc_reserve(rqstp, buflen); + svc_reserve_auth(rqstp, buflen); status = nfs_ok; - /* Success! bump slot seqid */ + /* Success! accept new slot seqid */ slot->sl_seqid = seq->seqid; + slot->sl_flags &= ~NFSD4_SLOT_REUSED; slot->sl_flags |= NFSD4_SLOT_INUSE; + slot->sl_generation = session->se_slot_gen; if (seq->cachethis) slot->sl_flags |= NFSD4_SLOT_CACHETHIS; else @@ -4258,7 +4423,51 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->session = session; cstate->clp = clp; + /* + * If the client ever uses the highest available slot, + * gently try to allocate another 20%. This allows + * fairly quick growth without grossly over-shooting what + * the client might use. + */ + if (seq->slotid == session->se_fchannel.maxreqs - 1 && + session->se_target_maxslots >= session->se_fchannel.maxreqs && + session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) { + int s = session->se_fchannel.maxreqs; + int cnt = DIV_ROUND_UP(s, 5); + void *prev_slot; + + do { + /* + * GFP_NOWAIT both allows allocation under a + * spinlock, and only succeeds if there is + * plenty of memory. + */ + slot = nfsd4_alloc_slot(&session->se_fchannel, s, + GFP_NOWAIT); + prev_slot = xa_load(&session->se_slots, s); + if (xa_is_value(prev_slot) && slot) { + slot->sl_seqid = xa_to_value(prev_slot); + slot->sl_flags |= NFSD4_SLOT_REUSED; + } + if (slot && + !xa_is_err(xa_store(&session->se_slots, s, slot, + GFP_NOWAIT))) { + s += 1; + session->se_fchannel.maxreqs = s; + atomic_add(s - session->se_target_maxslots, + &nfsd_total_target_slots); + session->se_target_maxslots = s; + } else { + kfree(slot); + slot = NULL; + } + } while (slot && --cnt > 0); + } + out: + seq->maxslots = max(session->se_target_maxslots, seq->maxslots); + seq->target_maxslots = session->se_target_maxslots; + switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; @@ -4610,8 +4819,8 @@ out: static unsigned long nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) { - int count; struct nfsd_net *nn = shrink->private_data; + long count; count = atomic_read(&nn->nfsd_courtesy_clients); if (!count) @@ -4651,21 +4860,32 @@ nfsd4_init_leases_net(struct nfsd_net *nn) atomic_set(&nn->nfsd_courtesy_clients, 0); } +enum rp_lock { + RP_UNLOCKED, + RP_LOCKED, + RP_UNHASHED, +}; + static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; - mutex_init(&rp->rp_mutex); + rp->rp_locked = RP_UNLOCKED; } -static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, - struct nfs4_stateowner *so) +static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, + struct nfs4_stateowner *so) { if (!nfsd4_has_session(cstate)) { - mutex_lock(&so->so_replay.rp_mutex); + wait_var_event(&so->so_replay.rp_locked, + cmpxchg(&so->so_replay.rp_locked, + RP_UNLOCKED, RP_LOCKED) != RP_LOCKED); + if (so->so_replay.rp_locked == RP_UNHASHED) + return -EAGAIN; cstate->replay_owner = nfs4_get_stateowner(so); } + return 0; } void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) @@ -4674,7 +4894,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) if (so != NULL) { cstate->replay_owner = NULL; - mutex_unlock(&so->so_replay.rp_mutex); + store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED); nfs4_put_stateowner(so); } } @@ -4855,34 +5075,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) } static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, - struct nfsd4_compound_state *cstate) +find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, + struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; - struct nfs4_openowner *oo, *ret; + struct nfs4_openowner *oo, *new = NULL; - oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); - if (!oo) - return NULL; - oo->oo_owner.so_ops = &openowner_ops; - oo->oo_owner.so_is_open_owner = 1; - oo->oo_owner.so_seqid = open->op_seqid; - oo->oo_flags = 0; - if (nfsd4_has_session(cstate)) - oo->oo_flags |= NFS4_OO_CONFIRMED; - oo->oo_time = 0; - oo->oo_last_closed_stid = NULL; - INIT_LIST_HEAD(&oo->oo_close_lru); +retry: spin_lock(&clp->cl_lock); - ret = find_openstateowner_str_locked(strhashval, open, clp); - if (ret == NULL) { - hash_openowner(oo, clp, strhashval); - ret = oo; - } else - nfs4_free_stateowner(&oo->oo_owner); - + oo = find_openstateowner_str(strhashval, open, clp); + if (!oo && new) { + hash_openowner(new, clp, strhashval); + spin_unlock(&clp->cl_lock); + return new; + } spin_unlock(&clp->cl_lock); - return ret; + + if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) { + /* Replace unconfirmed owners without checking for replay. */ + release_openowner(oo); + oo = NULL; + } + if (oo) { + if (new) + nfs4_free_stateowner(&new->oo_owner); + return oo; + } + + new = alloc_stateowner(openowner_slab, &open->op_owner, clp); + if (!new) + return NULL; + new->oo_owner.so_ops = &openowner_ops; + new->oo_owner.so_is_open_owner = 1; + new->oo_owner.so_seqid = open->op_seqid; + new->oo_flags = 0; + if (nfsd4_has_session(cstate)) + new->oo_flags |= NFS4_OO_CONFIRMED; + new->oo_time = 0; + new->oo_last_closed_stid = NULL; + INIT_LIST_HEAD(&new->oo_close_lru); + goto retry; } static struct nfs4_ol_stateid * @@ -4902,6 +5134,12 @@ retry: spin_lock(&oo->oo_owner.so_client->cl_lock); spin_lock(&fp->fi_lock); + if (nfs4_openowner_unhashed(oo)) { + mutex_unlock(&stp->st_mutex); + stp = NULL; + goto out_unlock; + } + retstp = nfsd4_find_existing_open(fp, open); if (retstp) goto out_unlock; @@ -4958,7 +5196,10 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) * Wait for the refcount to drop to 2. Since it has been unhashed, * there should be no danger of the refcount going back up again at * this point. + * Some threads with a reference might be waiting for rp_locked, + * so tell them to stop waiting. */ + store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED); wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2); release_all_access(s); @@ -5172,10 +5413,16 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = { .prepare = nfsd4_cb_recall_prepare, .done = nfsd4_cb_recall_done, .release = nfsd4_cb_recall_release, + .opcode = OP_CB_RECALL, }; static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { + bool queued; + + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags)) + return; + /* * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease @@ -5184,7 +5431,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); + queued = nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!queued); + if (!queued) + refcount_dec(&dp->dl_stid.sc_count); } /* Called from break_lease() with flc_lock held. */ @@ -5231,11 +5481,8 @@ static bool nfsd_breaker_owns_lease(struct file_lease *fl) struct svc_rqst *rqst; struct nfs4_client *clp; - if (!i_am_nfsd()) - return false; - rqst = kthread_data(current); - /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ - if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) + rqst = nfsd_current_rqst(); + if (!nfsd_v4client(rqst)) return false; clp = *(rqst->rq_lease_breaker); return dl->dl_stid.sc_client == clp; @@ -5331,27 +5578,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, clp = cstate->clp; strhashval = ownerstr_hashval(&open->op_owner); - oo = find_openstateowner_str(strhashval, open, clp); +retry: + oo = find_or_alloc_open_stateowner(strhashval, open, cstate); open->op_openowner = oo; - if (!oo) { - goto new_owner; - } - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { - /* Replace unconfirmed owners without checking for replay. */ - release_openowner(oo); - open->op_openowner = NULL; - goto new_owner; + if (!oo) + return nfserr_jukebox; + if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) { + nfs4_put_stateowner(&oo->oo_owner); + goto retry; } status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; - goto alloc_stateid; -new_owner: - oo = alloc_init_open_stateowner(strhashval, open, cstate); - if (oo == NULL) - return nfserr_jukebox; - open->op_openowner = oo; -alloc_stateid: + open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; @@ -5369,7 +5608,7 @@ alloc_stateid: static inline __be32 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) { - if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type)) return nfserr_openmode; else return nfs_ok; @@ -5601,8 +5840,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, - int flag) +static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp) { struct file_lease *fl; @@ -5611,7 +5849,7 @@ static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return NULL; fl->fl_lmops = &nfsd_lease_mng_ops; fl->c.flc_flags = FL_DELEG; - fl->c.flc_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; + fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK; fl->c.flc_owner = (fl_owner_t)dp; fl->c.flc_pid = current->tgid; fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file; @@ -5722,17 +5960,30 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) return 0; } +#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS; +} +#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */ +static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open) +{ + return false; +} +#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */ + static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) { - int status = 0; + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct nfs4_file *fp = stp->st_stid.sc_file; struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf = NULL; struct file_lease *fl; + int status = 0; u32 dl_type; /* @@ -5757,7 +6008,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, */ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) { nf = find_rw_file(fp); - dl_type = NFS4_OPEN_DELEGATE_WRITE; + dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE; } /* @@ -5766,12 +6017,21 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, */ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) { nf = find_readable_file(fp); - dl_type = NFS4_OPEN_DELEGATE_READ; + dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ; } if (!nf) return ERR_PTR(-EAGAIN); + /* + * File delegations and associated locks cannot be recovered if the + * export is from an NFS proxy server. + */ + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + nfsd_file_put(nf); + return ERR_PTR(-EOPNOTSUPP); + } + spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) @@ -5798,7 +6058,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (!dp) goto out_delegees; - fl = nfs4_alloc_init_lease(dp, dl_type); + fl = nfs4_alloc_init_lease(dp); if (!fl) goto out_clnt_odstate; @@ -5821,7 +6081,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, /* * Now that the deleg is set, check again to ensure that nothing - * raced in and changed the mode while we weren't lookng. + * raced in and changed the mode while we weren't looking. */ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); if (status) @@ -5855,25 +6115,47 @@ out_delegees: static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; if (status == -EAGAIN) open->op_why_no_deleg = WND4_CONTENTION; else { open->op_why_no_deleg = WND4_RESOURCE; switch (open->op_deleg_want) { - case NFS4_SHARE_WANT_READ_DELEG: - case NFS4_SHARE_WANT_WRITE_DELEG: - case NFS4_SHARE_WANT_ANY_DELEG: + case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: + case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: + case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: break; - case NFS4_SHARE_WANT_CANCEL: + case OPEN4_SHARE_ACCESS_WANT_CANCEL: open->op_why_no_deleg = WND4_CANCELLED; break; - case NFS4_SHARE_WANT_NO_DELEG: + case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: WARN_ON_ONCE(1); } } } +static bool +nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh, + struct kstat *stat) +{ + struct nfsd_file *nf = find_rw_file(dp->dl_stid.sc_file); + struct path path; + int rc; + + if (!nf) + return false; + + path.mnt = currentfh->fh_export->ex_path.mnt; + path.dentry = file_dentry(nf->nf_file); + + rc = vfs_getattr(&path, stat, + (STATX_MODE | STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), + AT_STATX_SYNC_AS_STAT); + + nfsd_file_put(nf); + return rc == 0; +} + /* * The Linux NFS server does not offer write delegations to NFSv4.0 * clients in order to avoid conflicts between write delegations and @@ -5902,14 +6184,14 @@ static void nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *currentfh) { - struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); + bool deleg_ts = nfsd4_want_deleg_timestamps(open); struct nfs4_client *clp = stp->st_stid.sc_client; struct svc_fh *parent = NULL; - int cb_up; - int status = 0; + struct nfs4_delegation *dp; struct kstat stat; - struct path path; + int status = 0; + int cb_up; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); open->op_recall = false; @@ -5945,30 +6227,27 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; - trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); - path.mnt = currentfh->fh_export->ex_path.mnt; - path.dentry = currentfh->fh_dentry; - if (vfs_getattr(&path, &stat, - (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), - AT_STATX_SYNC_AS_STAT)) { + if (!nfs4_delegation_stat(dp, currentfh, &stat)) { nfs4_put_stid(&dp->dl_stid); destroy_delegation(dp); goto out_no_deleg; } + open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : + OPEN_DELEGATE_WRITE; dp->dl_cb_fattr.ncf_cur_fsize = stat.size; - dp->dl_cb_fattr.ncf_initial_cinfo = - nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry)); + dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat); + trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); } else { - open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : + OPEN_DELEGATE_READ; trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); } nfs4_put_stid(&dp->dl_stid); return; out_no_deleg: - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + open->op_delegate_type = OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + open->op_delegate_type != OPEN_DELEGATE_NONE) { dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_recall = true; } @@ -5982,21 +6261,32 @@ out_no_deleg: static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, struct nfs4_delegation *dp) { - if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG && - dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; - open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; - } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG && - dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; - open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + if (deleg_is_write(dp->dl_type)) { + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; + } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + } } /* Otherwise the client must be confused wanting a delegation * it already has, therefore we don't return - * NFS4_OPEN_DELEGATE_NONE_EXT and reason. + * OPEN_DELEGATE_NONE_EXT and reason. */ } +/* Are we returning only a delegation stateid? */ +static bool open_xor_delegation(struct nfsd4_open *open) +{ + if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) + return false; + /* Did we actually get a delegation? */ + if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type)) + return false; + return true; +} + /** * nfsd4_process_open2 - finish open processing * @rqstp: the RPC transaction being executed @@ -6042,6 +6332,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (!stp) { stp = init_open_stateid(fp, open); + if (!stp) { + status = nfserr_jukebox; + goto out; + } + if (!open->op_stp) new_stp = true; } @@ -6077,8 +6372,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf mutex_unlock(&stp->st_mutex); if (nfsd4_has_session(&resp->cstate)) { - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { - open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) { + open->op_delegate_type = OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; goto nodeleg; } @@ -6089,12 +6384,23 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * OPEN succeeds even if we fail. */ nfs4_open_delegation(open, stp, &resp->cstate.current_fh); + + /* + * If there is an existing open stateid, it must be updated and + * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new + * open stateid would have to be created. + */ + if (new_stp && open_xor_delegation(open)) { + memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid)); + open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID; + release_open_stateid(stp); + } nodeleg: status = nfs_ok; trace_nfsd_open(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ - if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && + if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp && open->op_deleg_want) nfsd4_deleg_xgrade_none_ext(open, dp); @@ -6105,7 +6411,7 @@ out: /* * To finish the open response, we just need to set the rflags. */ - open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; + open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX; if (nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK; else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED)) @@ -6122,12 +6428,8 @@ out: void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { - if (open->op_openowner) { - struct nfs4_stateowner *so = &open->op_openowner->oo_owner; - - nfsd4_cstate_assign_replay(cstate, so); - nfs4_put_stateowner(so); - } + if (open->op_openowner) + nfs4_put_stateowner(&open->op_openowner->oo_owner); if (open->op_file) kmem_cache_free(file_slab, open->op_file); if (open->op_stp) @@ -6237,7 +6539,6 @@ void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); init_waitqueue_head(&nn->nfsd_ssc_waitq); } -EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work); /* * This is called when nfsd is being shutdown, after all inter_ssc @@ -6482,6 +6783,7 @@ nfs4_laundromat(struct nfsd_net *nn) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); + nfsd4_async_copy_reaper(nn); nfs4_get_client_reaplist(nn, &reaplist, <); nfs4_process_client_reaplist(&reaplist); @@ -6492,6 +6794,7 @@ nfs4_laundromat(struct nfsd_net *nn) dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); if (!state_expired(<, dp->dl_time)) break; + refcount_inc(&dp->dl_stid.sc_count); unhash_delegation_locked(dp, SC_STATUS_REVOKED); list_add(&dp->dl_recall_lru, &reaplist); } @@ -6585,40 +6888,34 @@ deleg_reaper(struct nfsd_net *nn) { struct list_head *pos, *next; struct nfs4_client *clp; - struct list_head cblist; - INIT_LIST_HEAD(&cblist); spin_lock(&nn->client_lock); list_for_each_safe(pos, next, &nn->client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_state != NFSD4_ACTIVE || - list_empty(&clp->cl_delegations) || - atomic_read(&clp->cl_delegs_in_recall) || - test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) || - (ktime_get_boottime_seconds() - - clp->cl_ra_time < 5)) { + + if (clp->cl_state != NFSD4_ACTIVE) + continue; + if (list_empty(&clp->cl_delegations)) + continue; + if (atomic_read(&clp->cl_delegs_in_recall)) + continue; + if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags)) + continue; + if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5) + continue; + if (clp->cl_cb_state != NFSD4_CB_UP) continue; - } - list_add(&clp->cl_ra_cblist, &cblist); /* release in nfsd4_cb_recall_any_release */ - atomic_inc(&clp->cl_rpc_users); - set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags); + kref_get(&clp->cl_nfsdfs.cl_ref); clp->cl_ra_time = ktime_get_boottime_seconds(); - } - spin_unlock(&nn->client_lock); - - while (!list_empty(&cblist)) { - clp = list_first_entry(&cblist, struct nfs4_client, - cl_ra_cblist); - list_del_init(&clp->cl_ra_cblist); clp->cl_ra->ra_keep = 0; - clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG); clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) | BIT(RCA4_TYPE_MASK_WDATA_DLG); trace_nfsd_cb_recall_any(clp->cl_ra); nfsd4_run_cb(&clp->cl_ra->ra_cb); } + spin_unlock(&nn->client_lock); } static void @@ -6783,7 +7080,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, */ statusmask |= SC_STATUS_REVOKED; - statusmask |= SC_STATUS_ADMIN_REVOKED; + statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) @@ -6858,7 +7155,8 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s, nf = nfs4_find_file(s, flags); if (nf) { - status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + status = nfsd_permission(&rqstp->rq_cred, + fhp->fh_export, fhp->fh_dentry, acc | NFSD_MAY_OWNER_OVERRIDE); if (status) { nfsd_file_put(nf); @@ -6989,11 +7287,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, *nfp = NULL; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - if (cstid) - status = nfserr_bad_stateid; - else - status = check_special_stateids(net, fhp, stateid, - flags); + status = check_special_stateids(net, fhp, stateid, flags); goto done; } @@ -7106,9 +7400,12 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (s->sc_type) { case SC_TYPE_DELEG: if (s->sc_status & SC_STATUS_REVOKED) { + s->sc_status |= SC_STATUS_CLOSED; spin_unlock(&s->sc_lock); dp = delegstateid(s); - list_del_init(&dp->dl_recall_lru); + if (s->sc_status & SC_STATUS_FREEABLE) + list_del_init(&dp->dl_recall_lru); + s->sc_status |= SC_STATUS_FREED; spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; @@ -7191,12 +7488,16 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; +retry: status = nfsd4_lookup_stateid(cstate, stateid, typemask, statusmask, &s, nn); if (status) return status; stp = openlockstateid(s); - nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); + if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) { + nfs4_put_stateowner(stp->st_stateowner); + goto retry; + } status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status) @@ -7338,7 +7639,7 @@ out: return status; } -static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) +static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; @@ -7355,11 +7656,11 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) list_for_each_entry(stp, &reaplist, st_locks) nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); + return false; } else { spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); - if (unhashed) - move_to_close_lru(s, clp->net); + return unhashed; } } @@ -7375,6 +7676,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + bool need_move_to_close_list; dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); @@ -7399,8 +7701,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, */ nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid); - nfsd4_close_open_stateid(stp); + need_move_to_close_list = nfsd4_close_open_stateid(stp); mutex_unlock(&stp->st_mutex); + if (need_move_to_close_list) + move_to_close_lru(stp, net); /* v4.1+ suggests that we send a special stateid in here, since the * clients should just ignore this anyway. Since this is not useful @@ -7431,7 +7735,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, 0, &s, nn); + status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn); if (status) goto out; dp = delegstateid(s); @@ -7440,8 +7744,9 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto put_stateid; trace_nfsd_deleg_return(stateid); - wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); + smp_mb__after_atomic(); + wake_up_var(d_inode(cstate->current_fh.fh_dentry)); put_stateid: nfs4_put_stid(&dp->dl_stid); out: @@ -7538,7 +7843,7 @@ nfsd4_lm_notify(struct file_lock *fl) if (queue) { trace_nfsd_cb_notify_lock(lo, nbl); - nfsd4_run_cb(&nbl->nbl_cb); + nfsd4_try_run_cb(&nbl->nbl_cb); } } @@ -7837,7 +8142,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_blocked_lock *nbl = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; - struct super_block *sb; __be32 status = 0; int lkflg; int err; @@ -7854,12 +8158,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; - if ((status = fh_verify(rqstp, &cstate->current_fh, - S_IFREG, NFSD_MAY_LOCK))) { - dprintk("NFSD: nfsd4_lock: permission denied!\n"); + status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0); + if (status != nfs_ok) return status; + if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) { + status = nfserr_notsupp; + goto out; } - sb = cstate->current_fh.fh_dentry->d_sb; if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) @@ -7912,9 +8217,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: - if (nfsd4_has_session(cstate) || - exportfs_lock_op_is_async(sb->s_export_op)) - flags |= FL_SLEEP; fallthrough; case NFS4_READ_LT: spin_lock(&fp->fi_lock); @@ -7925,9 +8227,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, type = F_RDLCK; break; case NFS4_WRITEW_LT: - if (nfsd4_has_session(cstate) || - exportfs_lock_op_is_async(sb->s_export_op)) - flags |= FL_SLEEP; fallthrough; case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); @@ -7947,15 +8246,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - /* - * Most filesystems with their own ->lock operations will block - * the nfsd thread waiting to acquire the lock. That leads to - * deadlocks (we don't want every nfsd thread tied up waiting - * for file locks), so don't attempt blocking lock notifications - * on those filesystems: - */ - if (!exportfs_lock_op_is_async(sb->s_export_op)) - flags &= ~FL_SLEEP; + if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) && + nfsd4_has_session(cstate) && + locks_can_async_lock(nf->nf_file->f_op)) + flags |= FL_SLEEP; nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { @@ -8210,6 +8504,11 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_lock_range; goto put_stateid; } + if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) { + status = nfserr_notsupp; + goto put_file; + } + file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -8297,7 +8596,7 @@ out: * @cstate: NFSv4 COMPOUND state * @u: RELEASE_LOCKOWNER arguments * - * Check if theree are any locks still held and if not - free the lockowner + * Check if there are any locks still held and if not, free the lockowner * and any lock state that is owned. * * Return values: @@ -8516,6 +8815,7 @@ static int nfs4_state_create_net(struct net *net) spin_lock_init(&nn->client_lock); spin_lock_init(&nn->s2s_cp_lock); idr_init(&nn->s2s_cp_stateids); + atomic_set(&nn->pending_async_copies, 0); spin_lock_init(&nn->blocked_locks_lock); INIT_LIST_HEAD(&nn->blocked_locks_lru); @@ -8604,7 +8904,6 @@ skip_grace: } /* initialization to perform when the nfsd service is started: */ - int nfs4_state_start(void) { @@ -8614,11 +8913,14 @@ nfs4_state_start(void) if (ret) return ret; - ret = nfsd4_create_callback_queue(); - if (ret) { + nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); + if (!nfsd_slot_shrinker) { rhltable_destroy(&nfs4_file_rhltable); - return ret; + return -ENOMEM; } + nfsd_slot_shrinker->count_objects = nfsd_slot_count; + nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; + shrinker_register(nfsd_slot_shrinker); set_max_delegations(); return 0; @@ -8632,7 +8934,7 @@ nfs4_state_shutdown_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); shrinker_free(nn->nfsd_client_shrinker); - cancel_work(&nn->nfsd_shrinker_work); + cancel_work_sync(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8660,8 +8962,8 @@ nfs4_state_shutdown_net(struct net *net) void nfs4_state_shutdown(void) { - nfsd4_destroy_callback_queue(); rhltable_destroy(&nfs4_file_rhltable); + shrinker_free(nfsd_slot_shrinker); } static void @@ -8779,11 +9081,82 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, } /** + * set_cb_time - vet and set the timespec for a cb_getattr update + * @cb: timestamp from the CB_GETATTR response + * @orig: original timestamp in the inode + * @now: current time + * + * Given a timestamp in a CB_GETATTR response, check it against the + * current timestamp in the inode and the current time. Returns true + * if the inode's timestamp needs to be updated, and false otherwise. + * @cb may also be changed if the timestamp needs to be clamped. + */ +static bool set_cb_time(struct timespec64 *cb, const struct timespec64 *orig, + const struct timespec64 *now) +{ + + /* + * "When the time presented is before the original time, then the + * update is ignored." Also no need to update if there is no change. + */ + if (timespec64_compare(cb, orig) <= 0) + return false; + + /* + * "When the time presented is in the future, the server can either + * clamp the new time to the current time, or it may + * return NFS4ERR_DELAY to the client, allowing it to retry." + */ + if (timespec64_compare(cb, now) > 0) { + /* clamp it */ + *cb = *now; + } + + return true; +} + +static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp) +{ + struct inode *inode = d_inode(dentry); + struct timespec64 now = current_time(inode); + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + struct iattr attrs = { }; + int ret; + + if (deleg_attrs_deleg(dp->dl_type)) { + struct timespec64 atime = inode_get_atime(inode); + struct timespec64 mtime = inode_get_mtime(inode); + + attrs.ia_atime = ncf->ncf_cb_atime; + attrs.ia_mtime = ncf->ncf_cb_mtime; + + if (set_cb_time(&attrs.ia_atime, &atime, &now)) + attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + + if (set_cb_time(&attrs.ia_mtime, &mtime, &now)) { + attrs.ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET; + attrs.ia_ctime = attrs.ia_mtime; + } + } else { + attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME; + attrs.ia_mtime = attrs.ia_ctime = now; + } + + if (!attrs.ia_valid) + return 0; + + attrs.ia_valid |= ATTR_DELEG; + inode_lock(inode); + ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL); + inode_unlock(inode); + return ret; +} + +/** * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict * @rqstp: RPC transaction context - * @inode: file to be checked for a conflict - * @modified: return true if file was modified - * @size: new size of file if modified is true + * @dentry: dentry of inode to be checked for a conflict + * @pdp: returned WRITE delegation, if one was found * * This function is called when there is a conflict between a write * delegation and a change/size GETATTR from another client. The server @@ -8793,83 +9166,90 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, * 18.7.4. * * Returns 0 if there is no conflict; otherwise an nfs_stat - * code is returned. + * code is returned. If @pdp is set to a non-NULL value, then the + * caller must put the reference. */ __be32 -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, - bool *modified, u64 *size) +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry, + struct nfs4_delegation **pdp) { __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; + struct nfs4_delegation *dp = NULL; struct file_lease *fl; - struct nfs4_delegation *dp; - struct iattr attrs; struct nfs4_cb_fattr *ncf; + struct inode *inode = d_inode(dentry); - *modified = false; ctx = locks_inode_context(inode); if (!ctx) - return 0; + return nfs_ok; + +#define NON_NFSD_LEASE ((void *)1) + spin_lock(&ctx->flc_lock); for_each_file_lock(fl, &ctx->flc_lease) { - unsigned char type = fl->c.flc_type; - if (fl->c.flc_flags == FL_LAYOUT) continue; - if (fl->fl_lmops != &nfsd_lease_mng_ops) { - /* - * non-nfs lease, if it's a lease with F_RDLCK then - * we are done; there isn't any write delegation - * on this inode - */ - if (type == F_RDLCK) - break; - goto break_lease; - } - if (type == F_WRLCK) { - dp = fl->c.flc_owner; - if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { - spin_unlock(&ctx->flc_lock); - return 0; - } -break_lease: - nfsd_stats_wdeleg_getattr_inc(nn); - dp = fl->c.flc_owner; - ncf = &dp->dl_cb_fattr; - nfs4_cb_getattr(&dp->dl_cb_fattr); - spin_unlock(&ctx->flc_lock); - wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, - TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); - if (ncf->ncf_cb_status) { - /* Recall delegation only if client didn't respond */ - status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); - if (status != nfserr_jukebox || - !nfsd_wait_for_delegreturn(rqstp, inode)) - return status; - } - if (!ncf->ncf_file_modified && - (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || - ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) - ncf->ncf_file_modified = true; - if (ncf->ncf_file_modified) { - /* - * Per section 10.4.3 of RFC 8881, the server would - * not update the file's metadata with the client's - * modified size - */ - attrs.ia_mtime = attrs.ia_ctime = current_time(inode); - attrs.ia_valid = ATTR_MTIME | ATTR_CTIME; - setattr_copy(&nop_mnt_idmap, inode, &attrs); - mark_inode_dirty(inode); - ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; - *size = ncf->ncf_cur_fsize; - *modified = true; - } - return 0; + if (fl->c.flc_type == F_WRLCK) { + if (fl->fl_lmops == &nfsd_lease_mng_ops) + dp = fl->c.flc_owner; + else + dp = NON_NFSD_LEASE; } break; } + if (dp == NULL || dp == NON_NFSD_LEASE || + dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) { + spin_unlock(&ctx->flc_lock); + if (dp == NON_NFSD_LEASE) { + status = nfserrno(nfsd_open_break_lease(inode, + NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) + return status; + } + return 0; + } + + nfsd_stats_wdeleg_getattr_inc(nn); + refcount_inc(&dp->dl_stid.sc_count); + ncf = &dp->dl_cb_fattr; + nfs4_cb_getattr(&dp->dl_cb_fattr); spin_unlock(&ctx->flc_lock); - return 0; + + wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING, + TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); + if (ncf->ncf_cb_status) { + /* Recall delegation only if client didn't respond */ + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (status != nfserr_jukebox || + !nfsd_wait_for_delegreturn(rqstp, inode)) + goto out_status; + } + if (!ncf->ncf_file_modified && + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) + ncf->ncf_file_modified = true; + if (ncf->ncf_file_modified) { + int err; + + /* + * Per section 10.4.3 of RFC 8881, the server would + * not update the file's metadata with the client's + * modified size + */ + err = cb_getattr_update_times(dentry, dp); + if (err) { + status = nfserrno(err); + goto out_status; + } + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; + *pdp = dp; + return nfs_ok; + } + status = nfs_ok; +out_status: + nfs4_put_stid(&dp->dl_stid); + return status; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fac938f563ad..3afcdbed6e14 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -55,6 +55,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "nfs4xdr_gen.h" #include "trace.h" @@ -118,11 +119,11 @@ static int zero_clientid(clientid_t *clid) * operation described in @argp finishes. */ static void * -svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) +svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, size_t len) { struct svcxdr_tmpbuf *tb; - tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL); + tb = kmalloc(struct_size(tb, buf, len), GFP_KERNEL); if (!tb) return NULL; tb->next = argp->to_free; @@ -138,9 +139,9 @@ svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) * buffer might end on a page boundary. */ static char * -svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) +svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, size_t len) { - char *p = svcxdr_tmpalloc(argp, len + 1); + char *p = svcxdr_tmpalloc(argp, size_add(len, 1)); if (!p) return NULL; @@ -150,7 +151,7 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) } static void * -svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) +svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, size_t len) { __be32 *tmp; @@ -520,6 +521,26 @@ nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, *umask = mask & S_IRWXUGO; iattr->ia_valid |= ATTR_MODE; } + if (bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) { + fattr4_time_deleg_access access; + + if (!xdrgen_decode_fattr4_time_deleg_access(argp->xdr, &access)) + return nfserr_bad_xdr; + iattr->ia_atime.tv_sec = access.seconds; + iattr->ia_atime.tv_nsec = access.nseconds; + iattr->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET | ATTR_DELEG; + } + if (bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) { + fattr4_time_deleg_modify modify; + + if (!xdrgen_decode_fattr4_time_deleg_modify(argp->xdr, &modify)) + return nfserr_bad_xdr; + iattr->ia_mtime.tv_sec = modify.seconds; + iattr->ia_mtime.tv_nsec = modify.nseconds; + iattr->ia_ctime.tv_sec = modify.seconds; + iattr->ia_ctime.tv_nsec = modify.seconds; + iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG; + } /* request sanity: did attrlist4 contain the expected number of words? */ if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos) @@ -1066,13 +1087,13 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh return nfs_ok; if (!argp->minorversion) return nfserr_bad_xdr; - switch (w & NFS4_SHARE_WANT_MASK) { - case NFS4_SHARE_WANT_NO_PREFERENCE: - case NFS4_SHARE_WANT_READ_DELEG: - case NFS4_SHARE_WANT_WRITE_DELEG: - case NFS4_SHARE_WANT_ANY_DELEG: - case NFS4_SHARE_WANT_NO_DELEG: - case NFS4_SHARE_WANT_CANCEL: + switch (w & NFS4_SHARE_WANT_TYPE_MASK) { + case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE: + case OPEN4_SHARE_ACCESS_WANT_READ_DELEG: + case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG: + case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG: + case OPEN4_SHARE_ACCESS_WANT_NO_DELEG: + case OPEN4_SHARE_ACCESS_WANT_CANCEL: break; default: return nfserr_bad_xdr; @@ -1246,14 +1267,6 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) } static __be32 -nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p) -{ - if (argp->minorversion == 0) - return nfs_ok; - return nfserr_notsupp; -} - -static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) { struct nfsd4_read *read = &u->read; @@ -1732,6 +1745,35 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } +static __be32 +nfsd4_decode_get_dir_delegation(struct nfsd4_compoundargs *argp, + union nfsd4_op_u *u) +{ + struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + __be32 status; + + memset(gdd, 0, sizeof(*gdd)); + + if (xdr_stream_decode_bool(argp->xdr, &gdd->gdda_signal_deleg_avail) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_bitmap4(argp, gdd->gdda_notification_types, + ARRAY_SIZE(gdd->gdda_notification_types)); + if (status) + return status; + status = nfsd4_decode_nfstime4(argp, &gdd->gdda_child_attr_delay); + if (status) + return status; + status = nfsd4_decode_nfstime4(argp, &gdd->gdda_dir_attr_delay); + if (status) + return status; + status = nfsd4_decode_bitmap4(argp, gdd->gdda_child_attributes, + ARRAY_SIZE(gdd->gdda_child_attributes)); + if (status) + return status; + return nfsd4_decode_bitmap4(argp, gdd->gdda_dir_attributes, + ARRAY_SIZE(gdd->gdda_dir_attributes)); +} + #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, @@ -1863,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, return nfserr_bad_xdr; seq->seqid = be32_to_cpup(p++); seq->slotid = be32_to_cpup(p++); - seq->maxslots = be32_to_cpup(p++); + /* sa_highest_slotid counts from 0 but maxslots counts from 1 ... */ + seq->maxslots = be32_to_cpup(p++) + 1; seq->cachethis = be32_to_cpup(p); seq->status_flags = 0; @@ -2117,7 +2160,7 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u) */ static __be32 nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, - char **bufp, u32 buflen) + char **bufp, size_t buflen) { struct page **pages = xdr->pages; struct kvec *head = xdr->head; @@ -2345,7 +2388,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm, [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade, [OP_PUTFH] = nfsd4_decode_putfh, - [OP_PUTPUBFH] = nfsd4_decode_putpubfh, + [OP_PUTPUBFH] = nfsd4_decode_noop, [OP_PUTROOTFH] = nfsd4_decode_noop, [OP_READ] = nfsd4_decode_read, [OP_READDIR] = nfsd4_decode_readdir, @@ -2370,7 +2413,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_CREATE_SESSION] = nfsd4_decode_create_session, [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session, [OP_FREE_STATEID] = nfsd4_decode_free_stateid, - [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp, + [OP_GET_DIR_DELEGATION] = nfsd4_decode_get_dir_delegation, #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo, [OP_GETDEVICELIST] = nfsd4_decode_notsupp, @@ -2521,7 +2564,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) /* Sessions make the DRC unnecessary: */ if (argp->minorversion) cachethis = false; - svc_reserve(argp->rqstp, max_reply + readbytes); + svc_reserve_auth(argp->rqstp, max_reply + readbytes); argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; argp->splice_ok = nfsd_read_splice_ok(argp->rqstp); @@ -2631,13 +2674,10 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, strlen = end - str; if (strlen) { - p = xdr_reserve_space(xdr, strlen + 4); - if (!p) + if (xdr_stream_encode_opaque(xdr, str, strlen) < 0) return nfserr_resource; - p = xdr_encode_opaque(p, str, strlen); count++; - } - else + } else end++; if (found_esc) end = next; @@ -2678,7 +2718,6 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr, const struct path *path) { struct path cur = *path; - __be32 *p; struct dentry **components = NULL; unsigned int ncomponents = 0; __be32 err = nfserr_jukebox; @@ -2709,24 +2748,19 @@ static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr, components[ncomponents++] = cur.dentry; cur.dentry = dget_parent(cur.dentry); } + err = nfserr_resource; - p = xdr_reserve_space(xdr, 4); - if (!p) + if (xdr_stream_encode_u32(xdr, ncomponents) != XDR_UNIT) goto out_free; - *p++ = cpu_to_be32(ncomponents); - while (ncomponents) { struct dentry *dentry = components[ncomponents - 1]; - unsigned int len; spin_lock(&dentry->d_lock); - len = dentry->d_name.len; - p = xdr_reserve_space(xdr, len + 4); - if (!p) { + if (xdr_stream_encode_opaque(xdr, dentry->d_name.name, + dentry->d_name.len) < 0) { spin_unlock(&dentry->d_lock); goto out_free; } - p = xdr_encode_opaque(p, dentry->d_name.name, len); dprintk("/%pd", dentry); spin_unlock(&dentry->d_lock); dput(dentry); @@ -2806,11 +2840,11 @@ static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqst #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, - void *context, int len) + const struct lsm_context *context) { __be32 *p; - p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + p = xdr_reserve_space(xdr, context->len + 4 + 4 + 4); if (!p) return nfserr_resource; @@ -2820,13 +2854,13 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, */ *p++ = cpu_to_be32(0); /* lfs */ *p++ = cpu_to_be32(0); /* pi */ - p = xdr_encode_opaque(p, context, len); + p = xdr_encode_opaque(p, context->context, context->len); return 0; } #else static inline __be32 nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, - void *context, int len) + struct lsm_context *context) { return 0; } #endif @@ -2907,10 +2941,9 @@ struct nfsd4_fattr_args { struct kstat stat; struct kstatfs statfs; struct nfs4_acl *acl; - u64 size; + u64 change_attr; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - void *context; - int contextlen; + struct lsm_context context; #endif u32 rdattr_err; bool contextsupport; @@ -3007,7 +3040,6 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { const struct svc_export *exp = args->exp; - u64 c; if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) { u32 flush_time = convert_to_wallclock(exp->cd->flush_time); @@ -3018,15 +3050,13 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr, return nfserr_resource; return nfs_ok; } - - c = nfsd4_change_attribute(&args->stat, d_inode(args->dentry)); - return nfsd4_encode_changeid4(xdr, c); + return nfsd4_encode_changeid4(xdr, args->change_attr); } static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { - return nfsd4_encode_uint64_t(xdr, args->size); + return nfsd4_encode_uint64_t(xdr, args->stat.size); } static __be32 nfsd4_encode_fattr4_fsid(struct xdr_stream *xdr, @@ -3361,12 +3391,28 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr, return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); } +/* + * Copied from generic_remap_checks/generic_remap_file_range_prep. + * + * These generic functions use the file system's s_blocksize, but + * individual file systems aren't required to use + * generic_remap_file_range_prep. Until there is a mechanism for + * determining a particular file system's (or file's) clone block + * size, this is the best NFSD can do. + */ +static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct inode *inode = d_inode(args->dentry); + + return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); +} + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { - return nfsd4_encode_security_label(xdr, args->rqstp, - args->context, args->contextlen); + return nfsd4_encode_security_label(xdr, args->rqstp, &args->context); } #endif @@ -3378,6 +3424,56 @@ static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr, return nfsd4_encode_bool(xdr, err == 0); } +#define NFSD_OA_SHARE_ACCESS (BIT(OPEN_ARGS_SHARE_ACCESS_READ) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WRITE) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_BOTH)) + +#define NFSD_OA_SHARE_DENY (BIT(OPEN_ARGS_SHARE_DENY_NONE) | \ + BIT(OPEN_ARGS_SHARE_DENY_READ) | \ + BIT(OPEN_ARGS_SHARE_DENY_WRITE) | \ + BIT(OPEN_ARGS_SHARE_DENY_BOTH)) + +#define NFSD_OA_SHARE_ACCESS_WANT (BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS) | \ + BIT(OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION)) + +#define NFSD_OA_OPEN_CLAIM (BIT(OPEN_ARGS_OPEN_CLAIM_NULL) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV)| \ + BIT(OPEN_ARGS_OPEN_CLAIM_FH) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH) | \ + BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH)) + +#define NFSD_OA_CREATE_MODE (BIT(OPEN_ARGS_CREATEMODE_UNCHECKED4) | \ + BIT(OPEN_ARGS_CREATE_MODE_GUARDED) | \ + BIT(OPEN_ARGS_CREATEMODE_EXCLUSIVE4) | \ + BIT(OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1)) + +static uint32_t oa_share_access = NFSD_OA_SHARE_ACCESS; +static uint32_t oa_share_deny = NFSD_OA_SHARE_DENY; +static uint32_t oa_share_access_want = NFSD_OA_SHARE_ACCESS_WANT; +static uint32_t oa_open_claim = NFSD_OA_OPEN_CLAIM; +static uint32_t oa_create_mode = NFSD_OA_CREATE_MODE; + +static const struct open_arguments4 nfsd_open_arguments = { + .oa_share_access = { .count = 1, .element = &oa_share_access }, + .oa_share_deny = { .count = 1, .element = &oa_share_deny }, + .oa_share_access_want = { .count = 1, .element = &oa_share_access_want }, + .oa_open_claim = { .count = 1, .element = &oa_open_claim }, + .oa_create_mode = { .count = 1, .element = &oa_create_mode }, +}; + +static __be32 nfsd4_encode_fattr4_open_arguments(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + if (!xdrgen_encode_fattr4_open_arguments(xdr, &nfsd_open_arguments)) + return nfserr_resource; + return nfs_ok; +} + static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_SUPPORTED_ATTRS] = nfsd4_encode_fattr4_supported_attrs, [FATTR4_TYPE] = nfsd4_encode_fattr4_type, @@ -3466,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, - [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop, + [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, @@ -3478,6 +3574,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop, [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support, + [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments, }; /* @@ -3490,11 +3587,14 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct dentry *dentry, const u32 *bmval, int ignore_crossmnt) { + DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + struct nfs4_delegation *dp = NULL; struct nfsd4_fattr_args args; struct svc_fh *tempfh = NULL; int starting_len = xdr->buf->len; - __be32 *attrlen_p, status; - int attrlen_offset; + unsigned int attrlen_offset; + __be32 attrlen, status; + u32 attrmask[3]; int err; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; @@ -3502,13 +3602,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, .mnt = exp->ex_path.mnt, .dentry = dentry, }; - union { - u32 attrmask[3]; - unsigned long mask[2]; - } u; unsigned long bit; - bool file_modified = false; - u64 size = 0; WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1); WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval)); @@ -3517,26 +3611,31 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.exp = exp; args.dentry = dentry; args.ignore_crossmnt = (ignore_crossmnt != 0); + args.acl = NULL; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + args.context.context = NULL; +#endif /* * Make a local copy of the attribute bitmap that can be modified. */ - memset(&u, 0, sizeof(u)); - u.attrmask[0] = bmval[0]; - u.attrmask[1] = bmval[1]; - u.attrmask[2] = bmval[2]; + attrmask[0] = bmval[0]; + attrmask[1] = bmval[1]; + attrmask[2] = bmval[2]; args.rdattr_err = 0; if (exp->ex_fslocs.migrated) { - status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1], - &u.attrmask[2], &args.rdattr_err); + status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1], + &attrmask[2], &args.rdattr_err); if (status) goto out; } - args.size = 0; - if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry), - &file_modified, &size); + if ((attrmask[0] & (FATTR4_WORD0_CHANGE | + FATTR4_WORD0_SIZE)) || + (attrmask[1] & (FATTR4_WORD1_TIME_ACCESS | + FATTR4_WORD1_TIME_MODIFY | + FATTR4_WORD1_TIME_METADATA))) { + status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp); if (status) goto out; } @@ -3544,25 +3643,40 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, err = vfs_getattr(&path, &args.stat, STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT); + if (dp) { + struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr; + + if (ncf->ncf_file_modified) { + ++ncf->ncf_initial_cinfo; + args.stat.size = ncf->ncf_cur_fsize; + if (!timespec64_is_epoch(&ncf->ncf_cb_mtime)) + args.stat.mtime = ncf->ncf_cb_mtime; + } + args.change_attr = ncf->ncf_initial_cinfo; + + if (!timespec64_is_epoch(&ncf->ncf_cb_atime)) + args.stat.atime = ncf->ncf_cb_atime; + + nfs4_put_stid(&dp->dl_stid); + } else { + args.change_attr = nfsd4_change_attribute(&args.stat); + } + if (err) goto out_nfserr; - if (file_modified) - args.size = size; - else - args.size = args.stat.size; if (!(args.stat.result_mask & STATX_BTIME)) /* underlying FS does not offer btime so we can't share it */ - u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; - if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | + attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE; + if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || - (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | + (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL))) { err = vfs_statfs(&path, &args.statfs); if (err) goto out_nfserr; } - if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && + if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); status = nfserr_jukebox; @@ -3576,11 +3690,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, } else args.fhp = fhp; - args.acl = NULL; - if (u.attrmask[0] & FATTR4_WORD0_ACL) { + if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) - u.attrmask[0] &= ~FATTR4_WORD0_ACL; + attrmask[0] &= ~FATTR4_WORD0_ACL; else if (err == -EINVAL) { status = nfserr_attrnotsupp; goto out; @@ -3591,18 +3704,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.contextsupport = false; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - args.context = NULL; - if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || - u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) || + attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) err = security_inode_getsecctx(d_inode(dentry), - &args.context, &args.contextlen); + &args.context); else err = -EOPNOTSUPP; args.contextsupport = (err == 0); - if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { + if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) { if (err == -EOPNOTSUPP) - u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; else if (err) goto out_nfserr; } @@ -3610,29 +3722,31 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ /* attrmask */ - status = nfsd4_encode_bitmap4(xdr, u.attrmask[0], - u.attrmask[1], u.attrmask[2]); + status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1], + attrmask[2]); if (status) goto out; /* attr_vals */ attrlen_offset = xdr->buf->len; - attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); - if (!attrlen_p) + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) goto out_resource; - for_each_set_bit(bit, (const unsigned long *)&u.mask, + bitmap_from_arr32(attr_bitmap, attrmask, + ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); + for_each_set_bit(bit, attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) { status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args); if (status != nfs_ok) goto out; } - *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); + attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); + write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT); status = nfs_ok; out: #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (args.context) - security_release_secctx(args.context, args.contextlen); + if (args.context.context) + security_release_secctx(&args.context); #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(args.acl); if (tempfh) { @@ -3715,7 +3829,9 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, __be32 nfserr; int ignore_crossmnt = 0; - dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen); + dentry = lookup_one_positive_unlocked(&nop_mnt_idmap, + &QSTR_LEN(name, namlen), + cd->rd_fhp->fh_dentry); if (IS_ERR(dentry)) return nfserrno(PTR_ERR(dentry)); @@ -3745,7 +3861,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, nfserr = nfserrno(err); goto out_put; } - nfserr = check_nfsd_access(exp, cd->rd_rqstp); + nfserr = check_nfsd_access(exp, cd->rd_rqstp, false); if (nfserr) goto out_put; @@ -4216,18 +4332,20 @@ nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open) if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT) return nfserr_resource; switch (open->op_delegate_type) { - case NFS4_OPEN_DELEGATE_NONE: + case OPEN_DELEGATE_NONE: status = nfs_ok; break; - case NFS4_OPEN_DELEGATE_READ: + case OPEN_DELEGATE_READ: + case OPEN_DELEGATE_READ_ATTRS_DELEG: /* read */ status = nfsd4_encode_open_read_delegation4(xdr, open); break; - case NFS4_OPEN_DELEGATE_WRITE: + case OPEN_DELEGATE_WRITE: + case OPEN_DELEGATE_WRITE_ATTRS_DELEG: /* write */ status = nfsd4_encode_open_write_delegation4(xdr, open); break; - case NFS4_OPEN_DELEGATE_NONE_EXT: + case OPEN_DELEGATE_NONE_EXT: /* od_whynone */ status = nfsd4_encode_open_none_delegation4(xdr, open); break; @@ -4304,6 +4422,15 @@ static __be32 nfsd4_encode_splice_read( __be32 nfserr; /* + * Splice read doesn't work if encoding has already wandered + * into the XDR buf's page array. + */ + if (unlikely(xdr->buf->page_len)) { + WARN_ON_ONCE(1); + return nfserr_serverfault; + } + + /* * Make sure there is room at the end of buf->head for * svcxdr_encode_opaque_pages() to create a tail buffer * to XDR-pad the payload. @@ -4385,25 +4512,23 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp; struct nfsd4_read *read = &u->read; struct xdr_stream *xdr = resp->xdr; - int starting_len = xdr->buf->len; bool splice_ok = argp->splice_ok; + unsigned int eof_offset; unsigned long maxcount; + __be32 wire_data[2]; struct file *file; - __be32 *p; if (nfserr) return nfserr; + + eof_offset = xdr->buf->len; file = read->rd_nf->nf_file; - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ - if (!p) { + /* Reserve space for the eof flag and byte count */ + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) { WARN_ON_ONCE(splice_ok); return nfserr_resource; } - if (resp->xdr->buf->page_len && splice_ok) { - WARN_ON_ONCE(1); - return nfserr_serverfault; - } xdr_commit_encode(xdr); maxcount = min_t(unsigned long, read->rd_length, @@ -4414,12 +4539,13 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); if (nfserr) { - xdr_truncate_encode(xdr, starting_len); + xdr_truncate_encode(xdr, eof_offset); return nfserr; } - p = xdr_encode_bool(p, read->rd_eof); - *p = cpu_to_be32(read->rd_length); + wire_data[0] = read->rd_eof ? xdr_one : xdr_zero; + wire_data[1] = cpu_to_be32(read->rd_length); + write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2); return nfs_ok; } @@ -4428,25 +4554,21 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { struct nfsd4_readlink *readlink = &u->readlink; - __be32 *p, *maxcount_p, zero = xdr_zero; + __be32 *p, wire_count, zero = xdr_zero; struct xdr_stream *xdr = resp->xdr; - int length_offset = xdr->buf->len; + unsigned int length_offset; int maxcount, status; - maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); - if (!maxcount_p) + /* linktext4.count */ + length_offset = xdr->buf->len; + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) return nfserr_resource; - maxcount = PAGE_SIZE; + /* linktext4.data */ + maxcount = PAGE_SIZE; p = xdr_reserve_space(xdr, maxcount); if (!p) return nfserr_resource; - /* - * XXX: By default, vfs_readlink() will truncate symlinks if they - * would overflow the buffer. Is this kosher in NFSv4? If not, one - * easy fix is: if vfs_readlink() precisely fills the buffer, assume - * that truncation occurred, and return NFS4ERR_RESOURCE. - */ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, (char *)p, &maxcount); if (nfserr == nfserr_isdir) @@ -4459,7 +4581,9 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfserrno(status); goto out_err; } - *maxcount_p = cpu_to_be32(maxcount); + + wire_count = cpu_to_be32(maxcount); + write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, XDR_UNIT); xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, xdr_pad_size(maxcount)); @@ -4594,14 +4718,42 @@ nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr, } static __be32 -nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) +nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf, + u32 *supported) +{ + struct rpcsec_gss_info info; + __be32 status; + + if (rpcauth_get_gssinfo(pf, &info) == 0) { + (*supported)++; + + /* flavor */ + status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS); + if (status != nfs_ok) + return status; + /* flavor_info */ + status = nfsd4_encode_rpcsec_gss_info(xdr, &info); + if (status != nfs_ok) + return status; + } else if (pf < RPC_AUTH_MAXFLAVOR) { + (*supported)++; + + /* flavor */ + status = nfsd4_encode_uint32_t(xdr, pf); + if (status != nfs_ok) + return status; + } + return nfs_ok; +} + +static __be32 +nfsd4_encode_SECINFO4resok(struct xdr_stream *xdr, struct svc_export *exp) { u32 i, nflavs, supported; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - static bool report = true; - __be32 *flavorsp; - __be32 status; + unsigned int count_offset; + __be32 status, wire_count; if (exp->ex_nflavors) { flavs = exp->ex_flavors; @@ -4623,43 +4775,20 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) } } - supported = 0; - flavorsp = xdr_reserve_space(xdr, XDR_UNIT); - if (!flavorsp) + count_offset = xdr->buf->len; + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) return nfserr_resource; - for (i = 0; i < nflavs; i++) { - rpc_authflavor_t pf = flavs[i].pseudoflavor; - struct rpcsec_gss_info info; - - if (rpcauth_get_gssinfo(pf, &info) == 0) { - supported++; - - /* flavor */ - status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS); - if (status != nfs_ok) - return status; - /* flavor_info */ - status = nfsd4_encode_rpcsec_gss_info(xdr, &info); - if (status != nfs_ok) - return status; - } else if (pf < RPC_AUTH_MAXFLAVOR) { - supported++; - - /* flavor */ - status = nfsd4_encode_uint32_t(xdr, pf); - if (status != nfs_ok) - return status; - } else { - if (report) - pr_warn("NFS: SECINFO: security flavor %u " - "is not supported\n", pf); - } + for (i = 0, supported = 0; i < nflavs; i++) { + status = nfsd4_encode_secinfo4(xdr, flavs[i].pseudoflavor, + &supported); + if (status != nfs_ok) + return status; } - if (nflavs != supported) - report = false; - *flavorsp = cpu_to_be32(supported); + wire_count = cpu_to_be32(supported); + write_bytes_to_xdr_buf(xdr->buf, count_offset, &wire_count, + XDR_UNIT); return 0; } @@ -4670,7 +4799,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo = &u->secinfo; struct xdr_stream *xdr = resp->xdr; - return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); + return nfsd4_encode_SECINFO4resok(xdr, secinfo->si_exp); } static __be32 @@ -4680,7 +4809,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name; struct xdr_stream *xdr = resp->xdr; - return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); + return nfsd4_encode_SECINFO4resok(xdr, secinfo->sin_exp); } static __be32 @@ -4804,6 +4933,25 @@ nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp) } static __be32 +nfsd4_encode_nfs_impl_id4(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid) +{ + __be32 status; + + /* nii_domain */ + status = nfsd4_encode_opaque(xdr, exid->nii_domain.data, + exid->nii_domain.len); + if (status != nfs_ok) + return status; + /* nii_name */ + status = nfsd4_encode_opaque(xdr, exid->nii_name.data, + exid->nii_name.len); + if (status != nfs_ok) + return status; + /* nii_time */ + return nfsd4_encode_nfstime4(xdr, &exid->nii_time); +} + +static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, union nfsd4_op_u *u) { @@ -4837,8 +4985,11 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* eir_server_impl_id<1> */ - if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) return nfserr_resource; + nfserr = nfsd4_encode_nfs_impl_id4(xdr, exid); + if (nfserr != nfs_ok) + return nfserr; return nfs_ok; } @@ -4933,7 +5084,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* sr_target_highest_slotid */ - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1); if (nfserr != nfs_ok) return nfserr; /* sr_status_flags */ @@ -4964,6 +5115,49 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, return nfs_ok; } +static __be32 +nfsd4_encode_get_dir_delegation(struct nfsd4_compoundres *resp, __be32 nfserr, + union nfsd4_op_u *u) +{ + struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation; + struct xdr_stream *xdr = resp->xdr; + __be32 status = nfserr_resource; + + switch(gdd->gddrnf_status) { + case GDD4_OK: + if (xdr_stream_encode_u32(xdr, GDD4_OK) != XDR_UNIT) + break; + status = nfsd4_encode_verifier4(xdr, &gdd->gddr_cookieverf); + if (status) + break; + status = nfsd4_encode_stateid4(xdr, &gdd->gddr_stateid); + if (status) + break; + status = nfsd4_encode_bitmap4(xdr, gdd->gddr_notification[0], 0, 0); + if (status) + break; + status = nfsd4_encode_bitmap4(xdr, gdd->gddr_child_attributes[0], + gdd->gddr_child_attributes[1], + gdd->gddr_child_attributes[2]); + if (status) + break; + status = nfsd4_encode_bitmap4(xdr, gdd->gddr_dir_attributes[0], + gdd->gddr_dir_attributes[1], + gdd->gddr_dir_attributes[2]); + break; + default: + pr_warn("nfsd: bad gddrnf_status (%u)\n", gdd->gddrnf_status); + gdd->gddrnf_will_signal_deleg_avail = 0; + fallthrough; + case GDD4_UNAVAIL: + if (xdr_stream_encode_u32(xdr, GDD4_UNAVAIL) != XDR_UNIT) + break; + status = nfsd4_encode_bool(xdr, gdd->gddrnf_will_signal_deleg_avail); + break; + } + return status; +} + #ifdef CONFIG_NFSD_PNFS static __be32 nfsd4_encode_device_addr4(struct xdr_stream *xdr, @@ -5200,7 +5394,12 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* osr_complete<1> */ - if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) + if (os->completed) { + if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT) + return nfserr_resource; + if (xdr_stream_encode_be32(xdr, os->status) != XDR_UNIT) + return nfserr_resource; + } else if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT) return nfserr_resource; return nfs_ok; } @@ -5213,17 +5412,20 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; bool splice_ok = argp->splice_ok; + unsigned int offset_offset; + __be32 nfserr, wire_count; unsigned long maxcount; - __be32 nfserr, *p; + __be64 wire_offset; - /* Content type, offset, byte count */ - p = xdr_reserve_space(xdr, 4 + 8 + 4); - if (!p) + if (xdr_stream_encode_u32(xdr, NFS4_CONTENT_DATA) != XDR_UNIT) return nfserr_io; - if (resp->xdr->buf->page_len && splice_ok) { - WARN_ON_ONCE(splice_ok); - return nfserr_serverfault; - } + + offset_offset = xdr->buf->len; + + /* Reserve space for the byte offset and count */ + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 3))) + return nfserr_io; + xdr_commit_encode(xdr); maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); @@ -5235,10 +5437,12 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, if (nfserr) return nfserr; - *p++ = cpu_to_be32(NFS4_CONTENT_DATA); - p = xdr_encode_hyper(p, read->rd_offset); - *p = cpu_to_be32(read->rd_length); - + wire_offset = cpu_to_be64(read->rd_offset); + write_bytes_to_xdr_buf(xdr->buf, offset_offset, &wire_offset, + XDR_UNIT * 2); + wire_count = cpu_to_be32(read->rd_length); + write_bytes_to_xdr_buf(xdr->buf, offset_offset + XDR_UNIT * 2, + &wire_count, XDR_UNIT); return nfs_ok; } @@ -5249,16 +5453,17 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read = &u->read; struct file *file = read->rd_nf->nf_file; struct xdr_stream *xdr = resp->xdr; - int starting_len = xdr->buf->len; + unsigned int eof_offset; + __be32 wire_data[2]; u32 segments = 0; - __be32 *p; if (nfserr) return nfserr; - /* eof flag, segment count */ - p = xdr_reserve_space(xdr, 4 + 4); - if (!p) + eof_offset = xdr->buf->len; + + /* Reserve space for the eof flag and segment count */ + if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) return nfserr_io; xdr_commit_encode(xdr); @@ -5268,15 +5473,16 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = nfsd4_encode_read_plus_data(resp, read); if (nfserr) { - xdr_truncate_encode(xdr, starting_len); + xdr_truncate_encode(xdr, eof_offset); return nfserr; } segments++; out: - p = xdr_encode_bool(p, read->rd_eof); - *p = cpu_to_be32(segments); + wire_data[0] = read->rd_eof ? xdr_one : xdr_zero; + wire_data[1] = cpu_to_be32(segments); + write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2); return nfserr; } @@ -5580,7 +5786,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { [OP_CREATE_SESSION] = nfsd4_encode_create_session, [OP_DESTROY_SESSION] = nfsd4_encode_noop, [OP_FREE_STATEID] = nfsd4_encode_noop, - [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop, + [OP_GET_DIR_DELEGATION] = nfsd4_encode_get_dir_delegation, #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo, [OP_GETDEVICELIST] = nfsd4_encode_noop, @@ -5653,6 +5859,23 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) return nfserr_rep_too_big; } +static __be32 nfsd4_map_status(__be32 status, u32 minor) +{ + switch (status) { + case nfs_ok: + break; + case nfserr_wrong_type: + /* RFC 8881 - 15.1.2.9 */ + if (minor == 0) + status = nfserr_inval; + break; + case nfserr_symlink_not_dir: + status = nfserr_symlink; + break; + } + return status; +} + void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { @@ -5660,15 +5883,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; const struct nfsd4_operation *opdesc = op->opdesc; - int post_err_offset; + unsigned int op_status_offset; nfsd4_enc encoder; - __be32 *p; - p = xdr_reserve_space(xdr, 8); - if (!p) + if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT) + goto release; + op_status_offset = xdr->buf->len; + if (!xdr_reserve_space(xdr, XDR_UNIT)) goto release; - *p++ = cpu_to_be32(op->opnum); - post_err_offset = xdr->buf->len; if (op->opnum == OP_ILLEGAL) goto status; @@ -5709,18 +5931,21 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) * bug if we had to do this on a non-idempotent op: */ warn_on_nonidempotent_op(op); - xdr_truncate_encode(xdr, post_err_offset); + xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT); } if (so) { - int len = xdr->buf->len - post_err_offset; + int len = xdr->buf->len - (op_status_offset + XDR_UNIT); so->so_replay.rp_status = op->status; so->so_replay.rp_buflen = len; - read_bytes_from_xdr_buf(xdr->buf, post_err_offset, + read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT, so->so_replay.rp_buf, len); } status: - *p = op->status; + op->status = nfsd4_map_status(op->status, + resp->cstate.minorversion); + write_bytes_to_xdr_buf(xdr->buf, op_status_offset, + &op->status, XDR_UNIT); release: if (opdesc && opdesc->op_release) opdesc->op_release(&op->u); diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c new file mode 100644 index 000000000000..a17b5d8e60b3 --- /dev/null +++ b/fs/nfsd/nfs4xdr_gen.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0 +// Generated by xdrgen. Manual edits will be lost. +// XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x +// XDR specification modification time: Mon Oct 14 09:10:13 2024 + +#include <linux/sunrpc/svc.h> + +#include "nfs4xdr_gen.h" + +static bool __maybe_unused +xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr) +{ + return xdrgen_decode_hyper(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr) +{ + return xdrgen_decode_unsigned_int(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr) +{ + if (xdr_stream_decode_u32(xdr, &ptr->count) < 0) + return false; + for (u32 i = 0; i < ptr->count; i++) + if (!xdrgen_decode_uint32_t(xdr, &ptr->element[i])) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr) +{ + if (!xdrgen_decode_int64_t(xdr, &ptr->seconds)) + return false; + if (!xdrgen_decode_uint32_t(xdr, &ptr->nseconds)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr) +{ + return xdrgen_decode_bool(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_open_arguments4(struct xdr_stream *xdr, struct open_arguments4 *ptr) +{ + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_deny)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access_want)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_open_claim)) + return false; + if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_create_mode)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_decode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_decode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +bool +xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr) +{ + return xdrgen_decode_open_arguments4(xdr, ptr); +}; + +bool +xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr) +{ + return xdrgen_decode_nfstime4(xdr, ptr); +}; + +bool +xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr) +{ + return xdrgen_decode_nfstime4(xdr, ptr); +}; + +static bool __maybe_unused +xdrgen_decode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 *ptr) +{ + u32 val; + + if (xdr_stream_decode_u32(xdr, &val) < 0) + return false; + *ptr = val; + return true; +} + +static bool __maybe_unused +xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value) +{ + return xdrgen_encode_hyper(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value) +{ + return xdrgen_encode_unsigned_int(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value) +{ + if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT) + return false; + for (u32 i = 0; i < value.count; i++) + if (!xdrgen_encode_uint32_t(xdr, value.element[i])) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value) +{ + if (!xdrgen_encode_int64_t(xdr, value->seconds)) + return false; + if (!xdrgen_encode_uint32_t(xdr, value->nseconds)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value) +{ + return xdrgen_encode_bool(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_open_arguments4(struct xdr_stream *xdr, const struct open_arguments4 *value) +{ + if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_share_deny)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access_want)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_open_claim)) + return false; + if (!xdrgen_encode_bitmap4(xdr, value->oa_create_mode)) + return false; + return true; +}; + +static bool __maybe_unused +xdrgen_encode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +static bool __maybe_unused +xdrgen_encode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} + +bool +xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value) +{ + return xdrgen_encode_open_arguments4(xdr, value); +}; + +bool +xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value) +{ + return xdrgen_encode_nfstime4(xdr, value); +}; + +bool +xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value) +{ + return xdrgen_encode_nfstime4(xdr, value); +}; + +static bool __maybe_unused +xdrgen_encode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 value) +{ + return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; +} diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h new file mode 100644 index 000000000000..41a0033b7256 --- /dev/null +++ b/fs/nfsd/nfs4xdr_gen.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Generated by xdrgen. Manual edits will be lost. */ +/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */ +/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */ + +#ifndef _LINUX_XDRGEN_NFS4_1_DECL_H +#define _LINUX_XDRGEN_NFS4_1_DECL_H + +#include <linux/types.h> + +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/xdrgen/_defs.h> +#include <linux/sunrpc/xdrgen/_builtins.h> +#include <linux/sunrpc/xdrgen/nfs4_1.h> + +bool xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr); +bool xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value); + +bool xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr); +bool xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value); + +bool xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr); +bool xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value); + +#endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ecd18bffeebc..6a42cc7a845a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -15,8 +15,10 @@ #include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/svc.h> #include <linux/module.h> #include <linux/fsnotify.h> +#include <linux/nfslocalio.h> #include "idmap.h" #include "nfsd.h" @@ -46,14 +48,11 @@ enum { NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, - NFSD_MaxConnections, NFSD_Filecache, -#ifdef CONFIG_NFSD_V4 NFSD_Leasetime, NFSD_Gracetime, NFSD_RecoveryDir, NFSD_V4EndGrace, -#endif NFSD_MaxReserved }; @@ -68,7 +67,6 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); -static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); @@ -87,7 +85,6 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = { [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, - [NFSD_MaxConnections] = write_maxconn, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, @@ -175,6 +172,13 @@ static int export_features_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(export_features); +static int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); + + return svc_pool_stats_open(&nn->nfsd_info, file); +} + static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, @@ -406,7 +410,9 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) if (newthreads < 0) return -EINVAL; trace_nfsd_ctl_threads(net, newthreads); - rv = nfsd_svc(newthreads, net, file->f_cred); + mutex_lock(&nfsd_mutex); + rv = nfsd_svc(1, &newthreads, net, file->f_cred, NULL); + mutex_unlock(&nfsd_mutex); if (rv < 0) return rv; } else @@ -480,6 +486,14 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) goto out_free; trace_nfsd_ctl_pool_threads(net, i, nthreads[i]); } + + /* + * There must always be a thread in pool 0; the admin + * can't shut down NFS completely using pool_threads. + */ + if (nthreads[0] == 0) + nthreads[0] = 1; + rv = nfsd_set_nrthreads(i, nthreads, net); if (rv) goto out_free; @@ -885,44 +899,6 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } -/* - * write_maxconn - Set or report the current max number of connections - * - * Input: - * buf: ignored - * size: zero - * OR - * - * Input: - * buf: C string containing an unsigned - * integer value representing the new - * number of max connections - * size: non-zero length of C string in @buf - * Output: - * On success: passed-in buffer filled with '\n'-terminated C string - * containing numeric value of max_connections setting - * for this net namespace; - * return code is the size in bytes of the string - * On error: return code is zero or a negative errno value - */ -static ssize_t write_maxconn(struct file *file, char *buf, size_t size) -{ - char *mesg = buf; - struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id); - unsigned int maxconn = nn->max_connections; - - if (size > 0) { - int rv = get_uint(&mesg, &maxconn); - - if (rv) - return rv; - trace_nfsd_ctl_maxconn(netns(file), maxconn); - nn->max_connections = maxconn; - } - - return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn); -} - #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time64_t *time, struct nfsd_net *nn) @@ -1355,12 +1331,13 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, - [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, #endif /* last one */ {""} @@ -1457,28 +1434,6 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; -/** - * nfsd_nl_rpc_status_get_start - Prepare rpc_status_get dumpit - * @cb: netlink metadata and command arguments - * - * Return values: - * %0: The rpc_status_get command may proceed - * %-ENODEV: There is no NFSD running in this namespace - */ -int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb) -{ - struct nfsd_net *nn = net_generic(sock_net(cb->skb->sk), nfsd_net_id); - int ret = -ENODEV; - - mutex_lock(&nfsd_mutex); - if (nn->nfsd_serv) - ret = 0; - else - mutex_unlock(&nfsd_mutex); - - return ret; -} - static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, struct nfsd_genl_rqstp *rqstp) @@ -1555,8 +1510,16 @@ static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - struct nfsd_net *nn = net_generic(sock_net(skb->sk), nfsd_net_id); int i, ret, rqstp_index = 0; + struct nfsd_net *nn; + + mutex_lock(&nfsd_mutex); + + nn = net_generic(sock_net(skb->sk), nfsd_net_id); + if (!nn->nfsd_serv) { + ret = -ENODEV; + goto out_unlock; + } rcu_read_lock(); @@ -1633,25 +1596,586 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, ret = skb->len; out: rcu_read_unlock(); +out_unlock: + mutex_unlock(&nfsd_mutex); return ret; } /** - * nfsd_nl_rpc_status_get_done - rpc_status_get dumpit post-processing - * @cb: netlink metadata and command arguments + * nfsd_nl_threads_set_doit - set the number of running threads + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + const struct nlattr *attr; + const char *scope = NULL; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS)) + return -EINVAL; + + /* count number of SERVER_THREADS values */ + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + if (nla_type(attr) == NFSD_A_SERVER_THREADS) + nrpools++; + } + + mutex_lock(&nfsd_mutex); + + nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); + if (!nthreads) { + ret = -ENOMEM; + goto out_unlock; + } + + i = 0; + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + if (nla_type(attr) == NFSD_A_SERVER_THREADS) { + nthreads[i++] = nla_get_u32(attr); + if (i >= nrpools) + break; + } + } + + if (info->attrs[NFSD_A_SERVER_GRACETIME] || + info->attrs[NFSD_A_SERVER_LEASETIME] || + info->attrs[NFSD_A_SERVER_SCOPE]) { + ret = -EBUSY; + if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads) + goto out_unlock; + + ret = -EINVAL; + attr = info->attrs[NFSD_A_SERVER_GRACETIME]; + if (attr) { + u32 gracetime = nla_get_u32(attr); + + if (gracetime < 10 || gracetime > 3600) + goto out_unlock; + + nn->nfsd4_grace = gracetime; + } + + attr = info->attrs[NFSD_A_SERVER_LEASETIME]; + if (attr) { + u32 leasetime = nla_get_u32(attr); + + if (leasetime < 10 || leasetime > 3600) + goto out_unlock; + + nn->nfsd4_lease = leasetime; + } + + attr = info->attrs[NFSD_A_SERVER_SCOPE]; + if (attr) + scope = nla_data(attr); + } + + ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope); + if (ret > 0) + ret = 0; +out_unlock: + mutex_unlock(&nfsd_mutex); + kfree(nthreads); + return ret; +} + +/** + * nfsd_nl_threads_get_doit - get the number of running threads + * @skb: reply buffer + * @info: netlink metadata and command arguments * - * Return values: - * %0: Success + * Return 0 on success or a negative errno. */ -int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb) +int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + void *hdr; + int err; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_iput(skb, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_free_msg; + } + + mutex_lock(&nfsd_mutex); + + err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME, + nn->nfsd4_grace) || + nla_put_u32(skb, NFSD_A_SERVER_LEASETIME, + nn->nfsd4_lease) || + nla_put_string(skb, NFSD_A_SERVER_SCOPE, + nn->nfsd_name); + if (err) + goto err_unlock; + + if (nn->nfsd_serv) { + int i; + + for (i = 0; i < nfsd_nrpools(net); ++i) { + struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i]; + + err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, + sp->sp_nrthreads); + if (err) + goto err_unlock; + } + } else { + err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0); + if (err) + goto err_unlock; + } + + mutex_unlock(&nfsd_mutex); + + genlmsg_end(skb, hdr); + + return genlmsg_reply(skb, info); + +err_unlock: + mutex_unlock(&nfsd_mutex); +err_free_msg: + nlmsg_free(skb); + + return err; +} + +/** + * nfsd_nl_version_set_doit - set the nfs enabled versions + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + const struct nlattr *attr; + struct nfsd_net *nn; + int i, rem; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION)) + return -EINVAL; + + mutex_lock(&nfsd_mutex); + + nn = net_generic(genl_info_net(info), nfsd_net_id); + if (nn->nfsd_serv) { + mutex_unlock(&nfsd_mutex); + return -EBUSY; + } + + /* clear current supported versions. */ + nfsd_vers(nn, 2, NFSD_CLEAR); + nfsd_vers(nn, 3, NFSD_CLEAR); + for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) + nfsd_minorversion(nn, i, NFSD_CLEAR); + + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + struct nlattr *tb[NFSD_A_VERSION_MAX + 1]; + u32 major, minor = 0; + bool enabled; + + if (nla_type(attr) != NFSD_A_SERVER_PROTO_VERSION) + continue; + + if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr, + nfsd_version_nl_policy, info->extack) < 0) + continue; + + if (!tb[NFSD_A_VERSION_MAJOR]) + continue; + + major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]); + if (tb[NFSD_A_VERSION_MINOR]) + minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]); + + enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]); + + switch (major) { + case 4: + nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR); + break; + case 3: + case 2: + if (!minor) + nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR); + break; + default: + break; + } + } + mutex_unlock(&nfsd_mutex); return 0; } /** + * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nfsd_net *nn; + int i, err; + void *hdr; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_iput(skb, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_free_msg; + } + + mutex_lock(&nfsd_mutex); + nn = net_generic(genl_info_net(info), nfsd_net_id); + + for (i = 2; i <= 4; i++) { + int j; + + for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) { + struct nlattr *attr; + + /* Don't record any versions the kernel doesn't have + * compiled in + */ + if (!nfsd_support_version(i)) + continue; + + /* NFSv{2,3} does not support minor numbers */ + if (i < 4 && j) + continue; + + attr = nla_nest_start(skb, + NFSD_A_SERVER_PROTO_VERSION); + if (!attr) { + err = -EINVAL; + goto err_nfsd_unlock; + } + + if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) || + nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) { + err = -EINVAL; + goto err_nfsd_unlock; + } + + /* Set the enabled flag if the version is enabled */ + if (nfsd_vers(nn, i, NFSD_TEST) && + (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) && + nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) { + err = -EINVAL; + goto err_nfsd_unlock; + } + + nla_nest_end(skb, attr); + } + } + + mutex_unlock(&nfsd_mutex); + genlmsg_end(skb, hdr); + + return genlmsg_reply(skb, info); + +err_nfsd_unlock: + mutex_unlock(&nfsd_mutex); +err_free_msg: + nlmsg_free(skb); + + return err; +} + +/** + * nfsd_nl_listener_set_doit - set the nfs running sockets + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct svc_xprt *xprt, *tmp; + const struct nlattr *attr; + struct svc_serv *serv; + LIST_HEAD(permsocks); + struct nfsd_net *nn; + bool delete = false; + int err, rem; + + mutex_lock(&nfsd_mutex); + + err = nfsd_create_serv(net); + if (err) { + mutex_unlock(&nfsd_mutex); + return err; + } + + nn = net_generic(net, nfsd_net_id); + serv = nn->nfsd_serv; + + spin_lock_bh(&serv->sv_lock); + + /* Move all of the old listener sockets to a temp list */ + list_splice_init(&serv->sv_permsocks, &permsocks); + + /* + * Walk the list of server_socks from userland and move any that match + * back to sv_permsocks + */ + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; + const char *xcl_name; + struct sockaddr *sa; + + if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) + continue; + + if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, + nfsd_sock_nl_policy, info->extack) < 0) + continue; + + if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) + continue; + + if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) + continue; + + xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); + sa = nla_data(tb[NFSD_A_SOCK_ADDR]); + + /* Put back any matching sockets */ + list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) { + /* This shouldn't be possible */ + if (WARN_ON_ONCE(xprt->xpt_net != net)) { + list_move(&xprt->xpt_list, &serv->sv_permsocks); + continue; + } + + /* If everything matches, put it back */ + if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) && + rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) { + list_move(&xprt->xpt_list, &serv->sv_permsocks); + break; + } + } + } + + /* + * If there are listener transports remaining on the permsocks list, + * it means we were asked to remove a listener. + */ + if (!list_empty(&permsocks)) { + list_splice_init(&permsocks, &serv->sv_permsocks); + delete = true; + } + spin_unlock_bh(&serv->sv_lock); + + /* Do not remove listeners while there are active threads. */ + if (serv->sv_nrthreads) { + err = -EBUSY; + goto out_unlock_mtx; + } + + /* + * Since we can't delete an arbitrary llist entry, destroy the + * remaining listeners and recreate the list. + */ + if (delete) + svc_xprt_destroy_all(serv, net); + + /* walk list of addrs again, open any that still don't exist */ + nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { + struct nlattr *tb[NFSD_A_SOCK_MAX + 1]; + const char *xcl_name; + struct sockaddr *sa; + int ret; + + if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR) + continue; + + if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr, + nfsd_sock_nl_policy, info->extack) < 0) + continue; + + if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME]) + continue; + + if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa)) + continue; + + xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]); + sa = nla_data(tb[NFSD_A_SOCK_ADDR]); + + xprt = svc_find_listener(serv, xcl_name, net, sa); + if (xprt) { + if (delete) + WARN_ONCE(1, "Transport type=%s already exists\n", + xcl_name); + svc_xprt_put(xprt); + continue; + } + + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, + get_current_cred()); + /* always save the latest error */ + if (ret < 0) + err = ret; + } + + if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks)) + nfsd_destroy_serv(net); + +out_unlock_mtx: + mutex_unlock(&nfsd_mutex); + + return err; +} + +/** + * nfsd_nl_listener_get_doit - get the nfs running listeners + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct svc_xprt *xprt; + struct svc_serv *serv; + struct nfsd_net *nn; + void *hdr; + int err; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_iput(skb, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_free_msg; + } + + mutex_lock(&nfsd_mutex); + nn = net_generic(genl_info_net(info), nfsd_net_id); + + /* no nfs server? Just send empty socket list */ + if (!nn->nfsd_serv) + goto out_unlock_mtx; + + serv = nn->nfsd_serv; + spin_lock_bh(&serv->sv_lock); + list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { + struct nlattr *attr; + + attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR); + if (!attr) { + err = -EINVAL; + goto err_serv_unlock; + } + + if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME, + xprt->xpt_class->xcl_name) || + nla_put(skb, NFSD_A_SOCK_ADDR, + sizeof(struct sockaddr_storage), + &xprt->xpt_local)) { + err = -EINVAL; + goto err_serv_unlock; + } + + nla_nest_end(skb, attr); + } + spin_unlock_bh(&serv->sv_lock); +out_unlock_mtx: + mutex_unlock(&nfsd_mutex); + genlmsg_end(skb, hdr); + + return genlmsg_reply(skb, info); + +err_serv_unlock: + spin_unlock_bh(&serv->sv_lock); + mutex_unlock(&nfsd_mutex); +err_free_msg: + nlmsg_free(skb); + + return err; +} + +/** + * nfsd_nl_pool_mode_set_doit - set the number of running threads + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info) +{ + const struct nlattr *attr; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_POOL_MODE_MODE)) + return -EINVAL; + + attr = info->attrs[NFSD_A_POOL_MODE_MODE]; + return sunrpc_set_pool_mode(nla_data(attr)); +} + +/** + * nfsd_nl_pool_mode_get_doit - get info about pool_mode + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + char buf[16]; + void *hdr; + int err; + + if (sunrpc_get_pool_mode(buf, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf)) + return -ERANGE; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + err = -EMSGSIZE; + hdr = genlmsg_iput(skb, info); + if (!hdr) + goto err_free_msg; + + err = nla_put_string(skb, NFSD_A_POOL_MODE_MODE, buf) | + nla_put_u32(skb, NFSD_A_POOL_MODE_NPOOLS, nfsd_nrpools(net)); + if (err) + goto err_free_msg; + + genlmsg_end(skb, hdr); + return genlmsg_reply(skb, info); + +err_free_msg: + nlmsg_free(skb); + return err; +} + +/** * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace * @net: a freshly-created network namespace * @@ -1663,8 +2187,9 @@ int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb) */ static __net_init int nfsd_net_init(struct net *net) { - int retval; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + int retval; + int i; retval = nfsd_export_init(net); if (retval) @@ -1672,20 +2197,35 @@ static __net_init int nfsd_net_init(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; - retval = nfsd_stat_counters_init(nn); + retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL, + NFSD_STATS_COUNTERS_NUM); if (retval) goto out_repcache_error; + memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats)); - nn->nfsd_svcstats.program = &nfsd_program; - nn->nfsd_versions = NULL; - nn->nfsd4_minorversions = NULL; + nn->nfsd_svcstats.program = &nfsd_programs[0]; + if (!nfsd_proc_stat_init(net)) { + retval = -ENOMEM; + goto out_proc_error; + } + + for (i = 0; i < sizeof(nn->nfsd_versions); i++) + nn->nfsd_versions[i] = nfsd_support_version(i); + for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++) + nn->nfsd4_minorversions[i] = nfsd_support_version(4); + nn->nfsd_info.mutex = &nfsd_mutex; + nn->nfsd_serv = NULL; nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); - nfsd_proc_stat_init(net); - +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + spin_lock_init(&nn->local_clients_lock); + INIT_LIST_HEAD(&nn->local_clients); +#endif return 0; +out_proc_error: + percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); out_repcache_error: nfsd_idmap_shutdown(net); out_idmap_error: @@ -1694,6 +2234,23 @@ out_export_error: return retval; } +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +/** + * nfsd_net_pre_exit - Disconnect localio clients from net namespace + * @net: a network namespace that is about to be destroyed + * + * This invalidates ->net pointers held by localio clients + * while they can still safely access nn->counter. + */ +static __net_exit void nfsd_net_pre_exit(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + nfs_localio_invalidate_clients(&nn->local_clients, + &nn->local_clients_lock); +} +#endif + /** * nfsd_net_exit - Release the nfsd_net portion of a net namespace * @net: a network namespace that is about to be destroyed @@ -1704,14 +2261,16 @@ static __net_exit void nfsd_net_exit(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfsd_proc_stat_shutdown(net); - nfsd_stat_counters_destroy(nn); + percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); - nfsd_netns_free_versions(nn); } static struct pernet_operations nfsd_net_ops = { .init = nfsd_net_init, +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + .pre_exit = nfsd_net_pre_exit, +#endif .exit = nfsd_net_exit, .id = &nfsd_net_id, .size = sizeof(struct nfsd_net), @@ -1721,6 +2280,8 @@ static int __init init_nfsd(void) { int retval; + nfsd_debugfs_init(); + retval = nfsd4_init_slabs(); if (retval) return retval; @@ -1731,12 +2292,9 @@ static int __init init_nfsd(void) if (retval) goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ - retval = create_proc_exports_entry(); - if (retval) - goto out_free_lockd; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) - goto out_free_exports; + goto out_free_lockd; retval = register_cld_notifier(); if (retval) goto out_free_subsys; @@ -1745,21 +2303,26 @@ static int __init init_nfsd(void) goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) - goto out_free_all; + goto out_free_nfsd4; retval = genl_register_family(&nfsd_nl_family); if (retval) + goto out_free_filesystem; + retval = create_proc_exports_entry(); + if (retval) goto out_free_all; + nfsd_localio_ops_init(); return 0; out_free_all: + genl_unregister_family(&nfsd_nl_family); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); +out_free_nfsd4: nfsd4_destroy_laundry_wq(); out_free_cld: unregister_cld_notifier(); out_free_subsys: unregister_pernet_subsys(&nfsd_net_ops); -out_free_exports: - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); @@ -1767,22 +2330,24 @@ out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); + nfsd_debugfs_exit(); return retval; } static void __exit exit_nfsd(void) { + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); genl_unregister_family(&nfsd_nl_family); unregister_filesystem(&nfsd_fs_type); nfsd4_destroy_laundry_wq(); unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); - remove_proc_entry("fs/nfs/exports", NULL); - remove_proc_entry("fs/nfs", NULL); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); + nfsd_debugfs_exit(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 16c5a05f340e..1bfd0b4e9af7 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -23,9 +23,7 @@ #include <uapi/linux/nfsd/debug.h> -#include "netns.h" #include "export.h" -#include "stats.h" #undef ifdebug #ifdef CONFIG_SUNRPC_DEBUG @@ -37,26 +35,23 @@ /* * nfsd version */ +#define NFSD_MINVERS 2 +#define NFSD_MAXVERS 4 #define NFSD_SUPPORTED_MINOR_VERSION 2 -/* - * Maximum blocksizes supported by daemon under various circumstances. - */ -#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD -/* NFSv2 is limited by the protocol specification, see RFC 1094 */ -#define NFSSVC_MAXBLKSIZE_V2 (8*1024) +bool nfsd_support_version(int vers); +#include "netns.h" +#include "stats.h" /* - * Largest number of bytes we need to allocate for an NFS - * call or reply. Used to control buffer sizes. We use - * the length of v3 WRITE, READDIR and READDIR replies - * which are an RPC header, up to 26 XDR units of reply - * data, and some page data. - * - * Note that accuracy here doesn't matter too much as the - * size is rounded up to a page size when allocating space. + * Default and maximum payload size (NFS READ or WRITE), in bytes. + * The default is historical, and the maximum is an implementation + * limit. */ -#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE) +enum { + NFSSVC_DEFBLKSIZE = 1 * 1024 * 1024, + NFSSVC_MAXBLKSIZE = RPCSVC_MAXPAYLOAD, +}; struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ @@ -80,12 +75,9 @@ struct nfsd_genl_rqstp { u32 rq_opnum[NFSD_MAX_OPS_PER_COMPOUND]; }; -extern struct svc_program nfsd_program; +extern struct svc_program nfsd_programs[]; extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; -extern spinlock_t nfsd_drc_lock; -extern unsigned long nfsd_drc_max_mem; -extern unsigned long nfsd_drc_mem_used; extern atomic_t nfsd_th_cnt; /* number of available threads */ extern const struct seq_operations nfs_exports_op; @@ -103,18 +95,17 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, /* * Function prototypes. */ -int nfsd_svc(int nrservs, struct net *net, const struct cred *cred); +int nfsd_svc(int n, int *nservers, struct net *net, + const struct cred *cred, const char *scope); int nfsd_dispatch(struct svc_rqst *rqstp); int nfsd_nrthreads(struct net *); int nfsd_nrpools(struct net *); int nfsd_get_nrthreads(int n, int *, struct net *); int nfsd_set_nrthreads(int n, int *, struct net *); -int nfsd_pool_stats_open(struct inode *, struct file *); -int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_shutdown_threads(struct net *net); -bool i_am_nfsd(void); +struct svc_rqst *nfsd_current_rqst(void); struct nfsdfs_client { struct kref cl_ref; @@ -142,6 +133,10 @@ extern const struct svc_version nfsd_acl_version3; #endif #endif +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +extern const struct svc_version localio_version1; +#endif + struct nfsd_net; enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; @@ -151,11 +146,21 @@ void nfsd_reset_versions(struct nfsd_net *nn); int nfsd_create_serv(struct net *net); void nfsd_destroy_serv(struct net *net); +#ifdef CONFIG_DEBUG_FS +void nfsd_debugfs_init(void); +void nfsd_debugfs_exit(void); +#else +static inline void nfsd_debugfs_init(void) {} +static inline void nfsd_debugfs_exit(void) {} +#endif + +extern bool nfsd_disable_splice_read __read_mostly; + extern int nfsd_max_blksize; static inline int nfsd_v4client(struct svc_rqst *rq) { - return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; + return rq && rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4; } static inline struct user_namespace * nfsd_user_namespace(const struct svc_rqst *rqstp) @@ -230,7 +235,6 @@ void nfsd_lockd_shutdown(void); #define nfserr_nospc cpu_to_be32(NFSERR_NOSPC) #define nfserr_rofs cpu_to_be32(NFSERR_ROFS) #define nfserr_mlink cpu_to_be32(NFSERR_MLINK) -#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP) #define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG) #define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY) #define nfserr_dquot cpu_to_be32(NFSERR_DQUOT) @@ -327,17 +331,36 @@ void nfsd_lockd_shutdown(void); #define nfserr_xattr2big cpu_to_be32(NFS4ERR_XATTR2BIG) #define nfserr_noxattr cpu_to_be32(NFS4ERR_NOXATTR) -/* error codes for internal use */ +/* + * Error codes for internal use. We use enum to choose numbers that are + * not already assigned, then covert to be32 resulting in a number that + * cannot conflict with any existing be32 nfserr value. + */ +enum { + NFSERR_DROPIT = NFS4ERR_FIRST_FREE, /* if a request fails due to kmalloc failure, it gets dropped. * Client should resend eventually */ -#define nfserr_dropit cpu_to_be32(30000) +#define nfserr_dropit cpu_to_be32(NFSERR_DROPIT) + /* end-of-file indicator in readdir */ -#define nfserr_eof cpu_to_be32(30001) + NFSERR_EOF, +#define nfserr_eof cpu_to_be32(NFSERR_EOF) + /* replay detected */ -#define nfserr_replay_me cpu_to_be32(11001) + NFSERR_REPLAY_ME, +#define nfserr_replay_me cpu_to_be32(NFSERR_REPLAY_ME) + /* nfs41 replay detected */ -#define nfserr_replay_cache cpu_to_be32(11002) + NFSERR_REPLAY_CACHE, +#define nfserr_replay_cache cpu_to_be32(NFSERR_REPLAY_CACHE) + +/* symlink found where dir expected - handled differently to + * other symlink found errors by NFSv3. + */ + NFSERR_SYMLINK_NOT_DIR, +#define nfserr_symlink_not_dir cpu_to_be32(NFSERR_SYMLINK_NOT_DIR) +}; /* Check for dir entries '.' and '..' */ #define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.')) @@ -432,7 +455,10 @@ void nfsd_lockd_shutdown(void); (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ FATTR4_WORD2_MODE_UMASK | \ NFSD4_2_SECURITY_ATTRS | \ - FATTR4_WORD2_XATTR_SUPPORT) + FATTR4_WORD2_XATTR_SUPPORT | \ + FATTR4_WORD2_TIME_DELEG_ACCESS | \ + FATTR4_WORD2_TIME_DELEG_MODIFY | \ + FATTR4_WORD2_OPEN_ARGUMENTS) extern const u32 nfsd_suppattrs[3][3]; @@ -502,7 +528,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) #endif #define NFSD_WRITEABLE_ATTRS_WORD2 \ (FATTR4_WORD2_MODE_UMASK \ - | MAYBE_FATTR4_WORD2_SECURITY_LABEL) + | MAYBE_FATTR4_WORD2_SECURITY_LABEL \ + | FATTR4_WORD2_TIME_DELEG_ACCESS \ + | FATTR4_WORD2_TIME_DELEG_MODIFY \ + ) #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 40fecf7b224f..aef474f1b84b 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -62,8 +62,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) * the write call). */ static inline __be32 -nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, - umode_t requested) +nfsd_mode_check(struct dentry *dentry, umode_t requested) { umode_t mode = d_inode(dentry)->i_mode & S_IFMT; @@ -76,36 +75,36 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry, } return nfs_ok; } - /* - * v4 has an error more specific than err_notdir which we should - * return in preference to err_notdir: - */ - if (rqstp->rq_vers == 4 && mode == S_IFLNK) + if (mode == S_IFLNK) { + if (requested == S_IFDIR) + return nfserr_symlink_not_dir; return nfserr_symlink; + } if (requested == S_IFDIR) return nfserr_notdir; if (mode == S_IFDIR) return nfserr_isdir; - return nfserr_inval; + return nfserr_wrong_type; } -static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags) +static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, + struct svc_cred *cred, + struct svc_export *exp) { - if (flags & NFSEXP_INSECURE_PORT) + if (nfsexp_flags(cred, exp) & NFSEXP_INSECURE_PORT) return true; /* We don't require gss requests to use low ports: */ - if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS) + if (cred->cr_flavor >= RPC_AUTH_GSS) return true; return test_bit(RQ_SECURE, &rqstp->rq_flags); } static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, + struct svc_cred *cred, struct svc_export *exp) { - int flags = nfsexp_flags(rqstp, exp); - /* Check if the request originated from a secure port. */ - if (!nfsd_originating_port_ok(rqstp, flags)) { + if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) { RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("nfsd: request from insecure port %s!\n", svc_print_addr(rqstp, buf, sizeof(buf))); @@ -113,23 +112,15 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp, } /* Set user creds for this exportpoint */ - return nfserrno(nfsd_setuser(rqstp, exp)); + return nfserrno(nfsd_setuser(cred, exp)); } -static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, - struct dentry *dentry, struct svc_export *exp) +static inline __be32 check_pseudo_root(struct dentry *dentry, + struct svc_export *exp) { if (!(exp->ex_flags & NFSEXP_V4ROOT)) return nfs_ok; /* - * v2/v3 clients have no need for the V4ROOT export--they use - * the mount protocl instead; also, further V4ROOT checks may be - * in v4-specific code, in which case v2/v3 clients could bypass - * them. - */ - if (!nfsd_v4client(rqstp)) - return nfserr_stale; - /* * We're exposing only the directories and symlinks that have to be * traversed on the way to real exports: */ @@ -151,7 +142,11 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, * dentry. On success, the results are used to set fh_export and * fh_dentry. */ -static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) +static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, + struct svc_cred *cred, + struct auth_domain *client, + struct auth_domain *gssclient, + struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; struct fid *fid = NULL; @@ -162,10 +157,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) int len; __be32 error; - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; - if (rqstp->rq_vers == 4 && fh->fh_size == 0) + error = nfserr_badhandle; + if (fh->fh_size == 0) return nfserr_nofilehandle; if (fh->fh_version != 1) @@ -195,7 +188,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) data_left -= len; if (data_left < 0) return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + exp = rqst_exp_find(rqstp ? &rqstp->rq_chandle : NULL, + net, client, gssclient, + fh->fh_fsid_type, fh->fh_fsid); fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; @@ -227,9 +222,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); put_cred(override_creds(new)); - put_cred(new); } else { - error = nfsd_setuser_and_check_port(rqstp, exp); + error = nfsd_setuser_and_check_port(rqstp, cred, exp); if (error) goto out; } @@ -237,9 +231,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) /* * Look up the dentry using the NFS file handle. */ - error = nfserr_stale; - if (rqstp->rq_vers > 2) - error = nfserr_badhandle; + error = nfserr_badhandle; fileid_type = fh->fh_fileid_type; @@ -247,7 +239,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) dentry = dget(exp->ex_path.dentry); else { dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, - data_left, fileid_type, + data_left, fileid_type, 0, nfsd_acceptable, exp); if (IS_ERR_OR_NULL(dentry)) { trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, @@ -278,17 +270,25 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; - switch (rqstp->rq_vers) { - case 4: + switch (fhp->fh_maxsize) { + case NFS4_FHSIZE: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) fhp->fh_no_atomic_attr = true; + fhp->fh_64bit_cookies = true; break; - case 3: + case NFS3_FHSIZE: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) fhp->fh_no_wcc = true; + fhp->fh_64bit_cookies = true; + if (exp->ex_flags & NFSEXP_V4ROOT) + goto out; break; - case 2: + case NFS_FHSIZE: fhp->fh_no_wcc = true; + if (EX_WGATHER(exp)) + fhp->fh_use_wgather = true; + if (exp->ex_flags & NFSEXP_V4ROOT) + goto out; } return 0; @@ -298,42 +298,34 @@ out: } /** - * fh_verify - filehandle lookup and access checking - * @rqstp: pointer to current rpc request + * __fh_verify - filehandle lookup and access checking + * @rqstp: RPC transaction context, or NULL + * @net: net namespace in which to perform the export lookup + * @cred: RPC user credential + * @client: RPC auth domain + * @gssclient: RPC GSS auth domain, or NULL * @fhp: filehandle to be verified * @type: expected type of object pointed to by filehandle * @access: type of access needed to object * - * Look up a dentry from the on-the-wire filehandle, check the client's - * access to the export, and set the current task's credentials. - * - * Regardless of success or failure of fh_verify(), fh_put() should be - * called on @fhp when the caller is finished with the filehandle. - * - * fh_verify() may be called multiple times on a given filehandle, for - * example, when processing an NFSv4 compound. The first call will look - * up a dentry using the on-the-wire filehandle. Subsequent calls will - * skip the lookup and just perform the other checks and possibly change - * the current task's credentials. - * - * @type specifies the type of object expected using one of the S_IF* - * constants defined in include/linux/stat.h. The caller may use zero - * to indicate that it doesn't care, or a negative integer to indicate - * that it expects something not of the given type. - * - * @access is formed from the NFSD_MAY_* constants defined in - * fs/nfsd/vfs.h. + * See fh_verify() for further descriptions of @fhp, @type, and @access. */ -__be32 -fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) +static __be32 +__fh_verify(struct svc_rqst *rqstp, + struct net *net, struct svc_cred *cred, + struct auth_domain *client, + struct auth_domain *gssclient, + struct svc_fh *fhp, umode_t type, int access) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_export *exp = NULL; + bool may_bypass_gss = false; struct dentry *dentry; __be32 error; if (!fhp->fh_dentry) { - error = nfsd_set_fh_dentry(rqstp, fhp); + error = nfsd_set_fh_dentry(rqstp, net, cred, client, + gssclient, fhp); if (error) goto out; } @@ -358,25 +350,24 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) * (for example, if different id-squashing options are in * effect on the new filesystem). */ - error = check_pseudo_root(rqstp, dentry, exp); + error = check_pseudo_root(dentry, exp); if (error) goto out; - error = nfsd_setuser_and_check_port(rqstp, exp); + error = nfsd_setuser_and_check_port(rqstp, cred, exp); if (error) goto out; - error = nfsd_mode_check(rqstp, dentry, type); + error = nfsd_mode_check(dentry, type); if (error) goto out; - /* - * pseudoflavor restrictions are not enforced on NLM, - * which clients virtually always use auth_sys for, - * even while using RPCSEC_GSS for NFS. - */ - if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) - goto skip_pseudoflavor_check; + if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM)) + /* NLM is allowed to fully bypass authentication */ + goto out; + + if (access & NFSD_MAY_BYPASS_GSS) + may_bypass_gss = true; /* * Clients may expect to be able to use auth_sys during mount, * even if they use gss for everything else; see section 2.3.2 @@ -384,15 +375,17 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) */ if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT && exp->ex_path.dentry == dentry) - goto skip_pseudoflavor_check; + may_bypass_gss = true; - error = check_nfsd_access(exp, rqstp); + error = check_nfsd_access(exp, rqstp, may_bypass_gss); if (error) goto out; + /* During LOCALIO call to fh_verify will be called with a NULL rqstp */ + if (rqstp) + svc_xprt_set_valid(rqstp->rq_xprt); -skip_pseudoflavor_check: /* Finally, check access permissions. */ - error = nfsd_permission(rqstp, exp, dentry, access); + error = nfsd_permission(cred, exp, dentry, access); out: trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); if (error == nfserr_stale) @@ -400,6 +393,63 @@ out: return error; } +/** + * fh_verify_local - filehandle lookup and access checking + * @net: net namespace in which to perform the export lookup + * @cred: RPC user credential + * @client: RPC auth domain + * @fhp: filehandle to be verified + * @type: expected type of object pointed to by filehandle + * @access: type of access needed to object + * + * This API can be used by callers who do not have an RPC + * transaction context (ie are not running in an nfsd thread). + * + * See fh_verify() for further descriptions of @fhp, @type, and @access. + */ +__be32 +fh_verify_local(struct net *net, struct svc_cred *cred, + struct auth_domain *client, struct svc_fh *fhp, + umode_t type, int access) +{ + return __fh_verify(NULL, net, cred, client, NULL, + fhp, type, access); +} + +/** + * fh_verify - filehandle lookup and access checking + * @rqstp: pointer to current rpc request + * @fhp: filehandle to be verified + * @type: expected type of object pointed to by filehandle + * @access: type of access needed to object + * + * Look up a dentry from the on-the-wire filehandle, check the client's + * access to the export, and set the current task's credentials. + * + * Regardless of success or failure of fh_verify(), fh_put() should be + * called on @fhp when the caller is finished with the filehandle. + * + * fh_verify() may be called multiple times on a given filehandle, for + * example, when processing an NFSv4 compound. The first call will look + * up a dentry using the on-the-wire filehandle. Subsequent calls will + * skip the lookup and just perform the other checks and possibly change + * the current task's credentials. + * + * @type specifies the type of object expected using one of the S_IF* + * constants defined in include/linux/stat.h. The caller may use zero + * to indicate that it doesn't care, or a negative integer to indicate + * that it expects something not of the given type. + * + * @access is formed from the NFSD_MAY_* constants defined in + * fs/nfsd/vfs.h. + */ +__be32 +fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) +{ + return __fh_verify(rqstp, SVC_NET(rqstp), &rqstp->rq_cred, + rqstp->rq_client, rqstp->rq_gssclient, + fhp, type, access); +} /* * Compose a file handle for an NFS reply. @@ -573,7 +623,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); - return nfserr_opnotsupp; + return nfserr_stale; } return 0; @@ -599,7 +649,7 @@ fh_update(struct svc_fh *fhp) _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; + return nfserr_stale; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); @@ -618,20 +668,18 @@ out_negative: __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp) { bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode; struct kstat stat; __be32 err; if (fhp->fh_no_wcc || fhp->fh_pre_saved) return nfs_ok; - inode = d_inode(fhp->fh_dentry); err = fh_getattr(fhp, &stat); if (err) return err; if (v4) - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + fhp->fh_pre_change = nfsd4_change_attribute(&stat); fhp->fh_pre_mtime = stat.mtime; fhp->fh_pre_ctime = stat.ctime; @@ -648,7 +696,6 @@ __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp) __be32 fh_fill_post_attrs(struct svc_fh *fhp) { bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); - struct inode *inode = d_inode(fhp->fh_dentry); __be32 err; if (fhp->fh_no_wcc) @@ -664,7 +711,7 @@ __be32 fh_fill_post_attrs(struct svc_fh *fhp) fhp->fh_post_saved = true; if (v4) fhp->fh_post_change = - nfsd4_change_attribute(&fhp->fh_post_attr, inode); + nfsd4_change_attribute(&fhp->fh_post_attr); return nfs_ok; } @@ -721,7 +768,7 @@ char * SVCFH_fmt(struct svc_fh *fhp) struct knfsd_fh *fh = &fhp->fh_handle; static char buf[2+1+1+64*3+1]; - if (fh->fh_size < 0 || fh->fh_size> 64) + if (fh->fh_size > 64) return "bad-fh"; sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); return buf; @@ -755,7 +802,14 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) return FSIDSOURCE_DEV; } -/* +/** + * nfsd4_change_attribute - Generate an NFSv4 change_attribute value + * @stat: inode attributes + * + * Caller must fill in @stat before calling, typically by invoking + * vfs_getattr() with STATX_MODE, STATX_CTIME, and STATX_CHANGE_COOKIE. + * Returns an unsigned 64-bit changeid4 value (RFC 8881 Section 3.2). + * * We could use i_version alone as the change attribute. However, i_version * can go backwards on a regular file after an unclean shutdown. On its own * that doesn't necessarily cause a problem, but if i_version goes backwards @@ -772,13 +826,13 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) * assume that the new change attr is always logged to stable storage in some * fashion before the results can be seen. */ -u64 nfsd4_change_attribute(const struct kstat *stat, const struct inode *inode) +u64 nfsd4_change_attribute(const struct kstat *stat) { u64 chattr; if (stat->result_mask & STATX_CHANGE_COOKIE) { chattr = stat->change_cookie; - if (S_ISREG(inode->i_mode) && + if (S_ISREG(stat->mode) && !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { chattr += (u64)stat->ctime.tv_sec << 30; chattr += stat->ctime.tv_nsec; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 6ebdf7ea27bf..5103c2f4d225 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -88,6 +88,8 @@ typedef struct svc_fh { * wcc data is not atomic with * operation */ + bool fh_use_wgather; /* NFSv2 wgather option */ + bool fh_64bit_cookies;/* readdir cookie size */ int fh_flags; /* FH flags */ bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ @@ -215,6 +217,8 @@ extern char * SVCFH_fmt(struct svc_fh *fhp); * Function prototypes */ __be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int); +__be32 fh_verify_local(struct net *, struct svc_cred *, struct auth_domain *, + struct svc_fh *, umode_t, int); __be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *); __be32 fh_update(struct svc_fh *); void fh_put(struct svc_fh *); @@ -263,7 +267,6 @@ static inline bool fh_fsid_match(const struct knfsd_fh *fh1, return true; } -#ifdef CONFIG_CRC32 /** * knfsd_fh_hash - calculate the crc32 hash for the filehandle * @fh - pointer to filehandle @@ -275,12 +278,6 @@ static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } -#else -static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) -{ - return 0; -} -#endif /** * fh_clear_pre_post_attrs - Reset pre/post attributes @@ -293,8 +290,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) fhp->fh_pre_saved = false; } -u64 nfsd4_change_attribute(const struct kstat *stat, - const struct inode *inode); +u64 nfsd4_change_attribute(const struct kstat *stat); __be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp); __be32 fh_fill_post_attrs(struct svc_fh *fhp); __be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 36370b957b63..c10fa8128a8a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -10,9 +10,35 @@ #include "cache.h" #include "xdr.h" #include "vfs.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC +static __be32 nfsd_map_status(__be32 status) +{ + switch (status) { + case nfs_ok: + break; + case nfserr_nofilehandle: + case nfserr_badhandle: + status = nfserr_stale; + break; + case nfserr_wrongsec: + case nfserr_xdev: + case nfserr_file_open: + status = nfserr_acces; + break; + case nfserr_symlink_not_dir: + status = nfserr_notdir; + break; + case nfserr_symlink: + case nfserr_wrong_type: + status = nfserr_inval; + break; + } + return status; +} + static __be32 nfsd_proc_null(struct svc_rqst *rqstp) { @@ -29,7 +55,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + trace_nfsd_vfs_getattr(rqstp, &argp->fh); fh_copy(&resp->fh, &argp->fh); resp->status = fh_verify(rqstp, &resp->fh, 0, @@ -38,6 +64,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) goto out; resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -109,6 +136,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -143,6 +171,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -164,6 +193,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp) page_address(resp->page), &resp->len); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -182,7 +212,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) SVCFH_fmt(&argp->fh), argp->count, argp->offset); - argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); + argp->count = min_t(u32, argp->count, NFS_MAXDATA); argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); resp->pages = rqstp->rq_next_page; @@ -200,6 +230,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) set_bit(RQ_DROPME, &rqstp->rq_flags); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -220,21 +251,19 @@ nfsd_proc_write(struct svc_rqst *rqstp) struct nfsd_writeargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; unsigned long cnt = argp->len; - unsigned int nvecs; dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - - resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), - argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC, NULL); + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, + &argp->payload, &cnt, NFS_DATA_SYNC, NULL); if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) set_bit(RQ_DROPME, &rqstp->rq_flags); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -261,9 +290,6 @@ nfsd_proc_create(struct svc_rqst *rqstp) int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); - dprintk("nfsd: CREATE %s %.*s\n", - SVCFH_fmt(dirfhp), argp->len, argp->name); - /* First verify the parent file handle */ resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); if (resp->status != nfs_ok) @@ -281,7 +307,8 @@ nfsd_proc_create(struct svc_rqst *rqstp) } inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); - dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); + dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(argp->name, argp->len), + dirfhp->fh_dentry); if (IS_ERR(dchild)) { resp->status = nfserrno(PTR_ERR(dchild)); goto out_unlock; @@ -300,7 +327,7 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ resp->status = nfserr_acces; if (!newfhp->fh_dentry) { - printk(KERN_WARNING + printk(KERN_WARNING "nfsd_proc_create: file handle not verified\n"); goto out_unlock; } @@ -331,10 +358,11 @@ nfsd_proc_create(struct svc_rqst *rqstp) * echo thing > device-special-file-or-pipe * by doing a CREATE with type==0 */ - resp->status = nfsd_permission(rqstp, - newfhp->fh_export, - newfhp->fh_dentry, - NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); + resp->status = nfsd_permission( + &rqstp->rq_cred, + newfhp->fh_export, + newfhp->fh_dentry, + NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); if (resp->status && resp->status != nfserr_rofs) goto out_unlock; } @@ -403,6 +431,7 @@ done: goto out; resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -412,13 +441,11 @@ nfsd_proc_remove(struct svc_rqst *rqstp) struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh), - argp->len, argp->name); - /* Unlink. -SIFDIR means file must not be a directory */ resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -428,15 +455,11 @@ nfsd_proc_rename(struct svc_rqst *rqstp) struct nfsd_renameargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: RENAME %s %.*s -> \n", - SVCFH_fmt(&argp->ffh), argp->flen, argp->fname); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname); - resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, &argp->tfh, argp->tname, argp->tlen); fh_put(&argp->ffh); fh_put(&argp->tfh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -446,17 +469,11 @@ nfsd_proc_link(struct svc_rqst *rqstp) struct nfsd_linkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: LINK %s ->\n", - SVCFH_fmt(&argp->ffh)); - dprintk("nfsd: %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, &argp->ffh); fh_put(&argp->ffh); fh_put(&argp->tfh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -483,10 +500,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) goto out; } - dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", - SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, - argp->tlen, argp->tname); - fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &attrs, &newfh); @@ -495,6 +508,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) fh_put(&argp->ffh); fh_put(&newfh); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -511,8 +525,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) .na_iattr = &argp->attrs, }; - dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - if (resp->fh.fh_dentry) { printk(KERN_WARNING "nfsd_proc_mkdir: response already verified??\n"); @@ -528,6 +540,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) resp->status = fh_getattr(&resp->fh, &resp->stat); out: + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -540,11 +553,10 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -576,9 +588,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) struct nfsd_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, argp->cookie); + trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd_init_dirlist_pages(rqstp, resp, argp->count); @@ -590,6 +600,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -602,11 +613,10 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_statfsres *resp = rqstp->rq_resp; - dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); + resp->status = nfsd_map_status(resp->status); return rpc_success; } @@ -698,7 +708,7 @@ static const struct svc_procedure nfsd_procedures2[18] = { .pc_argzero = sizeof(struct nfsd_readargs), .pc_ressize = sizeof(struct nfsd_readres), .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, + .pc_xdrressize = ST+AT+1+NFS_MAXDATA/4, .pc_name = "READ", }, [NFSPROC_WRITECACHE] = { diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c0d17b92b249..82b0111ac469 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -19,6 +19,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/lockd/bind.h> #include <linux/nfsacl.h> +#include <linux/nfslocalio.h> #include <linux/seq_file.h> #include <linux/inetdevice.h> #include <net/addrconf.h> @@ -35,7 +36,6 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC atomic_t nfsd_th_cnt = ATOMIC_INIT(0); -extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static int nfsd_acl_rpcbind_set(struct net *, @@ -70,15 +70,14 @@ static __be32 nfsd_init_request(struct svc_rqst *, */ DEFINE_MUTEX(nfsd_mutex); -/* - * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used. - * nfsd_drc_max_pages limits the total amount of memory available for - * version 4.1 DRC caches. - * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. - */ -DEFINE_SPINLOCK(nfsd_drc_lock); -unsigned long nfsd_drc_max_mem; -unsigned long nfsd_drc_mem_used; +#if IS_ENABLED(CONFIG_NFS_LOCALIO) +static const struct svc_version *localio_versions[] = { + [1] = &localio_version1, +}; + +#define NFSD_LOCALIO_NRVERS ARRAY_SIZE(localio_versions) + +#endif /* CONFIG_NFS_LOCALIO */ #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static const struct svc_version *nfsd_acl_version[] = { @@ -90,23 +89,12 @@ static const struct svc_version *nfsd_acl_version[] = { # endif }; -#define NFSD_ACL_MINVERS 2 +#define NFSD_ACL_MINVERS 2 #define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version) -static struct svc_program nfsd_acl_program = { - .pg_prog = NFS_ACL_PROGRAM, - .pg_nvers = NFSD_ACL_NRVERS, - .pg_vers = nfsd_acl_version, - .pg_name = "nfsacl", - .pg_class = "nfsd", - .pg_authenticate = &svc_set_client, - .pg_init_request = nfsd_acl_init_request, - .pg_rpcbind_set = nfsd_acl_rpcbind_set, -}; - #endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ -static const struct svc_version *nfsd_version[] = { +static const struct svc_version *nfsd_version[NFSD_MAXVERS+1] = { #if defined(CONFIG_NFSD_V2) [2] = &nfsd_version2, #endif @@ -116,98 +104,63 @@ static const struct svc_version *nfsd_version[] = { #endif }; -#define NFSD_MINVERS 2 -#define NFSD_NRVERS ARRAY_SIZE(nfsd_version) - -struct svc_program nfsd_program = { -#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) - .pg_next = &nfsd_acl_program, -#endif +struct svc_program nfsd_programs[] = { + { .pg_prog = NFS_PROGRAM, /* program number */ - .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ + .pg_nvers = NFSD_MAXVERS+1, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ .pg_name = "nfsd", /* program name */ .pg_class = "nfsd", /* authentication class */ - .pg_authenticate = &svc_set_client, /* export authentication */ + .pg_authenticate = svc_set_client, /* export authentication */ .pg_init_request = nfsd_init_request, .pg_rpcbind_set = nfsd_rpcbind_set, + }, +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) + { + .pg_prog = NFS_ACL_PROGRAM, + .pg_nvers = NFSD_ACL_NRVERS, + .pg_vers = nfsd_acl_version, + .pg_name = "nfsacl", + .pg_class = "nfsd", + .pg_authenticate = svc_set_client, + .pg_init_request = nfsd_acl_init_request, + .pg_rpcbind_set = nfsd_acl_rpcbind_set, + }, +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ +#if IS_ENABLED(CONFIG_NFS_LOCALIO) + { + .pg_prog = NFS_LOCALIO_PROGRAM, + .pg_nvers = NFSD_LOCALIO_NRVERS, + .pg_vers = localio_versions, + .pg_name = "nfslocalio", + .pg_class = "nfsd", + .pg_authenticate = svc_set_client, + .pg_init_request = svc_generic_init_request, + .pg_rpcbind_set = svc_generic_rpcbind_set, + } +#endif /* CONFIG_NFS_LOCALIO */ }; -static bool -nfsd_support_version(int vers) +bool nfsd_support_version(int vers) { - if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS) + if (vers >= NFSD_MINVERS && vers <= NFSD_MAXVERS) return nfsd_version[vers] != NULL; return false; } -static bool * -nfsd_alloc_versions(void) -{ - bool *vers = kmalloc_array(NFSD_NRVERS, sizeof(bool), GFP_KERNEL); - unsigned i; - - if (vers) { - /* All compiled versions are enabled by default */ - for (i = 0; i < NFSD_NRVERS; i++) - vers[i] = nfsd_support_version(i); - } - return vers; -} - -static bool * -nfsd_alloc_minorversions(void) -{ - bool *vers = kmalloc_array(NFSD_SUPPORTED_MINOR_VERSION + 1, - sizeof(bool), GFP_KERNEL); - unsigned i; - - if (vers) { - /* All minor versions are enabled by default */ - for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) - vers[i] = nfsd_support_version(4); - } - return vers; -} - -void -nfsd_netns_free_versions(struct nfsd_net *nn) -{ - kfree(nn->nfsd_versions); - kfree(nn->nfsd4_minorversions); - nn->nfsd_versions = NULL; - nn->nfsd4_minorversions = NULL; -} - -static void -nfsd_netns_init_versions(struct nfsd_net *nn) -{ - if (!nn->nfsd_versions) { - nn->nfsd_versions = nfsd_alloc_versions(); - nn->nfsd4_minorversions = nfsd_alloc_minorversions(); - if (!nn->nfsd_versions || !nn->nfsd4_minorversions) - nfsd_netns_free_versions(nn); - } -} - int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change) { - if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) + if (vers < NFSD_MINVERS || vers > NFSD_MAXVERS) return 0; switch(change) { case NFSD_SET: - if (nn->nfsd_versions) - nn->nfsd_versions[vers] = nfsd_support_version(vers); + nn->nfsd_versions[vers] = nfsd_support_version(vers); break; case NFSD_CLEAR: - nfsd_netns_init_versions(nn); - if (nn->nfsd_versions) - nn->nfsd_versions[vers] = false; + nn->nfsd_versions[vers] = false; break; case NFSD_TEST: - if (nn->nfsd_versions) - return nn->nfsd_versions[vers]; - fallthrough; + return nn->nfsd_versions[vers]; case NFSD_AVAIL: return nfsd_support_version(vers); } @@ -234,23 +187,16 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change switch(change) { case NFSD_SET: - if (nn->nfsd4_minorversions) { - nfsd_vers(nn, 4, NFSD_SET); - nn->nfsd4_minorversions[minorversion] = - nfsd_vers(nn, 4, NFSD_TEST); - } + nfsd_vers(nn, 4, NFSD_SET); + nn->nfsd4_minorversions[minorversion] = + nfsd_vers(nn, 4, NFSD_TEST); break; case NFSD_CLEAR: - nfsd_netns_init_versions(nn); - if (nn->nfsd4_minorversions) { - nn->nfsd4_minorversions[minorversion] = false; - nfsd_adjust_nfsd_versions4(nn); - } + nn->nfsd4_minorversions[minorversion] = false; + nfsd_adjust_nfsd_versions4(nn); break; case NFSD_TEST: - if (nn->nfsd4_minorversions) - return nn->nfsd4_minorversions[minorversion]; - return nfsd_vers(nn, 4, NFSD_TEST); + return nn->nfsd4_minorversions[minorversion]; case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION && nfsd_vers(nn, 4, NFSD_AVAIL); @@ -258,6 +204,34 @@ int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change return 0; } +bool nfsd_net_try_get(struct net *net) __must_hold(rcu) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + return (nn && percpu_ref_tryget_live(&nn->nfsd_net_ref)); +} + +void nfsd_net_put(struct net *net) __must_hold(rcu) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + percpu_ref_put(&nn->nfsd_net_ref); +} + +static void nfsd_net_done(struct percpu_ref *ref) +{ + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref); + + complete(&nn->nfsd_net_confirm_done); +} + +static void nfsd_net_free(struct percpu_ref *ref) +{ + struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref); + + complete(&nn->nfsd_net_free_done); +} + /* * Maximum number of nfsd processes */ @@ -422,13 +396,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif ret = nfs4_state_start_net(net); if (ret) goto out_reply_cache; -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_init_umount_work(nn); -#endif nn->nfsd_net_up = true; return 0; @@ -450,6 +424,13 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); + if (!nn->nfsd_net_up) + return; + + percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done); + wait_for_completion(&nn->nfsd_net_confirm_done); + + nfsd_export_flush(net); nfs4_state_shutdown_net(net); nfsd_reply_cache_shutdown(nn); nfsd_file_cache_shutdown_net(net); @@ -457,6 +438,10 @@ static void nfsd_shutdown_net(struct net *net) lockd_down(net); nn->lockd_up = false; } + + wait_for_completion(&nn->nfsd_net_free_done); + percpu_ref_exit(&nn->nfsd_net_ref); + nn->nfsd_net_up = false; nfsd_shutdown_generic(); } @@ -536,6 +521,8 @@ void nfsd_destroy_serv(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv = nn->nfsd_serv; + lockdep_assert_held(&nfsd_mutex); + spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; spin_unlock(&nfsd_notifier_lock); @@ -557,11 +544,8 @@ void nfsd_destroy_serv(struct net *net) * other initialization has been done except the rpcb information. */ svc_rpcb_cleanup(serv, net); - if (!nn->nfsd_net_up) - return; nfsd_shutdown_net(net); - nfsd_export_flush(net); svc_destroy(&serv); } @@ -569,11 +553,11 @@ void nfsd_reset_versions(struct nfsd_net *nn) { int i; - for (i = 0; i < NFSD_NRVERS; i++) + for (i = 0; i <= NFSD_MAXVERS; i++) if (nfsd_vers(nn, i, NFSD_TEST)) return; - for (i = 0; i < NFSD_NRVERS; i++) + for (i = 0; i <= NFSD_MAXVERS; i++) if (i != 4) nfsd_vers(nn, i, NFSD_SET); else { @@ -583,27 +567,6 @@ void nfsd_reset_versions(struct nfsd_net *nn) } } -/* - * Each session guarantees a negotiated per slot memory cache for replies - * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated - * NFSv4.1 server might want to use more memory for a DRC than a machine - * with mutiple services. - * - * Impose a hard limit on the number of pages for the DRC which varies - * according to the machines free pages. This is of course only a default. - * - * For now this is a #defined shift which could be under admin control - * in the future. - */ -static void set_max_drc(void) -{ - #define NFSD_DRC_SIZE_SHIFT 7 - nfsd_drc_max_mem = (nr_free_buffer_pages() - >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; - nfsd_drc_mem_used = 0; - dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); -} - static int nfsd_get_default_max_blksize(void) { struct sysinfo i; @@ -619,7 +582,7 @@ static int nfsd_get_default_max_blksize(void) */ target >>= 12; - ret = NFSSVC_MAXBLKSIZE; + ret = NFSSVC_DEFBLKSIZE; while (ret > target && ret >= 8*1024*2) ret /= 2; return ret; @@ -643,9 +606,11 @@ void nfsd_shutdown_threads(struct net *net) mutex_unlock(&nfsd_mutex); } -bool i_am_nfsd(void) +struct svc_rqst *nfsd_current_rqst(void) { - return kthread_func(current) == nfsd; + if (kthread_func(current) == nfsd) + return kthread_data(current); + return NULL; } int nfsd_create_serv(struct net *net) @@ -658,26 +623,31 @@ int nfsd_create_serv(struct net *net) if (nn->nfsd_serv) return 0; + error = percpu_ref_init(&nn->nfsd_net_ref, nfsd_net_free, + 0, GFP_KERNEL); + if (error) + return error; + init_completion(&nn->nfsd_net_free_done); + init_completion(&nn->nfsd_net_confirm_done); + if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats, + serv = svc_create_pooled(nfsd_programs, ARRAY_SIZE(nfsd_programs), + &nn->nfsd_svcstats, nfsd_max_blksize, nfsd); if (serv == NULL) return -ENOMEM; - serv->sv_maxconn = nn->max_connections; error = svc_bind(serv, net); if (error < 0) { svc_destroy(&serv); return error; } spin_lock(&nfsd_notifier_lock); - nn->nfsd_info.mutex = &nfsd_mutex; nn->nfsd_serv = serv; spin_unlock(&nfsd_notifier_lock); - set_max_drc(); /* check if the notifier is already set */ if (atomic_inc_return(&nfsd_notifier_refcount) == 1) { register_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -707,10 +677,23 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) if (serv) for (i = 0; i < serv->sv_nrpools && i < n; i++) - nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); + nthreads[i] = serv->sv_pools[i].sp_nrthreads; return 0; } +/** + * nfsd_set_nrthreads - set the number of running threads in the net's service + * @n: number of array members in @nthreads + * @nthreads: array of thread counts for each pool + * @net: network namespace to operate within + * + * This function alters the number of running threads for the given network + * namespace in each pool. If passed an array longer then the number of pools + * the extra pool settings are ignored. If passed an array shorter than the + * number of pools, the missing values are interpreted as 0's. + * + * Returns 0 on success or a negative errno on error. + */ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) { int i = 0; @@ -718,11 +701,18 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) int err = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - WARN_ON(!mutex_is_locked(&nfsd_mutex)); + lockdep_assert_held(&nfsd_mutex); if (nn->nfsd_serv == NULL || n <= 0) return 0; + /* + * Special case: When n == 1, pass in NULL for the pool, so that the + * change is distributed equally among them. + */ + if (n == 1) + return svc_set_num_threads(nn->nfsd_serv, NULL, nthreads[0]); + if (n > nn->nfsd_serv->sv_nrpools) n = nn->nfsd_serv->sv_nrpools; @@ -745,47 +735,50 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) } } - /* - * There must always be a thread in pool 0; the admin - * can't shut down NFS completely using pool_threads. - */ - if (nthreads[0] == 0) - nthreads[0] = 1; - /* apply the new numbers */ for (i = 0; i < n; i++) { err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) - break; + goto out; } + + /* Anything undefined in array is considered to be 0 */ + for (i = n; i < nn->nfsd_serv->sv_nrpools; ++i) { + err = svc_set_num_threads(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], + 0); + if (err) + goto out; + } +out: return err; } -/* - * Adjust the number of threads and return the new number of threads. - * This is also the function that starts the server if necessary, if - * this is the first time nrservs is nonzero. +/** + * nfsd_svc: start up or shut down the nfsd server + * @n: number of array members in @nthreads + * @nthreads: array of thread counts for each pool + * @net: network namespace to operate within + * @cred: credentials to use for xprt creation + * @scope: server scope value (defaults to nodename) + * + * Adjust the number of threads in each pool and return the new + * total number of threads in the service. */ int -nfsd_svc(int nrservs, struct net *net, const struct cred *cred) +nfsd_svc(int n, int *nthreads, struct net *net, const struct cred *cred, const char *scope) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct svc_serv *serv; - mutex_lock(&nfsd_mutex); - dprintk("nfsd: creating service\n"); + lockdep_assert_held(&nfsd_mutex); - nrservs = max(nrservs, 0); - nrservs = min(nrservs, NFSD_MAXSERVS); - error = 0; - - if (nrservs == 0 && nn->nfsd_serv == NULL) - goto out; + dprintk("nfsd: creating service\n"); - strscpy(nn->nfsd_name, utsname()->nodename, + strscpy(nn->nfsd_name, scope ? scope : utsname()->nodename, sizeof(nn->nfsd_name)); error = nfsd_create_serv(net); @@ -796,7 +789,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) error = nfsd_startup_net(net, cred); if (error) goto out_put; - error = svc_set_num_threads(serv, NULL, nrservs); + error = nfsd_set_nrthreads(n, nthreads, net); if (error) goto out_put; error = serv->sv_nrthreads; @@ -804,7 +797,6 @@ out_put: if (serv->sv_nrthreads == 0) nfsd_destroy_serv(net); out: - mutex_unlock(&nfsd_mutex); return error; } @@ -885,17 +877,17 @@ nfsd_init_request(struct svc_rqst *rqstp, if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST))) return svc_generic_init_request(rqstp, progp, ret); - ret->mismatch.lovers = NFSD_NRVERS; - for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) { + ret->mismatch.lovers = NFSD_MAXVERS + 1; + for (i = NFSD_MINVERS; i <= NFSD_MAXVERS; i++) { if (nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.lovers = i; break; } } - if (ret->mismatch.lovers == NFSD_NRVERS) + if (ret->mismatch.lovers > NFSD_MAXVERS) return rpc_prog_unavail; ret->mismatch.hivers = NFSD_MINVERS; - for (i = NFSD_NRVERS - 1; i >= NFSD_MINVERS; i--) { + for (i = NFSD_MAXVERS; i >= NFSD_MINVERS; i--) { if (nfsd_vers(nn, i, NFSD_TEST)) { ret->mismatch.hivers = i; break; @@ -917,11 +909,9 @@ nfsd(void *vrqstp) /* At this point, the thread shares current->fs * with the init process. We need to create files with the - * umask as defined by the client instead of init's umask. */ - if (unshare_fs_struct() < 0) { - printk("Unable to start nfsd thread: out of memory\n"); - goto out; - } + * umask as defined by the client instead of init's umask. + */ + svc_thread_init_status(rqstp, unshare_fs_struct()); current->fs->umask = 0; @@ -933,24 +923,19 @@ nfsd(void *vrqstp) * The main request loop */ while (!svc_thread_should_stop(rqstp)) { - /* Update sv_maxconn if it has changed */ - rqstp->rq_server->sv_maxconn = nn->max_connections; - svc_recv(rqstp); - nfsd_file_net_dispose(nn); } atomic_dec(&nfsd_th_cnt); -out: /* Release the thread */ svc_exit_thread(rqstp); return 0; } /** - * nfsd_dispatch - Process an NFS or NFSACL Request + * nfsd_dispatch - Process an NFS or NFSACL or LOCALIO Request * @rqstp: incoming request * * This RPC dispatcher integrates the NFS server's duplicate reply cache. @@ -1064,10 +1049,3 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { return true; } - -int nfsd_pool_stats_open(struct inode *inode, struct file *file) -{ - struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); - - return svc_pool_stats_open(&nn->nfsd_info, file); -} diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 5777f40c7353..fc262ceafca9 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -336,7 +336,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) /* opaque data */ if (xdr_stream_decode_u32(xdr, &args->len) < 0) return false; - if (args->len > NFSSVC_MAXBLKSIZE_V2) + if (args->len > NFS_MAXDATA) return false; return xdr_stream_subsegment(xdr, &args->payload, args->len); @@ -540,7 +540,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) p = xdr_reserve_space(xdr, XDR_UNIT * 5); if (!p) return false; - *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); + *p++ = cpu_to_be32(NFS_MAXDATA); *p++ = cpu_to_be32(stat->f_bsize); *p++ = cpu_to_be32(stat->f_blocks); *p++ = cpu_to_be32(stat->f_bfree); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 01c6f3445646..1995bca158b8 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -64,21 +64,43 @@ typedef struct { refcount_t cs_count; } copy_stateid_t; +struct nfsd4_referring_call { + struct list_head __list; + + u32 rc_sequenceid; + u32 rc_slotid; +}; + +struct nfsd4_referring_call_list { + struct list_head __list; + + struct nfs4_sessionid rcl_sessionid; + int __nr_referring_calls; + struct list_head rcl_referring_calls; +}; + struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; +#define NFSD4_CALLBACK_RUNNING (0) +#define NFSD4_CALLBACK_WAKE (1) +#define NFSD4_CALLBACK_REQUEUE (2) + unsigned long cb_flags; const struct nfsd4_callback_ops *cb_ops; - struct delayed_work cb_work; + struct work_struct cb_work; int cb_seq_status; int cb_status; - bool cb_need_restart; - bool cb_holds_slot; + int cb_held_slot; + + int cb_nr_referring_call_list; + struct list_head cb_referring_call_list; }; struct nfsd4_callback_ops { void (*prepare)(struct nfsd4_callback *); int (*done)(struct nfsd4_callback *, struct rpc_task *); void (*release)(struct nfsd4_callback *); + uint32_t opcode; }; /* @@ -113,6 +135,8 @@ struct nfs4_stid { /* For a deleg stateid kept around only to process free_stateid's: */ #define SC_STATUS_REVOKED BIT(1) #define SC_STATUS_ADMIN_REVOKED BIT(2) +#define SC_STATUS_FREEABLE BIT(3) +#define SC_STATUS_FREED BIT(4) unsigned short sc_status; struct list_head sc_cp_list; @@ -134,24 +158,36 @@ struct nfs4_cpntf_state { time64_t cpntf_time; /* last time stateid used */ }; +/* + * RFC 7862 Section 4.8 states: + * + * | A copy offload stateid will be valid until either (A) the client + * | or server restarts or (B) the client returns the resource by + * | issuing an OFFLOAD_CANCEL operation or the client replies to a + * | CB_OFFLOAD operation. + * + * Because a client might not reply to a CB_OFFLOAD, or a reply + * might get lost due to connection loss, NFSD purges async copy + * state after a short period to prevent it from accumulating + * over time. + */ +#define NFSD_COPY_INITIAL_TTL 10 + struct nfs4_cb_fattr { struct nfsd4_callback ncf_getattr; u32 ncf_cb_status; - u32 ncf_cb_bmap[1]; /* from CB_GETATTR reply */ u64 ncf_cb_change; u64 ncf_cb_fsize; + struct timespec64 ncf_cb_mtime; + struct timespec64 ncf_cb_atime; - unsigned long ncf_cb_flags; bool ncf_file_modified; u64 ncf_initial_cinfo; u64 ncf_cur_fsize; }; -/* bits for ncf_cb_flags */ -#define CB_GETATTR_BUSY 0 - /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -179,8 +215,8 @@ struct nfs4_delegation { struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ struct nfs4_clnt_odstate *dl_clnt_odstate; - u32 dl_type; time64_t dl_time; + u32 dl_type; /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; @@ -190,6 +226,22 @@ struct nfs4_delegation { struct nfs4_cb_fattr dl_cb_fattr; }; +static inline bool deleg_is_read(u32 dl_type) +{ + return (dl_type == OPEN_DELEGATE_READ || dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG); +} + +static inline bool deleg_is_write(u32 dl_type) +{ + return (dl_type == OPEN_DELEGATE_WRITE || dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG); +} + +static inline bool deleg_attrs_deleg(u32 dl_type) +{ + return dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG || + dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG; +} + #define cb_to_delegation(cb) \ container_of(cb, struct nfs4_delegation, dl_recall) @@ -210,8 +262,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) return container_of(s, struct nfs4_delegation, dl_stid); } -/* Maximum number of slots per session. 160 is useful for long haul TCP */ -#define NFSD_MAX_SLOTS_PER_SESSION 160 +/* Maximum number of slots per session. This is for sanity-check only. + * It could be increased if we had a mechanism to shutdown misbehaving clients. + * A large number can be needed to get good throughput on high-latency servers. + */ +#define NFSD_MAX_SLOTS_PER_SESSION 2048 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -223,12 +278,15 @@ struct nfsd4_slot { u32 sl_seqid; __be32 sl_status; struct svc_cred sl_cred; + u32 sl_index; u32 sl_datalen; u16 sl_opcnt; + u16 sl_generation; #define NFSD4_SLOT_INUSE (1 << 0) #define NFSD4_SLOT_CACHETHIS (1 << 1) #define NFSD4_SLOT_INITIALIZED (1 << 2) #define NFSD4_SLOT_CACHED (1 << 3) +#define NFSD4_SLOT_REUSED (1 << 4) u8 sl_flags; char sl_data[]; }; @@ -287,6 +345,9 @@ struct nfsd4_conn { unsigned char cn_flags; }; +/* Maximum number of slots that nfsd will use in the backchannel */ +#define NFSD_BC_SLOT_TABLE_SIZE (sizeof(u32) * 8) + /* * Representation of a v4.1+ session. These are refcounted in a similar fashion * to the nfs4_client. References are only taken when the server is actively @@ -294,20 +355,23 @@ struct nfsd4_conn { */ struct nfsd4_session { atomic_t se_ref; + spinlock_t se_lock; + u32 se_cb_slot_avail; /* bitmap of available slots */ + u32 se_cb_highest_slot; /* highest slot client wants */ + u32 se_cb_prog; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; -/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */ -#define NFS4_SESSION_DEAD 0x010 - u32 se_flags; + struct list_head se_all_sessions;/* global list of sessions */ struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel; - struct nfsd4_channel_attrs se_bchannel; struct nfsd4_cb_sec se_cb_sec; struct list_head se_conns; - u32 se_cb_prog; - u32 se_cb_seq_nr; - struct nfsd4_slot *se_slots[]; /* forward channel slots */ + u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE]; + struct xarray se_slots; /* forward channel slots */ + u16 se_slot_gen; + bool se_dead; + u32 se_target_maxslots; }; /* formatted contents of nfs4_sessionid */ @@ -406,8 +470,9 @@ struct nfs4_client { #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ 1 << NFSD4_CLIENT_CB_KILL) -#define NFSD4_CLIENT_CB_RECALL_ANY (6) unsigned long cl_flags; + + struct workqueue_struct *cl_callback_wq; const struct cred *cl_cb_cred; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; @@ -438,9 +503,6 @@ struct nfs4_client { */ struct dentry *cl_nfsd_info_dentry; - /* for nfs41 callbacks */ - /* We currently support a single back channel with a single slot */ - unsigned long cl_cb_slot_busy; struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ struct net *net; @@ -453,7 +515,6 @@ struct nfs4_client { struct nfsd4_cb_recall_any *cl_ra; time64_t cl_ra_time; - struct list_head cl_ra_cblist; }; /* struct nfs4_client_reset @@ -486,7 +547,7 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; struct knfsd_fh rp_openfh; - struct mutex rp_mutex; + int rp_locked; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; @@ -732,13 +793,24 @@ extern __be32 nfs4_check_open_reclaim(struct nfs4_client *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); +extern void nfsd41_cb_referring_call(struct nfsd4_callback *cb, + struct nfs4_sessionid *sessionid, + u32 slotid, u32 seqno); +extern void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); extern bool nfsd4_run_cb(struct nfsd4_callback *cb); -extern int nfsd4_create_callback_queue(void); -extern void nfsd4_destroy_callback_queue(void); + +static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb) +{ + if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags)) + WARN_ON_ONCE(!nfsd4_run_cb(cb)); +} + extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); +void nfsd4_async_copy_reaper(struct nfsd_net *nn); +bool nfsd4_has_active_async_copies(struct nfs4_client *clp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); @@ -781,5 +853,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) } extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, - struct inode *inode, bool *file_modified, u64 *size); + struct dentry *dentry, struct nfs4_delegation **pdp); #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index be52fb1e928e..f7eaf95e20fc 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -73,53 +73,11 @@ static int nfsd_show(struct seq_file *seq, void *v) DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); -int nfsd_percpu_counters_init(struct percpu_counter *counters, int num) -{ - int i, err = 0; - - for (i = 0; !err && i < num; i++) - err = percpu_counter_init(&counters[i], 0, GFP_KERNEL); - - if (!err) - return 0; - - for (; i > 0; i--) - percpu_counter_destroy(&counters[i-1]); - - return err; -} - -void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num) -{ - int i; - - for (i = 0; i < num; i++) - percpu_counter_set(&counters[i], 0); -} - -void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) -{ - int i; - - for (i = 0; i < num; i++) - percpu_counter_destroy(&counters[i]); -} - -int nfsd_stat_counters_init(struct nfsd_net *nn) -{ - return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM); -} - -void nfsd_stat_counters_destroy(struct nfsd_net *nn) -{ - nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM); -} - -void nfsd_proc_stat_init(struct net *net) +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); + return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops); } void nfsd_proc_stat_shutdown(struct net *net) diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index d2753e975dfd..e4efb0e4e56d 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -10,12 +10,7 @@ #include <uapi/linux/nfsd/stats.h> #include <linux/percpu_counter.h> -int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); -void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); -void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); -int nfsd_stat_counters_init(struct nfsd_net *nn); -void nfsd_stat_counters_destroy(struct nfsd_net *nn); -void nfsd_proc_stat_init(struct net *net); +struct proc_dir_entry *nfsd_proc_stat_init(struct net *net); void nfsd_proc_stat_shutdown(struct net *net); static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 1cd2076210b1..3c5505ef5e3a 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -11,6 +11,7 @@ #include <linux/tracepoint.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/xprt.h> +#include <trace/misc/fs.h> #include <trace/misc/nfs.h> #include <trace/misc/sunrpc.h> @@ -18,22 +19,40 @@ #include "nfsfh.h" #include "xdr4.h" -#define NFSD_TRACE_PROC_RES_FIELDS \ +#define NFSD_TRACE_PROC_CALL_FIELDS(r) \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) \ + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) + +#define NFSD_TRACE_PROC_CALL_ASSIGNMENTS(r) \ + do { \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __entry->netns_ino = SVC_NET(r)->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ + } while (0) + +#define NFSD_TRACE_PROC_RES_FIELDS(r) \ __field(unsigned int, netns_ino) \ __field(u32, xid) \ __field(unsigned long, status) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) -#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \ +#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(r, error) \ do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __entry->netns_ino = SVC_NET(r)->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ __entry->status = be32_to_cpu(error); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ } while (0); DECLARE_EVENT_CLASS(nfsd_xdr_err_class, @@ -79,14 +98,15 @@ DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); { NFSD_MAY_READ, "READ" }, \ { NFSD_MAY_SATTR, "SATTR" }, \ { NFSD_MAY_TRUNC, "TRUNC" }, \ - { NFSD_MAY_LOCK, "LOCK" }, \ + { NFSD_MAY_NLM, "NLM" }, \ { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \ { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \ { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \ { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ - { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) + { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }, \ + { NFSD_MAY_LOCALIO, "LOCALIO" }) TRACE_EVENT(nfsd_compound, TP_PROTO( @@ -104,7 +124,7 @@ TRACE_EVENT(nfsd_compound, TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); __entry->opcnt = opcnt; - __assign_str(tag, tag); + __assign_str(tag); ), TP_printk("xid=0x%08x opcnt=%u tag=%s", __entry->xid, __entry->opcnt, __get_str(tag) @@ -127,7 +147,7 @@ TRACE_EVENT(nfsd_compound_status, __entry->args_opcnt = args_opcnt; __entry->resp_opcnt = resp_opcnt; __entry->status = be32_to_cpu(status); - __assign_str(name, name); + __assign_str(name); ), TP_printk("op=%u/%u %s status=%d", __entry->resp_opcnt, __entry->args_opcnt, @@ -144,14 +164,14 @@ TRACE_EVENT(nfsd_compound_decode_err, ), TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status), TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS + NFSD_TRACE_PROC_RES_FIELDS(rqstp) __field(u32, args_opcnt) __field(u32, resp_opcnt) __field(u32, opnum) ), TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status) __entry->args_opcnt = args_opcnt; __entry->resp_opcnt = resp_opcnt; @@ -162,7 +182,7 @@ TRACE_EVENT(nfsd_compound_decode_err, __entry->opnum, __entry->status) ); -TRACE_EVENT(nfsd_compound_encode_err, +DECLARE_EVENT_CLASS(nfsd_compound_err_class, TP_PROTO( const struct svc_rqst *rqstp, u32 opnum, @@ -170,12 +190,12 @@ TRACE_EVENT(nfsd_compound_encode_err, ), TP_ARGS(rqstp, opnum, status), TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS + NFSD_TRACE_PROC_RES_FIELDS(rqstp) __field(u32, opnum) ), TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status) __entry->opnum = opnum; ), @@ -183,6 +203,18 @@ TRACE_EVENT(nfsd_compound_encode_err, __entry->opnum, __entry->status) ); +#define DEFINE_NFSD_COMPOUND_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_compound_err_class, nfsd_compound_##name##_err, \ + TP_PROTO( \ + const struct svc_rqst *rqstp, \ + u32 opnum, \ + __be32 status \ + ), \ + TP_ARGS(rqstp, opnum, status)) + +DEFINE_NFSD_COMPOUND_ERR_EVENT(op); +DEFINE_NFSD_COMPOUND_ERR_EVENT(encode); + #define show_fs_file_type(x) \ __print_symbolic(x, \ { S_IFLNK, "LNK" }, \ @@ -193,7 +225,7 @@ TRACE_EVENT(nfsd_compound_encode_err, { S_IFIFO, "FIFO" }, \ { S_IFSOCK, "SOCK" }) -TRACE_EVENT(nfsd_fh_verify, +TRACE_EVENT_CONDITION(nfsd_fh_verify, TP_PROTO( const struct svc_rqst *rqstp, const struct svc_fh *fhp, @@ -201,6 +233,7 @@ TRACE_EVENT(nfsd_fh_verify, int access ), TP_ARGS(rqstp, fhp, type, access), + TP_CONDITION(rqstp != NULL), TP_STRUCT__entry( __field(unsigned int, netns_ino) __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) @@ -239,7 +272,7 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err, __be32 error ), TP_ARGS(rqstp, fhp, type, access, error), - TP_CONDITION(error), + TP_CONDITION(rqstp != NULL && error), TP_STRUCT__entry( __field(unsigned int, netns_ino) __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) @@ -295,12 +328,13 @@ DECLARE_EVENT_CLASS(nfsd_fh_err_class, __entry->status) ) -#define DEFINE_NFSD_FH_ERR_EVENT(name) \ -DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \ - TP_PROTO(struct svc_rqst *rqstp, \ - struct svc_fh *fhp, \ - int status), \ - TP_ARGS(rqstp, fhp, status)) +#define DEFINE_NFSD_FH_ERR_EVENT(name) \ +DEFINE_EVENT_CONDITION(nfsd_fh_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + int status), \ + TP_ARGS(rqstp, fhp, status), \ + TP_CONDITION(rqstp != NULL)) DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport); DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle); @@ -318,7 +352,7 @@ TRACE_EVENT(nfsd_exp_find_key, TP_fast_assign( __entry->fsidtype = key->ek_fsidtype; memcpy(__entry->fsid, key->ek_fsid, 4*6); - __assign_str(auth_domain, key->ek_client->name); + __assign_str(auth_domain); __entry->status = status; ), TP_printk("fsid=%x::%s domain=%s status=%d", @@ -342,8 +376,8 @@ TRACE_EVENT(nfsd_expkey_update, TP_fast_assign( __entry->fsidtype = key->ek_fsidtype; memcpy(__entry->fsid, key->ek_fsid, 4*6); - __assign_str(auth_domain, key->ek_client->name); - __assign_str(path, exp_path); + __assign_str(auth_domain); + __assign_str(path); __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); ), TP_printk("fsid=%x::%s domain=%s path=%s cache=%s", @@ -365,8 +399,8 @@ TRACE_EVENT(nfsd_exp_get_by_name, __field(int, status) ), TP_fast_assign( - __assign_str(path, key->ex_path.dentry->d_name.name); - __assign_str(auth_domain, key->ex_client->name); + __assign_str(path); + __assign_str(auth_domain); __entry->status = status; ), TP_printk("path=%s domain=%s status=%d", @@ -385,8 +419,8 @@ TRACE_EVENT(nfsd_export_update, __field(bool, cache) ), TP_fast_assign( - __assign_str(path, key->ex_path.dentry->d_name.name); - __assign_str(auth_domain, key->ex_client->name); + __assign_str(path); + __assign_str(auth_domain); __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); ), TP_printk("path=%s domain=%s cache=%s", @@ -436,6 +470,8 @@ DEFINE_NFSD_IO_EVENT(write_start); DEFINE_NFSD_IO_EVENT(write_opened); DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_done); +DEFINE_NFSD_IO_EVENT(commit_start); +DEFINE_NFSD_IO_EVENT(commit_done); DECLARE_EVENT_CLASS(nfsd_err_class, TP_PROTO(struct svc_rqst *rqstp, @@ -485,7 +521,7 @@ TRACE_EVENT(nfsd_dirent, TP_fast_assign( __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; __entry->ino = ino; - __assign_str(name, name); + __assign_str(name); ), TP_printk("fh_hash=0x%08x ino=%llu name=%s", __entry->fh_hash, __entry->ino, __get_str(name) @@ -611,7 +647,6 @@ DEFINE_STATEID_EVENT(open); DEFINE_STATEID_EVENT(deleg_read); DEFINE_STATEID_EVENT(deleg_write); DEFINE_STATEID_EVENT(deleg_return); -DEFINE_STATEID_EVENT(deleg_recall); DECLARE_EVENT_CLASS(nfsd_stateseqid_class, TP_PROTO(u32 seqid, const stateid_t *stp), @@ -749,6 +784,87 @@ TRACE_EVENT_CONDITION(nfsd_seq4_status, ) ); +DECLARE_EVENT_CLASS(nfsd_cs_slot_class, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfsd4_create_session *cs + ), + TP_ARGS(clp, cs), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, slot_seqid) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + const struct nfsd4_clid_slot *slot = &clp->cl_cs_slot; + + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen); + __entry->seqid = cs->seqid; + __entry->slot_seqid = slot->sl_seqid; + ), + TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->seqid, __entry->slot_seqid + ) +); + +#define DEFINE_CS_SLOT_EVENT(name) \ +DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \ + TP_PROTO( \ + const struct nfs4_client *clp, \ + const struct nfsd4_create_session *cs \ + ), \ + TP_ARGS(clp, cs)) + +DEFINE_CS_SLOT_EVENT(slot_seqid_conf); +DEFINE_CS_SLOT_EVENT(slot_seqid_unconf); + +#define show_nfs_slot_flags(val) \ + __print_flags(val, "|", \ + { NFSD4_SLOT_INUSE, "INUSE" }, \ + { NFSD4_SLOT_CACHETHIS, "CACHETHIS" }, \ + { NFSD4_SLOT_INITIALIZED, "INITIALIZED" }, \ + { NFSD4_SLOT_CACHED, "CACHED" }, \ + { NFSD4_SLOT_REUSED, "REUSED" }) + +TRACE_EVENT(nfsd_slot_seqid_sequence, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfsd4_sequence *seq, + const struct nfsd4_slot *slot + ), + TP_ARGS(clp, seq, slot), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, slot_seqid) + __field(u32, slot_index) + __field(unsigned long, slot_flags) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen); + __entry->seqid = seq->seqid; + __entry->slot_seqid = slot->sl_seqid; + __entry->slot_index = seq->slotid; + __entry->slot_flags = slot->sl_flags; + ), + TP_printk("addr=%pISpc client %08x:%08x idx=%u seqid=%u slot_seqid=%u flags=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->slot_index, __entry->seqid, __entry->slot_seqid, + show_nfs_slot_flags(__entry->slot_flags) + ) +); + DECLARE_EVENT_CLASS(nfsd_clientid_class, TP_PROTO(const clientid_t *clid), TP_ARGS(clid), @@ -778,6 +894,30 @@ DEFINE_CLIENTID_EVENT(purged); DEFINE_CLIENTID_EVENT(renew); DEFINE_CLIENTID_EVENT(stale); +TRACE_EVENT(nfsd_mark_client_expired, + TP_PROTO( + const struct nfs4_client *clp, + int cl_rpc_users + ), + TP_ARGS(clp, cl_rpc_users), + TP_STRUCT__entry( + __field(int, cl_rpc_users) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_rpc_users = cl_rpc_users; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x cl_rpc_users=%d", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->cl_rpc_users) +); + DECLARE_EVENT_CLASS(nfsd_net_class, TP_PROTO(const struct nfsd_net *nn), TP_ARGS(nn), @@ -906,7 +1046,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class, __entry->flavor = clp->cl_cred.cr_flavor; memcpy(__entry->verifier, (void *)&clp->cl_verifier, NFS4_VERIFIER_SIZE); - __assign_str(name, clp->cl_name.data); + __assign_str(name); ), TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x", __entry->addr, __get_str(name), @@ -931,6 +1071,7 @@ DEFINE_CLID_EVENT(confirmed_r); { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_RECENT, "RECENT" }, \ { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, @@ -1016,7 +1157,7 @@ TRACE_EVENT(nfsd_file_acquire, ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -1050,7 +1191,7 @@ TRACE_EVENT(nfsd_file_insert_err, __field(long, error) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0; __entry->inode = inode; __entry->may_flags = may_flags; __entry->error = error; @@ -1080,7 +1221,7 @@ TRACE_EVENT(nfsd_file_cons_err, __field(const void *, nf_file) ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = refcount_read(&nf->nf_ref); @@ -1209,6 +1350,7 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_aged); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1238,6 +1380,7 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ TP_ARGS(removed, remaining)) DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_recent); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); TRACE_EVENT(nfsd_file_close, @@ -1392,6 +1535,9 @@ DEFINE_NFSD_CB_EVENT(new_state); DEFINE_NFSD_CB_EVENT(probe); DEFINE_NFSD_CB_EVENT(lost); DEFINE_NFSD_CB_EVENT(shutdown); +DEFINE_NFSD_CB_EVENT(rpc_prepare); +DEFINE_NFSD_CB_EVENT(rpc_done); +DEFINE_NFSD_CB_EVENT(rpc_release); TRACE_DEFINE_ENUM(RPC_AUTH_NULL); TRACE_DEFINE_ENUM(RPC_AUTH_UNIX); @@ -1425,7 +1571,7 @@ TRACE_EVENT(nfsd_cb_setup, TP_fast_assign( __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; - __assign_str(netid, netid); + __assign_str(netid); __entry->authflavor = authflavor; __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, clp->cl_cb_conn.cb_addrlen) @@ -1459,6 +1605,19 @@ TRACE_EVENT(nfsd_cb_setup_err, __entry->error) ); +/* Not a real opcode, but there is no 0 operation. */ +#define _CB_NULL 0 + +#define show_nfsd_cb_opcode(val) \ + __print_symbolic(val, \ + { _CB_NULL, "CB_NULL" }, \ + { OP_CB_GETATTR, "CB_GETATTR" }, \ + { OP_CB_RECALL, "CB_RECALL" }, \ + { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \ + { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \ + { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \ + { OP_CB_OFFLOAD, "CB_OFFLOAD" }) + DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class, TP_PROTO( const struct nfs4_client *clp, @@ -1469,6 +1628,7 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class, __field(u32, cl_boot) __field(u32, cl_id) __field(const void *, cb) + __field(unsigned long, opcode) __field(bool, need_restart) __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) ), @@ -1476,14 +1636,15 @@ DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class, __entry->cl_boot = clp->cl_clientid.cl_boot; __entry->cl_id = clp->cl_clientid.cl_id; __entry->cb = cb; - __entry->need_restart = cb->cb_need_restart; + __entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL; + __entry->need_restart = test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags); __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, clp->cl_cb_conn.cb_addrlen) ), - TP_printk("addr=%pISpc client %08x:%08x cb=%p%s", - __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, - __entry->cb, __entry->need_restart ? - " (need restart)" : " (first try)" + TP_printk("addr=%pISpc client %08x:%08x cb=%p%s opcode=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->cb, + __entry->need_restart ? " (need restart)" : " (first try)", + show_nfsd_cb_opcode(__entry->opcode) ) ); @@ -1534,7 +1695,7 @@ TRACE_EVENT(nfsd_cb_seq_status, __entry->seq_status = cb->cb_seq_status; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n", + " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d", __entry->task_id, __entry->client_id, __entry->cl_boot, __entry->cl_id, __entry->seqno, __entry->reserved, @@ -1570,10 +1731,10 @@ TRACE_EVENT(nfsd_cb_free_slot, __entry->cl_id = sid->clientid.cl_id; __entry->seqno = sid->sequence; __entry->reserved = sid->reserved; - __entry->slot_seqno = session->se_cb_seq_nr; + __entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot]; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER - " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n", + " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u", __entry->task_id, __entry->client_id, __entry->cl_boot, __entry->cl_id, __entry->seqno, __entry->reserved, @@ -1736,6 +1897,7 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_getattr_done); TRACE_EVENT(nfsd_cb_recall_any_done, TP_PROTO( @@ -1770,7 +1932,7 @@ TRACE_EVENT(nfsd_ctl_unlock_ip, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(address, address); + __assign_str(address); ), TP_printk("address=%s", __get_str(address) @@ -1789,7 +1951,7 @@ TRACE_EVENT(nfsd_ctl_unlock_fs, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(path, path); + __assign_str(path); ), TP_printk("path=%s", __get_str(path) @@ -1813,8 +1975,8 @@ TRACE_EVENT(nfsd_ctl_filehandle, TP_fast_assign( __entry->netns_ino = net->ns.inum; __entry->maxsize = maxsize; - __assign_str(domain, domain); - __assign_str(path, path); + __assign_str(domain); + __assign_str(path); ), TP_printk("domain=%s path=%s maxsize=%d", __get_str(domain), __get_str(path), __entry->maxsize @@ -1874,7 +2036,7 @@ TRACE_EVENT(nfsd_ctl_version, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(mesg, mesg); + __assign_str(mesg); ), TP_printk("%s", __get_str(mesg) @@ -1915,7 +2077,7 @@ TRACE_EVENT(nfsd_ctl_ports_addxprt, TP_fast_assign( __entry->netns_ino = net->ns.inum; __entry->port = port; - __assign_str(transport, transport); + __assign_str(transport); ), TP_printk("transport=%s port=%d", __get_str(transport), __entry->port @@ -1976,9 +2138,9 @@ TRACE_EVENT(nfsd_ctl_time, TP_fast_assign( __entry->netns_ino = net->ns.inum; __entry->time = time; - __assign_str(name, name); + __assign_str(name); ), - TP_printk("file=%s time=%d\n", + TP_printk("file=%s time=%d", __get_str(name), __entry->time ) ); @@ -1995,7 +2157,7 @@ TRACE_EVENT(nfsd_ctl_recoverydir, ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(recdir, recdir); + __assign_str(recdir); ), TP_printk("recdir=%s", __get_str(recdir) @@ -2033,6 +2195,10 @@ DECLARE_EVENT_CLASS(nfsd_copy_class, __field(u32, dst_cl_id) __field(u32, dst_so_id) __field(u32, dst_si_generation) + __field(u32, cb_cl_boot) + __field(u32, cb_cl_id) + __field(u32, cb_so_id) + __field(u32, cb_si_generation) __field(u64, src_cp_pos) __field(u64, dst_cp_pos) __field(u64, cp_count) @@ -2041,6 +2207,7 @@ DECLARE_EVENT_CLASS(nfsd_copy_class, TP_fast_assign( const stateid_t *src_stp = ©->cp_src_stateid; const stateid_t *dst_stp = ©->cp_dst_stateid; + const stateid_t *cb_stp = ©->cp_res.cb_stateid; __entry->intra = test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); @@ -2052,6 +2219,10 @@ DECLARE_EVENT_CLASS(nfsd_copy_class, __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id; __entry->dst_so_id = dst_stp->si_opaque.so_id; __entry->dst_si_generation = dst_stp->si_generation; + __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot; + __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id; + __entry->cb_so_id = cb_stp->si_opaque.so_id; + __entry->cb_si_generation = cb_stp->si_generation; __entry->src_cp_pos = copy->cp_src_pos; __entry->dst_cp_pos = copy->cp_dst_pos; __entry->cp_count = copy->cp_count; @@ -2059,14 +2230,17 @@ DECLARE_EVENT_CLASS(nfsd_copy_class, sizeof(struct sockaddr_in6)); ), TP_printk("client=%pISpc intra=%d async=%d " - "src_stateid[si_generation:0x%x cl_boot:0x%x cl_id:0x%x so_id:0x%x] " - "dst_stateid[si_generation:0x%x cl_boot:0x%x cl_id:0x%x so_id:0x%x] " + "src_client %08x:%08x src_stateid %08x:%08x " + "dst_client %08x:%08x dst_stateid %08x:%08x " + "cb_client %08x:%08x cb_stateid %08x:%08x " "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu", __get_sockaddr(addr), __entry->intra, __entry->async, - __entry->src_si_generation, __entry->src_cl_boot, - __entry->src_cl_id, __entry->src_so_id, - __entry->dst_si_generation, __entry->dst_cl_boot, - __entry->dst_cl_id, __entry->dst_so_id, + __entry->src_cl_boot, __entry->src_cl_id, + __entry->src_so_id, __entry->src_si_generation, + __entry->dst_cl_boot, __entry->dst_cl_id, + __entry->dst_so_id, __entry->dst_si_generation, + __entry->cb_cl_boot, __entry->cb_cl_id, + __entry->cb_so_id, __entry->cb_si_generation, __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count ) ); @@ -2078,7 +2252,7 @@ DEFINE_EVENT(nfsd_copy_class, nfsd_copy_##name, \ DEFINE_COPY_EVENT(inter); DEFINE_COPY_EVENT(intra); -DEFINE_COPY_EVENT(do_async); +DEFINE_COPY_EVENT(async); TRACE_EVENT(nfsd_copy_done, TP_PROTO( @@ -2099,11 +2273,342 @@ TRACE_EVENT(nfsd_copy_done, __assign_sockaddr(addr, ©->cp_clp->cl_addr, sizeof(struct sockaddr_in6)); ), - TP_printk("addr=%pISpc status=%d intra=%d async=%d ", + TP_printk("addr=%pISpc status=%d intra=%d async=%d", __get_sockaddr(addr), __entry->status, __entry->intra, __entry->async ) ); +DECLARE_EVENT_CLASS(nfsd_copy_async_done_class, + TP_PROTO( + const struct nfsd4_copy *copy + ), + TP_ARGS(copy), + TP_STRUCT__entry( + __field(int, status) + __field(bool, intra) + __field(bool, async) + __field(u32, src_cl_boot) + __field(u32, src_cl_id) + __field(u32, src_so_id) + __field(u32, src_si_generation) + __field(u32, dst_cl_boot) + __field(u32, dst_cl_id) + __field(u32, dst_so_id) + __field(u32, dst_si_generation) + __field(u32, cb_cl_boot) + __field(u32, cb_cl_id) + __field(u32, cb_so_id) + __field(u32, cb_si_generation) + __field(u64, src_cp_pos) + __field(u64, dst_cp_pos) + __field(u64, cp_count) + __sockaddr(addr, sizeof(struct sockaddr_in6)) + ), + TP_fast_assign( + const stateid_t *src_stp = ©->cp_src_stateid; + const stateid_t *dst_stp = ©->cp_dst_stateid; + const stateid_t *cb_stp = ©->cp_res.cb_stateid; + + __entry->status = be32_to_cpu(copy->nfserr); + __entry->intra = test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); + __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + __entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot; + __entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id; + __entry->src_so_id = src_stp->si_opaque.so_id; + __entry->src_si_generation = src_stp->si_generation; + __entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot; + __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id; + __entry->dst_so_id = dst_stp->si_opaque.so_id; + __entry->dst_si_generation = dst_stp->si_generation; + __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot; + __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id; + __entry->cb_so_id = cb_stp->si_opaque.so_id; + __entry->cb_si_generation = cb_stp->si_generation; + __entry->src_cp_pos = copy->cp_src_pos; + __entry->dst_cp_pos = copy->cp_dst_pos; + __entry->cp_count = copy->cp_count; + __assign_sockaddr(addr, ©->cp_clp->cl_addr, + sizeof(struct sockaddr_in6)); + ), + TP_printk("client=%pISpc status=%d intra=%d async=%d " + "src_client %08x:%08x src_stateid %08x:%08x " + "dst_client %08x:%08x dst_stateid %08x:%08x " + "cb_client %08x:%08x cb_stateid %08x:%08x " + "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu", + __get_sockaddr(addr), + __entry->status, __entry->intra, __entry->async, + __entry->src_cl_boot, __entry->src_cl_id, + __entry->src_so_id, __entry->src_si_generation, + __entry->dst_cl_boot, __entry->dst_cl_id, + __entry->dst_so_id, __entry->dst_si_generation, + __entry->cb_cl_boot, __entry->cb_cl_id, + __entry->cb_so_id, __entry->cb_si_generation, + __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count + ) +); + +#define DEFINE_COPY_ASYNC_DONE_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_async_done_class, \ + nfsd_copy_async_##name, \ + TP_PROTO(const struct nfsd4_copy *copy), \ + TP_ARGS(copy)) + +DEFINE_COPY_ASYNC_DONE_EVENT(done); +DEFINE_COPY_ASYNC_DONE_EVENT(cancel); + +TRACE_EVENT(nfsd_vfs_setattr, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const struct iattr *iap, + const struct timespec64 *guardtime + ), + TP_ARGS(rqstp, fhp, iap, guardtime), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(s64, gtime_tv_sec) + __field(u32, gtime_tv_nsec) + __field(unsigned int, ia_valid) + __field(loff_t, ia_size) + __field(uid_t, ia_uid) + __field(gid_t, ia_gid) + __field(umode_t, ia_mode) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->gtime_tv_sec = guardtime ? guardtime->tv_sec : 0; + __entry->gtime_tv_nsec = guardtime ? guardtime->tv_nsec : 0; + __entry->ia_valid = iap->ia_valid; + __entry->ia_size = iap->ia_size; + __entry->ia_uid = __kuid_val(iap->ia_uid); + __entry->ia_gid = __kgid_val(iap->ia_gid); + __entry->ia_mode = iap->ia_mode; + ), + TP_printk( + "xid=0x%08x fh_hash=0x%08x ia_valid=%s ia_size=%llu ia_mode=0%o ia_uid=%u ia_gid=%u guard_time=%lld.%u", + __entry->xid, __entry->fh_hash, show_ia_valid_flags(__entry->ia_valid), + __entry->ia_size, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid, + __entry->gtime_tv_sec, __entry->gtime_tv_nsec + ) +) + +TRACE_EVENT(nfsd_vfs_lookup, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s", + __entry->xid, __entry->fh_hash, __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_create, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, type, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(umode_t, type) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->type = type; + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s name=%s", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_symlink, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int namelen, + const char *target + ), + TP_ARGS(rqstp, fhp, name, namelen, target), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, namelen) + __string(target, target) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + __assign_str(target); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s target=%s", + __entry->xid, __entry->fh_hash, + __get_str(name), __get_str(target) + ) +); + +TRACE_EVENT(nfsd_vfs_link, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *sfhp, + const struct svc_fh *tfhp, + const char *name, + unsigned int namelen + ), + TP_ARGS(rqstp, sfhp, tfhp, name, namelen), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, sfh_hash) + __field(u32, tfh_hash) + __string_len(name, name, namelen) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle); + __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x src_fh=0x%08x tgt_fh=0x%08x name=%s", + __entry->xid, __entry->sfh_hash, __entry->tfh_hash, + __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_unlink, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s", + __entry->xid, __entry->fh_hash, + __get_str(name) + ) +); + +TRACE_EVENT(nfsd_vfs_rename, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *sfhp, + const struct svc_fh *tfhp, + const char *source, + unsigned int sourcelen, + const char *target, + unsigned int targetlen + ), + TP_ARGS(rqstp, sfhp, tfhp, source, sourcelen, target, targetlen), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, sfh_hash) + __field(u32, tfh_hash) + __string_len(source, source, sourcelen) + __string_len(target, target, targetlen) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle); + __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle); + __assign_str(source); + __assign_str(target); + ), + TP_printk("xid=0x%08x sfh_hash=0x%08x tfh_hash=0x%08x source=%s target=%s", + __entry->xid, __entry->sfh_hash, __entry->tfh_hash, + __get_str(source), __get_str(target) + ) +); + +TRACE_EVENT(nfsd_vfs_readdir, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + u32 count, + u64 offset + ), + TP_ARGS(rqstp, fhp, count, offset), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(u32, count) + __field(u64, offset) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->count = count; + __entry->offset = offset; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu count=%u", + __entry->xid, __entry->fh_hash, + __entry->offset, __entry->count + ) +); + +DECLARE_EVENT_CLASS(nfsd_vfs_getattr_class, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp + ), + TP_ARGS(rqstp, fhp), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x", + __entry->xid, __entry->fh_hash + ) +); + +#define DEFINE_NFSD_VFS_GETATTR_EVENT(__name) \ +DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \ + TP_PROTO( \ + const struct svc_rqst *rqstp, \ + const struct svc_fh *fhp \ + ), \ + TP_ARGS(rqstp, fhp)) + +DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr); +DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6a9464262fae..cd689df2ca5d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -31,11 +31,11 @@ #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/security.h> +#include <linux/sunrpc/xdr.h> #include "xdr3.h" #ifdef CONFIG_NFSD_V4 -#include "../internal.h" #include "acl.h" #include "idmap.h" #include "xdr4.h" @@ -48,6 +48,8 @@ #define NFSDDBG_FACILITY NFSDDBG_FILEOP +bool nfsd_disable_splice_read __read_mostly; + /** * nfserrno - Map Linux errnos to NFS errnos * @errno: POSIX(-ish) error code to be mapped @@ -72,7 +74,6 @@ nfserrno (int errno) { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, - { nfserr_mlink, -EMLINK }, { nfserr_nodev, -ENODEV }, { nfserr_notdir, -ENOTDIR }, { nfserr_isdir, -EISDIR }, @@ -100,6 +101,7 @@ nfserrno (int errno) { nfserr_io, -EUCLEAN }, { nfserr_perm, -ENOKEY }, { nfserr_no_grace, -ENOGRACE}, + { nfserr_io, -EBADMSG }, }; int i; @@ -245,7 +247,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry; int host_err; - dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); + trace_nfsd_vfs_lookup(rqstp, fhp, name, len); dparent = fhp->fh_dentry; exp = exp_get(fhp->fh_export); @@ -265,7 +267,8 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - dentry = lookup_one_len_unlocked(name, dparent, len); + dentry = lookup_one_unlocked(&nop_mnt_idmap, + &QSTR_LEN(name, len), dparent); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; @@ -320,7 +323,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; - err = check_nfsd_access(exp, rqstp); + err = check_nfsd_access(exp, rqstp, false); if (err) goto out; /* @@ -421,8 +424,9 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, if (iap->ia_size < inode->i_size) { __be32 err; - err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, - NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + err = nfsd_permission(&rqstp->rq_cred, + fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); if (err) return err; } @@ -499,6 +503,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, bool size_change = (iap->ia_valid & ATTR_SIZE); int retries; + trace_nfsd_vfs_setattr(rqstp, fhp, iap, guardtime); + if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; ftype = S_IFREG; @@ -814,7 +820,8 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor sresult |= map->access; - err2 = nfsd_permission(rqstp, export, dentry, map->how); + err2 = nfsd_permission(&rqstp->rq_cred, export, + dentry, map->how); switch (err2) { case nfs_ok: result |= map->access; @@ -858,8 +865,7 @@ int nfsd_open_break_lease(struct inode *inode, int access) * N.B. After this call fhp needs an fh_put */ static int -__nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, - int may_flags, struct file **filp) +__nfsd_open(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { struct path path; struct inode *inode; @@ -900,11 +906,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, goto out; } - if (may_flags & NFSD_MAY_64BIT_COOKIE) - file->f_mode |= FMODE_64BITHASH; - else - file->f_mode |= FMODE_32BITHASH; - *filp = file; out: return host_err; @@ -927,14 +928,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * directories, but we never have and it doesn't seem to have * caused anyone a problem. If we were to change this, note * also that our filldir callbacks would need a variant of - * lookup_one_len that doesn't check permissions. + * lookup_one_positive_unlocked() that doesn't check permissions. */ if (type == S_IFREG) may_flags |= NFSD_MAY_OWNER_OVERRIDE; retry: err = fh_verify(rqstp, fhp, type, may_flags); if (!err) { - host_err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + host_err = __nfsd_open(fhp, type, may_flags, filp); if (host_err == -EOPENSTALE && !retried) { retried = true; fh_put(fhp); @@ -947,7 +948,6 @@ retry: /** * nfsd_open_verified - Open a regular file for the filecache - * @rqstp: RPC request * @fhp: NFS filehandle of the file to open * @may_flags: internal permission flags * @filp: OUT: open "struct file *" @@ -955,10 +955,9 @@ retry: * Returns zero on success, or a negative errno value. */ int -nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, - struct file **filp) +nfsd_open_verified(struct svc_fh *fhp, int may_flags, struct file **filp) { - return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); + return __nfsd_open(fhp, S_IFREG, may_flags, filp); } /* @@ -1088,23 +1087,23 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long v, total; struct iov_iter iter; loff_t ppos = offset; - struct page *page; ssize_t host_err; + size_t len; v = 0; total = *count; while (total) { - page = *(rqstp->rq_next_page++); - rqstp->rq_vec[v].iov_base = page_address(page) + base; - rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base); - total -= rqstp->rq_vec[v].iov_len; + len = min_t(size_t, total, PAGE_SIZE - base); + bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++), + len, base); + total -= len; ++v; base = 0; } - WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec)); + WARN_ON_ONCE(v > rqstp->rq_maxpages); trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count); + iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); host_err = vfs_iter_read(file, &iter, &ppos, 0); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } @@ -1146,11 +1145,27 @@ static int wait_for_concurrent_writes(struct file *file) return err; } +/** + * nfsd_vfs_write - write data to an already-open file + * @rqstp: RPC execution context + * @fhp: File handle of file to write into + * @nf: An open file matching @fhp + * @offset: Byte offset of start + * @payload: xdr_buf containing the write payload + * @cnt: IN: number of bytes to write, OUT: number of bytes actually written + * @stable: An NFS stable_how value + * @verf: NFS WRITE verifier + * + * Upon return, caller must invoke fh_put on @fhp. + * + * Return values: + * An nfsstat value in network byte order. + */ __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable, - __be32 *verf) +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file *nf, loff_t offset, + const struct xdr_buf *payload, unsigned long *cnt, + int stable, __be32 *verf) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; @@ -1160,12 +1175,12 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, errseq_t since; __be32 nfserr; int host_err; - int use_wgather; loff_t pos = offset; unsigned long exp_op_flags = 0; unsigned int pflags = current->flags; rwf_t flags = 0; bool restore_flags = false; + unsigned int nvecs; trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); @@ -1186,15 +1201,15 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, } exp = fhp->fh_export; - use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!EX_ISSYNC(exp)) stable = NFS_UNSTABLE; - if (stable && !use_wgather) + if (stable && !fhp->fh_use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt); + nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload); + iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt); since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); @@ -1210,7 +1225,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (host_err < 0) goto out_nfserr; - if (stable && use_wgather) { + if (stable && fhp->fh_use_wgather) { host_err = wait_for_concurrent_writes(file); if (host_err < 0) commit_reset_write_verifier(nn, rqstp, host_err); @@ -1245,6 +1260,8 @@ out_nfserr: */ bool nfsd_read_splice_ok(struct svc_rqst *rqstp) { + if (nfsd_disable_splice_read) + return false; switch (svc_auth_flavor(rqstp)) { case RPC_AUTH_GSS_KRB5I: case RPC_AUTH_GSS_KRB5P: @@ -1292,14 +1309,24 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } -/* - * Write data to a file. - * The stable flag requests synchronous writes. - * N.B. After this call fhp needs an fh_put +/** + * nfsd_write - open a file and write data to it + * @rqstp: RPC execution context + * @fhp: File handle of file to write into; nfsd_write() may modify it + * @offset: Byte offset of start + * @payload: xdr_buf containing the write payload + * @cnt: IN: number of bytes to write, OUT: number of bytes actually written + * @stable: An NFS stable_how value + * @verf: NFS WRITE verifier + * + * Upon return, caller must invoke fh_put on @fhp. + * + * Return values: + * An nfsstat value in network byte order. */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable, + const struct xdr_buf *payload, unsigned long *cnt, int stable, __be32 *verf) { struct nfsd_file *nf; @@ -1311,8 +1338,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, - vlen, cnt, stable, verf); + err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt, + stable, verf); nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); @@ -1348,6 +1375,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t start, end; struct nfsd_net *nn; + trace_nfsd_commit_start(rqstp, fhp, offset, count); + /* * Convert the client-provided (offset, count) range to a * (start, end) range. If the client-provided range falls @@ -1386,6 +1415,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, } else nfsd_copy_write_verifier(verf, nn); + trace_nfsd_commit_done(rqstp, fhp, offset, count); return err; } @@ -1422,7 +1452,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, * Callers expect new file metadata to be committed even * if the attributes have not changed. */ - if (iap->ia_valid) + if (nfsd_attrs_valid(attrs)) status = nfsd_setattr(rqstp, resfhp, attrs, NULL); else status = nfserrno(commit_metadata(resfhp)); @@ -1469,13 +1499,14 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, struct inode *dirp; struct iattr *iap = attrs->na_iattr; __be32 err; - int host_err; + int host_err = 0; dentry = fhp->fh_dentry; dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); - err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); + err = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, dentry, + NFSD_MAY_CREATE); if (err) goto out; @@ -1495,28 +1526,15 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); - if (!host_err && unlikely(d_unhashed(dchild))) { - struct dentry *d; - d = lookup_one_len(dchild->d_name.name, - dchild->d_parent, - dchild->d_name.len); - if (IS_ERR(d)) { - host_err = PTR_ERR(d); - break; - } - if (unlikely(d_is_negative(d))) { - dput(d); - err = nfserr_serverfault; - goto out; - } + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); + if (IS_ERR(dchild)) { + host_err = PTR_ERR(dchild); + } else if (d_is_negative(dchild)) { + err = nfserr_serverfault; + goto out; + } else if (unlikely(dchild != resfhp->fh_dentry)) { dput(resfhp->fh_dentry); - resfhp->fh_dentry = dget(d); - err = fh_update(resfhp); - dput(dchild); - dchild = d; - if (err) - goto out; + resfhp->fh_dentry = dget(dchild); } break; case S_IFCHR: @@ -1537,7 +1555,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); out: - dput(dchild); + if (!IS_ERR(dchild)) + dput(dchild); return err; out_nfserr: @@ -1560,6 +1579,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; int host_err; + trace_nfsd_vfs_create(rqstp, fhp, type, fname, flen); + if (isdotent(fname, flen)) return nfserr_exist; @@ -1574,7 +1595,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, return nfserrno(host_err); inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); - dchild = lookup_one_len(fname, dentry, flen); + dchild = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); host_err = PTR_ERR(dchild); if (IS_ERR(dchild)) { err = nfserrno(host_err); @@ -1660,6 +1681,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err, cerr; int host_err; + trace_nfsd_vfs_symlink(rqstp, fhp, fname, flen, path); + err = nfserr_noent; if (!flen || path[0] == '\0') goto out; @@ -1679,7 +1702,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, dentry = fhp->fh_dentry; inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); - dnew = lookup_one_len(fname, dentry, flen); + dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); if (IS_ERR(dnew)) { err = nfserrno(PTR_ERR(dnew)); inode_unlock(dentry->d_inode); @@ -1706,9 +1729,17 @@ out: return err; } -/* - * Create a hardlink - * N.B. After this call _both_ ffhp and tfhp need an fh_put +/** + * nfsd_link - create a link + * @rqstp: RPC transaction context + * @ffhp: the file handle of the directory where the new link is to be created + * @name: the filename of the new link + * @len: the length of @name in octets + * @tfhp: the file handle of an existing file object + * + * After this call _both_ ffhp and tfhp need an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, @@ -1716,9 +1747,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, { struct dentry *ddir, *dnew, *dold; struct inode *dirp; + int type; __be32 err; int host_err; + trace_nfsd_vfs_link(rqstp, ffhp, tfhp, name, len); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; @@ -1735,19 +1769,19 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (isdotent(name, len)) goto out; + err = nfs_ok; + type = d_inode(tfhp->fh_dentry)->i_mode & S_IFMT; host_err = fh_want_write(tfhp); - if (host_err) { - err = nfserrno(host_err); + if (host_err) goto out; - } ddir = ffhp->fh_dentry; dirp = d_inode(ddir); inode_lock_nested(dirp, I_MUTEX_PARENT); - dnew = lookup_one_len(name, ddir, len); + dnew = lookup_one(&nop_mnt_idmap, &QSTR_LEN(name, len), ddir); if (IS_ERR(dnew)) { - err = nfserrno(PTR_ERR(dnew)); + host_err = PTR_ERR(dnew); goto out_unlock; } @@ -1763,20 +1797,26 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, fh_fill_post_attrs(ffhp); inode_unlock(dirp); if (!host_err) { - err = nfserrno(commit_metadata(ffhp)); - if (!err) - err = nfserrno(commit_metadata(tfhp)); - } else { - if (host_err == -EXDEV && rqstp->rq_vers == 2) - err = nfserr_acces; - else - err = nfserrno(host_err); + host_err = commit_metadata(ffhp); + if (!host_err) + host_err = commit_metadata(tfhp); } + dput(dnew); out_drop_write: fh_drop_write(tfhp); + if (host_err == -EBUSY) { + /* + * See RFC 8881 Section 18.9.4 para 1-2: NFSv4 LINK + * wants a status unique to the object type. + */ + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; + } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_dput: dput(dnew); @@ -1805,9 +1845,19 @@ nfsd_has_cached_files(struct dentry *dentry) return ret; } -/* - * Rename a file - * N.B. After this call _both_ ffhp and tfhp need an fh_put +/** + * nfsd_rename - rename a directory entry + * @rqstp: RPC transaction context + * @ffhp: the file handle of parent directory containing the entry to be renamed + * @fname: the filename of directory entry to be renamed + * @flen: the length of @fname in octets + * @tfhp: the file handle of parent directory to contain the renamed entry + * @tname: the filename of the new entry + * @tlen: the length of @tlen in octets + * + * After this call _both_ ffhp and tfhp need an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, @@ -1815,10 +1865,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, { struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; struct inode *fdir, *tdir; + int type = S_IFDIR; __be32 err; int host_err; bool close_cached = false; + trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) goto out; @@ -1836,7 +1889,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) goto out; - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; + err = nfserr_xdev; if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) goto out; if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) @@ -1851,8 +1904,8 @@ retry: trap = lock_rename(tdentry, fdentry); if (IS_ERR(trap)) { - err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; - goto out; + err = nfserr_xdev; + goto out_want_write; } err = fh_fill_pre_attrs(ffhp); if (err != nfs_ok) @@ -1861,7 +1914,7 @@ retry: if (err != nfs_ok) goto out_unlock; - odentry = lookup_one_len(fname, fdentry, flen); + odentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), fdentry); host_err = PTR_ERR(odentry); if (IS_ERR(odentry)) goto out_nfserr; @@ -1872,11 +1925,14 @@ retry: host_err = -EINVAL; if (odentry == trap) goto out_dput_old; + type = d_inode(odentry)->i_mode & S_IFMT; - ndentry = lookup_one_len(tname, tdentry, tlen); + ndentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(tname, tlen), tdentry); host_err = PTR_ERR(ndentry); if (IS_ERR(ndentry)) goto out_dput_old; + if (d_inode(ndentry)) + type = d_inode(ndentry)->i_mode & S_IFMT; host_err = -ENOTEMPTY; if (ndentry == trap) goto out_dput_new; @@ -1914,7 +1970,18 @@ retry: out_dput_old: dput(odentry); out_nfserr: - err = nfserrno(host_err); + if (host_err == -EBUSY) { + /* + * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME + * wants a status unique to the object type. + */ + if (type != S_IFDIR) + err = nfserr_file_open; + else + err = nfserr_acces; + } else { + err = nfserrno(host_err); + } if (!close_cached) { fh_fill_post_attrs(ffhp); @@ -1922,6 +1989,7 @@ retry: } out_unlock: unlock_rename(tdentry, fdentry); +out_want_write: fh_drop_write(ffhp); /* @@ -1940,9 +2008,17 @@ out: return err; } -/* - * Unlink a file or directory - * N.B. After this call fhp needs an fh_put +/** + * nfsd_unlink - remove a directory entry + * @rqstp: RPC transaction context + * @fhp: the file handle of the parent directory to be modified + * @type: enforced file type of the object to be removed + * @fname: the name of directory entry to be removed + * @flen: length of @fname in octets + * + * After this call fhp needs an fh_put. + * + * Returns a generic NFS status code in network byte-order. */ __be32 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, @@ -1954,6 +2030,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, __be32 err; int host_err; + trace_nfsd_vfs_unlink(rqstp, fhp, fname, flen); + err = nfserr_acces; if (!flen || isdotent(fname, flen)) goto out; @@ -1969,7 +2047,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dirp = d_inode(dentry); inode_lock_nested(dirp, I_MUTEX_PARENT); - rdentry = lookup_one_len(fname, dentry, flen); + rdentry = lookup_one(&nop_mnt_idmap, &QSTR_LEN(fname, flen), dentry); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) goto out_unlock; @@ -2016,18 +2094,17 @@ out_drop_write: fh_drop_write(fhp); out_nfserr: if (host_err == -EBUSY) { - /* name is mounted-on. There is no perfect - * error status. + /* + * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE + * wants a status unique to the object type. */ - if (nfsd_v4client(rqstp)) + if (type != S_IFDIR) err = nfserr_file_open; else err = nfserr_acces; - } else { - err = nfserrno(host_err); } out: - return err; + return err != nfs_ok ? err : nfserrno(host_err); out_unlock: inode_unlock(dirp); goto out_drop_write; @@ -2177,14 +2254,15 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, loff_t offset = *offsetp; int may_flags = NFSD_MAY_READ; - /* NFSv2 only supports 32 bit cookies */ - if (rqstp->rq_vers > 2) - may_flags |= NFSD_MAY_64BIT_COOKIE; - err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; + if (fhp->fh_64bit_cookies) + file->f_mode |= FMODE_64BITHASH; + else + file->f_mode |= FMODE_32BITHASH; + offset = vfs_llseek(file, offset, SEEK_SET); if (offset < 0) { err = nfserrno((int)offset); @@ -2242,6 +2320,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in { __be32 err; + trace_nfsd_vfs_statfs(rqstp, fhp); + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); if (!err) { struct path path = { @@ -2254,9 +2334,9 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in return err; } -static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp) +static int exp_rdonly(struct svc_cred *cred, struct svc_export *exp) { - return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; + return nfsexp_flags(cred, exp) & NFSEXP_READONLY; } #ifdef CONFIG_NFSD_V4 @@ -2500,8 +2580,8 @@ out_unlock: * Check for a user's access permissions to this inode. */ __be32 -nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, - struct dentry *dentry, int acc) +nfsd_permission(struct svc_cred *cred, struct svc_export *exp, + struct dentry *dentry, int acc) { struct inode *inode = d_inode(dentry); int err; @@ -2516,7 +2596,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, (acc & NFSD_MAY_EXEC)? " exec" : "", (acc & NFSD_MAY_SATTR)? " sattr" : "", (acc & NFSD_MAY_TRUNC)? " trunc" : "", - (acc & NFSD_MAY_LOCK)? " lock" : "", + (acc & NFSD_MAY_NLM)? " nlm" : "", (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "", inode->i_mode, IS_IMMUTABLE(inode)? " immut" : "", @@ -2532,7 +2612,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, */ if (!(acc & NFSD_MAY_LOCAL_ACCESS)) if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) { - if (exp_rdonly(rqstp, exp) || + if (exp_rdonly(cred, exp) || __mnt_is_readonly(exp->ex_path.mnt)) return nfserr_rofs; if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode)) @@ -2541,16 +2621,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode)) return nfserr_perm; - if (acc & NFSD_MAY_LOCK) { - /* If we cannot rely on authentication in NLM requests, - * just allow locks, otherwise require read permission, or - * ownership - */ - if (exp->ex_flags & NFSEXP_NOAUTHNLM) - return 0; - else - acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE; - } /* * The file owner always gets access permission for accesses that * would normally be checked at open time. This is to make diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index c60fdb6200fd..eff04959606f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -20,7 +20,7 @@ #define NFSD_MAY_READ 0x004 /* == MAY_READ */ #define NFSD_MAY_SATTR 0x008 #define NFSD_MAY_TRUNC 0x010 -#define NFSD_MAY_LOCK 0x020 +#define NFSD_MAY_NLM 0x020 /* request is from lockd */ #define NFSD_MAY_MASK 0x03f /* extra hints to permission and open routines: */ @@ -33,6 +33,8 @@ #define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ +#define NFSD_MAY_LOCALIO 0x2000 /* for tracing, reflects when localio used */ + #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -60,6 +62,14 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) posix_acl_release(attrs->na_dpacl); } +static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs) +{ + struct iattr *iap = attrs->na_iattr; + + return (iap->ia_valid || (attrs->na_seclabel && + attrs->na_seclabel->len)); +} + __be32 nfserrno (int errno); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); @@ -104,8 +114,8 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, - int may_flags, struct file **filp); +int nfsd_open_verified(struct svc_fh *fhp, int may_flags, + struct file **filp); __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, unsigned long *count, @@ -118,13 +128,13 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp); __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long *count, u32 *eof); -__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, - struct kvec *, int, unsigned long *, - int stable, __be32 *verf); +__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, const struct xdr_buf *payload, + unsigned long *cnt, int stable, __be32 *verf); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, - int stable, __be32 *verf); + const struct xdr_buf *payload, + unsigned long *cnt, int stable, __be32 *verf); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, @@ -145,8 +155,8 @@ __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -__be32 nfsd_permission(struct svc_rqst *, struct svc_export *, - struct dentry *, int); +__be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp, + struct dentry *dentry, int acc); void nfsd_filp_close(struct file *fp); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 415516c1b27e..aa2a356da784 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -518,6 +518,24 @@ struct nfsd4_free_stateid { stateid_t fr_stateid; /* request */ }; +struct nfsd4_get_dir_delegation { + /* request */ + u32 gdda_signal_deleg_avail; + u32 gdda_notification_types[1]; + struct timespec64 gdda_child_attr_delay; + struct timespec64 gdda_dir_attr_delay; + u32 gdda_child_attributes[3]; + u32 gdda_dir_attributes[3]; + /* response */ + u32 gddrnf_status; + nfs4_verifier gddr_cookieverf; + stateid_t gddr_stateid; + u32 gddr_notification[1]; + u32 gddr_child_attributes[3]; + u32 gddr_dir_attributes[3]; + bool gddrnf_will_signal_deleg_avail; +}; + /* also used for NVERIFY */ struct nfsd4_verify { u32 ve_bmval[3]; /* request */ @@ -549,6 +567,7 @@ struct nfsd4_exchange_id { struct xdr_netobj nii_domain; struct xdr_netobj nii_name; struct timespec64 nii_time; + char *server_impl_name; }; struct nfsd4_sequence { @@ -557,9 +576,7 @@ struct nfsd4_sequence { u32 slotid; /* request/response */ u32 maxslots; /* request/response */ u32 cachethis; /* request */ -#if 0 u32 target_maxslots; /* response */ -#endif /* not yet */ u32 status_flags; /* response */ }; @@ -657,7 +674,12 @@ struct nfsd4_cb_offload { struct nfsd4_callback co_cb; struct nfsd42_write_res co_res; __be32 co_nfserr; + unsigned int co_retries; struct knfsd_fh co_fh; + + struct nfs4_sessionid co_referring_sessionid; + u32 co_referring_slotid; + u32 co_referring_seqno; }; struct nfsd4_copy { @@ -674,11 +696,17 @@ struct nfsd4_copy { #define NFSD4_COPY_F_INTRA (1) #define NFSD4_COPY_F_SYNCHRONOUS (2) #define NFSD4_COPY_F_COMMITTED (3) +#define NFSD4_COPY_F_COMPLETED (4) +#define NFSD4_COPY_F_OFFLOAD_DONE (5) /* response */ + __be32 nfserr; struct nfsd42_write_res cp_res; struct knfsd_fh fh; + /* offload callback */ + struct nfsd4_cb_offload cp_cb_offload; + struct nfs4_client *cp_clp; struct nfsd_file *nf_src; @@ -689,10 +717,12 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; + unsigned int cp_ttl; struct nfsd4_ssc_umount_item *ss_nsui; struct nfs_fh c_fh; nfs4_stateid stateid; + struct nfsd_net *cp_nn; }; static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync) @@ -735,7 +765,8 @@ struct nfsd4_offload_status { /* response */ u64 count; - u32 status; + __be32 status; + bool completed; }; struct nfsd4_copy_notify { @@ -797,6 +828,7 @@ struct nfsd4_op { struct nfsd4_reclaim_complete reclaim_complete; struct nfsd4_test_stateid test_stateid; struct nfsd4_free_stateid free_stateid; + struct nfsd4_get_dir_delegation get_dir_delegation; struct nfsd4_getdeviceinfo getdeviceinfo; struct nfsd4_layoutget layoutget; struct nfsd4_layoutcommit layoutcommit; @@ -907,6 +939,7 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); +void nfsd4_exchange_id_release(union nfsd4_op_u *u); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *u); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index e8b00309c449..f4e29c0c701c 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -6,8 +6,11 @@ #define cb_compound_enc_hdr_sz 4 #define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) #define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2) +#define enc_referring_call4_sz (1 + 1) +#define enc_referring_call_list4_sz (sessionid_sz + 1 + \ + enc_referring_call4_sz) #define cb_sequence_enc_sz (sessionid_sz + 4 + \ - 1 /* no referring calls list yet */) + enc_referring_call_list4_sz) #define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4) #define op_enc_sz 1 @@ -59,16 +62,20 @@ * 1: CB_GETATTR opcode (32-bit) * N: file_handle * 1: number of entry in attribute array (32-bit) - * 1: entry 0 in attribute array (32-bit) + * 3: entry 0-2 in attribute array (32-bit * 3) */ #define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \ cb_sequence_enc_sz + \ - 1 + enc_nfs4_fh_sz + 1 + 1) + 1 + enc_nfs4_fh_sz + 1 + 3) /* * 4: fattr_bitmap_maxsz * 1: attribute array len * 2: change attr (64-bit) * 2: size (64-bit) + * 2: atime.seconds (64-bit) + * 1: atime.nanoseconds (32-bit) + * 2: mtime.seconds (64-bit) + * 1: mtime.nanoseconds (32-bit) */ #define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \ - cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz) + cb_sequence_dec_sz + 4 + 1 + 2 + 2 + 2 + 1 + 2 + 1 + op_dec_sz) |