summaryrefslogtreecommitdiff
path: root/fs/nfsd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig87
-rw-r--r--fs/nfsd/Makefile27
-rw-r--r--fs/nfsd/acl.h14
-rw-r--r--fs/nfsd/auth.c21
-rw-r--r--fs/nfsd/auth.h2
-rw-r--r--fs/nfsd/blocklayout.c381
-rw-r--r--fs/nfsd/blocklayoutxdr.c221
-rw-r--r--fs/nfsd/blocklayoutxdr.h28
-rw-r--r--fs/nfsd/cache.h17
-rw-r--r--fs/nfsd/debugfs.c143
-rw-r--r--fs/nfsd/export.c406
-rw-r--r--fs/nfsd/export.h36
-rw-r--r--fs/nfsd/fault_inject.c150
-rw-r--r--fs/nfsd/filecache.c1430
-rw-r--r--fs/nfsd/filecache.h88
-rw-r--r--fs/nfsd/flexfilelayout.c17
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c18
-rw-r--r--fs/nfsd/flexfilelayoutxdr.h4
-rw-r--r--fs/nfsd/localio.c217
-rw-r--r--fs/nfsd/lockd.c36
-rw-r--r--fs/nfsd/netlink.c114
-rw-r--r--fs/nfsd/netlink.h32
-rw-r--r--fs/nfsd/netns.h149
-rw-r--r--fs/nfsd/nfs2acl.c347
-rw-r--r--fs/nfsd/nfs3acl.c241
-rw-r--r--fs/nfsd/nfs3proc.c676
-rw-r--r--fs/nfsd/nfs3xdr.c1752
-rw-r--r--fs/nfsd/nfs4acl.c85
-rw-r--r--fs/nfsd/nfs4callback.c952
-rw-r--r--fs/nfsd/nfs4idmap.c56
-rw-r--r--fs/nfsd/nfs4layouts.c138
-rw-r--r--fs/nfsd/nfs4proc.c2091
-rw-r--r--fs/nfsd/nfs4recover.c1028
-rw-r--r--fs/nfsd/nfs4state.c5638
-rw-r--r--fs/nfsd/nfs4xdr.c7053
-rw-r--r--fs/nfsd/nfs4xdr_gen.c256
-rw-r--r--fs/nfsd/nfs4xdr_gen.h25
-rw-r--r--fs/nfsd/nfscache.c574
-rw-r--r--fs/nfsd/nfsctl.c1449
-rw-r--r--fs/nfsd/nfsd.h242
-rw-r--r--fs/nfsd/nfsfh.c617
-rw-r--r--fs/nfsd/nfsfh.h242
-rw-r--r--fs/nfsd/nfsproc.c589
-rw-r--r--fs/nfsd/nfssvc.c999
-rw-r--r--fs/nfsd/nfsxdr.c842
-rw-r--r--fs/nfsd/pnfs.h19
-rw-r--r--fs/nfsd/state.h377
-rw-r--r--fs/nfsd/stats.c96
-rw-r--r--fs/nfsd/stats.h84
-rw-r--r--fs/nfsd/trace.c1
-rw-r--r--fs/nfsd/trace.h2520
-rw-r--r--fs/nfsd/vfs.c2385
-rw-r--r--fs/nfsd/vfs.h168
-rw-r--r--fs/nfsd/xdr.h88
-rw-r--r--fs/nfsd/xdr3.h121
-rw-r--r--fs/nfsd/xdr4.h476
-rw-r--r--fs/nfsd/xdr4cb.h33
57 files changed, 24912 insertions, 10986 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 20b1c17320d5..0b5c1a0bf1cf 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -1,11 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
config NFSD
tristate "NFS server support"
depends on INET
depends on FILE_LOCKING
+ depends on FSNOTIFY
+ select CRC32
+ select CRYPTO_LIB_MD5 if NFSD_LEGACY_CLIENT_TRACKING
+ select CRYPTO_LIB_SHA256 if NFSD_V4
select LOCKD
select SUNRPC
select EXPORTFS
+ select NFS_COMMON
select NFS_ACL_SUPPORT if NFSD_V2_ACL
+ select NFS_ACL_SUPPORT if NFSD_V3_ACL
depends on MULTIUSER
help
Choose Y here if you want to allow other computers to access
@@ -24,28 +31,29 @@ config NFSD
Below you can choose which versions of the NFS protocol are
available to clients mounting the NFS server on this system.
- Support for NFS version 2 (RFC 1094) is always available when
+ Support for NFS version 3 (RFC 1813) is always available when
CONFIG_NFSD is selected.
If unsure, say N.
-config NFSD_V2_ACL
- bool
- depends on NFSD
-
-config NFSD_V3
- bool "NFS server support for NFS version 3"
+config NFSD_V2
+ bool "NFS server support for NFS version 2 (DEPRECATED)"
depends on NFSD
+ default n
help
- This option enables support in your system's NFS server for
- version 3 of the NFS protocol (RFC 1813).
+ NFSv2 (RFC 1094) was the first publicly-released version of NFS.
+ Unless you are hosting ancient (1990's era) NFS clients, you don't
+ need this.
- If unsure, say Y.
+ If unsure, say N.
+
+config NFSD_V2_ACL
+ bool "NFS server support for the NFSv2 ACL protocol extension"
+ depends on NFSD_V2
config NFSD_V3_ACL
bool "NFS server support for the NFSv3 ACL protocol extension"
- depends on NFSD_V3
- select NFSD_V2_ACL
+ depends on NFSD
help
Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
never became an official part of the NFS version 3 protocol.
@@ -68,11 +76,11 @@ config NFSD_V3_ACL
config NFSD_V4
bool "NFS server support for NFS version 4"
depends on NFSD && PROC_FS
- select NFSD_V3
select FS_POSIX_ACL
- select SUNRPC_GSS
- select CRYPTO
+ select RPCSEC_GSS_KRB5
+ select CRYPTO # required by RPCSEC_GSS_KRB5
select GRACE_PERIOD
+ select NFS_V4_2_SSC_HELPER if NFS_V4_2
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
@@ -94,7 +102,7 @@ config NFSD_BLOCKLAYOUT
help
This option enables support for the exporting pNFS block layouts
in the kernel's NFS server. The pNFS block layout enables NFS
- clients to directly perform I/O to block devices accesible to both
+ clients to directly perform I/O to block devices accessible to both
the server and the clients. See RFC 5663 for more details.
If unsure, say N.
@@ -104,11 +112,10 @@ config NFSD_SCSILAYOUT
depends on NFSD_V4 && BLOCK
select NFSD_PNFS
select EXPORTFS_BLOCK_OPS
- select BLK_SCSI_REQUEST
help
This option enables support for the exporting pNFS SCSI layouts
in the kernel's NFS server. The pNFS SCSI layout enables NFS
- clients to directly perform I/O to SCSI devices accesible to both
+ clients to directly perform I/O to SCSI devices accessible to both
the server and the clients. See draft-ietf-nfsv4-scsi-layout for
more details.
@@ -122,7 +129,7 @@ config NFSD_FLEXFILELAYOUT
This option enables support for the exporting pNFS Flex File
layouts in the kernel's NFS server. The pNFS Flex File layout
enables NFS clients to directly perform I/O to NFSv3 devices
- accesible to both the server and the clients. See
+ accessible to both the server and the clients. See
draft-ietf-nfsv4-flex-files for more details.
Warning, this server implements the bare minimum functionality
@@ -131,6 +138,16 @@ config NFSD_FLEXFILELAYOUT
If unsure, say N.
+config NFSD_V4_2_INTER_SSC
+ bool "NFSv4.2 inter server to server COPY"
+ depends on NFSD_V4 && NFS_V4_2
+ help
+ This option enables support for NFSv4.2 inter server to
+ server copy where the destination server calls the NFSv4.2
+ client to read the data to copy from the source server.
+
+ If unsure, say N.
+
config NFSD_V4_SECURITY_LABEL
bool "Provide Security Label support for NFSv4 server"
depends on NFSD_V4 && SECURITY
@@ -144,12 +161,28 @@ config NFSD_V4_SECURITY_LABEL
If you do not wish to enable fine-grained security labels SELinux or
Smack policies on NFSv4 files, say N.
-config NFSD_FAULT_INJECTION
- bool "NFS server manual fault injection"
- depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
+config NFSD_LEGACY_CLIENT_TRACKING
+ bool "Support legacy NFSv4 client tracking methods (DEPRECATED)"
+ depends on NFSD_V4
+ default n
help
- This option enables support for manually injecting faults
- into the NFS server. This is intended to be used for
- testing error recovery on the NFS client.
-
- If unsure, say N.
+ The NFSv4 server needs to store a small amount of information on
+ stable storage in order to handle state recovery after reboot. Most
+ modern deployments upcall to a userland daemon for this (nfsdcld),
+ but older NFS servers may store information directly in a
+ recoverydir, or spawn a process directly using a usermodehelper
+ upcall.
+
+ These legacy client tracking methods have proven to be problematic
+ and will be removed in the future. Say Y here if you need support
+ for them in the interim.
+
+config NFSD_V4_DELEG_TIMESTAMPS
+ bool "Support delegated timestamps"
+ depends on NFSD_V4
+ default n
+ help
+ NFSD implements delegated timestamps according to
+ draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This
+ is currently an experimental feature and is therefore left disabled
+ by default.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 2bfb58eefad1..55744bb786c9 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,15 +10,32 @@ obj-$(CONFIG_NFSD) += nfsd.o
# this one should be compiled first, as the tracing macros can easily blow up
nfsd-y += trace.o
-nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
- export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
-nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
+nfsd-y += nfssvc.o nfsctl.o nfsfh.o vfs.o \
+ export.o auth.o lockd.o nfscache.o \
+ stats.o filecache.o nfs3proc.o nfs3xdr.o \
+ netlink.o
+nfsd-$(CONFIG_NFSD_V2) += nfsproc.o nfsxdr.o
nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
-nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
- nfs4acl.o nfs4callback.o nfs4recover.o
+ nfs4acl.o nfs4callback.o nfs4recover.o nfs4xdr_gen.o
nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o
+nfsd-$(CONFIG_NFS_LOCALIO) += localio.o
+nfsd-$(CONFIG_DEBUG_FS) += debugfs.o
+
+
+.PHONY: xdrgen
+
+xdrgen: ../../include/linux/sunrpc/xdrgen/nfs4_1.h nfs4xdr_gen.h nfs4xdr_gen.c
+
+../../include/linux/sunrpc/xdrgen/nfs4_1.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen definitions $< > $@
+
+nfs4xdr_gen.h: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen declarations $< > $@
+
+nfs4xdr_gen.c: ../../Documentation/sunrpc/xdr/nfs4_1.x
+ ../../tools/net/sunrpc/xdrgen/xdrgen source $< > $@
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 4cd7c69a6cb9..4b7324458a94 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -38,14 +38,8 @@
struct nfs4_acl;
struct svc_fh;
struct svc_rqst;
-
-/*
- * Maximum ACL we'll accept from a client; chosen (somewhat
- * arbitrarily) so that kmalloc'ing the ACL shouldn't require a
- * high-order allocation. This allows 204 ACEs on x86_64:
- */
-#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
- / sizeof(struct nfs4_ace))
+struct nfsd_attrs;
+enum nfs_ftype4;
int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
@@ -53,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
struct nfs4_acl **acl);
-__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfs4_acl *acl);
+__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
+ struct nfsd_attrs *attr);
#endif /* LINUX_NFS4_ACL_H */
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index fdf2aad73470..4dc327e02456 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -5,39 +5,37 @@
#include "nfsd.h"
#include "auth.h"
-int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
+int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp)
{
struct exp_flavor_info *f;
struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
for (f = exp->ex_flavors; f < end; f++) {
- if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
+ if (f->pseudoflavor == cred->cr_flavor)
return f->flags;
}
return exp->ex_flags;
}
-int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
+int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp)
{
struct group_info *rqgi;
struct group_info *gi;
struct cred *new;
int i;
- int flags = nfsexp_flags(rqstp, exp);
-
- validate_process_creds();
+ int flags = nfsexp_flags(cred, exp);
/* discard any old override before preparing the new set */
- revert_creds(get_cred(current_real_cred()));
+ put_cred(revert_creds(get_cred(current_real_cred())));
new = prepare_creds();
if (!new)
return -ENOMEM;
- new->fsuid = rqstp->rq_cred.cr_uid;
- new->fsgid = rqstp->rq_cred.cr_gid;
+ new->fsuid = cred->cr_uid;
+ new->fsgid = cred->cr_gid;
- rqgi = rqstp->rq_cred.cr_group_info;
+ rqgi = cred->cr_group_info;
if (flags & NFSEXP_ALLSQUASH) {
new->fsuid = exp->ex_anon_uid;
@@ -81,10 +79,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
else
new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
- validate_process_creds();
put_cred(override_creds(new));
- put_cred(new);
- validate_process_creds();
return 0;
oom:
diff --git a/fs/nfsd/auth.h b/fs/nfsd/auth.h
index dbd66424f600..8c5031bbbcee 100644
--- a/fs/nfsd/auth.h
+++ b/fs/nfsd/auth.h
@@ -12,6 +12,6 @@
* Set the current process's fsuid/fsgid etc to those of the NFS
* client user
*/
-int nfsd_setuser(struct svc_rqst *, struct svc_export *);
+int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp);
#endif /* LINUX_NFSD_AUTH_H */
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 4fb1f72a25fb..afa16d7a8013 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -4,136 +4,194 @@
*/
#include <linux/exportfs.h>
#include <linux/iomap.h>
-#include <linux/genhd.h>
#include <linux/slab.h>
#include <linux/pr.h>
#include <linux/nfsd/debug.h>
-#include <scsi/scsi_proto.h>
-#include <scsi/scsi_common.h>
-#include <scsi/scsi_request.h>
#include "blocklayoutxdr.h"
#include "pnfs.h"
+#include "filecache.h"
+#include "vfs.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
+/*
+ * Get an extent from the file system that starts at offset or below
+ * and may be shorter than the requested length.
+ */
static __be32
-nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
- struct nfsd4_layoutget *args)
+nfsd4_block_map_extent(struct inode *inode, const struct svc_fh *fhp,
+ u64 offset, u64 length, u32 iomode, u64 minlength,
+ struct pnfs_block_extent *bex)
{
- struct nfsd4_layout_seg *seg = &args->lg_seg;
struct super_block *sb = inode->i_sb;
- u32 block_size = i_blocksize(inode);
- struct pnfs_block_extent *bex;
struct iomap iomap;
u32 device_generation = 0;
int error;
- if (seg->offset & (block_size - 1)) {
- dprintk("pnfsd: I/O misaligned\n");
- goto out_layoutunavailable;
- }
-
- /*
- * Some clients barf on non-zero block numbers for NONE or INVALID
- * layouts, so make sure to zero the whole structure.
- */
- error = -ENOMEM;
- bex = kzalloc(sizeof(*bex), GFP_KERNEL);
- if (!bex)
- goto out_error;
- args->lg_content = bex;
-
- error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length,
- &iomap, seg->iomode != IOMODE_READ,
- &device_generation);
+ error = sb->s_export_op->map_blocks(inode, offset, length, &iomap,
+ iomode != IOMODE_READ, &device_generation);
if (error) {
if (error == -ENXIO)
- goto out_layoutunavailable;
- goto out_error;
- }
-
- if (iomap.length < args->lg_minlength) {
- dprintk("pnfsd: extent smaller than minlength\n");
- goto out_layoutunavailable;
+ return nfserr_layoutunavailable;
+ return nfserrno(error);
}
switch (iomap.type) {
case IOMAP_MAPPED:
- if (seg->iomode == IOMODE_READ)
+ if (iomode == IOMODE_READ)
bex->es = PNFS_BLOCK_READ_DATA;
else
bex->es = PNFS_BLOCK_READWRITE_DATA;
bex->soff = iomap.addr;
break;
case IOMAP_UNWRITTEN:
- if (seg->iomode & IOMODE_RW) {
+ if (iomode & IOMODE_RW) {
/*
* Crack monkey special case from section 2.3.1.
*/
- if (args->lg_minlength == 0) {
+ if (minlength == 0) {
dprintk("pnfsd: no soup for you!\n");
- goto out_layoutunavailable;
+ return nfserr_layoutunavailable;
}
bex->es = PNFS_BLOCK_INVALID_DATA;
bex->soff = iomap.addr;
break;
}
- /*FALLTHRU*/
+ fallthrough;
case IOMAP_HOLE:
- if (seg->iomode == IOMODE_READ) {
+ if (iomode == IOMODE_READ) {
bex->es = PNFS_BLOCK_NONE_DATA;
break;
}
- /*FALLTHRU*/
+ fallthrough;
case IOMAP_DELALLOC:
default:
WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
- goto out_layoutunavailable;
+ return nfserr_layoutunavailable;
}
error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
if (error)
- goto out_error;
+ return nfserrno(error);
+
bex->foff = iomap.offset;
bex->len = iomap.length;
+ return nfs_ok;
+}
- seg->offset = iomap.offset;
- seg->length = iomap.length;
+static __be32
+nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
+ const struct svc_fh *fhp, struct nfsd4_layoutget *args)
+{
+ struct nfsd4_layout_seg *seg = &args->lg_seg;
+ struct pnfs_block_layout *bl;
+ struct pnfs_block_extent *first_bex, *last_bex;
+ u64 offset = seg->offset, length = seg->length;
+ u32 i, nr_extents_max, block_size = i_blocksize(inode);
+ __be32 nfserr;
- dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
- return 0;
+ if (locks_in_grace(SVC_NET(rqstp)))
+ return nfserr_grace;
+
+ nfserr = nfserr_layoutunavailable;
+ if (seg->offset & (block_size - 1)) {
+ dprintk("pnfsd: I/O misaligned\n");
+ goto out_error;
+ }
+
+ /*
+ * RFC 8881, section 3.3.17:
+ * The layout4 data type defines a layout for a file.
+ *
+ * RFC 8881, section 18.43.3:
+ * The loga_maxcount field specifies the maximum layout size
+ * (in bytes) that the client can handle. If the size of the
+ * layout structure exceeds the size specified by maxcount,
+ * the metadata server will return the NFS4ERR_TOOSMALL error.
+ */
+ nfserr = nfserr_toosmall;
+ if (args->lg_maxcount < PNFS_BLOCK_LAYOUT4_SIZE +
+ PNFS_BLOCK_EXTENT_SIZE)
+ goto out_error;
+
+ /*
+ * Limit the maximum layout size to avoid allocating
+ * a large buffer on the server for each layout request.
+ */
+ nr_extents_max = (min(args->lg_maxcount, PAGE_SIZE) -
+ PNFS_BLOCK_LAYOUT4_SIZE) / PNFS_BLOCK_EXTENT_SIZE;
+
+ /*
+ * Some clients barf on non-zero block numbers for NONE or INVALID
+ * layouts, so make sure to zero the whole structure.
+ */
+ nfserr = nfserrno(-ENOMEM);
+ bl = kzalloc(struct_size(bl, extents, nr_extents_max), GFP_KERNEL);
+ if (!bl)
+ goto out_error;
+ bl->nr_extents = nr_extents_max;
+ args->lg_content = bl;
+
+ for (i = 0; i < bl->nr_extents; i++) {
+ struct pnfs_block_extent *bex = bl->extents + i;
+ u64 bex_length;
+
+ nfserr = nfsd4_block_map_extent(inode, fhp, offset, length,
+ seg->iomode, args->lg_minlength, bex);
+ if (nfserr != nfs_ok)
+ goto out_error;
+
+ bex_length = bex->len - (offset - bex->foff);
+ if (bex_length >= length) {
+ bl->nr_extents = i + 1;
+ break;
+ }
+
+ offset = bex->foff + bex->len;
+ length -= bex_length;
+ }
+
+ first_bex = bl->extents;
+ last_bex = bl->extents + bl->nr_extents - 1;
+
+ nfserr = nfserr_layoutunavailable;
+ length = last_bex->foff + last_bex->len - seg->offset;
+ if (length < args->lg_minlength) {
+ dprintk("pnfsd: extent smaller than minlength\n");
+ goto out_error;
+ }
+
+ seg->offset = first_bex->foff;
+ seg->length = last_bex->foff - first_bex->foff + last_bex->len;
+ return nfs_ok;
out_error:
seg->length = 0;
- return nfserrno(error);
-out_layoutunavailable:
- seg->length = 0;
- return nfserr_layoutunavailable;
+ return nfserr;
}
static __be32
nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
struct iomap *iomaps, int nr_iomaps)
{
- loff_t new_size = lcp->lc_last_wr + 1;
+ struct timespec64 mtime = inode_get_mtime(inode);
struct iattr iattr = { .ia_valid = 0 };
- struct timespec ts;
int error;
- ts = timespec64_to_timespec(inode->i_mtime);
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
- timespec_compare(&lcp->lc_mtime, &ts) < 0)
- lcp->lc_mtime = timespec64_to_timespec(current_time(inode));
+ timespec64_compare(&lcp->lc_mtime, &mtime) < 0)
+ lcp->lc_mtime = current_time(inode);
iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
- iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = timespec_to_timespec64(lcp->lc_mtime);
+ iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
- if (new_size > i_size_read(inode)) {
+ if (lcp->lc_size_chg) {
iattr.ia_valid |= ATTR_SIZE;
- iattr.ia_size = new_size;
+ iattr.ia_size = lcp->lc_newsize;
}
error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
@@ -150,8 +208,7 @@ nfsd4_block_get_device_info_simple(struct super_block *sb,
struct pnfs_block_deviceaddr *dev;
struct pnfs_block_volume *b;
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL);
if (!dev)
return -ENOMEM;
gdp->gd_device = dev;
@@ -171,22 +228,26 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
- if (sb->s_bdev != sb->s_bdev->bd_contains)
+ if (bdev_is_partition(sb->s_bdev))
return nfserr_inval;
return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
}
static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
+nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp)
{
struct iomap *iomaps;
int nr_iomaps;
+ __be32 nfserr;
- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, i_blocksize(inode));
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
+ rqstp->rq_arg = lcp->lc_up_layout;
+ svcxdr_init_decode(rqstp);
+
+ nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
+ &iomaps, &nr_iomaps, i_blocksize(inode));
+ if (nfserr != nfs_ok)
+ return nfserr;
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
@@ -212,109 +273,6 @@ const struct nfsd4_layout_ops bl_layout_ops = {
#endif /* CONFIG_NFSD_BLOCKLAYOUT */
#ifdef CONFIG_NFSD_SCSILAYOUT
-static int nfsd4_scsi_identify_device(struct block_device *bdev,
- struct pnfs_block_volume *b)
-{
- struct request_queue *q = bdev->bd_disk->queue;
- struct request *rq;
- struct scsi_request *req;
- /*
- * The allocation length (passed in bytes 3 and 4 of the INQUIRY
- * command descriptor block) specifies the number of bytes that have
- * been allocated for the data-in buffer.
- * 252 is the highest one-byte value that is a multiple of 4.
- * 65532 is the highest two-byte value that is a multiple of 4.
- */
- size_t bufflen = 252, maxlen = 65532, len, id_len;
- u8 *buf, *d, type, assoc;
- int retries = 1, error;
-
- if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q)))
- return -EINVAL;
-
-again:
- buf = kzalloc(bufflen, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
- if (IS_ERR(rq)) {
- error = -ENOMEM;
- goto out_free_buf;
- }
- req = scsi_req(rq);
-
- error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
- if (error)
- goto out_put_request;
-
- req->cmd[0] = INQUIRY;
- req->cmd[1] = 1;
- req->cmd[2] = 0x83;
- req->cmd[3] = bufflen >> 8;
- req->cmd[4] = bufflen & 0xff;
- req->cmd_len = COMMAND_SIZE(INQUIRY);
-
- blk_execute_rq(rq->q, NULL, rq, 1);
- if (req->result) {
- pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
- req->result);
- error = -EIO;
- goto out_put_request;
- }
-
- len = (buf[2] << 8) + buf[3] + 4;
- if (len > bufflen) {
- if (len <= maxlen && retries--) {
- blk_put_request(rq);
- kfree(buf);
- bufflen = len;
- goto again;
- }
- pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
- len);
- goto out_put_request;
- }
-
- d = buf + 4;
- for (d = buf + 4; d < buf + len; d += id_len + 4) {
- id_len = d[3];
- type = d[1] & 0xf;
- assoc = (d[1] >> 4) & 0x3;
-
- /*
- * We only care about a EUI-64 and NAA designator types
- * with LU association.
- */
- if (assoc != 0x00)
- continue;
- if (type != 0x02 && type != 0x03)
- continue;
- if (id_len != 8 && id_len != 12 && id_len != 16)
- continue;
-
- b->scsi.code_set = PS_CODE_SET_BINARY;
- b->scsi.designator_type = type == 0x02 ?
- PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
- b->scsi.designator_len = id_len;
- memcpy(b->scsi.designator, d + 4, id_len);
-
- /*
- * If we found a 8 or 12 byte descriptor continue on to
- * see if a 16 byte one is available. If we find a
- * 16 byte descriptor we're done.
- */
- if (id_len == 16)
- break;
- }
-
-out_put_request:
- blk_put_request(rq);
-out_free_buf:
- kfree(buf);
- return error;
-}
-
#define NFSD_MDS_PR_KEY 0x0100000000000000ULL
/*
@@ -326,6 +284,31 @@ static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
}
+static const u8 designator_types[] = {
+ PS_DESIGNATOR_EUI64,
+ PS_DESIGNATOR_NAA,
+};
+
+static int
+nfsd4_block_get_unique_id(struct gendisk *disk, struct pnfs_block_volume *b)
+{
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(designator_types); i++) {
+ u8 type = designator_types[i];
+
+ ret = disk->fops->get_unique_id(disk, b->scsi.designator, type);
+ if (ret > 0) {
+ b->scsi.code_set = PS_CODE_SET_BINARY;
+ b->scsi.designator_type = type;
+ b->scsi.designator_len = ret;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
static int
nfsd4_block_get_device_info_scsi(struct super_block *sb,
struct nfs4_client *clp,
@@ -334,10 +317,9 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
struct pnfs_block_deviceaddr *dev;
struct pnfs_block_volume *b;
const struct pr_ops *ops;
- int error;
+ int ret;
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL);
if (!dev)
return -ENOMEM;
gdp->gd_device = dev;
@@ -348,33 +330,39 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
b->type = PNFS_BLOCK_VOLUME_SCSI;
b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
- error = nfsd4_scsi_identify_device(sb->s_bdev, b);
- if (error)
- return error;
+ ret = nfsd4_block_get_unique_id(sb->s_bdev->bd_disk, b);
+ if (ret < 0)
+ goto out_free_dev;
+ ret = -EINVAL;
ops = sb->s_bdev->bd_disk->fops->pr_ops;
if (!ops) {
pr_err("pNFS: device %s does not support PRs.\n",
sb->s_id);
- return -EINVAL;
+ goto out_free_dev;
}
- error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
- if (error) {
+ ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+ if (ret) {
pr_err("pNFS: failed to register key for device %s.\n",
sb->s_id);
- return -EINVAL;
+ goto out_free_dev;
}
- error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+ ret = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
- if (error) {
+ if (ret) {
pr_err("pNFS: failed to reserve device %s.\n",
sb->s_id);
- return -EINVAL;
+ goto out_free_dev;
}
return 0;
+
+out_free_dev:
+ kfree(dev);
+ gdp->gd_device = NULL;
+ return ret;
}
static __be32
@@ -383,33 +371,40 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdp)
{
- if (sb->s_bdev != sb->s_bdev->bd_contains)
+ if (bdev_is_partition(sb->s_bdev))
return nfserr_inval;
return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
}
static __be32
-nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp)
{
struct iomap *iomaps;
int nr_iomaps;
+ __be32 nfserr;
+
+ rqstp->rq_arg = lcp->lc_up_layout;
+ svcxdr_init_decode(rqstp);
- nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, i_blocksize(inode));
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
+ nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
+ &iomaps, &nr_iomaps, i_blocksize(inode));
+ if (nfserr != nfs_ok)
+ return nfserr;
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
static void
-nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
- struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+ struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
+ int status;
- bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
- nfsd4_scsi_pr_key(clp), 0, true);
+ status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+ nfsd4_scsi_pr_key(clp),
+ PR_EXCLUSIVE_ACCESS_REG_ONLY, true);
+ trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
}
const struct nfsd4_layout_ops scsi_layout_ops = {
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 442543304930..196ef4245604 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -9,16 +9,30 @@
#include "nfsd.h"
#include "blocklayoutxdr.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
+/**
+ * nfsd4_block_encode_layoutget - encode block/scsi layout extent array
+ * @xdr: stream for data encoding
+ * @lgp: layoutget content, actually an array of extents to encode
+ *
+ * Encode the opaque loc_body field in the layoutget response. Since the
+ * pnfs_block_layout4 and pnfs_scsi_layout4 structures on the wire are
+ * the same, this function is used by both layout drivers.
+ *
+ * Return values:
+ * %nfs_ok: Success, all extents encoded into @xdr
+ * %nfserr_toosmall: Not enough space in @xdr to encode all the data
+ */
__be32
nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
- struct nfsd4_layoutget *lgp)
+ const struct nfsd4_layoutget *lgp)
{
- struct pnfs_block_extent *b = lgp->lg_content;
- int len = sizeof(__be32) + 5 * sizeof(__be64) + sizeof(__be32);
+ const struct pnfs_block_layout *bl = lgp->lg_content;
+ u32 i, len = sizeof(__be32) + bl->nr_extents * PNFS_BLOCK_EXTENT_SIZE;
__be32 *p;
p = xdr_reserve_space(xdr, sizeof(__be32) + len);
@@ -26,15 +40,19 @@ nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
return nfserr_toosmall;
*p++ = cpu_to_be32(len);
- *p++ = cpu_to_be32(1); /* we always return a single extent */
-
- p = xdr_encode_opaque_fixed(p, &b->vol_id,
- sizeof(struct nfsd4_deviceid));
- p = xdr_encode_hyper(p, b->foff);
- p = xdr_encode_hyper(p, b->len);
- p = xdr_encode_hyper(p, b->soff);
- *p++ = cpu_to_be32(b->es);
- return 0;
+ *p++ = cpu_to_be32(bl->nr_extents);
+
+ for (i = 0; i < bl->nr_extents; i++) {
+ const struct pnfs_block_extent *bex = bl->extents + i;
+
+ p = svcxdr_encode_deviceid4(p, &bex->vol_id);
+ p = xdr_encode_hyper(p, bex->foff);
+ p = xdr_encode_hyper(p, bex->len);
+ p = xdr_encode_hyper(p, bex->soff);
+ *p++ = cpu_to_be32(bex->es);
+ }
+
+ return nfs_ok;
}
static int
@@ -76,12 +94,21 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
__be32
nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
- struct nfsd4_getdeviceinfo *gdp)
+ const struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_block_deviceaddr *dev = gdp->gd_device;
int len = sizeof(__be32), ret, i;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
@@ -102,64 +129,86 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
return 0;
}
-int
-nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size)
+/**
+ * nfsd4_block_decode_layoutupdate - decode the block layout extent array
+ * @xdr: subbuf set to the encoded array
+ * @iomapp: pointer to store the decoded extent array
+ * @nr_iomapsp: pointer to store the number of extents
+ * @block_size: alignment of extent offset and length
+ *
+ * This function decodes the opaque field of the layoutupdate4 structure
+ * in a layoutcommit request for the block layout driver. The field is
+ * actually an array of extents sent by the client. It also checks that
+ * the file offset, storage offset and length of each extent are aligned
+ * by @block_size.
+ *
+ * Return values:
+ * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
+ * %nfserr_bad_xdr: The encoded array in @xdr is invalid
+ * %nfserr_inval: An unaligned extent found
+ * %nfserr_delay: Failed to allocate memory for @iomapp
+ */
+__be32
+nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
+ int *nr_iomapsp, u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, i;
+ u32 nr_iomaps, expected, len, i;
+ __be32 nfserr;
- if (len < sizeof(u32)) {
- dprintk("%s: extent array too small: %u\n", __func__, len);
- return -EINVAL;
- }
- len -= sizeof(u32);
- if (len % PNFS_BLOCK_EXTENT_SIZE) {
- dprintk("%s: extent array invalid: %u\n", __func__, len);
- return -EINVAL;
- }
+ if (xdr_stream_decode_u32(xdr, &nr_iomaps))
+ return nfserr_bad_xdr;
- nr_iomaps = be32_to_cpup(p++);
- if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
- dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, nr_iomaps);
- return -EINVAL;
- }
+ len = sizeof(__be32) + xdr_stream_remaining(xdr);
+ expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
+ if (len != expected)
+ return nfserr_bad_xdr;
iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
- if (!iomaps) {
- dprintk("%s: failed to allocate extent array\n", __func__);
- return -ENOMEM;
- }
+ if (!iomaps)
+ return nfserr_delay;
for (i = 0; i < nr_iomaps; i++) {
struct pnfs_block_extent bex;
- memcpy(&bex.vol_id, p, sizeof(struct nfsd4_deviceid));
- p += XDR_QUADLEN(sizeof(struct nfsd4_deviceid));
+ if (nfsd4_decode_deviceid4(xdr, &bex.vol_id)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
- p = xdr_decode_hyper(p, &bex.foff);
+ if (xdr_stream_decode_u64(xdr, &bex.foff)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
if (bex.foff & (block_size - 1)) {
- dprintk("%s: unaligned offset 0x%llx\n",
- __func__, bex.foff);
+ nfserr = nfserr_inval;
+ goto fail;
+ }
+
+ if (xdr_stream_decode_u64(xdr, &bex.len)) {
+ nfserr = nfserr_bad_xdr;
goto fail;
}
- p = xdr_decode_hyper(p, &bex.len);
if (bex.len & (block_size - 1)) {
- dprintk("%s: unaligned length 0x%llx\n",
- __func__, bex.foff);
+ nfserr = nfserr_inval;
+ goto fail;
+ }
+
+ if (xdr_stream_decode_u64(xdr, &bex.soff)) {
+ nfserr = nfserr_bad_xdr;
goto fail;
}
- p = xdr_decode_hyper(p, &bex.soff);
if (bex.soff & (block_size - 1)) {
- dprintk("%s: unaligned disk offset 0x%llx\n",
- __func__, bex.soff);
+ nfserr = nfserr_inval;
+ goto fail;
+ }
+
+ if (xdr_stream_decode_u32(xdr, &bex.es)) {
+ nfserr = nfserr_bad_xdr;
goto fail;
}
- bex.es = be32_to_cpup(p++);
if (bex.es != PNFS_BLOCK_READWRITE_DATA) {
- dprintk("%s: incorrect extent state %d\n",
- __func__, bex.es);
+ nfserr = nfserr_inval;
goto fail;
}
@@ -168,59 +217,79 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
}
*iomapp = iomaps;
- return nr_iomaps;
+ *nr_iomapsp = nr_iomaps;
+ return nfs_ok;
fail:
kfree(iomaps);
- return -EINVAL;
+ return nfserr;
}
-int
-nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size)
+/**
+ * nfsd4_scsi_decode_layoutupdate - decode the scsi layout extent array
+ * @xdr: subbuf set to the encoded array
+ * @iomapp: pointer to store the decoded extent array
+ * @nr_iomapsp: pointer to store the number of extents
+ * @block_size: alignment of extent offset and length
+ *
+ * This function decodes the opaque field of the layoutupdate4 structure
+ * in a layoutcommit request for the scsi layout driver. The field is
+ * actually an array of extents sent by the client. It also checks that
+ * the offset and length of each extent are aligned by @block_size.
+ *
+ * Return values:
+ * %nfs_ok: Successful decoding, @iomapp and @nr_iomapsp are valid
+ * %nfserr_bad_xdr: The encoded array in @xdr is invalid
+ * %nfserr_inval: An unaligned extent found
+ * %nfserr_delay: Failed to allocate memory for @iomapp
+ */
+__be32
+nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr, struct iomap **iomapp,
+ int *nr_iomapsp, u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, expected, i;
+ u32 nr_iomaps, expected, len, i;
+ __be32 nfserr;
- if (len < sizeof(u32)) {
- dprintk("%s: extent array too small: %u\n", __func__, len);
- return -EINVAL;
- }
+ if (xdr_stream_decode_u32(xdr, &nr_iomaps))
+ return nfserr_bad_xdr;
- nr_iomaps = be32_to_cpup(p++);
+ len = sizeof(__be32) + xdr_stream_remaining(xdr);
expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
- if (len != expected) {
- dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, expected);
- return -EINVAL;
- }
+ if (len != expected)
+ return nfserr_bad_xdr;
iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
- if (!iomaps) {
- dprintk("%s: failed to allocate extent array\n", __func__);
- return -ENOMEM;
- }
+ if (!iomaps)
+ return nfserr_delay;
for (i = 0; i < nr_iomaps; i++) {
u64 val;
- p = xdr_decode_hyper(p, &val);
+ if (xdr_stream_decode_u64(xdr, &val)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
if (val & (block_size - 1)) {
- dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+ nfserr = nfserr_inval;
goto fail;
}
iomaps[i].offset = val;
- p = xdr_decode_hyper(p, &val);
+ if (xdr_stream_decode_u64(xdr, &val)) {
+ nfserr = nfserr_bad_xdr;
+ goto fail;
+ }
if (val & (block_size - 1)) {
- dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+ nfserr = nfserr_inval;
goto fail;
}
iomaps[i].length = val;
}
*iomapp = iomaps;
- return nr_iomaps;
+ *nr_iomapsp = nr_iomaps;
+ return nfs_ok;
fail:
kfree(iomaps);
- return -EINVAL;
+ return nfserr;
}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index bc5166bfe46b..2e0c6c7d2b42 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -8,6 +8,15 @@
struct iomap;
struct xdr_stream;
+/* On the wire size of the layout4 struct with zero number of extents */
+#define PNFS_BLOCK_LAYOUT4_SIZE \
+ (sizeof(__be32) * 2 + /* offset4 */ \
+ sizeof(__be32) * 2 + /* length4 */ \
+ sizeof(__be32) + /* layoutiomode4 */ \
+ sizeof(__be32) + /* layouttype4 */ \
+ sizeof(__be32) + /* number of bytes */ \
+ sizeof(__be32)) /* number of extents */
+
struct pnfs_block_extent {
struct nfsd4_deviceid vol_id;
u64 foff;
@@ -21,6 +30,11 @@ struct pnfs_block_range {
u64 len;
};
+struct pnfs_block_layout {
+ u32 nr_extents;
+ struct pnfs_block_extent extents[] __counted_by(nr_extents);
+};
+
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
* Not actually limited by the protocol.
@@ -47,16 +61,16 @@ struct pnfs_block_volume {
struct pnfs_block_deviceaddr {
u32 nr_volumes;
- struct pnfs_block_volume volumes[];
+ struct pnfs_block_volume volumes[] __counted_by(nr_volumes);
};
__be32 nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
- struct nfsd4_getdeviceinfo *gdp);
+ const struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
- struct nfsd4_layoutget *lgp);
-int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size);
-int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
- u32 block_size);
+ const struct nfsd4_layoutget *lgp);
+__be32 nfsd4_block_decode_layoutupdate(struct xdr_stream *xdr,
+ struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
+__be32 nfsd4_scsi_decode_layoutupdate(struct xdr_stream *xdr,
+ struct iomap **iomapp, int *nr_iomapsp, u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4a98537efb0f..bb7addef4a31 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -10,6 +10,7 @@
#define NFSCACHE_H
#include <linux/sunrpc/svc.h>
+#include "nfsd.h"
/*
* Representation of a reply cache entry.
@@ -18,7 +19,7 @@
* typical sockaddr_storage. This is for space reasons, since sockaddr_storage
* is much larger than a sockaddr_in6.
*/
-struct svc_cacherep {
+struct nfsd_cacherep {
struct {
/* Keep often-read xid, csum in the same cache line: */
__be32 k_xid;
@@ -77,10 +78,14 @@ enum {
/* Checksum this amount of the request */
#define RC_CSUMLEN (256U)
-int nfsd_reply_cache_init(void);
-void nfsd_reply_cache_shutdown(void);
-int nfsd_cache_lookup(struct svc_rqst *);
-void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
-int nfsd_reply_cache_stats_open(struct inode *, struct file *);
+int nfsd_drc_slab_create(void);
+void nfsd_drc_slab_free(void);
+int nfsd_reply_cache_init(struct nfsd_net *);
+void nfsd_reply_cache_shutdown(struct nfsd_net *);
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep);
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp);
+int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
new file mode 100644
index 000000000000..7f44689e0a53
--- /dev/null
+++ b/fs/nfsd/debugfs.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/debugfs.h>
+
+#include "nfsd.h"
+
+static struct dentry *nfsd_top_dir __read_mostly;
+
+/*
+ * /sys/kernel/debug/nfsd/disable-splice-read
+ *
+ * Contents:
+ * %0: NFS READ is allowed to use page splicing
+ * %1: NFS READ uses only iov iter read
+ *
+ * The default value of this setting is zero (page splicing is
+ * allowed). This setting takes immediate effect for all NFS
+ * versions, all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_dsr_get(void *data, u64 *val)
+{
+ *val = nfsd_disable_splice_read ? 1 : 0;
+ return 0;
+}
+
+static int nfsd_dsr_set(void *data, u64 val)
+{
+ nfsd_disable_splice_read = (val > 0);
+ if (!nfsd_disable_splice_read) {
+ /*
+ * Must use buffered I/O if splice_read is enabled.
+ */
+ nfsd_io_cache_read = NFSD_IO_BUFFERED;
+ }
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_dsr_fops, nfsd_dsr_get, nfsd_dsr_set, "%llu\n");
+
+/*
+ * /sys/kernel/debug/nfsd/io_cache_read
+ *
+ * Contents:
+ * %0: NFS READ will use buffered IO
+ * %1: NFS READ will use dontcache (buffered IO w/ dropbehind)
+ * %2: NFS READ will use direct IO
+ *
+ * This setting takes immediate effect for all NFS versions,
+ * all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_io_cache_read_get(void *data, u64 *val)
+{
+ *val = nfsd_io_cache_read;
+ return 0;
+}
+
+static int nfsd_io_cache_read_set(void *data, u64 val)
+{
+ int ret = 0;
+
+ switch (val) {
+ case NFSD_IO_BUFFERED:
+ nfsd_io_cache_read = NFSD_IO_BUFFERED;
+ break;
+ case NFSD_IO_DONTCACHE:
+ case NFSD_IO_DIRECT:
+ /*
+ * Must disable splice_read when enabling
+ * NFSD_IO_DONTCACHE.
+ */
+ nfsd_disable_splice_read = true;
+ nfsd_io_cache_read = val;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_read_fops, nfsd_io_cache_read_get,
+ nfsd_io_cache_read_set, "%llu\n");
+
+/*
+ * /sys/kernel/debug/nfsd/io_cache_write
+ *
+ * Contents:
+ * %0: NFS WRITE will use buffered IO
+ * %1: NFS WRITE will use dontcache (buffered IO w/ dropbehind)
+ *
+ * This setting takes immediate effect for all NFS versions,
+ * all exports, and in all NFSD net namespaces.
+ */
+
+static int nfsd_io_cache_write_get(void *data, u64 *val)
+{
+ *val = nfsd_io_cache_write;
+ return 0;
+}
+
+static int nfsd_io_cache_write_set(void *data, u64 val)
+{
+ int ret = 0;
+
+ switch (val) {
+ case NFSD_IO_BUFFERED:
+ case NFSD_IO_DONTCACHE:
+ case NFSD_IO_DIRECT:
+ nfsd_io_cache_write = val;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(nfsd_io_cache_write_fops, nfsd_io_cache_write_get,
+ nfsd_io_cache_write_set, "%llu\n");
+
+void nfsd_debugfs_exit(void)
+{
+ debugfs_remove_recursive(nfsd_top_dir);
+ nfsd_top_dir = NULL;
+}
+
+void nfsd_debugfs_init(void)
+{
+ nfsd_top_dir = debugfs_create_dir("nfsd", NULL);
+
+ debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO,
+ nfsd_top_dir, NULL, &nfsd_dsr_fops);
+
+ debugfs_create_file("io_cache_read", 0644, nfsd_top_dir, NULL,
+ &nfsd_io_cache_read_fops);
+
+ debugfs_create_file("io_cache_write", 0644, nfsd_top_dir, NULL,
+ &nfsd_io_cache_write_fops);
+}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 802993d8912f..9d55512d0cc9 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -22,6 +22,8 @@
#include "nfsfh.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
@@ -49,6 +51,11 @@ static void expkey_put(struct kref *ref)
kfree_rcu(key, ek_rcu);
}
+static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h);
+}
+
static void expkey_request(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen)
@@ -75,8 +82,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
int len;
struct auth_domain *dom = NULL;
int err;
- int fsidtype;
- char *ep;
+ u8 fsidtype;
struct svc_expkey key;
struct svc_expkey *ek = NULL;
@@ -90,7 +96,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out;
err = -EINVAL;
- if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@@ -100,12 +106,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("found domain %s\n", buf);
err = -EINVAL;
- if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
- fsidtype = simple_strtoul(buf, &ep, 10);
- if (*ep)
+ if (kstrtou8(buf, 10, &fsidtype))
goto out;
- dprintk("found fsidtype %d\n", fsidtype);
+ dprintk("found fsidtype %u\n", fsidtype);
if (key_len(fsidtype)==0) /* invalid type */
goto out;
if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
@@ -116,11 +121,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
/* OK, we seem to have a valid key */
key.h.flags = 0;
- key.h.expiry_time = get_expiry(&mesg);
- if (key.h.expiry_time == 0)
+ err = get_expiry(&mesg, &key.h.expiry_time);
+ if (err)
goto out;
- key.ek_client = dom;
+ key.ek_client = dom;
key.ek_fsidtype = fsidtype;
memcpy(key.ek_fsid, buf, len);
@@ -139,7 +144,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
if (len == 0) {
set_bit(CACHE_NEGATIVE, &key.h.flags);
ek = svc_expkey_update(cd, &key, ek);
- if (!ek)
+ if (ek)
+ trace_nfsd_expkey_update(ek, NULL);
+ else
err = -ENOMEM;
} else {
err = kern_path(buf, 0, &key.ek_path);
@@ -149,7 +156,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
dprintk("Found the path %s\n", buf);
ek = svc_expkey_update(cd, &key, ek);
- if (!ek)
+ if (ek)
+ trace_nfsd_expkey_update(ek, buf);
+ else
err = -ENOMEM;
path_put(&key.ek_path);
}
@@ -232,11 +241,23 @@ static struct cache_head *expkey_alloc(void)
return NULL;
}
+static void expkey_flush(void)
+{
+ /*
+ * Take the nfsd_mutex here to ensure that the file cache is not
+ * destroyed while we're in the middle of flushing.
+ */
+ mutex_lock(&nfsd_mutex);
+ nfsd_file_cache_purge(current->nsproxy->net_ns);
+ mutex_unlock(&nfsd_mutex);
+}
+
static const struct cache_detail svc_expkey_cache_template = {
.owner = THIS_MODULE,
.hash_size = EXPKEY_HASHMAX,
.name = "nfsd.fh",
.cache_put = expkey_put,
+ .cache_upcall = expkey_upcall,
.cache_request = expkey_request,
.cache_parse = expkey_parse,
.cache_show = expkey_show,
@@ -244,6 +265,7 @@ static const struct cache_detail svc_expkey_cache_template = {
.init = expkey_init,
.update = expkey_update,
.alloc = expkey_alloc,
+ .flush = expkey_flush,
};
static int
@@ -307,14 +329,54 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
fsloc->locations = NULL;
}
+static int export_stats_init(struct export_stats *stats)
+{
+ stats->start_time = ktime_get_seconds();
+ return percpu_counter_init_many(stats->counter, 0, GFP_KERNEL,
+ EXP_STATS_COUNTERS_NUM);
+}
+
+static void export_stats_reset(struct export_stats *stats)
+{
+ if (stats) {
+ int i;
+
+ for (i = 0; i < EXP_STATS_COUNTERS_NUM; i++)
+ percpu_counter_set(&stats->counter[i], 0);
+ }
+}
+
+static void export_stats_destroy(struct export_stats *stats)
+{
+ if (stats)
+ percpu_counter_destroy_many(stats->counter,
+ EXP_STATS_COUNTERS_NUM);
+}
+
+static void svc_export_release(struct rcu_head *rcu_head)
+{
+ struct svc_export *exp = container_of(rcu_head, struct svc_export,
+ ex_rcu);
+
+ nfsd4_fslocs_free(&exp->ex_fslocs);
+ export_stats_destroy(exp->ex_stats);
+ kfree(exp->ex_stats);
+ kfree(exp->ex_uuid);
+ kfree(exp);
+}
+
static void svc_export_put(struct kref *ref)
{
struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
+
path_put(&exp->ex_path);
auth_domain_put(exp->ex_client);
- nfsd4_fslocs_free(&exp->ex_fslocs);
- kfree(exp->ex_uuid);
- kfree_rcu(exp, ex_rcu);
+ call_rcu(&exp->ex_rcu, svc_export_release);
+}
+
+static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall(cd, h);
}
static void svc_export_request(struct cache_detail *cd,
@@ -340,8 +402,9 @@ static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);
-static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
+static int check_export(const struct path *path, int *flags, unsigned char *uuid)
{
+ struct inode *inode = d_inode(path->dentry);
/*
* We currently export only dirs, regular files, and (for v4
@@ -365,6 +428,7 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
* or an FSID number (so NFSEXP_FSID or ->uuid is needed).
* 2: We must be able to find an inode from a filehandle.
* This means that s_export_op must be set.
+ * 3: We must not currently be on an idmapped mount.
*/
if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
!(*flags & NFSEXP_FSID) &&
@@ -373,14 +437,23 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
return -EINVAL;
}
- if (!inode->i_sb->s_export_op ||
- !inode->i_sb->s_export_op->fh_to_dentry) {
+ if (!exportfs_can_decode_fh(inode->i_sb->s_export_op)) {
dprintk("exp_export: export of invalid fs type.\n");
return -EINVAL;
}
- return 0;
+ if (is_idmapped_mnt(path->mnt)) {
+ dprintk("exp_export: export of idmapped mounts not yet supported.\n");
+ return -EINVAL;
+ }
+ if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK &&
+ !(*flags & NFSEXP_NOSUBTREECHECK)) {
+ dprintk("%s: %s does not support subtree checking!\n",
+ __func__, inode->i_sb->s_type->name);
+ return -EINVAL;
+ }
+ return 0;
}
#ifdef CONFIG_NFSD_V4
@@ -487,6 +560,29 @@ static inline int
secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif
+static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp)
+{
+ unsigned int i, mode, listsize;
+ int err;
+
+ err = get_uint(mesg, &listsize);
+ if (err)
+ return err;
+ if (listsize > NFSEXP_XPRTSEC_NUM)
+ return -EINVAL;
+
+ exp->ex_xprtsec_modes = 0;
+ for (i = 0; i < listsize; i++) {
+ err = get_uint(mesg, &mode);
+ if (err)
+ return err;
+ if (mode > NFSEXP_XPRTSEC_MTLS)
+ return -EINVAL;
+ exp->ex_xprtsec_modes |= mode;
+ }
+ return 0;
+}
+
static inline int
nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
@@ -512,7 +608,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
char *buf;
- int len;
int err;
struct auth_domain *dom = NULL;
struct svc_export exp = {}, *expp;
@@ -528,8 +623,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* client */
err = -EINVAL;
- len = qword_get(&mesg, buf, PAGE_SIZE);
- if (len <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out;
err = -ENOENT;
@@ -539,7 +633,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
/* path */
err = -EINVAL;
- if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ if (qword_get(&mesg, buf, PAGE_SIZE) <= 0)
goto out1;
err = kern_path(buf, 0, &exp.ex_path);
@@ -549,11 +643,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_client = dom;
exp.cd = cd;
exp.ex_devid_map = NULL;
+ exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL;
/* expiry */
- err = -EINVAL;
- exp.h.expiry_time = get_expiry(&mesg);
- if (exp.h.expiry_time == 0)
+ err = get_expiry(&mesg, &exp.h.expiry_time);
+ if (err)
goto out3;
/* flags */
@@ -565,18 +659,18 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err || an_int < 0)
goto out3;
exp.ex_flags= an_int;
-
+
/* anon uid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
- exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
+ exp.ex_anon_uid= make_kuid(current_user_ns(), an_int);
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
- exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
+ exp.ex_anon_gid= make_kgid(current_user_ns(), an_int);
/* fsid */
err = get_int(&mesg, &an_int);
@@ -584,13 +678,15 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out3;
exp.ex_fsid = an_int;
- while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
+ while (qword_get(&mesg, buf, PAGE_SIZE) > 0) {
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0)
err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
+ else if (strcmp(buf, "xprtsec") == 0)
+ err = xprtsec_parse(&mesg, buf, &exp);
else
/* quietly ignore unknown words and anything
* following. Newer user-space can try to set
@@ -601,10 +697,10 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out4;
}
- err = check_export(d_inode(exp.ex_path.dentry), &exp.ex_flags,
- exp.ex_uuid);
+ err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
if (err)
goto out4;
+
/*
* No point caching this if it would immediately expire.
* Also, this protects exportfs's dummy export from the
@@ -630,15 +726,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
}
expp = svc_export_lookup(&exp);
- if (expp)
- expp = svc_export_update(&exp, expp);
- else
- err = -ENOMEM;
- cache_flush();
- if (expp == NULL)
+ if (!expp) {
err = -ENOMEM;
- else
+ goto out4;
+ }
+ expp = svc_export_update(&exp, expp);
+ if (expp) {
+ trace_nfsd_export_update(expp);
+ cache_flush();
exp_put(expp);
+ } else
+ err = -ENOMEM;
out4:
nfsd4_fslocs_free(&exp.ex_fslocs);
kfree(exp.ex_uuid);
@@ -655,22 +753,49 @@ static void exp_flags(struct seq_file *m, int flag, int fsid,
kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs);
static void show_secinfo(struct seq_file *m, struct svc_export *exp);
+static int is_export_stats_file(struct seq_file *m)
+{
+ /*
+ * The export_stats file uses the same ops as the exports file.
+ * We use the file's name to determine the reported info per export.
+ * There is no rename in nsfdfs, so d_name.name is stable.
+ */
+ return !strcmp(m->file->f_path.dentry->d_name.name, "export_stats");
+}
+
static int svc_export_show(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h)
{
- struct svc_export *exp ;
+ struct svc_export *exp;
+ bool export_stats = is_export_stats_file(m);
- if (h ==NULL) {
- seq_puts(m, "#path domain(flags)\n");
+ if (h == NULL) {
+ if (export_stats)
+ seq_puts(m, "#path domain start-time\n#\tstats\n");
+ else
+ seq_puts(m, "#path domain(flags)\n");
return 0;
}
exp = container_of(h, struct svc_export, h);
seq_path(m, &exp->ex_path, " \t\n\\");
seq_putc(m, '\t');
seq_escape(m, exp->ex_client->name, " \t\n\\");
+ if (export_stats) {
+ struct percpu_counter *counter = exp->ex_stats->counter;
+
+ seq_printf(m, "\t%lld\n", exp->ex_stats->start_time);
+ seq_printf(m, "\tfh_stale: %lld\n",
+ percpu_counter_sum_positive(&counter[EXP_STATS_FH_STALE]));
+ seq_printf(m, "\tio_read: %lld\n",
+ percpu_counter_sum_positive(&counter[EXP_STATS_IO_READ]));
+ seq_printf(m, "\tio_write: %lld\n",
+ percpu_counter_sum_positive(&counter[EXP_STATS_IO_WRITE]));
+ seq_putc(m, '\n');
+ return 0;
+ }
seq_putc(m, '(');
- if (test_bit(CACHE_VALID, &h->flags) &&
+ if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags)) {
exp_flags(m, exp->ex_flags, exp->ex_fsid,
exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
@@ -711,6 +836,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
new->ex_layout_types = 0;
new->ex_uuid = NULL;
new->cd = item->cd;
+ export_stats_reset(new->ex_stats);
}
static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -738,15 +864,28 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i];
}
+ new->ex_xprtsec_modes = item->ex_xprtsec_modes;
}
static struct cache_head *svc_export_alloc(void)
{
struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL);
- if (i)
- return &i->h;
- else
+ if (!i)
return NULL;
+
+ i->ex_stats = kmalloc(sizeof(*(i->ex_stats)), GFP_KERNEL);
+ if (!i->ex_stats) {
+ kfree(i);
+ return NULL;
+ }
+
+ if (export_stats_init(i->ex_stats)) {
+ kfree(i->ex_stats);
+ kfree(i);
+ return NULL;
+ }
+
+ return &i->h;
}
static const struct cache_detail svc_export_cache_template = {
@@ -754,6 +893,7 @@ static const struct cache_detail svc_export_cache_template = {
.hash_size = EXPORT_HASHMAX,
.name = "nfsd.export",
.cache_put = svc_export_put,
+ .cache_upcall = svc_export_upcall,
.cache_request = svc_export_request,
.cache_parse = svc_export_parse,
.cache_show = svc_export_show,
@@ -819,8 +959,10 @@ exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,
if (ek == NULL)
return ERR_PTR(-ENOMEM);
err = cache_check(cd, &ek->h, reqp);
- if (err)
+ if (err) {
+ trace_nfsd_exp_find_key(&key, err);
return ERR_PTR(err);
+ }
return ek;
}
@@ -842,8 +984,10 @@ exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp,
if (exp == NULL)
return ERR_PTR(-ENOMEM);
err = cache_check(cd, &exp->h, reqp);
- if (err)
+ if (err) {
+ trace_nfsd_exp_get_by_name(&key, err);
return ERR_PTR(err);
+ }
return exp;
}
@@ -937,24 +1081,76 @@ static struct svc_export *exp_find(struct cache_detail *cd,
return exp;
}
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
+/**
+ * check_xprtsec_policy - check if access to export is allowed by the
+ * xprtsec policy
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp)
{
- struct exp_flavor_info *f;
- struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
+ if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
+ return nfs_ok;
+ }
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
+ if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
+ !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
+ return nfs_ok;
+ }
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
+ if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
+ test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
+ return nfs_ok;
+ }
+ return nfserr_wrongsec;
+}
+
+/**
+ * check_security_flavor - check if access to export is allowed by the
+ * security flavor
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Helper function for check_nfsd_access(). Note that callers should be
+ * using check_nfsd_access() instead of calling this function directly. The
+ * one exception is __fh_verify() since it has logic that may result in one
+ * or both of the helpers being skipped.
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
- return 0;
+ return nfs_ok;
/* ip-address based client; check sec= export option: */
for (f = exp->ex_flavors; f < end; f++) {
if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
- return 0;
+ return nfs_ok;
}
/* defaults in absence of sec= options: */
if (exp->ex_nflavors == 0) {
if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
- return 0;
+ return nfs_ok;
}
/* If the compound op contains a spo_must_allowed op,
@@ -964,11 +1160,49 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
*/
if (nfsd4_spo_must_allow(rqstp))
- return 0;
+ return nfs_ok;
+
+ /* Some calls may be processed without authentication
+ * on GSS exports. For example NFS2/3 calls on root
+ * directory, see section 2.3.2 of rfc 2623.
+ * For "may_bypass_gss" check that export has really
+ * enabled some flavor with authentication (GSS or any
+ * other) and also check that the used auth flavor is
+ * without authentication (none or sys).
+ */
+ if (may_bypass_gss && (
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
+ rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)) {
+ for (f = exp->ex_flavors; f < end; f++) {
+ if (f->pseudoflavor >= RPC_AUTH_DES)
+ return 0;
+ }
+ }
return nfserr_wrongsec;
}
+/**
+ * check_nfsd_access - check if access to export is allowed.
+ * @exp: svc_export that is being accessed.
+ * @rqstp: svc_rqst attempting to access @exp.
+ * @may_bypass_gss: reduce strictness of authorization check
+ *
+ * Return values:
+ * %nfs_ok if access is granted, or
+ * %nfserr_wrongsec if access is denied
+ */
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss)
+{
+ __be32 status;
+
+ status = check_xprtsec_policy(exp, rqstp);
+ if (status != nfs_ok)
+ return status;
+ return check_security_flavor(exp, rqstp, may_bypass_gss);
+}
+
/*
* Uses rq_client and rq_gssclient to find an export; uses rq_client (an
* auth_unix client) if it's available and has secinfo information;
@@ -979,7 +1213,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
* use exp_get_by_name() or exp_find().
*/
struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, const struct path *path)
{
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -1009,19 +1243,35 @@ gss:
return gssexp;
}
+/**
+ * rqst_exp_find - Find an svc_export in the context of a rqst or similar
+ * @reqp: The handle to be used to suspend the request if a cache-upcall is needed
+ * If NULL, missing in-cache information will result in failure.
+ * @net: The network namespace in which the request exists
+ * @cl: default auth_domain to use for looking up the export
+ * @gsscl: an alternate auth_domain defined using deprecated gss/krb5 format.
+ * @fsid_type: The type of fsid to look for
+ * @fsidv: The actual fsid to look up in the context of either client.
+ *
+ * Perform a lookup for @cl/@fsidv in the given @net for an export. If
+ * none found and @gsscl specified, repeat the lookup.
+ *
+ * Returns an export, or an error pointer.
+ */
struct svc_export *
-rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
+rqst_exp_find(struct cache_req *reqp, struct net *net,
+ struct auth_domain *cl, struct auth_domain *gsscl,
+ int fsid_type, u32 *fsidv)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct cache_detail *cd = nn->svc_export_cache;
- if (rqstp->rq_client == NULL)
+ if (!cl)
goto gss;
/* First try the auth_unix client: */
- exp = exp_find(cd, rqstp->rq_client, fsid_type,
- fsidv, &rqstp->rq_chandle);
+ exp = exp_find(cd, cl, fsid_type, fsidv, reqp);
if (PTR_ERR(exp) == -ENOENT)
goto gss;
if (IS_ERR(exp))
@@ -1031,10 +1281,9 @@ rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
return exp;
gss:
/* Otherwise, try falling back on gss client */
- if (rqstp->rq_gssclient == NULL)
+ if (!gsscl)
return exp;
- gssexp = exp_find(cd, rqstp->rq_gssclient, fsid_type, fsidv,
- &rqstp->rq_chandle);
+ gssexp = exp_find(cd, gsscl, fsid_type, fsidv, reqp);
if (PTR_ERR(gssexp) == -ENOENT)
return exp;
if (!IS_ERR(exp))
@@ -1065,7 +1314,9 @@ struct svc_export *rqst_find_fsidzero_export(struct svc_rqst *rqstp)
mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
- return rqst_exp_find(rqstp, FSID_NUM, fsidv);
+ return rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp),
+ rqstp->rq_client, rqstp->rq_gssclient,
+ FSID_NUM, fsidv);
}
/*
@@ -1170,15 +1421,17 @@ static void show_secinfo(struct seq_file *m, struct svc_export *exp)
static void exp_flags(struct seq_file *m, int flag, int fsid,
kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fsloc)
{
+ struct user_namespace *userns = m->file->f_cred->user_ns;
+
show_expflags(m, flag, NFSEXP_ALLFLAGS);
if (flag & NFSEXP_FSID)
seq_printf(m, ",fsid=%d", fsid);
- if (!uid_eq(anonu, make_kuid(&init_user_ns, (uid_t)-2)) &&
- !uid_eq(anonu, make_kuid(&init_user_ns, 0x10000-2)))
- seq_printf(m, ",anonuid=%u", from_kuid(&init_user_ns, anonu));
- if (!gid_eq(anong, make_kgid(&init_user_ns, (gid_t)-2)) &&
- !gid_eq(anong, make_kgid(&init_user_ns, 0x10000-2)))
- seq_printf(m, ",anongid=%u", from_kgid(&init_user_ns, anong));
+ if (!uid_eq(anonu, make_kuid(userns, (uid_t)-2)) &&
+ !uid_eq(anonu, make_kuid(userns, 0x10000-2)))
+ seq_printf(m, ",anonuid=%u", from_kuid_munged(userns, anonu));
+ if (!gid_eq(anong, make_kgid(userns, (gid_t)-2)) &&
+ !gid_eq(anong, make_kgid(userns, 0x10000-2)))
+ seq_printf(m, ",anongid=%u", from_kgid_munged(userns, anong));
if (fsloc && fsloc->locations_count > 0) {
char *loctype = (fsloc->migrated) ? "refer" : "replicas";
int i;
@@ -1201,17 +1454,20 @@ static int e_show(struct seq_file *m, void *p)
struct cache_head *cp = p;
struct svc_export *exp = container_of(cp, struct svc_export, h);
struct cache_detail *cd = m->private;
+ bool export_stats = is_export_stats_file(m);
if (p == SEQ_START_TOKEN) {
seq_puts(m, "# Version 1.1\n");
- seq_puts(m, "# Path Client(Flags) # IPs\n");
+ if (export_stats)
+ seq_puts(m, "# Path Client Start-time\n#\tStats\n");
+ else
+ seq_puts(m, "# Path Client(Flags) # IPs\n");
return 0;
}
- exp_get(exp);
- if (cache_check(cd, &exp->h, NULL))
+ if (cache_check_rcu(cd, &exp->h, NULL))
return 0;
- exp_put(exp);
+
return svc_export_show(m, cd, cp);
}
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index e7daa1f246f0..d2b09cd76145 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -6,6 +6,7 @@
#define NFSD_EXPORT_H
#include <linux/sunrpc/cache.h>
+#include <linux/percpu_counter.h>
#include <uapi/linux/nfsd/export.h>
#include <linux/nfs4.h>
@@ -46,14 +47,27 @@ struct exp_flavor_info {
u32 flags;
};
+/* Per-export stats */
+enum {
+ EXP_STATS_FH_STALE,
+ EXP_STATS_IO_READ,
+ EXP_STATS_IO_WRITE,
+ EXP_STATS_COUNTERS_NUM
+};
+
+struct export_stats {
+ time64_t start_time;
+ struct percpu_counter counter[EXP_STATS_COUNTERS_NUM];
+};
+
struct svc_export {
struct cache_head h;
struct auth_domain * ex_client;
int ex_flags;
+ int ex_fsid;
struct path ex_path;
kuid_t ex_anon_uid;
kgid_t ex_anon_gid;
- int ex_fsid;
unsigned char * ex_uuid; /* 16 byte fsid */
struct nfsd4_fs_locations ex_fslocs;
uint32_t ex_nflavors;
@@ -62,6 +76,8 @@ struct svc_export {
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
struct rcu_head ex_rcu;
+ unsigned long ex_xprtsec_modes;
+ struct export_stats *ex_stats;
};
/* an "export key" (expkey) maps a filehandlefragement to an
@@ -72,7 +88,7 @@ struct svc_expkey {
struct cache_head h;
struct auth_domain * ek_client;
- int ek_fsidtype;
+ u8 ek_fsidtype;
u32 ek_fsid[6];
struct path ek_path;
@@ -83,8 +99,13 @@ struct svc_expkey {
#define EX_NOHIDE(exp) ((exp)->ex_flags & NFSEXP_NOHIDE)
#define EX_WGATHER(exp) ((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+struct svc_cred;
+int nfsexp_flags(struct svc_cred *cred, struct svc_export *exp);
+__be32 check_xprtsec_policy(struct svc_export *exp, struct svc_rqst *rqstp);
+__be32 check_security_flavor(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
+ bool may_bypass_gss);
/*
* Function declarations
@@ -93,14 +114,13 @@ int nfsd_export_init(struct net *);
void nfsd_export_shutdown(struct net *);
void nfsd_export_flush(struct net *);
struct svc_export * rqst_exp_get_by_name(struct svc_rqst *,
- struct path *);
+ const struct path *);
struct svc_export * rqst_exp_parent(struct svc_rqst *,
struct path *);
struct svc_export * rqst_find_fsidzero_export(struct svc_rqst *);
int exp_rootfh(struct net *, struct auth_domain *,
char *path, struct knfsd_fh *, int maxsize);
__be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
-__be32 nfserrno(int errno);
static inline void exp_put(struct svc_export *exp)
{
@@ -112,6 +132,8 @@ static inline struct svc_export *exp_get(struct svc_export *exp)
cache_get(&exp->h);
return exp;
}
-struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
+struct svc_export *rqst_exp_find(struct cache_req *reqp, struct net *net,
+ struct auth_domain *cl, struct auth_domain *gsscl,
+ int fsid_type, u32 *fsidv);
#endif /* NFSD_EXPORT_H */
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
deleted file mode 100644
index 84831253203d..000000000000
--- a/fs/nfsd/fault_inject.c
+++ /dev/null
@@ -1,150 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
- *
- * Uses debugfs to create fault injection points for client testing
- */
-
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/debugfs.h>
-#include <linux/module.h>
-#include <linux/nsproxy.h>
-#include <linux/sunrpc/addr.h>
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-
-#include "state.h"
-#include "netns.h"
-
-struct nfsd_fault_inject_op {
- char *file;
- u64 (*get)(void);
- u64 (*set_val)(u64);
- u64 (*set_clnt)(struct sockaddr_storage *, size_t);
-};
-
-static struct dentry *debug_dir;
-
-static ssize_t fault_inject_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- static u64 val;
- char read_buf[25];
- size_t size;
- loff_t pos = *ppos;
- struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
-
- if (!pos)
- val = op->get();
- size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
-
- return simple_read_from_buffer(buf, len, ppos, read_buf, size);
-}
-
-static ssize_t fault_inject_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
-{
- char write_buf[INET6_ADDRSTRLEN];
- size_t size = min(sizeof(write_buf) - 1, len);
- struct net *net = current->nsproxy->net_ns;
- struct sockaddr_storage sa;
- struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
- u64 val;
- char *nl;
-
- if (copy_from_user(write_buf, buf, size))
- return -EFAULT;
- write_buf[size] = '\0';
-
- /* Deal with any embedded newlines in the string */
- nl = strchr(write_buf, '\n');
- if (nl) {
- size = nl - write_buf;
- *nl = '\0';
- }
-
- size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
- if (size > 0) {
- val = op->set_clnt(&sa, size);
- if (val)
- pr_info("NFSD [%s]: Client %s had %llu state object(s)\n",
- op->file, write_buf, val);
- } else {
- val = simple_strtoll(write_buf, NULL, 0);
- if (val == 0)
- pr_info("NFSD Fault Injection: %s (all)", op->file);
- else
- pr_info("NFSD Fault Injection: %s (n = %llu)",
- op->file, val);
- val = op->set_val(val);
- pr_info("NFSD: %s: found %llu", op->file, val);
- }
- return len; /* on success, claim we got the whole input */
-}
-
-static const struct file_operations fops_nfsd = {
- .owner = THIS_MODULE,
- .read = fault_inject_read,
- .write = fault_inject_write,
-};
-
-void nfsd_fault_inject_cleanup(void)
-{
- debugfs_remove_recursive(debug_dir);
-}
-
-static struct nfsd_fault_inject_op inject_ops[] = {
- {
- .file = "forget_clients",
- .get = nfsd_inject_print_clients,
- .set_val = nfsd_inject_forget_clients,
- .set_clnt = nfsd_inject_forget_client,
- },
- {
- .file = "forget_locks",
- .get = nfsd_inject_print_locks,
- .set_val = nfsd_inject_forget_locks,
- .set_clnt = nfsd_inject_forget_client_locks,
- },
- {
- .file = "forget_openowners",
- .get = nfsd_inject_print_openowners,
- .set_val = nfsd_inject_forget_openowners,
- .set_clnt = nfsd_inject_forget_client_openowners,
- },
- {
- .file = "forget_delegations",
- .get = nfsd_inject_print_delegations,
- .set_val = nfsd_inject_forget_delegations,
- .set_clnt = nfsd_inject_forget_client_delegations,
- },
- {
- .file = "recall_delegations",
- .get = nfsd_inject_print_delegations,
- .set_val = nfsd_inject_recall_delegations,
- .set_clnt = nfsd_inject_recall_client_delegations,
- },
-};
-
-int nfsd_fault_inject_init(void)
-{
- unsigned int i;
- struct nfsd_fault_inject_op *op;
- umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
-
- debug_dir = debugfs_create_dir("nfsd", NULL);
- if (!debug_dir)
- goto fail;
-
- for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
- op = &inject_ops[i];
- if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
- goto fail;
- }
- return 0;
-
-fail:
- nfsd_fault_inject_cleanup();
- return -ENOMEM;
-}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
new file mode 100644
index 000000000000..93798575b807
--- /dev/null
+++ b/fs/nfsd/filecache.c
@@ -0,0 +1,1430 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The NFSD open file cache.
+ *
+ * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
+ *
+ * An nfsd_file object is a per-file collection of open state that binds
+ * together:
+ * - a struct file *
+ * - a user credential
+ * - a network namespace
+ * - a read-ahead context
+ * - monitoring for writeback errors
+ *
+ * nfsd_file objects are reference-counted. Consumers acquire a new
+ * object via the nfsd_file_acquire API. They manage their interest in
+ * the acquired object, and hence the object's reference count, via
+ * nfsd_file_get and nfsd_file_put. There are two varieties of nfsd_file
+ * object:
+ *
+ * * non-garbage-collected: When a consumer wants to precisely control
+ * the lifetime of a file's open state, it acquires a non-garbage-
+ * collected nfsd_file. The final nfsd_file_put releases the open
+ * state immediately.
+ *
+ * * garbage-collected: When a consumer does not control the lifetime
+ * of open state, it acquires a garbage-collected nfsd_file. The
+ * final nfsd_file_put allows the open state to linger for a period
+ * during which it may be re-used.
+ */
+
+#include <linux/hash.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include <linux/list_lru.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/fsnotify.h>
+#include <linux/seq_file.h>
+#include <linux/rhashtable.h>
+#include <linux/nfslocalio.h>
+
+#include "vfs.h"
+#include "nfsd.h"
+#include "nfsfh.h"
+#include "netns.h"
+#include "filecache.h"
+#include "trace.h"
+
+#define NFSD_LAUNDRETTE_DELAY (2 * HZ)
+
+#define NFSD_FILE_CACHE_UP (0)
+
+/* We only care about NFSD_MAY_READ/WRITE for this cache */
+#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO)
+
+static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_releases);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
+static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
+
+struct nfsd_fcache_disposal {
+ spinlock_t lock;
+ struct list_head freeme;
+};
+
+static struct kmem_cache *nfsd_file_slab;
+static struct kmem_cache *nfsd_file_mark_slab;
+static struct list_lru nfsd_file_lru;
+static unsigned long nfsd_file_flags;
+static struct fsnotify_group *nfsd_file_fsnotify_group;
+static struct delayed_work nfsd_filecache_laundrette;
+static struct rhltable nfsd_file_rhltable
+ ____cacheline_aligned_in_smp;
+
+static bool
+nfsd_match_cred(const struct cred *c1, const struct cred *c2)
+{
+ int i;
+
+ if (!uid_eq(c1->fsuid, c2->fsuid))
+ return false;
+ if (!gid_eq(c1->fsgid, c2->fsgid))
+ return false;
+ if (c1->group_info == NULL || c2->group_info == NULL)
+ return c1->group_info == c2->group_info;
+ if (c1->group_info->ngroups != c2->group_info->ngroups)
+ return false;
+ for (i = 0; i < c1->group_info->ngroups; i++) {
+ if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
+ return false;
+ }
+ return true;
+}
+
+static const struct rhashtable_params nfsd_file_rhash_params = {
+ .key_len = sizeof_field(struct nfsd_file, nf_inode),
+ .key_offset = offsetof(struct nfsd_file, nf_inode),
+ .head_offset = offsetof(struct nfsd_file, nf_rlist),
+
+ /*
+ * Start with a single page hash table to reduce resizing churn
+ * on light workloads.
+ */
+ .min_size = 256,
+ .automatic_shrinking = true,
+};
+
+static void
+nfsd_file_schedule_laundrette(void)
+{
+ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags))
+ queue_delayed_work(system_dfl_wq, &nfsd_filecache_laundrette,
+ NFSD_LAUNDRETTE_DELAY);
+}
+
+static void
+nfsd_file_slab_free(struct rcu_head *rcu)
+{
+ struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
+
+ put_cred(nf->nf_cred);
+ kmem_cache_free(nfsd_file_slab, nf);
+}
+
+static void
+nfsd_file_mark_free(struct fsnotify_mark *mark)
+{
+ struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
+ nfm_mark);
+
+ kmem_cache_free(nfsd_file_mark_slab, nfm);
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_get(struct nfsd_file_mark *nfm)
+{
+ if (!refcount_inc_not_zero(&nfm->nfm_ref))
+ return NULL;
+ return nfm;
+}
+
+static void
+nfsd_file_mark_put(struct nfsd_file_mark *nfm)
+{
+ if (refcount_dec_and_test(&nfm->nfm_ref)) {
+ fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(&nfm->nfm_mark);
+ }
+}
+
+static struct nfsd_file_mark *
+nfsd_file_mark_find_or_create(struct inode *inode)
+{
+ int err;
+ struct fsnotify_mark *mark;
+ struct nfsd_file_mark *nfm = NULL, *new;
+
+ do {
+ fsnotify_group_lock(nfsd_file_fsnotify_group);
+ mark = fsnotify_find_inode_mark(inode,
+ nfsd_file_fsnotify_group);
+ if (mark) {
+ nfm = nfsd_file_mark_get(container_of(mark,
+ struct nfsd_file_mark,
+ nfm_mark));
+ fsnotify_group_unlock(nfsd_file_fsnotify_group);
+ if (nfm) {
+ fsnotify_put_mark(mark);
+ break;
+ }
+ /* Avoid soft lockup race with nfsd_file_mark_put() */
+ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
+ fsnotify_put_mark(mark);
+ } else {
+ fsnotify_group_unlock(nfsd_file_fsnotify_group);
+ }
+
+ /* allocate a new nfm */
+ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
+ if (!new)
+ return NULL;
+ fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+ new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
+ refcount_set(&new->nfm_ref, 1);
+
+ err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
+
+ /*
+ * If the add was successful, then return the object.
+ * Otherwise, we need to put the reference we hold on the
+ * nfm_mark. The fsnotify code will take a reference and put
+ * it on failure, so we can't just free it directly. It's also
+ * not safe to call fsnotify_destroy_mark on it as the
+ * mark->group will be NULL. Thus, we can't let the nfm_ref
+ * counter drive the destruction at this point.
+ */
+ if (likely(!err))
+ nfm = new;
+ else
+ fsnotify_put_mark(&new->nfm_mark);
+ } while (unlikely(err == -EEXIST));
+
+ return nfm;
+}
+
+static struct nfsd_file *
+nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
+ bool want_gc)
+{
+ struct nfsd_file *nf;
+
+ nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
+ if (unlikely(!nf))
+ return NULL;
+
+ this_cpu_inc(nfsd_file_allocations);
+ INIT_LIST_HEAD(&nf->nf_lru);
+ INIT_LIST_HEAD(&nf->nf_gc);
+ nf->nf_birthtime = ktime_get();
+ nf->nf_file = NULL;
+ nf->nf_cred = get_current_cred();
+ nf->nf_net = net;
+ nf->nf_flags = want_gc ?
+ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
+ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
+ nf->nf_inode = inode;
+ refcount_set(&nf->nf_ref, 1);
+ nf->nf_may = need;
+ nf->nf_mark = NULL;
+ nf->nf_dio_mem_align = 0;
+ nf->nf_dio_offset_align = 0;
+ nf->nf_dio_read_offset_align = 0;
+ return nf;
+}
+
+/**
+ * nfsd_file_check_write_error - check for writeback errors on a file
+ * @nf: nfsd_file to check for writeback errors
+ *
+ * Check whether a nfsd_file has an unseen error. Reset the write
+ * verifier if so.
+ */
+static void
+nfsd_file_check_write_error(struct nfsd_file *nf)
+{
+ struct file *file = nf->nf_file;
+
+ if ((file->f_mode & FMODE_WRITE) &&
+ filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)))
+ nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
+}
+
+static void
+nfsd_file_hash_remove(struct nfsd_file *nf)
+{
+ trace_nfsd_file_unhash(nf);
+ rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
+ nfsd_file_rhash_params);
+}
+
+static bool
+nfsd_file_unhash(struct nfsd_file *nf)
+{
+ if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ nfsd_file_hash_remove(nf);
+ return true;
+ }
+ return false;
+}
+
+static void
+nfsd_file_free(struct nfsd_file *nf)
+{
+ s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
+
+ trace_nfsd_file_free(nf);
+
+ this_cpu_inc(nfsd_file_releases);
+ this_cpu_add(nfsd_file_total_age, age);
+
+ nfsd_file_unhash(nf);
+ if (nf->nf_mark)
+ nfsd_file_mark_put(nf->nf_mark);
+ if (nf->nf_file) {
+ nfsd_file_check_write_error(nf);
+ nfsd_filp_close(nf->nf_file);
+ }
+
+ /*
+ * If this item is still linked via nf_lru, that's a bug.
+ * WARN and leak it to preserve system stability.
+ */
+ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
+ return;
+
+ call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
+}
+
+static bool
+nfsd_file_check_writeback(struct nfsd_file *nf)
+{
+ struct file *file = nf->nf_file;
+ struct address_space *mapping;
+
+ /* File not open for write? */
+ if (!(file->f_mode & FMODE_WRITE))
+ return false;
+
+ /*
+ * Some filesystems (e.g. NFS) flush all dirty data on close.
+ * On others, there is no need to wait for writeback.
+ */
+ if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
+ return false;
+
+ mapping = file->f_mapping;
+ return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
+ mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
+}
+
+static void nfsd_file_lru_add(struct nfsd_file *nf)
+{
+ refcount_inc(&nf->nf_ref);
+ if (list_lru_add_obj(&nfsd_file_lru, &nf->nf_lru))
+ trace_nfsd_file_lru_add(nf);
+ else
+ WARN_ON(1);
+ nfsd_file_schedule_laundrette();
+}
+
+static bool nfsd_file_lru_remove(struct nfsd_file *nf)
+{
+ if (list_lru_del_obj(&nfsd_file_lru, &nf->nf_lru)) {
+ trace_nfsd_file_lru_del(nf);
+ return true;
+ }
+ return false;
+}
+
+struct nfsd_file *
+nfsd_file_get(struct nfsd_file *nf)
+{
+ if (nf && refcount_inc_not_zero(&nf->nf_ref))
+ return nf;
+ return NULL;
+}
+
+/**
+ * nfsd_file_put - put the reference to a nfsd_file
+ * @nf: nfsd_file of which to put the reference
+ *
+ * Put a reference to a nfsd_file. In the non-GC case, we just put the
+ * reference immediately. In the GC case, if the reference would be
+ * the last one, the put it on the LRU instead to be cleaned up later.
+ */
+void
+nfsd_file_put(struct nfsd_file *nf)
+{
+ might_sleep();
+ trace_nfsd_file_put(nf);
+
+ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
+ test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ set_bit(NFSD_FILE_RECENT, &nf->nf_flags);
+ }
+
+ if (refcount_dec_and_test(&nf->nf_ref))
+ nfsd_file_free(nf);
+}
+
+/**
+ * nfsd_file_put_local - put nfsd_file reference and arm nfsd_net_put in caller
+ * @pnf: nfsd_file of which to put the reference
+ *
+ * First save the associated net to return to caller, then put
+ * the reference of the nfsd_file.
+ */
+struct net *
+nfsd_file_put_local(struct nfsd_file __rcu **pnf)
+{
+ struct nfsd_file *nf;
+ struct net *net = NULL;
+
+ nf = unrcu_pointer(xchg(pnf, NULL));
+ if (nf) {
+ net = nf->nf_net;
+ nfsd_file_put(nf);
+ }
+ return net;
+}
+
+/**
+ * nfsd_file_file - get the backing file of an nfsd_file
+ * @nf: nfsd_file of which to access the backing file.
+ *
+ * Return backing file for @nf.
+ */
+struct file *
+nfsd_file_file(struct nfsd_file *nf)
+{
+ return nf->nf_file;
+}
+
+static void
+nfsd_file_dispose_list(struct list_head *dispose)
+{
+ struct nfsd_file *nf;
+
+ while (!list_empty(dispose)) {
+ nf = list_first_entry(dispose, struct nfsd_file, nf_gc);
+ list_del_init(&nf->nf_gc);
+ nfsd_file_free(nf);
+ }
+}
+
+/**
+ * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
+ * @dispose: list of nfsd_files to be disposed
+ *
+ * Transfers each file to the "freeme" list for its nfsd_net, to eventually
+ * be disposed of by the per-net garbage collector.
+ */
+static void
+nfsd_file_dispose_list_delayed(struct list_head *dispose)
+{
+ while(!list_empty(dispose)) {
+ struct nfsd_file *nf = list_first_entry(dispose,
+ struct nfsd_file, nf_gc);
+ struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+ struct svc_serv *serv;
+
+ spin_lock(&l->lock);
+ list_move_tail(&nf->nf_gc, &l->freeme);
+ spin_unlock(&l->lock);
+
+ /*
+ * The filecache laundrette is shut down after the
+ * nn->nfsd_serv pointer is cleared, but before the
+ * svc_serv is freed.
+ */
+ serv = nn->nfsd_serv;
+ if (serv)
+ svc_wake_up(serv);
+ }
+}
+
+/**
+ * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed.
+ * @nn: nfsd_net in which to find files to be disposed.
+ *
+ * When files held open for nfsv3 are removed from the filecache, whether
+ * due to memory pressure or garbage collection, they are queued to
+ * a per-net-ns queue. This function completes the disposal, either
+ * directly or by waking another nfsd thread to help with the work.
+ */
+void nfsd_file_net_dispose(struct nfsd_net *nn)
+{
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+ if (!list_empty(&l->freeme)) {
+ LIST_HEAD(dispose);
+ int i;
+
+ spin_lock(&l->lock);
+ for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
+ list_move(l->freeme.next, &dispose);
+ spin_unlock(&l->lock);
+ if (!list_empty(&l->freeme))
+ /* Wake up another thread to share the work
+ * *before* doing any actual disposing.
+ */
+ svc_wake_up(nn->nfsd_serv);
+ nfsd_file_dispose_list(&dispose);
+ }
+}
+
+/**
+ * nfsd_file_lru_cb - Examine an entry on the LRU list
+ * @item: LRU entry to examine
+ * @lru: controlling LRU
+ * @arg: dispose list
+ *
+ * Return values:
+ * %LRU_REMOVED: @item was removed from the LRU
+ * %LRU_ROTATE: @item is to be moved to the LRU tail
+ * %LRU_SKIP: @item cannot be evicted
+ */
+static enum lru_status
+nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
+ void *arg)
+{
+ struct list_head *head = arg;
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+ /* We should only be dealing with GC entries here */
+ WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
+
+ /*
+ * Don't throw out files that are still undergoing I/O or
+ * that have uncleared errors pending.
+ */
+ if (nfsd_file_check_writeback(nf)) {
+ trace_nfsd_file_gc_writeback(nf);
+ return LRU_SKIP;
+ }
+
+ /* If it was recently added to the list, skip it */
+ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
+ trace_nfsd_file_gc_referenced(nf);
+ return LRU_ROTATE;
+ }
+
+ /*
+ * Put the reference held on behalf of the LRU if it is the last
+ * reference, else rotate.
+ */
+ if (!refcount_dec_if_one(&nf->nf_ref)) {
+ trace_nfsd_file_gc_in_use(nf);
+ return LRU_ROTATE;
+ }
+
+ /* Refcount went to zero. Unhash it and queue it to the dispose list */
+ nfsd_file_unhash(nf);
+ list_lru_isolate(lru, &nf->nf_lru);
+ list_add(&nf->nf_gc, head);
+ this_cpu_inc(nfsd_file_evictions);
+ trace_nfsd_file_gc_disposed(nf);
+ return LRU_REMOVED;
+}
+
+static enum lru_status
+nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru,
+ void *arg)
+{
+ struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
+
+ if (test_and_clear_bit(NFSD_FILE_RECENT, &nf->nf_flags)) {
+ /*
+ * "REFERENCED" really means "should be at the end of the
+ * LRU. As we are putting it there we can clear the flag.
+ */
+ clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
+ trace_nfsd_file_gc_aged(nf);
+ return LRU_ROTATE;
+ }
+ return nfsd_file_lru_cb(item, lru, arg);
+}
+
+/* If the shrinker runs between calls to list_lru_walk_node() in
+ * nfsd_file_gc(), the "remaining" count will be wrong. This could
+ * result in premature freeing of some files. This may not matter much
+ * but is easy to fix with this spinlock which temporarily disables
+ * the shrinker.
+ */
+static DEFINE_SPINLOCK(nfsd_gc_lock);
+static void
+nfsd_file_gc(void)
+{
+ unsigned long ret = 0;
+ LIST_HEAD(dispose);
+ int nid;
+
+ spin_lock(&nfsd_gc_lock);
+ for_each_node_state(nid, N_NORMAL_MEMORY) {
+ unsigned long remaining = list_lru_count_node(&nfsd_file_lru, nid);
+
+ while (remaining > 0) {
+ unsigned long nr = min(remaining, NFSD_FILE_GC_BATCH);
+
+ remaining -= nr;
+ ret += list_lru_walk_node(&nfsd_file_lru, nid, nfsd_file_gc_cb,
+ &dispose, &nr);
+ if (nr)
+ /* walk aborted early */
+ remaining = 0;
+ }
+ }
+ spin_unlock(&nfsd_gc_lock);
+ trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_dispose_list_delayed(&dispose);
+}
+
+static void
+nfsd_file_gc_worker(struct work_struct *work)
+{
+ if (list_lru_count(&nfsd_file_lru))
+ nfsd_file_gc();
+ nfsd_file_schedule_laundrette();
+}
+
+static unsigned long
+nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
+{
+ return list_lru_count(&nfsd_file_lru);
+}
+
+static unsigned long
+nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ LIST_HEAD(dispose);
+ unsigned long ret;
+
+ if (!spin_trylock(&nfsd_gc_lock))
+ return SHRINK_STOP;
+
+ ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
+ nfsd_file_lru_cb, &dispose);
+ spin_unlock(&nfsd_gc_lock);
+ trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
+ nfsd_file_dispose_list_delayed(&dispose);
+ return ret;
+}
+
+static struct shrinker *nfsd_file_shrinker;
+
+/**
+ * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file
+ * @nf: nfsd_file to attempt to queue
+ * @dispose: private list to queue successfully-put objects
+ *
+ * Unhash an nfsd_file, try to get a reference to it, and then put that
+ * reference. If it's the last reference, queue it to the dispose list.
+ */
+static void
+nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
+ __must_hold(RCU)
+{
+ int decrement = 1;
+
+ /* If we raced with someone else unhashing, ignore it */
+ if (!nfsd_file_unhash(nf))
+ return;
+
+ /* If we can't get a reference, ignore it */
+ if (!nfsd_file_get(nf))
+ return;
+
+ /* Extra decrement if we remove from the LRU */
+ if (nfsd_file_lru_remove(nf))
+ ++decrement;
+
+ /* If refcount goes to 0, then put on the dispose list */
+ if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+ list_add(&nf->nf_gc, dispose);
+ trace_nfsd_file_closing(nf);
+ }
+}
+
+/**
+ * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
+ * @inode: inode on which to close out nfsd_files
+ * @dispose: list on which to gather nfsd_files to close out
+ *
+ * An nfsd_file represents a struct file being held open on behalf of nfsd.
+ * An open file however can block other activity (such as leases), or cause
+ * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
+ *
+ * This function is intended to find open nfsd_files when this sort of
+ * conflicting access occurs and then attempt to close those files out.
+ *
+ * Populates the dispose list with entries that have already had their
+ * refcounts go to zero. The actual free of an nfsd_file can be expensive,
+ * so we leave it up to the caller whether it wants to wait or not.
+ */
+static void
+nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
+{
+ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
+
+ rcu_read_lock();
+ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
+ nfsd_file_rhash_params);
+ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
+ if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
+ continue;
+ nfsd_file_cond_queue(nf, dispose);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Close out any open nfsd_files that can be reaped for @inode. The
+ * actual freeing is deferred to the dispose_list_delayed infrastructure.
+ *
+ * This is used by the fsnotify callbacks and setlease notifier.
+ */
+static void
+nfsd_file_close_inode(struct inode *inode)
+{
+ LIST_HEAD(dispose);
+
+ nfsd_file_queue_for_close(inode, &dispose);
+ nfsd_file_dispose_list_delayed(&dispose);
+}
+
+/**
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * @inode: inode of the file to attempt to remove
+ *
+ * Close out any open nfsd_files that can be reaped for @inode. The
+ * nfsd_files are closed out synchronously.
+ *
+ * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames
+ * when reexporting NFS.
+ */
+void
+nfsd_file_close_inode_sync(struct inode *inode)
+{
+ LIST_HEAD(dispose);
+
+ trace_nfsd_file_close(inode);
+
+ nfsd_file_queue_for_close(inode, &dispose);
+ nfsd_file_dispose_list(&dispose);
+}
+
+static int
+nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
+ void *data)
+{
+ struct file_lease *fl = data;
+
+ /* Only close files for F_SETLEASE leases */
+ if (fl->c.flc_flags & FL_LEASE)
+ nfsd_file_close_inode(file_inode(fl->c.flc_file));
+ return 0;
+}
+
+static struct notifier_block nfsd_file_lease_notifier = {
+ .notifier_call = nfsd_file_lease_notifier_call,
+};
+
+static int
+nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
+ struct inode *inode, struct inode *dir,
+ const struct qstr *name, u32 cookie)
+{
+ if (WARN_ON_ONCE(!inode))
+ return 0;
+
+ trace_nfsd_file_fsnotify_handle_event(inode, mask);
+
+ /* Should be no marks on non-regular files */
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ /* don't close files if this was not the last link */
+ if (mask & FS_ATTRIB) {
+ if (inode->i_nlink)
+ return 0;
+ }
+
+ nfsd_file_close_inode(inode);
+ return 0;
+}
+
+
+static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
+ .handle_inode_event = nfsd_file_fsnotify_handle_event,
+ .free_mark = nfsd_file_mark_free,
+};
+
+int
+nfsd_file_cache_init(void)
+{
+ int ret;
+
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+ return 0;
+
+ ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
+ if (ret)
+ goto out;
+
+ ret = -ENOMEM;
+ nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
+ if (!nfsd_file_slab) {
+ pr_err("nfsd: unable to create nfsd_file_slab\n");
+ goto out_err;
+ }
+
+ nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0);
+ if (!nfsd_file_mark_slab) {
+ pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
+ goto out_err;
+ }
+
+ ret = list_lru_init(&nfsd_file_lru);
+ if (ret) {
+ pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
+ goto out_err;
+ }
+
+ nfsd_file_shrinker = shrinker_alloc(0, "nfsd-filecache");
+ if (!nfsd_file_shrinker) {
+ ret = -ENOMEM;
+ pr_err("nfsd: failed to allocate nfsd_file_shrinker\n");
+ goto out_lru;
+ }
+
+ nfsd_file_shrinker->count_objects = nfsd_file_lru_count;
+ nfsd_file_shrinker->scan_objects = nfsd_file_lru_scan;
+ nfsd_file_shrinker->seeks = 1;
+
+ shrinker_register(nfsd_file_shrinker);
+
+ ret = lease_register_notifier(&nfsd_file_lease_notifier);
+ if (ret) {
+ pr_err("nfsd: unable to register lease notifier: %d\n", ret);
+ goto out_shrinker;
+ }
+
+ nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
+ 0);
+ if (IS_ERR(nfsd_file_fsnotify_group)) {
+ pr_err("nfsd: unable to create fsnotify group: %ld\n",
+ PTR_ERR(nfsd_file_fsnotify_group));
+ ret = PTR_ERR(nfsd_file_fsnotify_group);
+ nfsd_file_fsnotify_group = NULL;
+ goto out_notifier;
+ }
+
+ INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
+out:
+ if (ret)
+ clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags);
+ return ret;
+out_notifier:
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+out_shrinker:
+ shrinker_free(nfsd_file_shrinker);
+out_lru:
+ list_lru_destroy(&nfsd_file_lru);
+out_err:
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ rhltable_destroy(&nfsd_file_rhltable);
+ goto out;
+}
+
+/**
+ * __nfsd_file_cache_purge: clean out the cache for shutdown
+ * @net: net-namespace to shut down the cache (may be NULL)
+ *
+ * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
+ * then close out everything. Called when an nfsd instance is being shut down,
+ * and when the exports table is flushed.
+ */
+static void
+__nfsd_file_cache_purge(struct net *net)
+{
+ struct rhashtable_iter iter;
+ struct nfsd_file *nf;
+ LIST_HEAD(dispose);
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ if (net) {
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ nfs_localio_invalidate_clients(&nn->local_clients,
+ &nn->local_clients_lock);
+ }
+#endif
+
+ rhltable_walk_enter(&nfsd_file_rhltable, &iter);
+ do {
+ rhashtable_walk_start(&iter);
+
+ nf = rhashtable_walk_next(&iter);
+ while (!IS_ERR_OR_NULL(nf)) {
+ if (!net || nf->nf_net == net)
+ nfsd_file_cond_queue(nf, &dispose);
+ nf = rhashtable_walk_next(&iter);
+ }
+
+ rhashtable_walk_stop(&iter);
+ } while (nf == ERR_PTR(-EAGAIN));
+ rhashtable_walk_exit(&iter);
+
+ nfsd_file_dispose_list(&dispose);
+}
+
+static struct nfsd_fcache_disposal *
+nfsd_alloc_fcache_disposal(void)
+{
+ struct nfsd_fcache_disposal *l;
+
+ l = kmalloc(sizeof(*l), GFP_KERNEL);
+ if (!l)
+ return NULL;
+ spin_lock_init(&l->lock);
+ INIT_LIST_HEAD(&l->freeme);
+ return l;
+}
+
+static void
+nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
+{
+ nfsd_file_dispose_list(&l->freeme);
+ kfree(l);
+}
+
+static void
+nfsd_free_fcache_disposal_net(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+ nfsd_free_fcache_disposal(l);
+}
+
+int
+nfsd_file_cache_start_net(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->fcache_disposal = nfsd_alloc_fcache_disposal();
+ return nn->fcache_disposal ? 0 : -ENOMEM;
+}
+
+/**
+ * nfsd_file_cache_purge - Remove all cache items associated with @net
+ * @net: target net namespace
+ *
+ */
+void
+nfsd_file_cache_purge(struct net *net)
+{
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
+ __nfsd_file_cache_purge(net);
+}
+
+void
+nfsd_file_cache_shutdown_net(struct net *net)
+{
+ nfsd_file_cache_purge(net);
+ nfsd_free_fcache_disposal_net(net);
+}
+
+void
+nfsd_file_cache_shutdown(void)
+{
+ int i;
+
+ lockdep_assert_held(&nfsd_mutex);
+ if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0)
+ return;
+
+ lease_unregister_notifier(&nfsd_file_lease_notifier);
+ shrinker_free(nfsd_file_shrinker);
+ /*
+ * make sure all callers of nfsd_file_lru_cb are done before
+ * calling nfsd_file_cache_purge
+ */
+ cancel_delayed_work_sync(&nfsd_filecache_laundrette);
+ __nfsd_file_cache_purge(NULL);
+ list_lru_destroy(&nfsd_file_lru);
+ rcu_barrier();
+ fsnotify_put_group(nfsd_file_fsnotify_group);
+ nfsd_file_fsnotify_group = NULL;
+ kmem_cache_destroy(nfsd_file_slab);
+ nfsd_file_slab = NULL;
+ fsnotify_wait_marks_destroyed();
+ kmem_cache_destroy(nfsd_file_mark_slab);
+ nfsd_file_mark_slab = NULL;
+ rhltable_destroy(&nfsd_file_rhltable);
+
+ for_each_possible_cpu(i) {
+ per_cpu(nfsd_file_cache_hits, i) = 0;
+ per_cpu(nfsd_file_acquisitions, i) = 0;
+ per_cpu(nfsd_file_allocations, i) = 0;
+ per_cpu(nfsd_file_releases, i) = 0;
+ per_cpu(nfsd_file_total_age, i) = 0;
+ per_cpu(nfsd_file_evictions, i) = 0;
+ }
+}
+
+static struct nfsd_file *
+nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
+ struct inode *inode, unsigned char need,
+ bool want_gc)
+{
+ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
+
+ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
+ nfsd_file_rhash_params);
+ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
+ if (nf->nf_may != need)
+ continue;
+ if (nf->nf_net != net)
+ continue;
+ if (!nfsd_match_cred(nf->nf_cred, cred))
+ continue;
+ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
+ continue;
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+ continue;
+
+ if (!nfsd_file_get(nf))
+ continue;
+ return nf;
+ }
+ return NULL;
+}
+
+/**
+ * nfsd_file_is_cached - are there any cached open files for this inode?
+ * @inode: inode to check
+ *
+ * The lookup matches inodes in all net namespaces and is atomic wrt
+ * nfsd_file_acquire().
+ *
+ * Return values:
+ * %true: filecache contains at least one file matching this inode
+ * %false: filecache contains no files matching this inode
+ */
+bool
+nfsd_file_is_cached(struct inode *inode)
+{
+ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
+ bool ret = false;
+
+ rcu_read_lock();
+ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
+ nfsd_file_rhash_params);
+ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
+ if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
+ ret = true;
+ break;
+ }
+ rcu_read_unlock();
+
+ trace_nfsd_file_is_cached(inode, (int)ret);
+ return ret;
+}
+
+static __be32
+nfsd_file_get_dio_attrs(const struct svc_fh *fhp, struct nfsd_file *nf)
+{
+ struct inode *inode = file_inode(nf->nf_file);
+ struct kstat stat;
+ __be32 status;
+
+ /* Currently only need to get DIO alignment info for regular files */
+ if (!S_ISREG(inode->i_mode))
+ return nfs_ok;
+
+ status = fh_getattr(fhp, &stat);
+ if (status != nfs_ok)
+ return status;
+
+ trace_nfsd_file_get_dio_attrs(inode, &stat);
+
+ if (stat.result_mask & STATX_DIOALIGN) {
+ nf->nf_dio_mem_align = stat.dio_mem_align;
+ nf->nf_dio_offset_align = stat.dio_offset_align;
+ }
+ if (stat.result_mask & STATX_DIO_READ_ALIGN)
+ nf->nf_dio_read_offset_align = stat.dio_read_offset_align;
+ else
+ nf->nf_dio_read_offset_align = nf->nf_dio_offset_align;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
+ struct svc_cred *cred,
+ struct auth_domain *client,
+ struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ umode_t type, bool want_gc, struct nfsd_file **pnf)
+{
+ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+ struct nfsd_file *new, *nf;
+ bool stale_retry = true;
+ bool open_retry = true;
+ struct inode *inode;
+ __be32 status;
+ int ret;
+
+retry:
+ if (rqstp)
+ status = fh_verify(rqstp, fhp, type,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+ else
+ status = fh_verify_local(net, cred, client, fhp, type,
+ may_flags|NFSD_MAY_OWNER_OVERRIDE);
+
+ if (status != nfs_ok)
+ return status;
+ inode = d_inode(fhp->fh_dentry);
+
+ rcu_read_lock();
+ nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
+ rcu_read_unlock();
+
+ if (nf)
+ goto wait_for_construction;
+
+ new = nfsd_file_alloc(net, inode, need, want_gc);
+ if (!new) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
+ rcu_read_lock();
+ spin_lock(&inode->i_lock);
+ nf = nfsd_file_lookup_locked(net, current_cred(), inode, need, want_gc);
+ if (unlikely(nf)) {
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ nfsd_file_free(new);
+ goto wait_for_construction;
+ }
+ nf = new;
+ ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
+ nfsd_file_rhash_params);
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ if (likely(ret == 0))
+ goto open_file;
+
+ trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
+ status = nfserr_jukebox;
+ goto construction_err;
+
+wait_for_construction:
+ wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
+
+ /* Did construction of this file fail? */
+ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+ trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
+ if (!open_retry) {
+ status = nfserr_jukebox;
+ goto construction_err;
+ }
+ nfsd_file_put(nf);
+ open_retry = false;
+ fh_put(fhp);
+ goto retry;
+ }
+ this_cpu_inc(nfsd_file_cache_hits);
+
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+ if (status != nfs_ok) {
+ nfsd_file_put(nf);
+ nf = NULL;
+ }
+
+out:
+ if (status == nfs_ok) {
+ this_cpu_inc(nfsd_file_acquisitions);
+ nfsd_file_check_write_error(nf);
+ *pnf = nf;
+ }
+ trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
+ return status;
+
+open_file:
+ trace_nfsd_file_alloc(nf);
+
+ if (type == S_IFREG)
+ nf->nf_mark = nfsd_file_mark_find_or_create(inode);
+
+ if (type != S_IFREG || nf->nf_mark) {
+ if (file) {
+ get_file(file);
+ nf->nf_file = file;
+ status = nfs_ok;
+ trace_nfsd_file_opened(nf, status);
+ } else {
+ ret = nfsd_open_verified(fhp, type, may_flags, &nf->nf_file);
+ if (ret == -EOPENSTALE && stale_retry) {
+ stale_retry = false;
+ nfsd_file_unhash(nf);
+ clear_and_wake_up_bit(NFSD_FILE_PENDING,
+ &nf->nf_flags);
+ if (refcount_dec_and_test(&nf->nf_ref))
+ nfsd_file_free(nf);
+ nf = NULL;
+ fh_put(fhp);
+ goto retry;
+ }
+ status = nfserrno(ret);
+ trace_nfsd_file_open(nf, status);
+ if (status == nfs_ok)
+ status = nfsd_file_get_dio_attrs(fhp, nf);
+ }
+ } else
+ status = nfserr_jukebox;
+ /*
+ * If construction failed, or we raced with a call to unlink()
+ * then unhash.
+ */
+ if (status != nfs_ok || inode->i_nlink == 0)
+ nfsd_file_unhash(nf);
+ else if (want_gc)
+ nfsd_file_lru_add(nf);
+
+ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ if (status == nfs_ok)
+ goto out;
+
+construction_err:
+ if (refcount_dec_and_test(&nf->nf_ref))
+ nfsd_file_free(nf);
+ nf = NULL;
+ goto out;
+}
+
+/**
+ * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file to be opened
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * The nfsd_file object returned by this API is reference-counted
+ * and garbage-collected. The object is retained for a few
+ * seconds after the final nfsd_file_put() in case the caller
+ * wants to re-use it.
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
+ fhp, may_flags, NULL, S_IFREG, true, pnf);
+}
+
+/**
+ * nfsd_file_acquire - Get a struct nfsd_file with an open file
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file to be opened
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * The nfsd_file_object returned by this API is reference-counted
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put().
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
+ fhp, may_flags, NULL, S_IFREG, false, pnf);
+}
+
+/**
+ * nfsd_file_acquire_local - Get a struct nfsd_file with an open file for localio
+ * @net: The network namespace in which to perform a lookup
+ * @cred: the user credential with which to validate access
+ * @client: the auth_domain for LOCALIO lookup
+ * @fhp: the NFS filehandle of the file to be opened
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * This file lookup interface provide access to a file given the
+ * filehandle and credential. No connection-based authorisation
+ * is performed and in that way it is quite different to other
+ * file access mediated by nfsd. It allows a kernel module such as the NFS
+ * client to reach across network and filesystem namespaces to access
+ * a file. The security implications of this should be carefully
+ * considered before use.
+ *
+ * The nfsd_file_object returned by this API is reference-counted
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put().
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
+ struct auth_domain *client, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf)
+{
+ /*
+ * Save creds before calling nfsd_file_do_acquire() (which calls
+ * nfsd_setuser). Important because caller (LOCALIO) is from
+ * client context.
+ */
+ const struct cred *save_cred = get_current_cred();
+ __be32 beres;
+
+ beres = nfsd_file_do_acquire(NULL, net, cred, client, fhp, may_flags,
+ NULL, S_IFREG, false, pnf);
+ put_cred(revert_creds(save_cred));
+ return beres;
+}
+
+/**
+ * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file just created
+ * @may_flags: NFSD_MAY_ settings for the file
+ * @file: cached, already-open file (may be NULL)
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist,
+ * and @file is non-NULL, use it to instantiate a new nfsd_file instead of
+ * opening a new one.
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **pnf)
+{
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL,
+ fhp, may_flags, file, S_IFREG, false, pnf);
+}
+
+/**
+ * nfsd_file_acquire_dir - Get a struct nfsd_file with an open directory
+ * @rqstp: the RPC transaction being executed
+ * @fhp: the NFS filehandle of the file to be opened
+ * @pnf: OUT: new or found "struct nfsd_file" object
+ *
+ * The nfsd_file_object returned by this API is reference-counted
+ * but not garbage-collected. The object is unhashed after the
+ * final nfsd_file_put(). This opens directories only, and only
+ * in O_RDONLY mode.
+ *
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
+ */
+__be32
+nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file **pnf)
+{
+ return nfsd_file_do_acquire(rqstp, SVC_NET(rqstp), NULL, NULL, fhp,
+ NFSD_MAY_READ|NFSD_MAY_64BIT_COOKIE,
+ NULL, S_IFDIR, false, pnf);
+}
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
+{
+ unsigned long allocations = 0, releases = 0, evictions = 0;
+ unsigned long hits = 0, acquisitions = 0;
+ unsigned int i, count = 0, buckets = 0;
+ unsigned long lru = 0, total_age = 0;
+
+ /* Serialize with server shutdown */
+ mutex_lock(&nfsd_mutex);
+ if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) {
+ struct bucket_table *tbl;
+ struct rhashtable *ht;
+
+ lru = list_lru_count(&nfsd_file_lru);
+
+ rcu_read_lock();
+ ht = &nfsd_file_rhltable.ht;
+ count = atomic_read(&ht->nelems);
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ buckets = tbl->size;
+ rcu_read_unlock();
+ }
+ mutex_unlock(&nfsd_mutex);
+
+ for_each_possible_cpu(i) {
+ hits += per_cpu(nfsd_file_cache_hits, i);
+ acquisitions += per_cpu(nfsd_file_acquisitions, i);
+ allocations += per_cpu(nfsd_file_allocations, i);
+ releases += per_cpu(nfsd_file_releases, i);
+ total_age += per_cpu(nfsd_file_total_age, i);
+ evictions += per_cpu(nfsd_file_evictions, i);
+ }
+
+ seq_printf(m, "total inodes: %u\n", count);
+ seq_printf(m, "hash buckets: %u\n", buckets);
+ seq_printf(m, "lru entries: %lu\n", lru);
+ seq_printf(m, "cache hits: %lu\n", hits);
+ seq_printf(m, "acquisitions: %lu\n", acquisitions);
+ seq_printf(m, "allocations: %lu\n", allocations);
+ seq_printf(m, "releases: %lu\n", releases);
+ seq_printf(m, "evictions: %lu\n", evictions);
+ if (releases)
+ seq_printf(m, "mean age (ms): %ld\n", total_age / releases);
+ else
+ seq_printf(m, "mean age (ms): -\n");
+ return 0;
+}
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
new file mode 100644
index 000000000000..b383dbc5b921
--- /dev/null
+++ b/fs/nfsd/filecache.h
@@ -0,0 +1,88 @@
+#ifndef _FS_NFSD_FILECACHE_H
+#define _FS_NFSD_FILECACHE_H
+
+#include <linux/fsnotify_backend.h>
+
+/*
+ * Limit the time that the list_lru_one lock is held during
+ * an LRU scan.
+ */
+#define NFSD_FILE_GC_BATCH (16UL)
+
+/*
+ * This is the fsnotify_mark container that nfsd attaches to the files that it
+ * is holding open. Note that we have a separate refcount here aside from the
+ * one in the fsnotify_mark. We only want a single fsnotify_mark attached to
+ * the inode, and for each nfsd_file to hold a reference to it.
+ *
+ * The fsnotify_mark is itself refcounted, but that's not sufficient to tell us
+ * how to put that reference. If there are still outstanding nfsd_files that
+ * reference the mark, then we would want to call fsnotify_put_mark on it.
+ * If there were not, then we'd need to call fsnotify_destroy_mark. Since we
+ * can't really tell the difference, we use the nfm_mark to keep track of how
+ * many nfsd_files hold references to the mark. When that counter goes to zero
+ * then we know to call fsnotify_destroy_mark on it.
+ */
+struct nfsd_file_mark {
+ struct fsnotify_mark nfm_mark;
+ refcount_t nfm_ref;
+};
+
+/*
+ * A representation of a file that has been opened by knfsd. These are hashed
+ * in the hashtable by inode pointer value. Note that this object doesn't
+ * hold a reference to the inode by itself, so the nf_inode pointer should
+ * never be dereferenced, only used for comparison.
+ */
+struct nfsd_file {
+ struct rhlist_head nf_rlist;
+ void *nf_inode;
+ struct file *nf_file;
+ const struct cred *nf_cred;
+ struct net *nf_net;
+#define NFSD_FILE_HASHED (0)
+#define NFSD_FILE_PENDING (1)
+#define NFSD_FILE_REFERENCED (2)
+#define NFSD_FILE_GC (3)
+#define NFSD_FILE_RECENT (4)
+ unsigned long nf_flags;
+ refcount_t nf_ref;
+ unsigned char nf_may;
+
+ struct nfsd_file_mark *nf_mark;
+ struct list_head nf_lru;
+ struct list_head nf_gc;
+ struct rcu_head nf_rcu;
+ ktime_t nf_birthtime;
+
+ u32 nf_dio_mem_align;
+ u32 nf_dio_offset_align;
+ u32 nf_dio_read_offset_align;
+};
+
+int nfsd_file_cache_init(void);
+void nfsd_file_cache_purge(struct net *);
+void nfsd_file_cache_shutdown(void);
+int nfsd_file_cache_start_net(struct net *net);
+void nfsd_file_cache_shutdown_net(struct net *net);
+void nfsd_file_put(struct nfsd_file *nf);
+struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf);
+struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
+struct file *nfsd_file_file(struct nfsd_file *nf);
+void nfsd_file_close_inode_sync(struct inode *inode);
+void nfsd_file_net_dispose(struct nfsd_net *nn);
+bool nfsd_file_is_cached(struct inode *inode);
+__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ unsigned int may_flags, struct file *file,
+ struct nfsd_file **nfp);
+__be32 nfsd_file_acquire_local(struct net *net, struct svc_cred *cred,
+ struct auth_domain *client, struct svc_fh *fhp,
+ unsigned int may_flags, struct nfsd_file **pnf);
+__be32 nfsd_file_acquire_dir(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file **pnf);
+int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
+#endif /* _FS_NFSD_FILECACHE_H */
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index db7ef07ae50c..0f1a35400cd5 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -15,12 +15,13 @@
#include "flexfilelayoutxdr.h"
#include "pnfs.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PNFS
static __be32
-nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
- struct nfsd4_layoutget *args)
+nfsd4_ff_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
+ const struct svc_fh *fhp, struct nfsd4_layoutget *args)
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
u32 device_generation = 0;
@@ -61,7 +62,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
goto out_error;
fl->fh.size = fhp->fh_handle.fh_size;
- memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size);
+ memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size);
/* Give whole file layout segments */
seg->offset = 0;
@@ -117,13 +118,20 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
da->netaddr.addr_len =
snprintf(da->netaddr.addr, FF_ADDR_LEN + 1,
- "%s.%hhu.%hhu", addr, port >> 8, port & 0xff);
+ "%s.%d.%d", addr, port >> 8, port & 0xff);
da->tightly_coupled = false;
return 0;
}
+static __be32
+nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
+ struct nfsd4_layoutcommit *lcp)
+{
+ return nfs_ok;
+}
+
const struct nfsd4_layout_ops ff_layout_ops = {
.notify_types =
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
@@ -132,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ops = {
.encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
.proc_layoutget = nfsd4_ff_proc_layoutget,
.encode_layoutget = nfsd4_ff_encode_layoutget,
+ .proc_layoutcommit = nfsd4_ff_proc_layoutcommit,
};
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381..f9f7e38cba13 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -17,9 +17,9 @@ struct ff_idmap {
__be32
nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
- struct nfsd4_layoutget *lgp)
+ const struct nfsd4_layoutget *lgp)
{
- struct pnfs_ff_layout *fl = lgp->lg_content;
+ const struct pnfs_ff_layout *fl = lgp->lg_content;
int len, mirror_len, ds_len, fh_len;
__be32 *p;
@@ -54,8 +54,7 @@ nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
*p++ = cpu_to_be32(1); /* single mirror */
*p++ = cpu_to_be32(1); /* single data server */
- p = xdr_encode_opaque_fixed(p, &fl->deviceid,
- sizeof(struct nfsd4_deviceid));
+ p = svcxdr_encode_deviceid4(p, &fl->deviceid);
*p++ = cpu_to_be32(1); /* efficiency */
@@ -77,7 +76,7 @@ nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
__be32
nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
- struct nfsd4_getdeviceinfo *gdp)
+ const struct nfsd4_getdeviceinfo *gdp)
{
struct pnfs_ff_device_addr *da = gdp->gd_device;
int len;
@@ -85,6 +84,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
int addr_len;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
diff --git a/fs/nfsd/flexfilelayoutxdr.h b/fs/nfsd/flexfilelayoutxdr.h
index 8e195aeca023..6d5a1066a903 100644
--- a/fs/nfsd/flexfilelayoutxdr.h
+++ b/fs/nfsd/flexfilelayoutxdr.h
@@ -43,8 +43,8 @@ struct pnfs_ff_layout {
};
__be32 nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
- struct nfsd4_getdeviceinfo *gdp);
+ const struct nfsd4_getdeviceinfo *gdp);
__be32 nfsd4_ff_encode_layoutget(struct xdr_stream *xdr,
- struct nfsd4_layoutget *lgp);
+ const struct nfsd4_layoutget *lgp);
#endif /* _NFSD_FLEXFILELAYOUTXDR_H */
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
new file mode 100644
index 000000000000..be710d809a3b
--- /dev/null
+++ b/fs/nfsd/localio.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * NFS server support for local clients to bypass network stack
+ *
+ * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
+ * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/exportfs.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs_common.h>
+#include <linux/nfslocalio.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_xdr.h>
+#include <linux/string.h>
+
+#include "nfsd.h"
+#include "vfs.h"
+#include "netns.h"
+#include "filecache.h"
+#include "cache.h"
+
+/**
+ * nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file
+ *
+ * @net: 'struct net' to get the proper nfsd_net required for LOCALIO access
+ * @dom: 'struct auth_domain' required for LOCALIO access
+ * @rpc_clnt: rpc_clnt that the client established
+ * @cred: cred that the client established
+ * @nfs_fh: filehandle to lookup
+ * @pnf: place to find the nfsd_file, or store it if it was non-NULL
+ * @fmode: fmode_t to use for open
+ *
+ * This function maps a local fh to a path on a local filesystem.
+ * This is useful when the nfs client has the local server mounted - it can
+ * avoid all the NFS overhead with reads, writes and commits.
+ *
+ * On successful return, returned nfsd_file will have its nf_net member
+ * set. Caller (NFS client) is responsible for calling nfsd_net_put and
+ * nfsd_file_put (via nfs_to_nfsd_file_put_local).
+ */
+static struct nfsd_file *
+nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
+ struct rpc_clnt *rpc_clnt, const struct cred *cred,
+ const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf,
+ const fmode_t fmode)
+{
+ int mayflags = NFSD_MAY_LOCALIO;
+ struct svc_cred rq_cred;
+ struct svc_fh fh;
+ struct nfsd_file *localio;
+ __be32 beres;
+
+ if (nfs_fh->size > NFS4_FHSIZE)
+ return ERR_PTR(-EINVAL);
+
+ if (!nfsd_net_try_get(net))
+ return ERR_PTR(-ENXIO);
+
+ rcu_read_lock();
+ localio = nfsd_file_get(rcu_dereference(*pnf));
+ rcu_read_unlock();
+ if (localio)
+ return localio;
+
+ /* nfs_fh -> svc_fh */
+ fh_init(&fh, NFS4_FHSIZE);
+ fh.fh_handle.fh_size = nfs_fh->size;
+ memcpy(fh.fh_handle.fh_raw, nfs_fh->data, nfs_fh->size);
+
+ if (fmode & FMODE_READ)
+ mayflags |= NFSD_MAY_READ;
+ if (fmode & FMODE_WRITE)
+ mayflags |= NFSD_MAY_WRITE;
+
+ svcauth_map_clnt_to_svc_cred_local(rpc_clnt, cred, &rq_cred);
+
+ beres = nfsd_file_acquire_local(net, &rq_cred, dom,
+ &fh, mayflags, &localio);
+ if (beres)
+ localio = ERR_PTR(nfs_stat_to_errno(be32_to_cpu(beres)));
+
+ fh_put(&fh);
+ if (rq_cred.cr_group_info)
+ put_group_info(rq_cred.cr_group_info);
+
+ if (!IS_ERR(localio)) {
+ struct nfsd_file *new;
+ if (!nfsd_net_try_get(net)) {
+ nfsd_file_put(localio);
+ nfsd_net_put(net);
+ return ERR_PTR(-ENXIO);
+ }
+ nfsd_file_get(localio);
+ again:
+ new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio)));
+ if (new) {
+ /* Some other thread installed an nfsd_file */
+ if (nfsd_file_get(new) == NULL)
+ goto again;
+ /*
+ * Drop the ref we were going to install (both file and
+ * net) and the one we were going to return (only file).
+ */
+ nfsd_file_put(localio);
+ nfsd_net_put(net);
+ nfsd_file_put(localio);
+ localio = new;
+ }
+ } else
+ nfsd_net_put(net);
+
+ return localio;
+}
+
+static void nfsd_file_dio_alignment(struct nfsd_file *nf,
+ u32 *nf_dio_mem_align,
+ u32 *nf_dio_offset_align,
+ u32 *nf_dio_read_offset_align)
+{
+ *nf_dio_mem_align = nf->nf_dio_mem_align;
+ *nf_dio_offset_align = nf->nf_dio_offset_align;
+ *nf_dio_read_offset_align = nf->nf_dio_read_offset_align;
+}
+
+static const struct nfsd_localio_operations nfsd_localio_ops = {
+ .nfsd_net_try_get = nfsd_net_try_get,
+ .nfsd_net_put = nfsd_net_put,
+ .nfsd_open_local_fh = nfsd_open_local_fh,
+ .nfsd_file_put_local = nfsd_file_put_local,
+ .nfsd_file_file = nfsd_file_file,
+ .nfsd_file_dio_alignment = nfsd_file_dio_alignment,
+};
+
+void nfsd_localio_ops_init(void)
+{
+ nfs_to = &nfsd_localio_ops;
+}
+
+/*
+ * UUID_IS_LOCAL XDR functions
+ */
+
+static __be32 localio_proc_null(struct svc_rqst *rqstp)
+{
+ return rpc_success;
+}
+
+struct localio_uuidarg {
+ uuid_t uuid;
+};
+
+static __be32 localio_proc_uuid_is_local(struct svc_rqst *rqstp)
+{
+ struct localio_uuidarg *argp = rqstp->rq_argp;
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs_uuid_is_local(&argp->uuid, &nn->local_clients,
+ &nn->local_clients_lock,
+ net, rqstp->rq_client, THIS_MODULE);
+
+ return rpc_success;
+}
+
+static bool localio_decode_uuidarg(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr)
+{
+ struct localio_uuidarg *argp = rqstp->rq_argp;
+ u8 uuid[UUID_SIZE];
+
+ if (decode_opaque_fixed(xdr, uuid, UUID_SIZE))
+ return false;
+ import_uuid(&argp->uuid, uuid);
+
+ return true;
+}
+
+static const struct svc_procedure localio_procedures1[] = {
+ [LOCALIOPROC_NULL] = {
+ .pc_func = localio_proc_null,
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = 0,
+ .pc_name = "NULL",
+ },
+ [LOCALIOPROC_UUID_IS_LOCAL] = {
+ .pc_func = localio_proc_uuid_is_local,
+ .pc_decode = localio_decode_uuidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct localio_uuidarg),
+ .pc_argzero = sizeof(struct localio_uuidarg),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_name = "UUID_IS_LOCAL",
+ },
+};
+
+#define LOCALIO_NR_PROCEDURES ARRAY_SIZE(localio_procedures1)
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ localio_count[LOCALIO_NR_PROCEDURES]);
+const struct svc_version localio_version1 = {
+ .vs_vers = 1,
+ .vs_nproc = LOCALIO_NR_PROCEDURES,
+ .vs_proc = localio_procedures1,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_count = localio_count,
+ .vs_xdrsize = XDR_QUADLEN(UUID_SIZE),
+ .vs_hidden = true,
+};
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 3f5b3d7b62b7..c774ce9aa296 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,26 +25,52 @@
* Note: we hold the dentry use count while the file is open.
*/
static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
+ int mode)
{
__be32 nfserr;
+ int access;
struct svc_fh fh;
/* must initialize before using! but maxsize doesn't matter */
fh_init(&fh,0);
fh.fh_handle.fh_size = f->size;
- memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
+ memcpy(&fh.fh_handle.fh_raw, f->data, f->size);
fh.fh_export = NULL;
- nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
+ /*
+ * Allow BYPASS_GSS as some client implementations use AUTH_SYS
+ * for NLM even when GSS is used for NFS.
+ * Allow OWNER_OVERRIDE as permission might have been changed
+ * after the file was opened.
+ * Pass MAY_NLM so that authentication can be completely bypassed
+ * if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL
+ * for NLM requests.
+ */
+ access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+ access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
- /* We return nlm error codes as nlm doesn't know
+ /* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
*/
switch (nfserr) {
case nfs_ok:
return 0;
- case nfserr_dropit:
+ case nfserr_jukebox:
+ /* this error can indicate a presence of a conflicting
+ * delegation to an NLM lock request. Options are:
+ * (1) For now, drop this request and make the client
+ * retry. When delegation is returned, client's lock retry
+ * will complete.
+ * (2) NLM4_DENIED as per "spec" signals to the client
+ * that the lock is unavailable now but client can retry.
+ * Linux client implementation does not. It treats
+ * NLM4_DENIED same as NLM4_FAILED and errors the request.
+ * (3) For the future, treat this as blocked lock and try
+ * to callback when the delegation is returned but might
+ * not have a proper lock request to block on.
+ */
return nlm_drop_reply;
case nfserr_stale:
return nlm_stale_fh;
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
new file mode 100644
index 000000000000..ac51a44e1065
--- /dev/null
+++ b/fs/nfsd/netlink.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/nfsd.yaml */
+/* YNL-GEN kernel source */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netlink.h"
+
+#include <uapi/linux/nfsd_netlink.h>
+
+/* Common nested types */
+const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1] = {
+ [NFSD_A_SOCK_ADDR] = { .type = NLA_BINARY, },
+ [NFSD_A_SOCK_TRANSPORT_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = {
+ [NFSD_A_VERSION_MAJOR] = { .type = NLA_U32, },
+ [NFSD_A_VERSION_MINOR] = { .type = NLA_U32, },
+ [NFSD_A_VERSION_ENABLED] = { .type = NLA_FLAG, },
+};
+
+/* NFSD_CMD_THREADS_SET - do */
+static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_SCOPE + 1] = {
+ [NFSD_A_SERVER_THREADS] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, },
+};
+
+/* NFSD_CMD_VERSION_SET - do */
+static const struct nla_policy nfsd_version_set_nl_policy[NFSD_A_SERVER_PROTO_VERSION + 1] = {
+ [NFSD_A_SERVER_PROTO_VERSION] = NLA_POLICY_NESTED(nfsd_version_nl_policy),
+};
+
+/* NFSD_CMD_LISTENER_SET - do */
+static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_ADDR + 1] = {
+ [NFSD_A_SERVER_SOCK_ADDR] = NLA_POLICY_NESTED(nfsd_sock_nl_policy),
+};
+
+/* NFSD_CMD_POOL_MODE_SET - do */
+static const struct nla_policy nfsd_pool_mode_set_nl_policy[NFSD_A_POOL_MODE_MODE + 1] = {
+ [NFSD_A_POOL_MODE_MODE] = { .type = NLA_NUL_STRING, },
+};
+
+/* Ops table for nfsd */
+static const struct genl_split_ops nfsd_nl_ops[] = {
+ {
+ .cmd = NFSD_CMD_RPC_STATUS_GET,
+ .dumpit = nfsd_nl_rpc_status_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = NFSD_CMD_THREADS_SET,
+ .doit = nfsd_nl_threads_set_doit,
+ .policy = nfsd_threads_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_SCOPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_THREADS_GET,
+ .doit = nfsd_nl_threads_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_VERSION_SET,
+ .doit = nfsd_nl_version_set_doit,
+ .policy = nfsd_version_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_PROTO_VERSION,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_VERSION_GET,
+ .doit = nfsd_nl_version_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_LISTENER_SET,
+ .doit = nfsd_nl_listener_set_doit,
+ .policy = nfsd_listener_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_SOCK_ADDR,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_LISTENER_GET,
+ .doit = nfsd_nl_listener_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_POOL_MODE_SET,
+ .doit = nfsd_nl_pool_mode_set_doit,
+ .policy = nfsd_pool_mode_set_nl_policy,
+ .maxattr = NFSD_A_POOL_MODE_MODE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_POOL_MODE_GET,
+ .doit = nfsd_nl_pool_mode_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+};
+
+struct genl_family nfsd_nl_family __ro_after_init = {
+ .name = NFSD_FAMILY_NAME,
+ .version = NFSD_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = nfsd_nl_ops,
+ .n_split_ops = ARRAY_SIZE(nfsd_nl_ops),
+};
diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h
new file mode 100644
index 000000000000..478117ff6b8c
--- /dev/null
+++ b/fs/nfsd/netlink.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/nfsd.yaml */
+/* YNL-GEN kernel header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
+
+#ifndef _LINUX_NFSD_GEN_H
+#define _LINUX_NFSD_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/nfsd_netlink.h>
+
+/* Common nested types */
+extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1];
+extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1];
+
+int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info);
+
+extern struct genl_family nfsd_nl_family;
+
+#endif /* _LINUX_NFSD_GEN_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 32cb8c027483..3e2d0fde80a7 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* per net namespace data structures for nfsd
*
* Copyright (C) 2012, Jeff Layton <jlayton@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 51
- * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __NFSD_NETNS_H__
@@ -23,6 +10,12 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/filelock.h>
+#include <linux/nfs4.h>
+#include <linux/percpu_counter.h>
+#include <linux/percpu-refcount.h>
+#include <linux/siphash.h>
+#include <linux/sunrpc/stats.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -34,6 +27,26 @@
struct cld_net;
struct nfsd4_client_tracking_ops;
+enum {
+ /* cache misses due only to checksum comparison failures */
+ NFSD_STATS_PAYLOAD_MISSES,
+ /* amount of memory (in bytes) currently consumed by the DRC */
+ NFSD_STATS_DRC_MEM_USAGE,
+ NFSD_STATS_RC_HITS, /* repcache hits */
+ NFSD_STATS_RC_MISSES, /* repcache misses */
+ NFSD_STATS_RC_NOCACHE, /* uncached reqs */
+ NFSD_STATS_FH_STALE, /* FH stale error */
+ NFSD_STATS_IO_READ, /* bytes returned to read requests */
+ NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
+#ifdef CONFIG_NFSD_V4
+ NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
+ NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
+#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
+#endif
+ NFSD_STATS_COUNTERS_NUM
+};
+
/*
* Represents a nfsd "container". With respect to nfsv4 state tracking, the
* fields of interest are the *_id_hashtbls and the *_name_tree. These track
@@ -53,7 +66,9 @@ struct nfsd_net {
struct lock_manager nfsd4_manager;
bool grace_ended;
- time_t boot_time;
+ time64_t boot_time;
+
+ struct dentry *nfsd_client_dir;
/*
* reclaim_str_hashtbl[] holds known client info from previous reset/reboot
@@ -100,29 +115,29 @@ struct nfsd_net {
bool in_grace;
const struct nfsd4_client_tracking_ops *client_tracking_ops;
- time_t nfsd4_lease;
- time_t nfsd4_grace;
+ time64_t nfsd4_lease;
+ time64_t nfsd4_grace;
bool somebody_reclaimed;
+ bool track_reclaim_completes;
+ atomic_t nr_reclaim_complete;
+
bool nfsd_net_up;
bool lockd_up;
- /* Time of server startup */
- struct timespec64 nfssvc_boot;
-
- /*
- * Max number of connections this nfsd container will allow. Defaults
- * to '0' which is means that it bases this on the number of threads.
- */
- unsigned int max_connections;
+ seqlock_t writeverf_lock;
+ unsigned char writeverf[8];
+ u32 clientid_base;
u32 clientid_counter;
u32 clverifier_counter;
- struct svc_serv *nfsd_serv;
+ struct svc_info nfsd_info;
+#define nfsd_serv nfsd_info.serv
- wait_queue_head_t ntf_wq;
- atomic_t ntf_refcnt;
+ struct percpu_ref nfsd_net_ref;
+ struct completion nfsd_net_confirm_done;
+ struct completion nfsd_net_free_done;
/*
* clientid and stateid data for construction of net unique COPY
@@ -131,10 +146,88 @@ struct nfsd_net {
u32 s2s_cp_cl_id;
struct idr s2s_cp_stateids;
spinlock_t s2s_cp_lock;
+ atomic_t pending_async_copies;
+
+ /*
+ * Version information
+ */
+ bool nfsd_versions[NFSD_MAXVERS + 1];
+ bool nfsd4_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1];
+
+ /*
+ * Duplicate reply cache
+ */
+ struct nfsd_drc_bucket *drc_hashtbl;
+
+ /* max number of entries allowed in the cache */
+ unsigned int max_drc_entries;
+
+ /* number of significant bits in the hash value */
+ unsigned int maskbits;
+ unsigned int drc_hashsize;
+
+ /*
+ * Stats and other tracking of on the duplicate reply cache.
+ * The longest_chain* fields are modified with only the per-bucket
+ * cache lock, which isn't really safe and should be fixed if we want
+ * these statistics to be completely accurate.
+ */
+
+ /* total number of entries */
+ atomic_t num_drc_entries;
+
+ /* Per-netns stats counters */
+ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
+
+ /* sunrpc svc stats */
+ struct svc_stat nfsd_svcstats;
+
+ /* longest hash chain seen */
+ unsigned int longest_chain;
+
+ /* size of cache when we saw the longest hash chain */
+ unsigned int longest_chain_cachesize;
+
+ struct shrinker *nfsd_reply_cache_shrinker;
+
+ /* tracking server-to-server copy mounts */
+ spinlock_t nfsd_ssc_lock;
+ struct list_head nfsd_ssc_mount_list;
+ wait_queue_head_t nfsd_ssc_waitq;
+
+ /* utsname taken from the process that starts the server */
+ char nfsd_name[UNX_MAXNODENAME+1];
+
+ struct nfsd_fcache_disposal *fcache_disposal;
+
+ siphash_key_t siphash_key;
+
+ atomic_t nfs4_client_count;
+ int nfs4_max_clients;
+
+ atomic_t nfsd_courtesy_clients;
+ struct shrinker *nfsd_client_shrinker;
+ struct work_struct nfsd_shrinker_work;
+
+ /* last time an admin-revoke happened for NFSv4.0 */
+ time64_t nfs40_last_revoke;
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ /* Local clients to be invalidated when net is shut down */
+ spinlock_t local_clients_lock;
+ struct list_head local_clients;
+#endif
};
/* Simple check to find out if a given net was properly initialized */
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
+extern bool nfsd_support_version(int vers);
extern unsigned int nfsd_net_id;
+
+bool nfsd_net_try_get(struct net *net);
+void nfsd_net_put(struct net *net);
+
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn);
+void nfsd_reset_write_verifier(struct nfsd_net *nn);
#endif /* __NFSD_NETNS_H__ */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index cbab1d2d8a75..5fb202acb0fd 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -14,7 +14,6 @@
#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-#define RETURN_STATUS(st) { resp->status = (st); return (st); }
/*
* NULL call.
@@ -22,7 +21,7 @@
static __be32
nfsacld_proc_null(struct svc_rqst *rqstp)
{
- return nfs_ok;
+ return rpc_success;
}
/*
@@ -35,33 +34,34 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
struct posix_acl *acl;
struct inode *inode;
svc_fh *fh;
- __be32 nfserr = 0;
dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
fh = fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
- if (nfserr)
- RETURN_STATUS(nfserr);
+ resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (resp->status != nfs_ok)
+ goto out;
inode = d_inode(fh->fh_dentry);
- if (argp->mask & ~NFS_ACL_MASK)
- RETURN_STATUS(nfserr_inval);
+ if (argp->mask & ~NFS_ACL_MASK) {
+ resp->status = nfserr_inval;
+ goto out;
+ }
resp->mask = argp->mask;
- nfserr = fh_getattr(fh, &resp->stat);
- if (nfserr)
- RETURN_STATUS(nfserr);
+ resp->status = fh_getattr(fh, &resp->stat);
+ if (resp->status != nfs_ok)
+ goto out;
if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
- acl = get_acl(inode, ACL_TYPE_ACCESS);
+ acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
if (acl == NULL) {
/* Solaris returns the inode's minimum ACL. */
acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
}
if (IS_ERR(acl)) {
- nfserr = nfserrno(PTR_ERR(acl));
+ resp->status = nfserrno(PTR_ERR(acl));
goto fail;
}
resp->acl_access = acl;
@@ -69,21 +69,24 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp)
if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
/* Check how Solaris handles requests for the Default ACL
of a non-directory! */
- acl = get_acl(inode, ACL_TYPE_DEFAULT);
+ acl = get_inode_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(acl)) {
- nfserr = nfserrno(PTR_ERR(acl));
+ resp->status = nfserrno(PTR_ERR(acl));
goto fail;
}
resp->acl_default = acl;
}
/* resp->acl_{access,default} are released in nfssvc_release_getacl. */
- RETURN_STATUS(0);
+out:
+ return rpc_success;
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
- RETURN_STATUS(nfserr);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
+ goto out;
}
/*
@@ -95,14 +98,13 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
struct nfsd_attrstat *resp = rqstp->rq_resp;
struct inode *inode;
svc_fh *fh;
- __be32 nfserr = 0;
int error;
dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
fh = fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
- if (nfserr)
+ resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
+ if (resp->status != nfs_ok)
goto out;
inode = d_inode(fh->fh_dentry);
@@ -111,32 +113,35 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
if (error)
goto out_errno;
- fh_lock(fh);
+ inode_lock(inode);
- error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS,
+ argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT,
+ argp->acl_default);
if (error)
goto out_drop_lock;
- fh_unlock(fh);
+ inode_unlock(inode);
fh_drop_write(fh);
- nfserr = fh_getattr(fh, &resp->stat);
+ resp->status = fh_getattr(fh, &resp->stat);
out:
/* argp->acl_{access,default} may have been allocated in
nfssvc_decode_setaclargs. */
posix_acl_release(argp->acl_access);
posix_acl_release(argp->acl_default);
- return nfserr;
+ return rpc_success;
+
out_drop_lock:
- fh_unlock(fh);
+ inode_unlock(inode);
fh_drop_write(fh);
out_errno:
- nfserr = nfserrno(error);
+ resp->status = nfserrno(error);
goto out;
}
@@ -147,15 +152,16 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
+
dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
- if (nfserr)
- return nfserr;
- nfserr = fh_getattr(&resp->fh, &resp->stat);
- return nfserr;
+ resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (resp->status != nfs_ok)
+ goto out;
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ return rpc_success;
}
/*
@@ -165,7 +171,6 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
{
struct nfsd3_accessargs *argp = rqstp->rq_argp;
struct nfsd3_accessres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: ACCESS(2acl) %s 0x%x\n",
SVCFH_fmt(&argp->fh),
@@ -173,152 +178,117 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->access = argp->access;
- nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
- if (nfserr)
- return nfserr;
- nfserr = fh_getattr(&resp->fh, &resp->stat);
- return nfserr;
+ resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+ if (resp->status != nfs_ok)
+ goto out;
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ return rpc_success;
}
/*
* XDR decode functions
*/
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+
+static bool
+nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_getaclargs *argp = rqstp->rq_argp;
- p = nfs2svc_decode_fh(p, &argp->fh);
- if (!p)
- return 0;
- argp->mask = ntohl(*p); p++;
+ if (!svcxdr_decode_fhandle(xdr, &argp->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
+ return false;
- return xdr_argsize_check(rqstp, p);
+ return true;
}
-
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_setaclargs *argp = rqstp->rq_argp;
- struct kvec *head = rqstp->rq_arg.head;
- unsigned int base;
- int n;
-
- p = nfs2svc_decode_fh(p, &argp->fh);
- if (!p)
- return 0;
- argp->mask = ntohl(*p++);
- if (argp->mask & ~NFS_ACL_MASK ||
- !xdr_argsize_check(rqstp, p))
- return 0;
-
- base = (char *)p - (char *)head->iov_base;
- n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
- (argp->mask & NFS_ACL) ?
- &argp->acl_access : NULL);
- if (n > 0)
- n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
- (argp->mask & NFS_DFACL) ?
- &argp->acl_default : NULL);
- return (n > 0);
-}
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
-{
- struct nfsd_fhandle *argp = rqstp->rq_argp;
-
- p = nfs2svc_decode_fh(p, &argp->fh);
- if (!p)
- return 0;
- return xdr_argsize_check(rqstp, p);
+ if (!svcxdr_decode_fhandle(xdr, &argp->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
+ return false;
+ if (argp->mask & ~NFS_ACL_MASK)
+ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
+ &argp->acl_access : NULL))
+ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
+ &argp->acl_default : NULL))
+ return false;
+
+ return true;
}
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
- struct nfsd3_accessargs *argp = rqstp->rq_argp;
+ struct nfsd3_accessargs *args = rqstp->rq_argp;
- p = nfs2svc_decode_fh(p, &argp->fh);
- if (!p)
- return 0;
- argp->access = ntohl(*p++);
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->access) < 0)
+ return false;
- return xdr_argsize_check(rqstp, p);
+ return true;
}
/*
* XDR encode functions
*/
-/*
- * There must be an encoding function for void results so svc_process
- * will work properly.
- */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
-}
-
/* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_getaclres *resp = rqstp->rq_resp;
struct dentry *dentry = resp->fh.fh_dentry;
struct inode *inode;
- struct kvec *head = rqstp->rq_res.head;
- unsigned int base;
- int n;
- int w;
-
- /*
- * Since this is version 2, the check for nfserr in
- * nfsd_dispatch actually ensures the following cannot happen.
- * However, it seems fragile to depend on that.
- */
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+
if (dentry == NULL || d_really_is_negative(dentry))
- return 0;
+ return true;
inode = d_inode(dentry);
- p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
- *p++ = htonl(resp->mask);
- if (!xdr_ressize_check(rqstp, p))
- return 0;
- base = (char *)p - (char *)head->iov_base;
-
- rqstp->rq_res.page_len = w = nfsacl_size(
- (resp->mask & NFS_ACL) ? resp->acl_access : NULL,
- (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
- while (w > 0) {
- if (!*(rqstp->rq_next_page++))
- return 0;
- w -= PAGE_SIZE;
- }
-
- n = nfsacl_encode(&rqstp->rq_res, base, inode,
- resp->acl_access,
- resp->mask & NFS_ACL, 0);
- if (n > 0)
- n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
- resp->acl_default,
- resp->mask & NFS_DFACL,
- NFS_ACL_DEFAULT);
- return (n > 0);
-}
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
+ return false;
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
-{
- struct nfsd_attrstat *resp = rqstp->rq_resp;
+ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
+ resp->mask & NFS_ACL, 0))
+ return false;
+ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
+ resp->mask & NFS_DFACL, NFS_ACL_DEFAULT))
+ return false;
- p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
- return xdr_ressize_check(rqstp, p);
+ return true;
}
/* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_accessres *resp = rqstp->rq_resp;
- p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
- *p++ = htonl(resp->access);
- return xdr_ressize_check(rqstp, p);
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->access) < 0)
+ return false;
+ break;
+ }
+
+ return true;
}
/*
@@ -333,13 +303,6 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp)
posix_acl_release(resp->acl_default);
}
-static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp)
-{
- struct nfsd_attrstat *resp = rqstp->rq_resp;
-
- fh_put(&resp->fh);
-}
-
static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
{
struct nfsd3_accessres *resp = rqstp->rq_resp;
@@ -347,42 +310,78 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp)
fh_put(&resp->fh);
}
-#define nfsaclsvc_decode_voidargs NULL
-#define nfsaclsvc_release_void NULL
-#define nfsd3_fhandleargs nfsd_fhandle
-#define nfsd3_attrstatres nfsd_attrstat
-#define nfsd3_voidres nfsd3_voidargs
-struct nfsd3_voidargs { int dummy; };
-
-#define PROC(name, argt, rest, relt, cache, respsize) \
-{ \
- .pc_func = nfsacld_proc_##name, \
- .pc_decode = nfsaclsvc_decode_##argt##args, \
- .pc_encode = nfsaclsvc_encode_##rest##res, \
- .pc_release = nfsaclsvc_release_##relt, \
- .pc_argsize = sizeof(struct nfsd3_##argt##args), \
- .pc_ressize = sizeof(struct nfsd3_##rest##res), \
- .pc_cachetype = cache, \
- .pc_xdrressize = respsize, \
-}
-
#define ST 1 /* status*/
#define AT 21 /* attributes */
#define pAT (1+AT) /* post attributes - conditional */
#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */
-static const struct svc_procedure nfsd_acl_procedures2[] = {
- PROC(null, void, void, void, RC_NOCACHE, ST),
- PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)),
- PROC(setacl, setacl, attrstat, attrstat, RC_NOCACHE, ST+AT),
- PROC(getattr, fhandle, attrstat, attrstat, RC_NOCACHE, ST+AT),
- PROC(access, access, access, access, RC_NOCACHE, ST+AT+1),
+static const struct svc_procedure nfsd_acl_procedures2[5] = {
+ [ACLPROC2_NULL] = {
+ .pc_func = nfsacld_proc_null,
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+ .pc_name = "NULL",
+ },
+ [ACLPROC2_GETACL] = {
+ .pc_func = nfsacld_proc_getacl,
+ .pc_decode = nfsaclsvc_decode_getaclargs,
+ .pc_encode = nfsaclsvc_encode_getaclres,
+ .pc_release = nfsaclsvc_release_getacl,
+ .pc_argsize = sizeof(struct nfsd3_getaclargs),
+ .pc_argzero = sizeof(struct nfsd3_getaclargs),
+ .pc_ressize = sizeof(struct nfsd3_getaclres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+1+2*(1+ACL),
+ .pc_name = "GETACL",
+ },
+ [ACLPROC2_SETACL] = {
+ .pc_func = nfsacld_proc_setacl,
+ .pc_decode = nfsaclsvc_decode_setaclargs,
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd3_setaclargs),
+ .pc_argzero = sizeof(struct nfsd3_setaclargs),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+ .pc_name = "SETACL",
+ },
+ [ACLPROC2_GETATTR] = {
+ .pc_func = nfsacld_proc_getattr,
+ .pc_decode = nfssvc_decode_fhandleargs,
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
+ .pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
+ .pc_ressize = sizeof(struct nfsd_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT,
+ .pc_name = "GETATTR",
+ },
+ [ACLPROC2_ACCESS] = {
+ .pc_func = nfsacld_proc_access,
+ .pc_decode = nfsaclsvc_decode_accessargs,
+ .pc_encode = nfsaclsvc_encode_accessres,
+ .pc_release = nfsaclsvc_release_access,
+ .pc_argsize = sizeof(struct nfsd3_accessargs),
+ .pc_argzero = sizeof(struct nfsd3_accessargs),
+ .pc_ressize = sizeof(struct nfsd3_accessres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+AT+1,
+ .pc_name = "SETATTR",
+ },
};
-static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)]);
const struct svc_version nfsd_acl_version2 = {
.vs_vers = 2,
- .vs_nproc = 5,
+ .vs_nproc = ARRAY_SIZE(nfsd_acl_procedures2),
.vs_proc = nfsd_acl_procedures2,
.vs_count = nfsd_acl_count2,
.vs_dispatch = nfsd_dispatch,
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 13bca4a2f89d..7b5433bd3019 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -13,15 +13,13 @@
#include "xdr3.h"
#include "vfs.h"
-#define RETURN_STATUS(st) { resp->status = (st); return (st); }
-
/*
* NULL call.
*/
static __be32
nfsd3_proc_null(struct svc_rqst *rqstp)
{
- return nfs_ok;
+ return rpc_success;
}
/*
@@ -34,27 +32,28 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
struct posix_acl *acl;
struct inode *inode;
svc_fh *fh;
- __be32 nfserr = 0;
fh = fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
- if (nfserr)
- RETURN_STATUS(nfserr);
+ resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
+ if (resp->status != nfs_ok)
+ goto out;
inode = d_inode(fh->fh_dentry);
- if (argp->mask & ~NFS_ACL_MASK)
- RETURN_STATUS(nfserr_inval);
+ if (argp->mask & ~NFS_ACL_MASK) {
+ resp->status = nfserr_inval;
+ goto out;
+ }
resp->mask = argp->mask;
if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
- acl = get_acl(inode, ACL_TYPE_ACCESS);
+ acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
if (acl == NULL) {
/* Solaris returns the inode's minimum ACL. */
acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
}
if (IS_ERR(acl)) {
- nfserr = nfserrno(PTR_ERR(acl));
+ resp->status = nfserrno(PTR_ERR(acl));
goto fail;
}
resp->acl_access = acl;
@@ -62,21 +61,24 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp)
if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
/* Check how Solaris handles requests for the Default ACL
of a non-directory! */
- acl = get_acl(inode, ACL_TYPE_DEFAULT);
+ acl = get_inode_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(acl)) {
- nfserr = nfserrno(PTR_ERR(acl));
+ resp->status = nfserrno(PTR_ERR(acl));
goto fail;
}
resp->acl_default = acl;
}
/* resp->acl_{access,default} are released in nfs3svc_release_getacl. */
- RETURN_STATUS(0);
+out:
+ return rpc_success;
fail:
posix_acl_release(resp->acl_access);
posix_acl_release(resp->acl_default);
- RETURN_STATUS(nfserr);
+ resp->acl_access = NULL;
+ resp->acl_default = NULL;
+ goto out;
}
/*
@@ -88,12 +90,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
struct nfsd3_attrstat *resp = rqstp->rq_resp;
struct inode *inode;
svc_fh *fh;
- __be32 nfserr = 0;
int error;
fh = fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
- if (nfserr)
+ resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
+ if (resp->status != nfs_ok)
goto out;
inode = d_inode(fh->fh_dentry);
@@ -102,66 +103,64 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
if (error)
goto out_errno;
- fh_lock(fh);
+ inode_lock(inode);
- error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS,
+ argp->acl_access);
if (error)
goto out_drop_lock;
- error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+ error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT,
+ argp->acl_default);
out_drop_lock:
- fh_unlock(fh);
+ inode_unlock(inode);
fh_drop_write(fh);
out_errno:
- nfserr = nfserrno(error);
+ resp->status = nfserrno(error);
out:
/* argp->acl_{access,default} may have been allocated in
nfs3svc_decode_setaclargs. */
posix_acl_release(argp->acl_access);
posix_acl_release(argp->acl_default);
- RETURN_STATUS(nfserr);
+ return rpc_success;
}
/*
* XDR decode functions
*/
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+
+static bool
+nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_getaclargs *args = rqstp->rq_argp;
- p = nfs3svc_decode_fh(p, &args->fh);
- if (!p)
- return 0;
- args->mask = ntohl(*p); p++;
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->mask) < 0)
+ return false;
- return xdr_argsize_check(rqstp, p);
+ return true;
}
-
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
- struct nfsd3_setaclargs *args = rqstp->rq_argp;
- struct kvec *head = rqstp->rq_arg.head;
- unsigned int base;
- int n;
-
- p = nfs3svc_decode_fh(p, &args->fh);
- if (!p)
- return 0;
- args->mask = ntohl(*p++);
- if (args->mask & ~NFS_ACL_MASK ||
- !xdr_argsize_check(rqstp, p))
- return 0;
-
- base = (char *)p - (char *)head->iov_base;
- n = nfsacl_decode(&rqstp->rq_arg, base, NULL,
- (args->mask & NFS_ACL) ?
- &args->acl_access : NULL);
- if (n > 0)
- n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL,
- (args->mask & NFS_DFACL) ?
- &args->acl_default : NULL);
- return (n > 0);
+ struct nfsd3_setaclargs *argp = rqstp->rq_argp;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
+ return false;
+ if (argp->mask & ~NFS_ACL_MASK)
+ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
+ &argp->acl_access : NULL))
+ return false;
+ if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
+ &argp->acl_default : NULL))
+ return false;
+
+ return true;
}
/*
@@ -169,58 +168,47 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
*/
/* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_getaclres *resp = rqstp->rq_resp;
struct dentry *dentry = resp->fh.fh_dentry;
+ struct inode *inode;
- p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
- if (resp->status == 0 && dentry && d_really_is_positive(dentry)) {
- struct inode *inode = d_inode(dentry);
- struct kvec *head = rqstp->rq_res.head;
- unsigned int base;
- int n;
- int w;
-
- *p++ = htonl(resp->mask);
- if (!xdr_ressize_check(rqstp, p))
- return 0;
- base = (char *)p - (char *)head->iov_base;
-
- rqstp->rq_res.page_len = w = nfsacl_size(
- (resp->mask & NFS_ACL) ? resp->acl_access : NULL,
- (resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
- while (w > 0) {
- if (!*(rqstp->rq_next_page++))
- return 0;
- w -= PAGE_SIZE;
- }
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ inode = d_inode(dentry);
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
+ return false;
+
+ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access,
+ resp->mask & NFS_ACL, 0))
+ return false;
+ if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default,
+ resp->mask & NFS_DFACL,
+ NFS_ACL_DEFAULT))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ }
- n = nfsacl_encode(&rqstp->rq_res, base, inode,
- resp->acl_access,
- resp->mask & NFS_ACL, 0);
- if (n > 0)
- n = nfsacl_encode(&rqstp->rq_res, base + n, inode,
- resp->acl_default,
- resp->mask & NFS_DFACL,
- NFS_ACL_DEFAULT);
- if (n <= 0)
- return 0;
- } else
- if (!xdr_ressize_check(rqstp, p))
- return 0;
-
- return 1;
+ return true;
}
/* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
+static bool
+nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
-
- return xdr_ressize_check(rqstp, p);
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+ svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh);
}
/*
@@ -235,39 +223,54 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp)
posix_acl_release(resp->acl_default);
}
-#define nfs3svc_decode_voidargs NULL
-#define nfs3svc_release_void NULL
-#define nfsd3_setaclres nfsd3_attrstat
-#define nfsd3_voidres nfsd3_voidargs
-struct nfsd3_voidargs { int dummy; };
-
-#define PROC(name, argt, rest, relt, cache, respsize) \
-{ \
- .pc_func = nfsd3_proc_##name, \
- .pc_decode = nfs3svc_decode_##argt##args, \
- .pc_encode = nfs3svc_encode_##rest##res, \
- .pc_release = nfs3svc_release_##relt, \
- .pc_argsize = sizeof(struct nfsd3_##argt##args), \
- .pc_ressize = sizeof(struct nfsd3_##rest##res), \
- .pc_cachetype = cache, \
- .pc_xdrressize = respsize, \
-}
-
#define ST 1 /* status*/
#define AT 21 /* attributes */
#define pAT (1+AT) /* post attributes - conditional */
#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */
-static const struct svc_procedure nfsd_acl_procedures3[] = {
- PROC(null, void, void, void, RC_NOCACHE, ST),
- PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)),
- PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT),
+static const struct svc_procedure nfsd_acl_procedures3[3] = {
+ [ACLPROC3_NULL] = {
+ .pc_func = nfsd3_proc_null,
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST,
+ .pc_name = "NULL",
+ },
+ [ACLPROC3_GETACL] = {
+ .pc_func = nfsd3_proc_getacl,
+ .pc_decode = nfs3svc_decode_getaclargs,
+ .pc_encode = nfs3svc_encode_getaclres,
+ .pc_release = nfs3svc_release_getacl,
+ .pc_argsize = sizeof(struct nfsd3_getaclargs),
+ .pc_argzero = sizeof(struct nfsd3_getaclargs),
+ .pc_ressize = sizeof(struct nfsd3_getaclres),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+1+2*(1+ACL),
+ .pc_name = "GETACL",
+ },
+ [ACLPROC3_SETACL] = {
+ .pc_func = nfsd3_proc_setacl,
+ .pc_decode = nfs3svc_decode_setaclargs,
+ .pc_encode = nfs3svc_encode_setaclres,
+ .pc_release = nfs3svc_release_fhandle,
+ .pc_argsize = sizeof(struct nfsd3_setaclargs),
+ .pc_argzero = sizeof(struct nfsd3_setaclargs),
+ .pc_ressize = sizeof(struct nfsd3_attrstat),
+ .pc_cachetype = RC_NOCACHE,
+ .pc_xdrressize = ST+pAT,
+ .pc_name = "SETACL",
+ },
};
-static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)]);
const struct svc_version nfsd_acl_version3 = {
.vs_vers = 3,
- .vs_nproc = 3,
+ .vs_nproc = ARRAY_SIZE(nfsd_acl_procedures3),
.vs_proc = nfsd_acl_procedures3,
.vs_count = nfsd_acl_count3,
.vs_dispatch = nfsd_dispatch,
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 9eb8086ea841..42adc5461db0 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -8,15 +8,16 @@
#include <linux/fs.h>
#include <linux/ext2_fs.h>
#include <linux/magic.h>
+#include <linux/namei.h>
#include "cache.h"
#include "xdr3.h"
#include "vfs.h"
+#include "filecache.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-#define RETURN_STATUS(st) { resp->status = (st); return (st); }
-
static int nfs3_ftypes[] = {
0, /* NF3NON */
S_IFREG, /* NF3REG */
@@ -28,13 +29,36 @@ static int nfs3_ftypes[] = {
S_IFIFO, /* NF3FIFO */
};
+static __be32 nfsd3_map_status(__be32 status)
+{
+ switch (status) {
+ case nfs_ok:
+ break;
+ case nfserr_nofilehandle:
+ status = nfserr_badhandle;
+ break;
+ case nfserr_wrongsec:
+ case nfserr_file_open:
+ status = nfserr_acces;
+ break;
+ case nfserr_symlink_not_dir:
+ status = nfserr_notdir;
+ break;
+ case nfserr_symlink:
+ case nfserr_wrong_type:
+ status = nfserr_inval;
+ break;
+ }
+ return status;
+}
+
/*
* NULL call.
*/
static __be32
nfsd3_proc_null(struct svc_rqst *rqstp)
{
- return nfs_ok;
+ return rpc_success;
}
/*
@@ -45,20 +69,19 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
- dprintk("nfsd: GETATTR(3) %s\n",
- SVCFH_fmt(&argp->fh));
+ trace_nfsd_vfs_getattr(rqstp, &argp->fh);
fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0,
- NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
- if (nfserr)
- RETURN_STATUS(nfserr);
-
- nfserr = fh_getattr(&resp->fh, &resp->stat);
-
- RETURN_STATUS(nfserr);
+ resp->status = fh_verify(rqstp, &resp->fh, 0,
+ NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
+ if (resp->status != nfs_ok)
+ goto out;
+
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -69,15 +92,20 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
{
struct nfsd3_sattrargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
+ const struct timespec64 *guardtime = NULL;
dprintk("nfsd: SETATTR(3) %s\n",
SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
- argp->check_guard, argp->guardtime);
- RETURN_STATUS(nfserr);
+ if (argp->check_guard)
+ guardtime = &argp->guardtime;
+ resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -88,7 +116,6 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp)
{
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: LOOKUP(3) %s %.*s\n",
SVCFH_fmt(&argp->fh),
@@ -98,11 +125,11 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp)
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
- nfserr = nfsd_lookup(rqstp, &resp->dirfh,
- argp->name,
- argp->len,
- &resp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_lookup(rqstp, &resp->dirfh,
+ argp->name, argp->len,
+ &resp->fh);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -113,7 +140,6 @@ nfsd3_proc_access(struct svc_rqst *rqstp)
{
struct nfsd3_accessargs *argp = rqstp->rq_argp;
struct nfsd3_accessres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: ACCESS(3) %s 0x%x\n",
SVCFH_fmt(&argp->fh),
@@ -121,8 +147,9 @@ nfsd3_proc_access(struct svc_rqst *rqstp)
fh_copy(&resp->fh, &argp->fh);
resp->access = argp->access;
- nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -131,17 +158,19 @@ nfsd3_proc_access(struct svc_rqst *rqstp)
static __be32
nfsd3_proc_readlink(struct svc_rqst *rqstp)
{
- struct nfsd3_readlinkargs *argp = rqstp->rq_argp;
+ struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_readlinkres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
/* Read the symlink. */
fh_copy(&resp->fh, &argp->fh);
resp->len = NFS3_MAXPATHLEN;
- nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len);
- RETURN_STATUS(nfserr);
+ resp->pages = rqstp->rq_next_page++;
+ resp->status = nfsd_readlink(rqstp, &resp->fh,
+ page_address(*resp->pages), &resp->len);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -152,34 +181,34 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
{
struct nfsd3_readargs *argp = rqstp->rq_argp;
struct nfsd3_readres *resp = rqstp->rq_resp;
- __be32 nfserr;
- u32 max_blocksize = svc_max_payload(rqstp);
- unsigned long cnt = min(argp->count, max_blocksize);
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
(unsigned long) argp->count,
(unsigned long long) argp->offset);
+ argp->count = min_t(u32, argp->count, svc_max_payload(rqstp));
+ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+ if (argp->offset > (u64)OFFSET_MAX)
+ argp->offset = (u64)OFFSET_MAX;
+ if (argp->offset + argp->count > (u64)OFFSET_MAX)
+ argp->count = (u64)OFFSET_MAX - argp->offset;
+
+ resp->pages = rqstp->rq_next_page;
+
/* Obtain buffer pointer for payload.
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- resp->count = cnt;
- svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
+ resp->count = argp->count;
+ svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3) << 2) +
+ resp->count + 4);
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_read(rqstp, &resp->fh,
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &resp->count);
- if (nfserr == 0) {
- struct inode *inode = d_inode(resp->fh.fh_dentry);
- resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
- inode->i_size);
- }
-
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
+ &resp->count, &resp->eof);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -190,67 +219,170 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
{
struct nfsd3_writeargs *argp = rqstp->rq_argp;
struct nfsd3_writeres *resp = rqstp->rq_resp;
- __be32 nfserr;
unsigned long cnt = argp->len;
- unsigned int nvecs;
dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n",
SVCFH_fmt(&argp->fh),
argp->len,
(unsigned long long) argp->offset,
- argp->stable? " stable" : "");
+ argp->stable ? " stable" : "");
+
+ resp->status = nfserr_fbig;
+ if (argp->offset > (u64)OFFSET_MAX ||
+ argp->offset + argp->len > (u64)OFFSET_MAX)
+ return rpc_success;
fh_copy(&resp->fh, &argp->fh);
resp->committed = argp->stable;
- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
- &argp->first, cnt);
- if (!nvecs)
- RETURN_STATUS(nfserr_io);
- nfserr = nfsd_write(rqstp, &resp->fh, argp->offset,
- rqstp->rq_vec, nvecs, &cnt,
- resp->committed);
+ resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
+ &argp->payload, &cnt,
+ resp->committed, resp->verf);
resp->count = cnt;
- RETURN_STATUS(nfserr);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
- * With NFSv3, CREATE processing is a lot easier than with NFSv2.
- * At least in theory; we'll see how it fares in practice when the
- * first reports about SunOS compatibility problems start to pour in...
+ * Implement NFSv3's unchecked, guarded, and exclusive CREATE
+ * semantics for regular files. Except for the created file,
+ * this operation is stateless on the server.
+ *
+ * Upon return, caller must release @fhp and @resfhp.
*/
static __be32
+nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd3_createargs *argp)
+{
+ struct iattr *iap = &argp->attrs;
+ struct dentry *parent, *child;
+ struct nfsd_attrs attrs = {
+ .na_iattr = iap,
+ };
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+ __be32 status;
+ int host_err;
+
+ trace_nfsd_vfs_create(rqstp, fhp, S_IFREG, argp->name, argp->len);
+
+ if (isdotent(argp->name, argp->len))
+ return nfserr_exist;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+ if (status != nfs_ok)
+ return status;
+
+ parent = fhp->fh_dentry;
+ inode = d_inode(parent);
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ return nfserrno(host_err);
+
+ child = start_creating(&nop_mnt_idmap, parent,
+ &QSTR_LEN(argp->name, argp->len));
+ if (IS_ERR(child)) {
+ status = nfserrno(PTR_ERR(child));
+ goto out_write;
+ }
+
+ if (d_really_is_negative(child)) {
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ if (status != nfs_ok)
+ goto out;
+ }
+
+ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
+ if (status != nfs_ok)
+ goto out;
+
+ v_mtime = 0;
+ v_atime = 0;
+ if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
+ u32 *verifier = (u32 *)argp->verf;
+
+ /*
+ * Solaris 7 gets confused (bugid 4218508) if these have
+ * the high bit set, as do xfs filesystems without the
+ * "bigtime" feature. So just clear the high bits.
+ */
+ v_mtime = verifier[0] & 0x7fffffff;
+ v_atime = verifier[1] & 0x7fffffff;
+ }
+
+ if (d_really_is_positive(child)) {
+ status = nfs_ok;
+
+ switch (argp->createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ if (!d_is_reg(child))
+ break;
+ iap->ia_valid &= ATTR_SIZE;
+ goto set_attr;
+ case NFS3_CREATE_GUARDED:
+ status = nfserr_exist;
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
+ inode_get_atime_sec(d_inode(child)) == v_atime &&
+ d_inode(child)->i_size == 0) {
+ break;
+ }
+ status = nfserr_exist;
+ }
+ goto out;
+ }
+
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
+ host_err = vfs_create(&nop_mnt_idmap, child, iap->ia_mode, NULL);
+ if (host_err < 0) {
+ status = nfserrno(host_err);
+ goto out;
+ }
+ fh_fill_post_attrs(fhp);
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+ if (argp->createmode == NFS3_CREATE_EXCLUSIVE) {
+ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
+ ATTR_MTIME_SET | ATTR_ATIME_SET;
+ iap->ia_mtime.tv_sec = v_mtime;
+ iap->ia_atime.tv_sec = v_atime;
+ iap->ia_mtime.tv_nsec = 0;
+ iap->ia_atime.tv_nsec = 0;
+ }
+
+set_attr:
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
+
+out:
+ end_creating(child);
+out_write:
+ fh_drop_write(fhp);
+ return status;
+}
+
+static __be32
nfsd3_proc_create(struct svc_rqst *rqstp)
{
struct nfsd3_createargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
- svc_fh *dirfhp, *newfhp = NULL;
- struct iattr *attr;
- __be32 nfserr;
-
- dprintk("nfsd: CREATE(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
+ svc_fh *dirfhp, *newfhp;
dirfhp = fh_copy(&resp->dirfh, &argp->fh);
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
- attr = &argp->attrs;
-
- /* Unfudge the mode bits */
- attr->ia_mode &= ~S_IFMT;
- if (!(attr->ia_valid & ATTR_MODE)) {
- attr->ia_valid |= ATTR_MODE;
- attr->ia_mode = S_IFREG;
- } else {
- attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
- }
- /* Now create the file and set attributes */
- nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
- attr, newfhp,
- argp->createmode, (u32 *)argp->verf, NULL, NULL);
-
- RETURN_STATUS(nfserr);
+ resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -261,20 +393,17 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp)
{
struct nfsd3_createargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: MKDIR(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
argp->attrs.ia_valid &= ~ATTR_SIZE;
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
- nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
- &argp->attrs, S_IFDIR, 0, &resp->fh);
- fh_unlock(&resp->dirfh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &attrs, S_IFDIR, 0, &resp->fh);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
static __be32
@@ -282,30 +411,35 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp)
{
struct nfsd3_symlinkargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
- __be32 nfserr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
- if (argp->tlen == 0)
- RETURN_STATUS(nfserr_inval);
- if (argp->tlen > NFS3_MAXPATHLEN)
- RETURN_STATUS(nfserr_nametoolong);
+ if (argp->tlen == 0) {
+ resp->status = nfserr_inval;
+ goto out;
+ }
+ if (argp->tlen > NFS3_MAXPATHLEN) {
+ resp->status = nfserr_nametoolong;
+ goto out;
+ }
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
page_address(rqstp->rq_arg.pages[0]),
argp->tlen);
- if (IS_ERR(argp->tname))
- RETURN_STATUS(nfserrno(PTR_ERR(argp->tname)));
-
- dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
- SVCFH_fmt(&argp->ffh),
- argp->flen, argp->fname,
- argp->tlen, argp->tname);
+ if (IS_ERR(argp->tname)) {
+ resp->status = nfserrno(PTR_ERR(argp->tname));
+ goto out;
+ }
fh_copy(&resp->dirfh, &argp->ffh);
fh_init(&resp->fh, NFS3_FHSIZE);
- nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
- argp->tname, &resp->fh);
+ resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname,
+ argp->flen, argp->tname, &attrs, &resp->fh);
kfree(argp->tname);
- RETURN_STATUS(nfserr);
+out:
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -316,34 +450,33 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp)
{
struct nfsd3_mknodargs *argp = rqstp->rq_argp;
struct nfsd3_diropres *resp = rqstp->rq_resp;
- __be32 nfserr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
int type;
dev_t rdev = 0;
- dprintk("nfsd: MKNOD(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
-
fh_copy(&resp->dirfh, &argp->fh);
fh_init(&resp->fh, NFS3_FHSIZE);
- if (argp->ftype == 0 || argp->ftype >= NF3BAD)
- RETURN_STATUS(nfserr_inval);
if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) {
rdev = MKDEV(argp->major, argp->minor);
if (MAJOR(rdev) != argp->major ||
- MINOR(rdev) != argp->minor)
- RETURN_STATUS(nfserr_inval);
- } else
- if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO)
- RETURN_STATUS(nfserr_inval);
+ MINOR(rdev) != argp->minor) {
+ resp->status = nfserr_inval;
+ goto out;
+ }
+ } else if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) {
+ resp->status = nfserr_badtype;
+ goto out;
+ }
type = nfs3_ftypes[argp->ftype];
- nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
- &argp->attrs, type, rdev, &resp->fh);
- fh_unlock(&resp->dirfh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &attrs, type, rdev, &resp->fh);
+out:
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -354,18 +487,13 @@ nfsd3_proc_remove(struct svc_rqst *rqstp)
{
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: REMOVE(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
/* Unlink. -S_IFDIR means file must not be a directory */
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len);
- fh_unlock(&resp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR,
+ argp->name, argp->len);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -376,17 +504,12 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp)
{
struct nfsd3_diropargs *argp = rqstp->rq_argp;
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: RMDIR(3) %s %.*s\n",
- SVCFH_fmt(&argp->fh),
- argp->len,
- argp->name);
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len);
- fh_unlock(&resp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR,
+ argp->name, argp->len);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
static __be32
@@ -394,22 +517,13 @@ nfsd3_proc_rename(struct svc_rqst *rqstp)
{
struct nfsd3_renameargs *argp = rqstp->rq_argp;
struct nfsd3_renameres *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: RENAME(3) %s %.*s ->\n",
- SVCFH_fmt(&argp->ffh),
- argp->flen,
- argp->fname);
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
fh_copy(&resp->ffh, &argp->ffh);
fh_copy(&resp->tfh, &argp->tfh);
- nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
- &resp->tfh, argp->tname, argp->tlen);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
+ &resp->tfh, argp->tname, argp->tlen);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
static __be32
@@ -417,20 +531,33 @@ nfsd3_proc_link(struct svc_rqst *rqstp)
{
struct nfsd3_linkargs *argp = rqstp->rq_argp;
struct nfsd3_linkres *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: LINK(3) %s ->\n",
- SVCFH_fmt(&argp->ffh));
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
fh_copy(&resp->fh, &argp->ffh);
fh_copy(&resp->tfh, &argp->tfh);
- nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
- &resp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
+ &resp->fh);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
+}
+
+static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp,
+ struct nfsd3_readdirres *resp,
+ u32 count)
+{
+ struct xdr_buf *buf = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+ unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen,
+ svc_max_payload(rqstp));
+
+ memset(buf, 0, sizeof(*buf));
+
+ /* Reserve room for the NULL ptr & eof flag (-2 words) */
+ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf);
+ buf->buflen -= XDR_UNIT * 2;
+ buf->pages = rqstp->rq_next_page;
+ rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ xdr_init_encode_pages(xdr, buf);
}
/*
@@ -441,32 +568,27 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
{
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
- __be32 nfserr;
- int count;
+ loff_t offset;
- dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, (u32) argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
- /* Make sure we've room for the NULL ptr & eof flag, and shrink to
- * client read size */
- count = (argp->count >> 2) - 2;
+ nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
- /* Read directory and encode entries on the fly */
fh_copy(&resp->fh, &argp->fh);
-
- resp->buflen = count;
resp->common.err = nfs_ok;
- resp->buffer = argp->buffer;
+ resp->cookie_offset = 0;
resp->rqstp = rqstp;
- nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
- &resp->common, nfs3svc_encode_entry);
+ offset = argp->cookie;
+ resp->status = nfsd_readdir(rqstp, &resp->fh, &offset,
+ &resp->common, nfs3svc_encode_entry3);
memcpy(resp->verf, argp->verf, 8);
- resp->count = resp->buffer - argp->buffer;
- if (resp->offset)
- xdr_encode_hyper(resp->offset, argp->cookie);
+ nfs3svc_encode_cookie3(resp, offset);
- RETURN_STATUS(nfserr);
+ /* Recycle only pages that were part of the reply */
+ rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -478,64 +600,38 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp)
{
struct nfsd3_readdirargs *argp = rqstp->rq_argp;
struct nfsd3_readdirres *resp = rqstp->rq_resp;
- __be32 nfserr;
- int count = 0;
loff_t offset;
- struct page **p;
- caddr_t page_addr = NULL;
- dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, (u32) argp->cookie);
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
- /* Convert byte count to number of words (i.e. >> 2),
- * and reserve room for the NULL ptr & eof flag (-2 words) */
- resp->count = (argp->count >> 2) - 2;
+ nfsd3_init_dirlist_pages(rqstp, resp, argp->count);
- /* Read directory and encode entries on the fly */
fh_copy(&resp->fh, &argp->fh);
-
resp->common.err = nfs_ok;
- resp->buffer = argp->buffer;
- resp->buflen = resp->count;
+ resp->cookie_offset = 0;
resp->rqstp = rqstp;
offset = argp->cookie;
- nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
- if (nfserr)
- RETURN_STATUS(nfserr);
+ resp->status = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
+ if (resp->status != nfs_ok)
+ goto out;
- if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
- RETURN_STATUS(nfserr_notsupp);
+ if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS) {
+ resp->status = nfserr_notsupp;
+ goto out;
+ }
- nfserr = nfsd_readdir(rqstp, &resp->fh,
- &offset,
- &resp->common,
- nfs3svc_encode_entry_plus);
+ resp->status = nfsd_readdir(rqstp, &resp->fh, &offset,
+ &resp->common, nfs3svc_encode_entryplus3);
memcpy(resp->verf, argp->verf, 8);
- for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
- page_addr = page_address(*p);
+ nfs3svc_encode_cookie3(resp, offset);
- if (((caddr_t)resp->buffer >= page_addr) &&
- ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
- count += (caddr_t)resp->buffer - page_addr;
- break;
- }
- count += PAGE_SIZE;
- }
- resp->count = count >> 2;
- if (resp->offset) {
- if (unlikely(resp->offset1)) {
- /* we ended up with offset on a page boundary */
- *resp->offset = htonl(offset >> 32);
- *resp->offset1 = htonl(offset & 0xffffffff);
- resp->offset1 = NULL;
- } else {
- xdr_encode_hyper(resp->offset, offset);
- }
- }
+ /* Recycle only pages that were part of the reply */
+ rqstp->rq_next_page = resp->xdr.page_ptr + 1;
- RETURN_STATUS(nfserr);
+out:
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -546,14 +642,11 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_fsstatres *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: FSSTAT(3) %s\n",
- SVCFH_fmt(&argp->fh));
- nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0);
+ resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0);
fh_put(&argp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -564,7 +657,6 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_fsinfores *resp = rqstp->rq_resp;
- __be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
dprintk("nfsd: FSINFO(3) %s\n",
@@ -576,17 +668,17 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
resp->f_wtmax = max_blocksize;
resp->f_wtpref = max_blocksize;
resp->f_wtmult = PAGE_SIZE;
- resp->f_dtpref = PAGE_SIZE;
+ resp->f_dtpref = max_blocksize;
resp->f_maxfilesize = ~(u32) 0;
resp->f_properties = NFS3_FSF_DEFAULT;
- nfserr = fh_verify(rqstp, &argp->fh, 0,
- NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
+ resp->status = fh_verify(rqstp, &argp->fh, 0,
+ NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
/* Check special features of the file system. May request
* different read/write sizes for file systems known to have
* problems with large blocks */
- if (nfserr == 0) {
+ if (resp->status == nfs_ok) {
struct super_block *sb = argp->fh.fh_dentry->d_sb;
/* Note that we don't care for remote fs's here */
@@ -597,7 +689,8 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp)
}
fh_put(&argp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -608,7 +701,6 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd3_pathconfres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: PATHCONF(3) %s\n",
SVCFH_fmt(&argp->fh));
@@ -621,9 +713,9 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp)
resp->p_case_insensitive = 0;
resp->p_case_preserving = 1;
- nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
+ resp->status = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
- if (nfserr == 0) {
+ if (resp->status == nfs_ok) {
struct super_block *sb = argp->fh.fh_dentry->d_sb;
/* Note that we don't care for remote fs's here */
@@ -640,10 +732,10 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp)
}
fh_put(&argp->fh);
- RETURN_STATUS(nfserr);
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
-
/*
* Commit a file (range) to stable storage.
*/
@@ -652,20 +744,24 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
{
struct nfsd3_commitargs *argp = rqstp->rq_argp;
struct nfsd3_commitres *resp = rqstp->rq_resp;
- __be32 nfserr;
+ struct nfsd_file *nf;
dprintk("nfsd: COMMIT(3) %s %u@%Lu\n",
SVCFH_fmt(&argp->fh),
argp->count,
(unsigned long long) argp->offset);
- if (argp->offset > NFS_OFFSET_MAX)
- RETURN_STATUS(nfserr_inval);
-
fh_copy(&resp->fh, &argp->fh);
- nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
-
- RETURN_STATUS(nfserr);
+ resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE |
+ NFSD_MAY_NOT_BREAK_LEASE, &nf);
+ if (resp->status)
+ goto out;
+ resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset,
+ argp->count, resp->verf);
+ nfsd_file_put(nf);
+out:
+ resp->status = nfsd3_map_status(resp->status);
+ return rpc_success;
}
@@ -673,18 +769,14 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
* NFSv3 Server procedures.
* Only the results of non-idempotent operations are cached.
*/
-#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle
#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat
#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat
#define nfsd3_mkdirargs nfsd3_createargs
#define nfsd3_readdirplusargs nfsd3_readdirargs
#define nfsd3_fhandleargs nfsd_fhandle
-#define nfsd3_fhandleres nfsd3_attrstat
#define nfsd3_attrstatres nfsd3_attrstat
#define nfsd3_wccstatres nfsd3_attrstat
#define nfsd3_createres nfsd3_diropres
-#define nfsd3_voidres nfsd3_voidargs
-struct nfsd3_voidargs { int dummy; };
#define ST 1 /* status*/
#define FH 17 /* filehandle with length */
@@ -695,21 +787,26 @@ struct nfsd3_voidargs { int dummy; };
static const struct svc_procedure nfsd_procedures3[22] = {
[NFS3PROC_NULL] = {
.pc_func = nfsd3_proc_null,
- .pc_encode = nfs3svc_encode_voidres,
- .pc_argsize = sizeof(struct nfsd3_voidargs),
- .pc_ressize = sizeof(struct nfsd3_voidres),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
+ .pc_name = "NULL",
},
[NFS3PROC_GETATTR] = {
.pc_func = nfsd3_proc_getattr,
.pc_decode = nfs3svc_decode_fhandleargs,
- .pc_encode = nfs3svc_encode_attrstatres,
+ .pc_encode = nfs3svc_encode_getattrres,
.pc_release = nfs3svc_release_fhandle,
- .pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd3_attrstatres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT,
+ .pc_name = "GETATTR",
},
[NFS3PROC_SETATTR] = {
.pc_func = nfsd3_proc_setattr,
@@ -717,19 +814,23 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_wccstatres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_sattrargs),
+ .pc_argzero = sizeof(struct nfsd3_sattrargs),
.pc_ressize = sizeof(struct nfsd3_wccstatres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC,
+ .pc_name = "SETATTR",
},
[NFS3PROC_LOOKUP] = {
.pc_func = nfsd3_proc_lookup,
.pc_decode = nfs3svc_decode_diropargs,
- .pc_encode = nfs3svc_encode_diropres,
+ .pc_encode = nfs3svc_encode_lookupres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_argzero = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_diropres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+FH+pAT+pAT,
+ .pc_name = "LOOKUP",
},
[NFS3PROC_ACCESS] = {
.pc_func = nfsd3_proc_access,
@@ -737,19 +838,23 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_accessres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_accessargs),
+ .pc_argzero = sizeof(struct nfsd3_accessargs),
.pc_ressize = sizeof(struct nfsd3_accessres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+1,
+ .pc_name = "ACCESS",
},
[NFS3PROC_READLINK] = {
.pc_func = nfsd3_proc_readlink,
- .pc_decode = nfs3svc_decode_readlinkargs,
+ .pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_readlinkres,
.pc_release = nfs3svc_release_fhandle,
- .pc_argsize = sizeof(struct nfsd3_readlinkargs),
+ .pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd3_readlinkres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4,
+ .pc_name = "READLINK",
},
[NFS3PROC_READ] = {
.pc_func = nfsd3_proc_read,
@@ -757,9 +862,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_readargs),
+ .pc_argzero = sizeof(struct nfsd3_readargs),
.pc_ressize = sizeof(struct nfsd3_readres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4,
+ .pc_name = "READ",
},
[NFS3PROC_WRITE] = {
.pc_func = nfsd3_proc_write,
@@ -767,9 +874,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_writeres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_writeargs),
+ .pc_argzero = sizeof(struct nfsd3_writeargs),
.pc_ressize = sizeof(struct nfsd3_writeres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC+4,
+ .pc_name = "WRITE",
},
[NFS3PROC_CREATE] = {
.pc_func = nfsd3_proc_create,
@@ -777,9 +886,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_createargs),
+ .pc_argzero = sizeof(struct nfsd3_createargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
+ .pc_name = "CREATE",
},
[NFS3PROC_MKDIR] = {
.pc_func = nfsd3_proc_mkdir,
@@ -787,9 +898,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_mkdirargs),
+ .pc_argzero = sizeof(struct nfsd3_mkdirargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
+ .pc_name = "MKDIR",
},
[NFS3PROC_SYMLINK] = {
.pc_func = nfsd3_proc_symlink,
@@ -797,9 +910,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_symlinkargs),
+ .pc_argzero = sizeof(struct nfsd3_symlinkargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
+ .pc_name = "SYMLINK",
},
[NFS3PROC_MKNOD] = {
.pc_func = nfsd3_proc_mknod,
@@ -807,9 +922,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_createres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_mknodargs),
+ .pc_argzero = sizeof(struct nfsd3_mknodargs),
.pc_ressize = sizeof(struct nfsd3_createres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+(1+FH+pAT)+WC,
+ .pc_name = "MKNOD",
},
[NFS3PROC_REMOVE] = {
.pc_func = nfsd3_proc_remove,
@@ -817,9 +934,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_wccstatres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_argzero = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_wccstatres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC,
+ .pc_name = "REMOVE",
},
[NFS3PROC_RMDIR] = {
.pc_func = nfsd3_proc_rmdir,
@@ -827,9 +946,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_wccstatres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_diropargs),
+ .pc_argzero = sizeof(struct nfsd3_diropargs),
.pc_ressize = sizeof(struct nfsd3_wccstatres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC,
+ .pc_name = "RMDIR",
},
[NFS3PROC_RENAME] = {
.pc_func = nfsd3_proc_rename,
@@ -837,9 +958,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_renameres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_renameargs),
+ .pc_argzero = sizeof(struct nfsd3_renameargs),
.pc_ressize = sizeof(struct nfsd3_renameres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+WC+WC,
+ .pc_name = "RENAME",
},
[NFS3PROC_LINK] = {
.pc_func = nfsd3_proc_link,
@@ -847,9 +970,11 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_linkres,
.pc_release = nfs3svc_release_fhandle2,
.pc_argsize = sizeof(struct nfsd3_linkargs),
+ .pc_argzero = sizeof(struct nfsd3_linkargs),
.pc_ressize = sizeof(struct nfsd3_linkres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+pAT+WC,
+ .pc_name = "LINK",
},
[NFS3PROC_READDIR] = {
.pc_func = nfsd3_proc_readdir,
@@ -857,8 +982,10 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readdirres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_readdirargs),
+ .pc_argzero = sizeof(struct nfsd3_readdirargs),
.pc_ressize = sizeof(struct nfsd3_readdirres),
.pc_cachetype = RC_NOCACHE,
+ .pc_name = "READDIR",
},
[NFS3PROC_READDIRPLUS] = {
.pc_func = nfsd3_proc_readdirplus,
@@ -866,35 +993,43 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_readdirres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_readdirplusargs),
+ .pc_argzero = sizeof(struct nfsd3_readdirplusargs),
.pc_ressize = sizeof(struct nfsd3_readdirres),
.pc_cachetype = RC_NOCACHE,
+ .pc_name = "READDIRPLUS",
},
[NFS3PROC_FSSTAT] = {
.pc_func = nfsd3_proc_fsstat,
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_fsstatres,
.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
.pc_ressize = sizeof(struct nfsd3_fsstatres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+2*6+1,
+ .pc_name = "FSSTAT",
},
[NFS3PROC_FSINFO] = {
.pc_func = nfsd3_proc_fsinfo,
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_fsinfores,
.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
.pc_ressize = sizeof(struct nfsd3_fsinfores),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+12,
+ .pc_name = "FSINFO",
},
[NFS3PROC_PATHCONF] = {
.pc_func = nfsd3_proc_pathconf,
.pc_decode = nfs3svc_decode_fhandleargs,
.pc_encode = nfs3svc_encode_pathconfres,
.pc_argsize = sizeof(struct nfsd3_fhandleargs),
+ .pc_argzero = sizeof(struct nfsd3_fhandleargs),
.pc_ressize = sizeof(struct nfsd3_pathconfres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+pAT+6,
+ .pc_name = "PATHCONF",
},
[NFS3PROC_COMMIT] = {
.pc_func = nfsd3_proc_commit,
@@ -902,16 +1037,19 @@ static const struct svc_procedure nfsd_procedures3[22] = {
.pc_encode = nfs3svc_encode_commitres,
.pc_release = nfs3svc_release_fhandle,
.pc_argsize = sizeof(struct nfsd3_commitargs),
+ .pc_argzero = sizeof(struct nfsd3_commitargs),
.pc_ressize = sizeof(struct nfsd3_commitres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+WC+2,
+ .pc_name = "COMMIT",
},
};
-static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_count3[ARRAY_SIZE(nfsd_procedures3)]);
const struct svc_version nfsd_version3 = {
.vs_vers = 3,
- .vs_nproc = 22,
+ .vs_nproc = ARRAY_SIZE(nfsd_procedures3),
.vs_proc = nfsd_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_count = nfsd_count3,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 9b973f4f7d01..ef4971d71ac4 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -14,820 +14,966 @@
#include "netns.h"
#include "vfs.h"
-#define NFSDDBG_FACILITY NFSDDBG_XDR
+/*
+ * Force construction of an empty post-op attr
+ */
+static const struct svc_fh nfs3svc_null_fh = {
+ .fh_no_wcc = true,
+};
+/*
+ * time_delta. {1, 0} means the server is accurate only
+ * to the nearest second.
+ */
+static const struct timespec64 nfs3svc_time_delta = {
+ .tv_sec = 1,
+ .tv_nsec = 0,
+};
/*
* Mapping of S_IF* types to NFS file types
*/
-static u32 nfs3_ftypes[] = {
+static const u32 nfs3_ftypes[] = {
NF3NON, NF3FIFO, NF3CHR, NF3BAD,
NF3DIR, NF3BAD, NF3BLK, NF3BAD,
NF3REG, NF3BAD, NF3LNK, NF3BAD,
NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
};
+
/*
- * XDR functions for basic NFS types
+ * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6)
*/
+
static __be32 *
-encode_time3(__be32 *p, struct timespec *time)
+encode_nfstime3(__be32 *p, const struct timespec64 *time)
{
- *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
+ *p++ = cpu_to_be32((u32)time->tv_sec);
+ *p++ = cpu_to_be32(time->tv_nsec);
+
return p;
}
-static __be32 *
-decode_time3(__be32 *p, struct timespec *time)
+static bool
+svcxdr_decode_nfstime3(struct xdr_stream *xdr, struct timespec64 *timep)
{
- time->tv_sec = ntohl(*p++);
- time->tv_nsec = ntohl(*p++);
- return p;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT * 2);
+ if (!p)
+ return false;
+ timep->tv_sec = be32_to_cpup(p++);
+ timep->tv_nsec = be32_to_cpup(p);
+
+ return true;
}
-static __be32 *
-decode_fh(__be32 *p, struct svc_fh *fhp)
+/**
+ * svcxdr_decode_nfs_fh3 - Decode an NFSv3 file handle
+ * @xdr: XDR stream positioned at an undecoded NFSv3 FH
+ * @fhp: OUT: filled-in server file handle
+ *
+ * Return values:
+ * %false: The encoded file handle was not valid
+ * %true: @fhp has been initialized
+ */
+bool
+svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp)
{
- unsigned int size;
+ __be32 *p;
+ u32 size;
+
+ if (xdr_stream_decode_u32(xdr, &size) < 0)
+ return false;
+ if (size == 0 || size > NFS3_FHSIZE)
+ return false;
+ p = xdr_inline_decode(xdr, size);
+ if (!p)
+ return false;
fh_init(fhp, NFS3_FHSIZE);
- size = ntohl(*p++);
- if (size > NFS3_FHSIZE)
- return NULL;
-
- memcpy(&fhp->fh_handle.fh_base, p, size);
fhp->fh_handle.fh_size = size;
- return p + XDR_QUADLEN(size);
+ memcpy(&fhp->fh_handle.fh_raw, p, size);
+
+ return true;
}
-/* Helper function for NFSv3 ACL code */
-__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
+/**
+ * svcxdr_encode_nfsstat3 - Encode an NFSv3 status code
+ * @xdr: XDR stream
+ * @status: status value to encode
+ *
+ * Return values:
+ * %false: Send buffer space was exhausted
+ * %true: Success
+ */
+bool
+svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status)
{
- return decode_fh(p, fhp);
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, sizeof(status));
+ if (!p)
+ return false;
+ *p = status;
+
+ return true;
}
-static __be32 *
-encode_fh(__be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp)
+{
+ u32 size = fhp->fh_handle.fh_size;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT + size);
+ if (!p)
+ return false;
+ *p++ = cpu_to_be32(size);
+ if (size)
+ p[XDR_QUADLEN(size) - 1] = 0;
+ memcpy(p, &fhp->fh_handle.fh_raw, size);
+
+ return true;
+}
+
+static bool
+svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp)
{
- unsigned int size = fhp->fh_handle.fh_size;
- *p++ = htonl(size);
- if (size) p[XDR_QUADLEN(size)-1]=0;
- memcpy(p, &fhp->fh_handle.fh_base, size);
- return p + XDR_QUADLEN(size);
+ if (xdr_stream_encode_item_present(xdr) < 0)
+ return false;
+ if (!svcxdr_encode_nfs_fh3(xdr, fhp))
+ return false;
+
+ return true;
}
-/*
- * Decode a file name and make sure that the path contains
- * no slashes or null bytes.
- */
-static __be32 *
-decode_filename(__be32 *p, char **namp, unsigned int *lenp)
+static bool
+svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf)
{
- char *name;
- unsigned int i;
-
- if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
- for (i = 0, name = *namp; i < *lenp; i++, name++) {
- if (*name == '\0' || *name == '/')
- return NULL;
- }
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS3_COOKIEVERFSIZE);
+ if (!p)
+ return false;
+ memcpy(p, verf, NFS3_COOKIEVERFSIZE);
+
+ return true;
+}
+
+static bool
+svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS3_WRITEVERFSIZE);
+ if (!p)
+ return false;
+ memcpy(p, verf, NFS3_WRITEVERFSIZE);
+
+ return true;
+}
+
+static bool
+svcxdr_decode_filename3(struct xdr_stream *xdr, char **name, unsigned int *len)
+{
+ u32 size, i;
+ __be32 *p;
+ char *c;
+
+ if (xdr_stream_decode_u32(xdr, &size) < 0)
+ return false;
+ if (size == 0 || size > NFS3_MAXNAMLEN)
+ return false;
+ p = xdr_inline_decode(xdr, size);
+ if (!p)
+ return false;
+
+ *len = size;
+ *name = (char *)p;
+ for (i = 0, c = *name; i < size; i++, c++) {
+ if (*c == '\0' || *c == '/')
+ return false;
}
- return p;
+ return true;
}
-static __be32 *
-decode_sattr3(__be32 *p, struct iattr *iap)
+static bool
+svcxdr_decode_diropargs3(struct xdr_stream *xdr, struct svc_fh *fhp,
+ char **name, unsigned int *len)
+{
+ return svcxdr_decode_nfs_fh3(xdr, fhp) &&
+ svcxdr_decode_filename3(xdr, name, len);
+}
+
+static bool
+svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ struct iattr *iap)
{
- u32 tmp;
+ u32 set_it;
iap->ia_valid = 0;
- if (*p++) {
+ if (xdr_stream_decode_bool(xdr, &set_it) < 0)
+ return false;
+ if (set_it) {
+ u32 mode;
+
+ if (xdr_stream_decode_u32(xdr, &mode) < 0)
+ return false;
iap->ia_valid |= ATTR_MODE;
- iap->ia_mode = ntohl(*p++);
+ iap->ia_mode = mode;
}
- if (*p++) {
- iap->ia_uid = make_kuid(&init_user_ns, ntohl(*p++));
+ if (xdr_stream_decode_bool(xdr, &set_it) < 0)
+ return false;
+ if (set_it) {
+ u32 uid;
+
+ if (xdr_stream_decode_u32(xdr, &uid) < 0)
+ return false;
+ iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), uid);
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
- if (*p++) {
- iap->ia_gid = make_kgid(&init_user_ns, ntohl(*p++));
+ if (xdr_stream_decode_bool(xdr, &set_it) < 0)
+ return false;
+ if (set_it) {
+ u32 gid;
+
+ if (xdr_stream_decode_u32(xdr, &gid) < 0)
+ return false;
+ iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), gid);
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
- if (*p++) {
- u64 newsize;
+ if (xdr_stream_decode_bool(xdr, &set_it) < 0)
+ return false;
+ if (set_it) {
+ u64 newsize;
+ if (xdr_stream_decode_u64(xdr, &newsize) < 0)
+ return false;
iap->ia_valid |= ATTR_SIZE;
- p = xdr_decode_hyper(p, &newsize);
- iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
+ iap->ia_size = newsize;
}
- if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
+ if (xdr_stream_decode_u32(xdr, &set_it) < 0)
+ return false;
+ switch (set_it) {
+ case DONT_CHANGE:
+ break;
+ case SET_TO_SERVER_TIME:
iap->ia_valid |= ATTR_ATIME;
- } else if (tmp == 2) { /* set to client time */
+ break;
+ case SET_TO_CLIENT_TIME:
+ if (!svcxdr_decode_nfstime3(xdr, &iap->ia_atime))
+ return false;
iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
- iap->ia_atime.tv_sec = ntohl(*p++);
- iap->ia_atime.tv_nsec = ntohl(*p++);
+ break;
+ default:
+ return false;
}
- if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
+ if (xdr_stream_decode_u32(xdr, &set_it) < 0)
+ return false;
+ switch (set_it) {
+ case DONT_CHANGE:
+ break;
+ case SET_TO_SERVER_TIME:
iap->ia_valid |= ATTR_MTIME;
- } else if (tmp == 2) { /* set to client time */
+ break;
+ case SET_TO_CLIENT_TIME:
+ if (!svcxdr_decode_nfstime3(xdr, &iap->ia_mtime))
+ return false;
iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
- iap->ia_mtime.tv_sec = ntohl(*p++);
- iap->ia_mtime.tv_nsec = ntohl(*p++);
+ break;
+ default:
+ return false;
}
- return p;
+
+ return true;
}
-static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
{
- u64 f;
- switch(fsid_source(fhp)) {
- default:
- case FSIDSOURCE_DEV:
- p = xdr_encode_hyper(p, (u64)huge_encode_dev
- (fhp->fh_dentry->d_sb->s_dev));
- break;
- case FSIDSOURCE_FSID:
- p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
- break;
- case FSIDSOURCE_UUID:
- f = ((u64*)fhp->fh_export->ex_uuid)[0];
- f ^= ((u64*)fhp->fh_export->ex_uuid)[1];
- p = xdr_encode_hyper(p, f);
- break;
- }
- return p;
+ u32 check;
+
+ if (xdr_stream_decode_bool(xdr, &check) < 0)
+ return false;
+ if (check) {
+ if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
+ return false;
+ args->check_guard = 1;
+ } else
+ args->check_guard = 0;
+
+ return true;
}
-static __be32 *
-encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
- struct kstat *stat)
+static bool
+svcxdr_decode_specdata3(struct xdr_stream *xdr, struct nfsd3_mknodargs *args)
{
- struct timespec ts;
- *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
- *p++ = htonl((u32) (stat->mode & S_IALLUGO));
- *p++ = htonl((u32) stat->nlink);
- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
- if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
- p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
- } else {
- p = xdr_encode_hyper(p, (u64) stat->size);
- }
- p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
- *p++ = htonl((u32) MAJOR(stat->rdev));
- *p++ = htonl((u32) MINOR(stat->rdev));
- p = encode_fsid(p, fhp);
- p = xdr_encode_hyper(p, stat->ino);
- ts = timespec64_to_timespec(stat->atime);
- p = encode_time3(p, &ts);
- ts = timespec64_to_timespec(stat->mtime);
- p = encode_time3(p, &ts);
- ts = timespec64_to_timespec(stat->ctime);
- p = encode_time3(p, &ts);
+ __be32 *p;
- return p;
+ p = xdr_inline_decode(xdr, XDR_UNIT * 2);
+ if (!p)
+ return false;
+ args->major = be32_to_cpup(p++);
+ args->minor = be32_to_cpup(p);
+
+ return true;
}
-static __be32 *
-encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_decode_devicedata3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ struct nfsd3_mknodargs *args)
{
- /* Attributes to follow */
- *p++ = xdr_one;
- return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr);
+ return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) &&
+ svcxdr_decode_specdata3(xdr, args);
}
-/*
- * Encode post-operation attributes.
- * The inode may be NULL if the call failed because of a stale file
- * handle. In this case, no attributes are returned.
- */
-static __be32 *
-encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp, const struct kstat *stat)
{
- struct dentry *dentry = fhp->fh_dentry;
- if (dentry && d_really_is_positive(dentry)) {
- __be32 err;
- struct kstat stat;
-
- err = fh_getattr(fhp, &stat);
- if (!err) {
- *p++ = xdr_one; /* attributes follow */
- lease_get_mtime(d_inode(dentry), &stat.mtime);
- return encode_fattr3(rqstp, p, fhp, &stat);
- }
+ struct user_namespace *userns = nfsd_user_namespace(rqstp);
+ __be32 *p;
+ u64 fsid;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 21);
+ if (!p)
+ return false;
+
+ *p++ = cpu_to_be32(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+ *p++ = cpu_to_be32((u32)(stat->mode & S_IALLUGO));
+ *p++ = cpu_to_be32((u32)stat->nlink);
+ *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid));
+ *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid));
+ if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN)
+ p = xdr_encode_hyper(p, (u64)NFS3_MAXPATHLEN);
+ else
+ p = xdr_encode_hyper(p, (u64)stat->size);
+
+ /* used */
+ p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
+
+ /* rdev */
+ *p++ = cpu_to_be32((u32)MAJOR(stat->rdev));
+ *p++ = cpu_to_be32((u32)MINOR(stat->rdev));
+
+ switch(fsid_source(fhp)) {
+ case FSIDSOURCE_FSID:
+ fsid = (u64)fhp->fh_export->ex_fsid;
+ break;
+ case FSIDSOURCE_UUID:
+ fsid = ((u64 *)fhp->fh_export->ex_uuid)[0];
+ fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1];
+ break;
+ default:
+ fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev);
}
- *p++ = xdr_zero;
- return p;
+ p = xdr_encode_hyper(p, fsid);
+
+ /* fileid */
+ p = xdr_encode_hyper(p, stat->ino);
+
+ p = encode_nfstime3(p, &stat->atime);
+ p = encode_nfstime3(p, &stat->mtime);
+ encode_nfstime3(p, &stat->ctime);
+
+ return true;
}
-/* Helper for NFSv3 ACLs */
-__be32 *
-nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp)
{
- return encode_post_op_attr(rqstp, p, fhp);
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 6);
+ if (!p)
+ return false;
+ p = xdr_encode_hyper(p, (u64)fhp->fh_pre_size);
+ p = encode_nfstime3(p, &fhp->fh_pre_mtime);
+ encode_nfstime3(p, &fhp->fh_pre_ctime);
+
+ return true;
}
-/*
- * Enocde weak cache consistency data
- */
-static __be32 *
-encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp)
{
- struct dentry *dentry = fhp->fh_dentry;
-
- if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) {
- if (fhp->fh_pre_saved) {
- *p++ = xdr_one;
- p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
- p = encode_time3(p, &fhp->fh_pre_mtime);
- p = encode_time3(p, &fhp->fh_pre_ctime);
- } else {
- *p++ = xdr_zero;
- }
- return encode_saved_post_attr(rqstp, p, fhp);
+ if (!fhp->fh_pre_saved) {
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return false;
+ return true;
}
- /* no pre- or post-attrs */
- *p++ = xdr_zero;
- return encode_post_op_attr(rqstp, p, fhp);
+
+ if (xdr_stream_encode_item_present(xdr) < 0)
+ return false;
+ return svcxdr_encode_wcc_attr(xdr, fhp);
}
-/*
- * Fill in the pre_op attr for the wcc data
+/**
+ * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes
+ * @rqstp: Context of a completed RPC transaction
+ * @xdr: XDR stream
+ * @fhp: File handle to encode
+ *
+ * Return values:
+ * %false: Send buffer space was exhausted
+ * %true: Success
*/
-void fill_pre_wcc(struct svc_fh *fhp)
+bool
+svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp)
{
- struct inode *inode;
- struct kstat stat;
- __be32 err;
+ struct dentry *dentry = fhp->fh_dentry;
+ struct kstat stat;
- if (fhp->fh_pre_saved)
- return;
+ /*
+ * The inode may be NULL if the call failed because of a
+ * stale file handle. In this case, no attributes are
+ * returned.
+ */
+ if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry))
+ goto no_post_op_attrs;
+ if (fh_getattr(fhp, &stat) != nfs_ok)
+ goto no_post_op_attrs;
- inode = d_inode(fhp->fh_dentry);
- err = fh_getattr(fhp, &stat);
- if (err) {
- /* Grab the times from inode anyway */
- stat.mtime = inode->i_mtime;
- stat.ctime = inode->i_ctime;
- stat.size = inode->i_size;
- }
+ if (xdr_stream_encode_item_present(xdr) < 0)
+ return false;
+ lease_get_mtime(d_inode(dentry), &stat.mtime);
+ if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &stat))
+ return false;
+
+ return true;
- fhp->fh_pre_mtime = timespec64_to_timespec(stat.mtime);
- fhp->fh_pre_ctime = timespec64_to_timespec(stat.ctime);
- fhp->fh_pre_size = stat.size;
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
- fhp->fh_pre_saved = true;
+no_post_op_attrs:
+ return xdr_stream_encode_item_absent(xdr) > 0;
}
/*
- * Fill in the post_op attr for the wcc data
+ * Encode weak cache consistency data
*/
-void fill_post_wcc(struct svc_fh *fhp)
+static bool
+svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp)
{
- __be32 err;
-
- if (fhp->fh_post_saved)
- printk("nfsd: inode locked twice during operation.\n");
-
- err = fh_getattr(fhp, &fhp->fh_post_attr);
- fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr,
- d_inode(fhp->fh_dentry));
- if (err) {
- fhp->fh_post_saved = false;
- /* Grab the ctime anyway - set_change_info might use it */
- fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
- } else
- fhp->fh_post_saved = true;
+ struct dentry *dentry = fhp->fh_dentry;
+
+ if (!dentry || !d_really_is_positive(dentry) || !fhp->fh_post_saved)
+ goto neither;
+
+ /* before */
+ if (!svcxdr_encode_pre_op_attr(xdr, fhp))
+ return false;
+
+ /* after */
+ if (xdr_stream_encode_item_present(xdr) < 0)
+ return false;
+ if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &fhp->fh_post_attr))
+ return false;
+
+ return true;
+
+neither:
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp))
+ return false;
+
+ return true;
}
/*
* XDR decode functions
*/
-int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
+
+bool
+nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_fhandle *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_nfs_fh3(xdr, &args->fh);
}
-int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_sattrargs *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = decode_sattr3(p, &args->attrs);
-
- if ((args->check_guard = ntohl(*p++)) != 0) {
- struct timespec time;
- p = decode_time3(p, &time);
- args->guardtime = time.tv_sec;
- }
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_nfs_fh3(xdr, &args->fh) &&
+ svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) &&
+ svcxdr_decode_sattrguard3(xdr, args);
}
-int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_diropargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len)))
- return 0;
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len);
}
-int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_accessargs *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- args->access = ntohl(*p++);
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->access) < 0)
+ return false;
- return xdr_argsize_check(rqstp, p);
+ return true;
}
-int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_readargs *args = rqstp->rq_argp;
- unsigned int len;
- int v;
- u32 max_blocksize = svc_max_payload(rqstp);
-
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = xdr_decode_hyper(p, &args->offset);
- args->count = ntohl(*p++);
- len = min(args->count, max_blocksize);
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
- /* set up the kvec */
- v=0;
- while (len > 0) {
- struct page *p = *(rqstp->rq_next_page++);
-
- rqstp->rq_vec[v].iov_base = page_address(p);
- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- }
- args->vlen = v;
- return xdr_argsize_check(rqstp, p);
+ return true;
}
-int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_writeargs *args = rqstp->rq_argp;
- unsigned int len, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp);
- struct kvec *head = rqstp->rq_arg.head;
- struct kvec *tail = rqstp->rq_arg.tail;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = xdr_decode_hyper(p, &args->offset);
-
- args->count = ntohl(*p++);
- args->stable = ntohl(*p++);
- len = args->len = ntohl(*p++);
- if ((void *)p > head->iov_base + head->iov_len)
- return 0;
- /*
- * The count must equal the amount of data passed.
- */
- if (args->count != args->len)
- return 0;
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->stable) < 0)
+ return false;
- /*
- * Check to make sure that we got the right number of
- * bytes.
- */
- hdr = (void*)p - head->iov_base;
- dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr;
- /*
- * Round the length of the data which was specified up to
- * the next multiple of XDR units and then compare that
- * against the length which was actually received.
- * Note that when RPCSEC/GSS (for example) is used, the
- * data buffer can be padded so dlen might be larger
- * than required. It must never be smaller.
- */
- if (dlen < XDR_QUADLEN(len)*4)
- return 0;
+ /* opaque data */
+ if (xdr_stream_decode_u32(xdr, &args->len) < 0)
+ return false;
+ /* request sanity */
+ if (args->count != args->len)
+ return false;
if (args->count > max_blocksize) {
args->count = max_blocksize;
- len = args->len = max_blocksize;
+ args->len = max_blocksize;
}
- args->first.iov_base = (void *)p;
- args->first.iov_len = head->iov_len - hdr;
- return 1;
+ return xdr_stream_subsegment(xdr, &args->payload, args->count);
}
-int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_createargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len)))
- return 0;
-
- switch (args->createmode = ntohl(*p++)) {
+ if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->createmode) < 0)
+ return false;
+ switch (args->createmode) {
case NFS3_CREATE_UNCHECKED:
case NFS3_CREATE_GUARDED:
- p = decode_sattr3(p, &args->attrs);
- break;
+ return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
case NFS3_CREATE_EXCLUSIVE:
- args->verf = p;
- p += 2;
+ args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE);
+ if (!args->verf)
+ return false;
break;
default:
- return 0;
+ return false;
}
-
- return xdr_argsize_check(rqstp, p);
+ return true;
}
-int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_createargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->fh)) ||
- !(p = decode_filename(p, &args->name, &args->len)))
- return 0;
- p = decode_sattr3(p, &args->attrs);
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_diropargs3(xdr, &args->fh,
+ &args->name, &args->len) &&
+ svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
}
-int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_symlinkargs *args = rqstp->rq_argp;
- char *base = (char *)p;
- size_t dlen;
-
- if (!(p = decode_fh(p, &args->ffh)) ||
- !(p = decode_filename(p, &args->fname, &args->flen)))
- return 0;
- p = decode_sattr3(p, &args->attrs);
-
- args->tlen = ntohl(*p++);
-
- args->first.iov_base = p;
- args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
- args->first.iov_len -= (char *)p - base;
+ struct kvec *head = rqstp->rq_arg.head;
- dlen = args->first.iov_len + rqstp->rq_arg.page_len +
- rqstp->rq_arg.tail[0].iov_len;
- if (dlen < XDR_QUADLEN(args->tlen) << 2)
- return 0;
- return 1;
+ if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen))
+ return false;
+ if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
+ return false;
+
+ /* symlink_data */
+ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+ args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
+ return args->first.iov_base != NULL;
}
-int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_mknodargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len)))
- return 0;
-
- args->ftype = ntohl(*p++);
-
- if (args->ftype == NF3BLK || args->ftype == NF3CHR
- || args->ftype == NF3SOCK || args->ftype == NF3FIFO)
- p = decode_sattr3(p, &args->attrs);
-
- if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
- args->major = ntohl(*p++);
- args->minor = ntohl(*p++);
+ if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->ftype) < 0)
+ return false;
+ switch (args->ftype) {
+ case NF3CHR:
+ case NF3BLK:
+ return svcxdr_decode_devicedata3(rqstp, xdr, args);
+ case NF3SOCK:
+ case NF3FIFO:
+ return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
+ case NF3REG:
+ case NF3DIR:
+ case NF3LNK:
+ /* Valid XDR but illegal file types */
+ break;
+ default:
+ return false;
}
- return xdr_argsize_check(rqstp, p);
+ return true;
}
-int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_renameargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->ffh))
- || !(p = decode_filename(p, &args->fname, &args->flen))
- || !(p = decode_fh(p, &args->tfh))
- || !(p = decode_filename(p, &args->tname, &args->tlen)))
- return 0;
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_diropargs3(xdr, &args->ffh,
+ &args->fname, &args->flen) &&
+ svcxdr_decode_diropargs3(xdr, &args->tfh,
+ &args->tname, &args->tlen);
}
-int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
-{
- struct nfsd3_readlinkargs *args = rqstp->rq_argp;
-
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- args->buffer = page_address(*(rqstp->rq_next_page++));
-
- return xdr_argsize_check(rqstp, p);
-}
-
-int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_linkargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->ffh))
- || !(p = decode_fh(p, &args->tfh))
- || !(p = decode_filename(p, &args->tname, &args->tlen)))
- return 0;
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_nfs_fh3(xdr, &args->ffh) &&
+ svcxdr_decode_diropargs3(xdr, &args->tfh,
+ &args->tname, &args->tlen);
}
-int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = xdr_decode_hyper(p, &args->cookie);
- args->verf = p; p += 2;
- args->dircount = ~0;
- args->count = ntohl(*p++);
- args->count = min_t(u32, args->count, PAGE_SIZE);
- args->buffer = page_address(*(rqstp->rq_next_page++));
-
- return xdr_argsize_check(rqstp, p);
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
+ return false;
+ args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+ if (!args->verf)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
+
+ return true;
}
-int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_readdirargs *args = rqstp->rq_argp;
- int len;
- u32 max_blocksize = svc_max_payload(rqstp);
-
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = xdr_decode_hyper(p, &args->cookie);
- args->verf = p; p += 2;
- args->dircount = ntohl(*p++);
- args->count = ntohl(*p++);
-
- len = args->count = min(args->count, max_blocksize);
- while (len > 0) {
- struct page *p = *(rqstp->rq_next_page++);
- if (!args->buffer)
- args->buffer = page_address(p);
- len -= PAGE_SIZE;
- }
-
- return xdr_argsize_check(rqstp, p);
+ u32 dircount;
+
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
+ return false;
+ args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
+ if (!args->verf)
+ return false;
+ /* dircount is ignored */
+ if (xdr_stream_decode_u32(xdr, &dircount) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
+
+ return true;
}
-int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_commitargs *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = xdr_decode_hyper(p, &args->offset);
- args->count = ntohl(*p++);
- return xdr_argsize_check(rqstp, p);
+ if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
+
+ return true;
}
/*
* XDR encode functions
*/
-/*
- * There must be an encoding function for void results so svc_process
- * will work properly.
- */
-int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
-}
/* GETATTR */
-int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- if (resp->status == 0) {
- lease_get_mtime(d_inode(resp->fh.fh_dentry),
- &resp->stat.mtime);
- p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime);
+ if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat))
+ return false;
+ break;
}
- return xdr_ressize_check(rqstp, p);
+
+ return true;
}
/* SETATTR, REMOVE, RMDIR */
-int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_attrstat *resp = rqstp->rq_resp;
- p = encode_wcc_data(rqstp, p, &resp->fh);
- return xdr_ressize_check(rqstp, p);
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+ svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh);
}
/* LOOKUP */
-int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_diropres *resp = rqstp->rq_resp;
- if (resp->status == 0) {
- p = encode_fh(p, &resp->fh);
- p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
+ return false;
}
- p = encode_post_op_attr(rqstp, p, &resp->dirfh);
- return xdr_ressize_check(rqstp, p);
+
+ return true;
}
/* ACCESS */
-int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_accessres *resp = rqstp->rq_resp;
- p = encode_post_op_attr(rqstp, p, &resp->fh);
- if (resp->status == 0)
- *p++ = htonl(resp->access);
- return xdr_ressize_check(rqstp, p);
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->access) < 0)
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ }
+
+ return true;
}
/* READLINK */
-int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->len) < 0)
+ return false;
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages, 0,
+ resp->len);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ }
- p = encode_post_op_attr(rqstp, p, &resp->fh);
- if (resp->status == 0) {
- *p++ = htonl(resp->len);
- xdr_ressize_check(rqstp, p);
- rqstp->rq_res.page_len = resp->len;
- if (resp->len & 3) {
- /* need to pad the tail */
- rqstp->rq_res.tail[0].iov_base = p;
- *p = 0;
- rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
- }
- return 1;
- } else
- return xdr_ressize_check(rqstp, p);
+ return true;
}
/* READ */
-int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+ return false;
+ if (xdr_stream_encode_bool(xdr, resp->eof) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+ return false;
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+ rqstp->rq_res.page_base,
+ resp->count);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ }
- p = encode_post_op_attr(rqstp, p, &resp->fh);
- if (resp->status == 0) {
- *p++ = htonl(resp->count);
- *p++ = htonl(resp->eof);
- *p++ = htonl(resp->count); /* xdr opaque count */
- xdr_ressize_check(rqstp, p);
- /* now update rqstp->rq_res to reflect data as well */
- rqstp->rq_res.page_len = resp->count;
- if (resp->count & 3) {
- /* need to pad the tail */
- rqstp->rq_res.tail[0].iov_base = p;
- *p = 0;
- rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
- }
- return 1;
- } else
- return xdr_ressize_check(rqstp, p);
+ return true;
}
/* WRITE */
-int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_writeres *resp = rqstp->rq_resp;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-
- p = encode_wcc_data(rqstp, p, &resp->fh);
- if (resp->status == 0) {
- *p++ = htonl(resp->count);
- *p++ = htonl(resp->committed);
- /* unique identifier, y2038 overflow can be ignored */
- *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->committed) < 0)
+ return false;
+ if (!svcxdr_encode_writeverf3(xdr, resp->verf))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+ return false;
}
- return xdr_ressize_check(rqstp, p);
+
+ return true;
}
/* CREATE, MKDIR, SYMLINK, MKNOD */
-int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_diropres *resp = rqstp->rq_resp;
- if (resp->status == 0) {
- *p++ = xdr_one;
- p = encode_fh(p, &resp->fh);
- p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
+ return false;
}
- p = encode_wcc_data(rqstp, p, &resp->dirfh);
- return xdr_ressize_check(rqstp, p);
+
+ return true;
}
/* RENAME */
-int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_renameres *resp = rqstp->rq_resp;
- p = encode_wcc_data(rqstp, p, &resp->ffh);
- p = encode_wcc_data(rqstp, p, &resp->tfh);
- return xdr_ressize_check(rqstp, p);
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+ svcxdr_encode_wcc_data(rqstp, xdr, &resp->ffh) &&
+ svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh);
}
/* LINK */
-int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_linkres *resp = rqstp->rq_resp;
- p = encode_post_op_attr(rqstp, p, &resp->fh);
- p = encode_wcc_data(rqstp, p, &resp->tfh);
- return xdr_ressize_check(rqstp, p);
+ return svcxdr_encode_nfsstat3(xdr, resp->status) &&
+ svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh) &&
+ svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh);
}
/* READDIR */
-int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_readdirres *resp = rqstp->rq_resp;
+ struct xdr_buf *dirlist = &resp->dirlist;
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
+ return false;
+ svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+ dirlist->len);
+ /* no more entries */
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return false;
+ if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ return false;
+ }
- p = encode_post_op_attr(rqstp, p, &resp->fh);
-
- if (resp->status == 0) {
- /* stupid readdir cookie */
- memcpy(p, resp->verf, 8); p += 2;
- xdr_ressize_check(rqstp, p);
- if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE)
- return 1; /*No room for trailer */
- rqstp->rq_res.page_len = (resp->count) << 2;
-
- /* add the 'tail' to the end of the 'head' page - page 0. */
- rqstp->rq_res.tail[0].iov_base = p;
- *p++ = 0; /* no more entries */
- *p++ = htonl(resp->common.err == nfserr_eof);
- rqstp->rq_res.tail[0].iov_len = 2<<2;
- return 1;
- } else
- return xdr_ressize_check(rqstp, p);
-}
-
-static __be32 *
-encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
- int namlen, u64 ino)
-{
- *p++ = xdr_one; /* mark entry present */
- p = xdr_encode_hyper(p, ino); /* file id */
- p = xdr_encode_array(p, name, namlen);/* name length & name */
-
- cd->offset = p; /* remember pointer */
- p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */
-
- return p;
+ return true;
}
static __be32
@@ -844,19 +990,24 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
if (isdotent(name, namlen)) {
if (namlen == 2) {
dchild = dget_parent(dparent);
- /* filesystem root - cannot return filehandle for ".." */
+ /*
+ * Don't return filehandle for ".." if we're at
+ * the filesystem or export root:
+ */
if (dchild == dparent)
goto out;
+ if (dparent == exp->ex_path.dentry)
+ goto out;
} else
dchild = dget(dparent);
} else
- dchild = lookup_one_len_unlocked(name, dparent, namlen);
+ dchild = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ dparent);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
goto out;
- if (d_really_is_negative(dchild))
- goto out;
if (dchild->d_inode->i_ino != ino)
goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
@@ -865,264 +1016,323 @@ out:
return rv;
}
-static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
-{
- struct svc_fh *fh = &cd->scratch;
- __be32 err;
-
- fh_init(fh, NFS3_FHSIZE);
- err = compose_entry_fh(cd, fh, name, namlen, ino);
- if (err) {
- *p++ = 0;
- *p++ = 0;
- goto out;
- }
- p = encode_post_op_attr(cd->rqstp, p, fh);
- *p++ = xdr_one; /* yes, a file handle follows */
- p = encode_fh(p, fh);
-out:
- fh_put(fh);
- return p;
-}
-
-/*
- * Encode a directory entry. This one works for both normal readdir
- * and readdirplus.
- * The normal readdir reply requires 2 (fileid) + 1 (stringlen)
- * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen.
- *
- * The readdirplus baggage is 1+21 words for post_op_attr, plus the
- * file handle.
+/**
+ * nfs3svc_encode_cookie3 - Encode a directory offset cookie
+ * @resp: readdir result context
+ * @offset: offset cookie to encode
+ *
+ * The buffer space for the offset cookie has already been reserved
+ * by svcxdr_encode_entry3_common().
*/
-
-#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1)
-#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2))
-static int
-encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
- loff_t offset, u64 ino, unsigned int d_type, int plus)
+void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset)
{
- struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
- common);
- __be32 *p = cd->buffer;
- caddr_t curr_page_addr = NULL;
- struct page ** page;
- int slen; /* string (name) length */
- int elen; /* estimated entry length in words */
- int num_entry_words = 0; /* actual number of words */
-
- if (cd->offset) {
- u64 offset64 = offset;
-
- if (unlikely(cd->offset1)) {
- /* we ended up with offset on a page boundary */
- *cd->offset = htonl(offset64 >> 32);
- *cd->offset1 = htonl(offset64 & 0xffffffff);
- cd->offset1 = NULL;
- } else {
- xdr_encode_hyper(cd->offset, offset64);
- }
- }
-
- /*
- dprintk("encode_entry(%.*s @%ld%s)\n",
- namlen, name, (long) offset, plus? " plus" : "");
- */
-
- /* truncate filename if too long */
- namlen = min(namlen, NFS3_MAXNAMLEN);
+ __be64 cookie = cpu_to_be64(offset);
- slen = XDR_QUADLEN(namlen);
- elen = slen + NFS3_ENTRY_BAGGAGE
- + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0);
+ if (!resp->cookie_offset)
+ return;
+ write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie,
+ sizeof(cookie));
+ resp->cookie_offset = 0;
+}
- if (cd->buflen < elen) {
- cd->common.err = nfserr_toosmall;
- return -EINVAL;
- }
+static bool
+svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name,
+ int namlen, loff_t offset, u64 ino)
+{
+ struct xdr_buf *dirlist = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+
+ if (xdr_stream_encode_item_present(xdr) < 0)
+ return false;
+ /* fileid */
+ if (xdr_stream_encode_u64(xdr, ino) < 0)
+ return false;
+ /* name */
+ if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS3_MAXNAMLEN)) < 0)
+ return false;
+ /* cookie */
+ resp->cookie_offset = dirlist->len;
+ if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0)
+ return false;
+
+ return true;
+}
- /* determine which page in rq_respages[] we are currently filling */
- for (page = cd->rqstp->rq_respages + 1;
- page < cd->rqstp->rq_next_page; page++) {
- curr_page_addr = page_address(*page);
+/**
+ * nfs3svc_encode_entry3 - encode one NFSv3 READDIR entry
+ * @data: directory context
+ * @name: name of the object to be encoded
+ * @namlen: length of that name, in bytes
+ * @offset: the offset of the previous entry
+ * @ino: the fileid of this entry
+ * @d_type: unused
+ *
+ * Return values:
+ * %0: Entry was successfully encoded.
+ * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
+ *
+ * On exit, the following fields are updated:
+ * - resp->xdr
+ * - resp->common.err
+ * - resp->cookie_offset
+ */
+int nfs3svc_encode_entry3(void *data, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct readdir_cd *ccd = data;
+ struct nfsd3_readdirres *resp = container_of(ccd,
+ struct nfsd3_readdirres,
+ common);
+ unsigned int starting_length = resp->dirlist.len;
- if (((caddr_t)cd->buffer >= curr_page_addr) &&
- ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE))
- break;
- }
+ /* The offset cookie for the previous entry */
+ nfs3svc_encode_cookie3(resp, offset);
- if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) {
- /* encode entry in current page */
+ if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino))
+ goto out_toosmall;
- p = encode_entry_baggage(cd, p, name, namlen, ino);
+ xdr_commit_encode(&resp->xdr);
+ resp->common.err = nfs_ok;
+ return 0;
- if (plus)
- p = encode_entryplus_baggage(cd, p, name, namlen, ino);
- num_entry_words = p - cd->buffer;
- } else if (*(page+1) != NULL) {
- /* temporarily encode entry into next page, then move back to
- * current and next page in rq_respages[] */
- __be32 *p1, *tmp;
- int len1, len2;
+out_toosmall:
+ resp->cookie_offset = 0;
+ resp->common.err = nfserr_toosmall;
+ resp->dirlist.len = starting_length;
+ return -EINVAL;
+}
- /* grab next page for temporary storage of entry */
- p1 = tmp = page_address(*(page+1));
+static bool
+svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name,
+ int namlen, u64 ino)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct svc_fh *fhp = &resp->scratch;
+ bool result;
- p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
+ result = false;
+ fh_init(fhp, NFS3_FHSIZE);
+ if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok)
+ goto out_noattrs;
- if (plus)
- p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
+ if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp))
+ goto out;
+ if (!svcxdr_encode_post_op_fh3(xdr, fhp))
+ goto out;
+ result = true;
- /* determine entry word length and lengths to go in pages */
- num_entry_words = p1 - tmp;
- len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer;
- if ((num_entry_words << 2) < len1) {
- /* the actual number of words in the entry is less
- * than elen and can still fit in the current page
- */
- memmove(p, tmp, num_entry_words << 2);
- p += num_entry_words;
-
- /* update offset */
- cd->offset = cd->buffer + (cd->offset - tmp);
- } else {
- unsigned int offset_r = (cd->offset - tmp) << 2;
-
- /* update pointer to offset location.
- * This is a 64bit quantity, so we need to
- * deal with 3 cases:
- * - entirely in first page
- * - entirely in second page
- * - 4 bytes in each page
- */
- if (offset_r + 8 <= len1) {
- cd->offset = p + (cd->offset - tmp);
- } else if (offset_r >= len1) {
- cd->offset -= len1 >> 2;
- } else {
- /* sitting on the fence */
- BUG_ON(offset_r != len1 - 4);
- cd->offset = p + (cd->offset - tmp);
- cd->offset1 = tmp;
- }
-
- len2 = (num_entry_words << 2) - len1;
-
- /* move from temp page to current and next pages */
- memmove(p, tmp, len1);
- memmove(tmp, (caddr_t)tmp+len1, len2);
-
- p = tmp + (len2 >> 2);
- }
- }
- else {
- cd->common.err = nfserr_toosmall;
- return -EINVAL;
- }
+out:
+ fh_put(fhp);
+ return result;
+
+out_noattrs:
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return false;
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return false;
+ return true;
+}
- cd->buflen -= num_entry_words;
- cd->buffer = p;
- cd->common.err = nfs_ok;
+/**
+ * nfs3svc_encode_entryplus3 - encode one NFSv3 READDIRPLUS entry
+ * @data: directory context
+ * @name: name of the object to be encoded
+ * @namlen: length of that name, in bytes
+ * @offset: the offset of the previous entry
+ * @ino: the fileid of this entry
+ * @d_type: unused
+ *
+ * Return values:
+ * %0: Entry was successfully encoded.
+ * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
+ *
+ * On exit, the following fields are updated:
+ * - resp->xdr
+ * - resp->common.err
+ * - resp->cookie_offset
+ */
+int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct readdir_cd *ccd = data;
+ struct nfsd3_readdirres *resp = container_of(ccd,
+ struct nfsd3_readdirres,
+ common);
+ unsigned int starting_length = resp->dirlist.len;
+
+ /* The offset cookie for the previous entry */
+ nfs3svc_encode_cookie3(resp, offset);
+
+ if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino))
+ goto out_toosmall;
+ if (!svcxdr_encode_entry3_plus(resp, name, namlen, ino))
+ goto out_toosmall;
+
+ xdr_commit_encode(&resp->xdr);
+ resp->common.err = nfs_ok;
return 0;
+out_toosmall:
+ resp->cookie_offset = 0;
+ resp->common.err = nfserr_toosmall;
+ resp->dirlist.len = starting_length;
+ return -EINVAL;
}
-int
-nfs3svc_encode_entry(void *cd, const char *name,
- int namlen, loff_t offset, u64 ino, unsigned int d_type)
+static bool
+svcxdr_encode_fsstat3resok(struct xdr_stream *xdr,
+ const struct nfsd3_fsstatres *resp)
{
- return encode_entry(cd, name, namlen, offset, ino, d_type, 0);
-}
+ const struct kstatfs *s = &resp->stats;
+ u64 bs = s->f_bsize;
+ __be32 *p;
-int
-nfs3svc_encode_entry_plus(void *cd, const char *name,
- int namlen, loff_t offset, u64 ino,
- unsigned int d_type)
-{
- return encode_entry(cd, name, namlen, offset, ino, d_type, 1);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 13);
+ if (!p)
+ return false;
+ p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */
+ p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */
+ p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */
+ p = xdr_encode_hyper(p, s->f_files); /* total inodes */
+ p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */
+ p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */
+ *p = cpu_to_be32(resp->invarsec); /* mean unchanged time */
+
+ return true;
}
/* FSSTAT */
-int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_fsstatres *resp = rqstp->rq_resp;
- struct kstatfs *s = &resp->stats;
- u64 bs = s->f_bsize;
-
- *p++ = xdr_zero; /* no post_op_attr */
-
- if (resp->status == 0) {
- p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */
- p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */
- p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */
- p = xdr_encode_hyper(p, s->f_files); /* total inodes */
- p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */
- p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */
- *p++ = htonl(resp->invarsec); /* mean unchanged time */
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+ return false;
+ if (!svcxdr_encode_fsstat3resok(xdr, resp))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+ return false;
}
- return xdr_ressize_check(rqstp, p);
+
+ return true;
+}
+
+static bool
+svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr,
+ const struct nfsd3_fsinfores *resp)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 12);
+ if (!p)
+ return false;
+ *p++ = cpu_to_be32(resp->f_rtmax);
+ *p++ = cpu_to_be32(resp->f_rtpref);
+ *p++ = cpu_to_be32(resp->f_rtmult);
+ *p++ = cpu_to_be32(resp->f_wtmax);
+ *p++ = cpu_to_be32(resp->f_wtpref);
+ *p++ = cpu_to_be32(resp->f_wtmult);
+ *p++ = cpu_to_be32(resp->f_dtpref);
+ p = xdr_encode_hyper(p, resp->f_maxfilesize);
+ p = encode_nfstime3(p, &nfs3svc_time_delta);
+ *p = cpu_to_be32(resp->f_properties);
+
+ return true;
}
/* FSINFO */
-int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_fsinfores *resp = rqstp->rq_resp;
- *p++ = xdr_zero; /* no post_op_attr */
-
- if (resp->status == 0) {
- *p++ = htonl(resp->f_rtmax);
- *p++ = htonl(resp->f_rtpref);
- *p++ = htonl(resp->f_rtmult);
- *p++ = htonl(resp->f_wtmax);
- *p++ = htonl(resp->f_wtpref);
- *p++ = htonl(resp->f_wtmult);
- *p++ = htonl(resp->f_dtpref);
- p = xdr_encode_hyper(p, resp->f_maxfilesize);
- *p++ = xdr_one;
- *p++ = xdr_zero;
- *p++ = htonl(resp->f_properties);
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+ return false;
+ if (!svcxdr_encode_fsinfo3resok(xdr, resp))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+ return false;
}
- return xdr_ressize_check(rqstp, p);
+ return true;
+}
+
+static bool
+svcxdr_encode_pathconf3resok(struct xdr_stream *xdr,
+ const struct nfsd3_pathconfres *resp)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 6);
+ if (!p)
+ return false;
+ *p++ = cpu_to_be32(resp->p_link_max);
+ *p++ = cpu_to_be32(resp->p_name_max);
+ p = xdr_encode_bool(p, resp->p_no_trunc);
+ p = xdr_encode_bool(p, resp->p_chown_restricted);
+ p = xdr_encode_bool(p, resp->p_case_insensitive);
+ xdr_encode_bool(p, resp->p_case_preserving);
+
+ return true;
}
/* PATHCONF */
-int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_pathconfres *resp = rqstp->rq_resp;
- *p++ = xdr_zero; /* no post_op_attr */
-
- if (resp->status == 0) {
- *p++ = htonl(resp->p_link_max);
- *p++ = htonl(resp->p_name_max);
- *p++ = htonl(resp->p_no_trunc);
- *p++ = htonl(resp->p_chown_restricted);
- *p++ = htonl(resp->p_case_insensitive);
- *p++ = htonl(resp->p_case_preserving);
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+ return false;
+ if (!svcxdr_encode_pathconf3resok(xdr, resp))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
+ return false;
}
- return xdr_ressize_check(rqstp, p);
+ return true;
}
/* COMMIT */
-int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd3_commitres *resp = rqstp->rq_resp;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-
- p = encode_wcc_data(rqstp, p, &resp->fh);
- /* Write verifier */
- if (resp->status == 0) {
- /* unique identifier, y2038 overflow can be ignored */
- *p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
- *p++ = htonl(nn->nfssvc_boot.tv_nsec);
+
+ if (!svcxdr_encode_nfsstat3(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_writeverf3(xdr, resp->verf))
+ return false;
+ break;
+ default:
+ if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
+ return false;
}
- return xdr_ressize_check(rqstp, p);
+
+ return true;
}
/*
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 71292a0d6f09..936ea1ad9586 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -135,7 +135,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
unsigned int flags = 0;
int size = 0;
- pacl = get_acl(inode, ACL_TYPE_ACCESS);
+ pacl = get_inode_acl(inode, ACL_TYPE_ACCESS);
if (!pacl)
pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
@@ -147,7 +147,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
if (S_ISDIR(inode->i_mode)) {
flags = NFS4_ACL_DIR;
- dpacl = get_acl(inode, ACL_TYPE_DEFAULT);
+ dpacl = get_inode_acl(inode, ACL_TYPE_DEFAULT);
if (IS_ERR(dpacl)) {
error = PTR_ERR(dpacl);
goto rel_pacl;
@@ -198,8 +198,6 @@ summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas)
memset(pas, 0, sizeof(*pas));
pas->mask = 07;
- pe = acl->a_entries + acl->a_count;
-
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:
@@ -441,7 +439,7 @@ struct posix_ace_state_array {
* calculated so far: */
struct posix_acl_state {
- int empty;
+ unsigned char valid;
struct posix_ace_state owner;
struct posix_ace_state group;
struct posix_ace_state other;
@@ -457,7 +455,6 @@ init_state(struct posix_acl_state *state, int cnt)
int alloc;
memset(state, 0, sizeof(struct posix_acl_state));
- state->empty = 1;
/*
* In the worst case, each individual acl could be for a distinct
* named user or group, but we don't know which, so we allocate
@@ -500,7 +497,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
* and effective cases: when there are no inheritable ACEs,
* calls ->set_acl with a NULL ACL structure.
*/
- if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+ if (!state->valid && (flags & NFS4_ACL_TYPE_DEFAULT))
return NULL;
/*
@@ -622,11 +619,12 @@ static void process_one_v4_ace(struct posix_acl_state *state,
struct nfs4_ace *ace)
{
u32 mask = ace->access_mask;
+ short type = ace2type(ace);
int i;
- state->empty = 0;
+ state->valid |= type;
- switch (ace2type(ace)) {
+ switch (type) {
case ACL_USER_OBJ:
if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
allow_bits(&state->owner, mask);
@@ -726,6 +724,30 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl,
if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
process_one_v4_ace(&effective_acl_state, ace);
}
+
+ /*
+ * At this point, the default ACL may have zeroed-out entries for owner,
+ * group and other. That usually results in a non-sensical resulting ACL
+ * that denies all access except to any ACE that was explicitly added.
+ *
+ * The setfacl command solves a similar problem with this logic:
+ *
+ * "If a Default ACL entry is created, and the Default ACL contains
+ * no owner, owning group, or others entry, a copy of the ACL
+ * owner, owning group, or others entry is added to the Default ACL."
+ *
+ * Copy any missing ACEs from the effective set, if any ACEs were
+ * explicitly set.
+ */
+ if (default_acl_state.valid) {
+ if (!(default_acl_state.valid & ACL_USER_OBJ))
+ default_acl_state.owner = effective_acl_state.owner;
+ if (!(default_acl_state.valid & ACL_GROUP_OBJ))
+ default_acl_state.group = effective_acl_state.group;
+ if (!(default_acl_state.valid & ACL_OTHER))
+ default_acl_state.other = effective_acl_state.other;
+ }
+
*pacl = posix_state_to_acl(&effective_acl_state, flags);
if (IS_ERR(*pacl)) {
ret = PTR_ERR(*pacl);
@@ -751,57 +773,26 @@ out_estate:
return ret;
}
-__be32
-nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfs4_acl *acl)
+__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl,
+ struct nfsd_attrs *attr)
{
- __be32 error;
int host_error;
- struct dentry *dentry;
- struct inode *inode;
- struct posix_acl *pacl = NULL, *dpacl = NULL;
unsigned int flags = 0;
- /* Get inode */
- error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
- if (error)
- return error;
-
- dentry = fhp->fh_dentry;
- inode = d_inode(dentry);
+ if (!acl)
+ return nfs_ok;
- if (S_ISDIR(inode->i_mode))
+ if (type == NF4DIR)
flags = NFS4_ACL_DIR;
- host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
+ host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl,
+ &attr->na_dpacl, flags);
if (host_error == -EINVAL)
return nfserr_attrnotsupp;
- if (host_error < 0)
- goto out_nfserr;
-
- fh_lock(fhp);
-
- host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
- if (host_error < 0)
- goto out_drop_lock;
-
- if (S_ISDIR(inode->i_mode)) {
- host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
- }
-
-out_drop_lock:
- fh_unlock(fhp);
-
- posix_acl_release(pacl);
- posix_acl_release(dpacl);
-out_nfserr:
- if (host_error == -EOPNOTSUPP)
- return nfserr_attrnotsupp;
else
return nfserrno(host_error);
}
-
static short
ace2type(struct nfs4_ace *ace)
{
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c74e4538d0eb..e00b2aea8da2 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -31,6 +31,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <linux/nfs4.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/svc_xprt.h>
@@ -38,13 +39,13 @@
#include "nfsd.h"
#include "state.h"
#include "netns.h"
+#include "trace.h"
#include "xdr4cb.h"
#include "xdr4.h"
+#include "nfs4xdr_gen.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
-
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
@@ -60,16 +61,6 @@ struct nfs4_cb_compound_hdr {
int status;
};
-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("NFS: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
static __be32 *xdr_encode_empty_array(__be32 *p)
{
*p++ = xdr_zero;
@@ -85,30 +76,53 @@ static __be32 *xdr_encode_empty_array(__be32 *p)
* 1 Protocol"
*/
-/*
- * nfs_cb_opnum4
- *
- * enum nfs_cb_opnum4 {
- * OP_CB_GETATTR = 3,
- * ...
- * };
- */
-enum nfs_cb_opnum4 {
- OP_CB_GETATTR = 3,
- OP_CB_RECALL = 4,
- OP_CB_LAYOUTRECALL = 5,
- OP_CB_NOTIFY = 6,
- OP_CB_PUSH_DELEG = 7,
- OP_CB_RECALL_ANY = 8,
- OP_CB_RECALLABLE_OBJ_AVAIL = 9,
- OP_CB_RECALL_SLOT = 10,
- OP_CB_SEQUENCE = 11,
- OP_CB_WANTS_CANCELLED = 12,
- OP_CB_NOTIFY_LOCK = 13,
- OP_CB_NOTIFY_DEVICEID = 14,
- OP_CB_OFFLOAD = 15,
- OP_CB_ILLEGAL = 10044
-};
+static void encode_uint32(struct xdr_stream *xdr, u32 n)
+{
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, n) < 0);
+}
+
+static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
+ size_t len)
+{
+ xdr_stream_encode_uint32_array(xdr, bitmap, len);
+}
+
+static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_cb_fattr *fattr)
+{
+ fattr->ncf_cb_change = 0;
+ fattr->ncf_cb_fsize = 0;
+ fattr->ncf_cb_atime.tv_sec = 0;
+ fattr->ncf_cb_atime.tv_nsec = 0;
+ fattr->ncf_cb_mtime.tv_sec = 0;
+ fattr->ncf_cb_mtime.tv_nsec = 0;
+
+ if (bitmap[0] & FATTR4_WORD0_CHANGE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
+ return -EIO;
+ if (bitmap[0] & FATTR4_WORD0_SIZE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
+ return -EIO;
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+ fattr4_time_deleg_access access;
+
+ if (!xdrgen_decode_fattr4_time_deleg_access(xdr, &access))
+ return -EIO;
+ fattr->ncf_cb_atime.tv_sec = access.seconds;
+ fattr->ncf_cb_atime.tv_nsec = access.nseconds;
+
+ }
+ if (bitmap[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ fattr4_time_deleg_modify modify;
+
+ if (!xdrgen_decode_fattr4_time_deleg_modify(xdr, &modify))
+ return -EIO;
+ fattr->ncf_cb_mtime.tv_sec = modify.seconds;
+ fattr->ncf_cb_mtime.tv_nsec = modify.nseconds;
+
+ }
+ return 0;
+}
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
{
@@ -130,7 +144,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
BUG_ON(length > NFS4_FHSIZE);
p = xdr_reserve_space(xdr, 4 + length);
- xdr_encode_opaque(p, &fh->fh_base, length);
+ xdr_encode_opaque(p, &fh->fh_raw, length);
}
/*
@@ -240,7 +254,6 @@ static int decode_cb_op_status(struct xdr_stream *xdr,
*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
return 0;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
out_unexpected:
dprintk("NFSD: Callback server returned operation %d but "
@@ -296,20 +309,19 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
u32 length;
__be32 *p;
- p = xdr_inline_decode(xdr, 4 + 4);
+ p = xdr_inline_decode(xdr, XDR_UNIT);
if (unlikely(p == NULL))
goto out_overflow;
- hdr->status = be32_to_cpup(p++);
+ hdr->status = be32_to_cpup(p);
/* Ignore the tag */
- length = be32_to_cpup(p++);
- p = xdr_inline_decode(xdr, length + 4);
- if (unlikely(p == NULL))
+ if (xdr_stream_decode_u32(xdr, &length) < 0)
+ goto out_overflow;
+ if (xdr_inline_decode(xdr, length) == NULL)
+ goto out_overflow;
+ if (xdr_stream_decode_u32(xdr, &hdr->nops) < 0)
goto out_overflow;
- p += XDR_QUADLEN(length);
- hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}
@@ -340,6 +352,95 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
}
/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ encode_uint32(xdr, ra->ra_keep);
+ encode_bitmap4(xdr, ra->ra_bmval, ARRAY_SIZE(ra->ra_bmval));
+ hdr->nops++;
+}
+
+/*
+ * CB_GETATTR4args
+ * struct CB_GETATTR4args {
+ * nfs_fh4 fh;
+ * bitmap4 attr_request;
+ * };
+ *
+ * The size and change attributes are the only one
+ * guaranteed to be serviced by the client.
+ */
+static void
+encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
+ struct nfs4_cb_fattr *fattr)
+{
+ struct nfs4_delegation *dp = container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+ struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ u32 bmap_size = 1;
+ u32 bmap[3];
+
+ bmap[0] = FATTR4_WORD0_SIZE;
+ if (!ncf->ncf_file_modified)
+ bmap[0] |= FATTR4_WORD0_CHANGE;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ bmap[1] = 0;
+ bmap[2] = FATTR4_WORD2_TIME_DELEG_ACCESS | FATTR4_WORD2_TIME_DELEG_MODIFY;
+ bmap_size = 3;
+ }
+ encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
+ encode_nfs_fh4(xdr, fh);
+ encode_bitmap4(xdr, bmap, bmap_size);
+ hdr->nops++;
+}
+
+static u32 highest_slotid(struct nfsd4_session *ses)
+{
+ u32 idx;
+
+ spin_lock(&ses->se_lock);
+ idx = fls(~ses->se_cb_slot_avail);
+ if (idx > 0)
+ --idx;
+ idx = max(idx, ses->se_cb_highest_slot);
+ spin_unlock(&ses->se_lock);
+ return idx;
+}
+
+static void
+encode_referring_call4(struct xdr_stream *xdr,
+ const struct nfsd4_referring_call *rc)
+{
+ encode_uint32(xdr, rc->rc_sequenceid);
+ encode_uint32(xdr, rc->rc_slotid);
+}
+
+static void
+encode_referring_call_list4(struct xdr_stream *xdr,
+ const struct nfsd4_referring_call_list *rcl)
+{
+ struct nfsd4_referring_call *rc;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
+ xdr_encode_opaque_fixed(p, rcl->rcl_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ encode_uint32(xdr, rcl->__nr_referring_calls);
+ list_for_each_entry(rc, &rcl->rcl_referring_calls, __list)
+ encode_referring_call4(xdr, rc);
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -356,6 +457,7 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
struct nfs4_cb_compound_hdr *hdr)
{
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ struct nfsd4_referring_call_list *rcl;
__be32 *p;
if (hdr->minorversion == 0)
@@ -364,16 +466,45 @@ static void encode_cb_sequence4args(struct xdr_stream *xdr,
encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
encode_sessionid4(xdr, session);
- p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
- *p++ = cpu_to_be32(session->se_cb_seq_nr); /* csa_sequenceid */
- *p++ = xdr_zero; /* csa_slotid */
- *p++ = xdr_zero; /* csa_highest_slotid */
+ p = xdr_reserve_space(xdr, XDR_UNIT * 4);
+ *p++ = cpu_to_be32(session->se_cb_seq_nr[cb->cb_held_slot]); /* csa_sequenceid */
+ *p++ = cpu_to_be32(cb->cb_held_slot); /* csa_slotid */
+ *p++ = cpu_to_be32(highest_slotid(session)); /* csa_highest_slotid */
*p++ = xdr_zero; /* csa_cachethis */
- xdr_encode_empty_array(p); /* csa_referring_call_lists */
+
+ /* csa_referring_call_lists */
+ encode_uint32(xdr, cb->cb_nr_referring_call_list);
+ list_for_each_entry(rcl, &cb->cb_referring_call_list, __list)
+ encode_referring_call_list4(xdr, rcl);
hdr->nops++;
}
+static void update_cb_slot_table(struct nfsd4_session *ses, u32 target)
+{
+ /* No need to do anything if nothing changed */
+ if (likely(target == READ_ONCE(ses->se_cb_highest_slot)))
+ return;
+
+ spin_lock(&ses->se_lock);
+ if (target > ses->se_cb_highest_slot) {
+ int i;
+
+ target = min(target, NFSD_BC_SLOT_TABLE_SIZE - 1);
+
+ /*
+ * Growing the slot table. Reset any new sequences to 1.
+ *
+ * NB: There is some debate about whether the RFC requires this,
+ * but the Linux client expects it.
+ */
+ for (i = ses->se_cb_highest_slot + 1; i <= target; ++i)
+ ses->se_cb_seq_nr[i] = 1;
+ }
+ ses->se_cb_highest_slot = target;
+ spin_unlock(&ses->se_lock);
+}
+
/*
* CB_SEQUENCE4resok
*
@@ -401,7 +532,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
int status = -ESERVERFAULT;
__be32 *p;
- u32 dummy;
+ u32 seqid, slotid, target;
/*
* If the server returns different values for sessionID, slotID or
@@ -417,27 +548,27 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
}
p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
- dummy = be32_to_cpup(p++);
- if (dummy != session->se_cb_seq_nr) {
+ seqid = be32_to_cpup(p++);
+ if (seqid != session->se_cb_seq_nr[cb->cb_held_slot]) {
dprintk("NFS: %s Invalid sequence number\n", __func__);
goto out;
}
- dummy = be32_to_cpup(p++);
- if (dummy != 0) {
+ slotid = be32_to_cpup(p++);
+ if (slotid != cb->cb_held_slot) {
dprintk("NFS: %s Invalid slotid\n", __func__);
goto out;
}
- /*
- * FIXME: process highest slotid and target highest slotid
- */
+ p++; // ignore current highest slot value
+
+ target = be32_to_cpup(p++);
+ update_cb_slot_table(session, target);
status = 0;
out:
cb->cb_seq_status = status;
return status;
out_overflow:
- print_overflow_msg(__func__, xdr);
status = -EIO;
goto out;
}
@@ -476,6 +607,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_getattr4args(xdr, &hdr, ncf);
+ encode_cb_nops(&hdr);
+}
+
+/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -494,6 +645,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfsd4_cb_recall_any *ra;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, ra);
+ encode_cb_nops(&hdr);
+}
/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -511,6 +682,46 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+ u32 bitmap[3] = {0};
+ u32 attrlen, maxlen;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
+ status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
+ if (unlikely(status || cb->cb_status))
+ return status;
+ if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
+ return -EIO;
+ if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
+ return -EIO;
+ maxlen = sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize);
+ if (bitmap[2] != 0)
+ maxlen += (sizeof(ncf->ncf_cb_mtime.tv_sec) +
+ sizeof(ncf->ncf_cb_mtime.tv_nsec)) * 2;
+ if (attrlen > maxlen)
+ return -EIO;
+ status = decode_cb_fattr4(xdr, bitmap, ncf);
+ return status;
+}
+
+/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
@@ -525,15 +736,35 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb != NULL) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -617,11 +848,10 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
}
#endif /* CONFIG_NFSD_PNFS */
@@ -642,7 +872,7 @@ static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
const struct nfsd4_callback *cb = data;
const struct nfsd4_blocked_lock *nbl =
container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.c.flc_owner;
struct nfs4_cb_compound_hdr hdr = {
.ident = 0,
.minorversion = cb->cb_clp->cl_minorversion,
@@ -676,11 +906,10 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
}
@@ -695,7 +924,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
* case NFS4_OK:
* write_response4 coa_resok4;
* default:
- * length4 coa_bytes_copied;
+ * length4 coa_bytes_copied;
* };
* struct CB_OFFLOAD4args {
* nfs_fh4 coa_fh;
@@ -704,21 +933,22 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
* };
*/
static void encode_offload_info4(struct xdr_stream *xdr,
- __be32 nfserr,
- const struct nfsd4_copy *cp)
+ const struct nfsd4_cb_offload *cbo)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
- *p++ = nfserr;
- if (!nfserr) {
+ *p = cbo->co_nfserr;
+ switch (cbo->co_nfserr) {
+ case nfs_ok:
p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
p = xdr_encode_empty_array(p);
- p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
- *p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
- p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
+ p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written);
+ *p++ = cpu_to_be32(cbo->co_res.wr_stable_how);
+ p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data,
NFS4_VERIFIER_SIZE);
- } else {
+ break;
+ default:
p = xdr_reserve_space(xdr, 8);
/* We always return success if bytes were written */
p = xdr_encode_hyper(p, 0);
@@ -726,18 +956,16 @@ static void encode_offload_info4(struct xdr_stream *xdr,
}
static void encode_cb_offload4args(struct xdr_stream *xdr,
- __be32 nfserr,
- const struct knfsd_fh *fh,
- const struct nfsd4_copy *cp,
+ const struct nfsd4_cb_offload *cbo,
struct nfs4_cb_compound_hdr *hdr)
{
__be32 *p;
p = xdr_reserve_space(xdr, 4);
- *p++ = cpu_to_be32(OP_CB_OFFLOAD);
- encode_nfs_fh4(xdr, fh);
- encode_stateid4(xdr, &cp->cp_res.cb_stateid);
- encode_offload_info4(xdr, nfserr, cp);
+ *p = cpu_to_be32(OP_CB_OFFLOAD);
+ encode_nfs_fh4(xdr, &cbo->co_fh);
+ encode_stateid4(xdr, &cbo->co_res.cb_stateid);
+ encode_offload_info4(xdr, cbo);
hdr->nops++;
}
@@ -747,8 +975,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
const void *data)
{
const struct nfsd4_callback *cb = data;
- const struct nfsd4_copy *cp =
- container_of(cb, struct nfsd4_copy, cp_cb);
+ const struct nfsd4_cb_offload *cbo =
+ container_of(cb, struct nfsd4_cb_offload, co_cb);
struct nfs4_cb_compound_hdr hdr = {
.ident = 0,
.minorversion = cb->cb_clp->cl_minorversion,
@@ -756,7 +984,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
- encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
+ encode_cb_offload4args(xdr, cbo, &hdr);
encode_cb_nops(&hdr);
}
@@ -772,11 +1000,10 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
if (unlikely(status))
return status;
- if (cb) {
- status = decode_cb_sequence4res(xdr, cb);
- if (unlikely(status || cb->cb_seq_status))
- return status;
- }
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
}
/*
@@ -801,6 +1028,8 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
+ PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@@ -841,7 +1070,52 @@ static const struct rpc_program cb_program = {
static int max_cb_time(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- return max(nn->nfsd4_lease/10, (time_t)1) * HZ;
+
+ /*
+ * nfsd4_lease is set to at most one hour in __nfsd4_write_time,
+ * so we can use 32-bit math on it. Warn if that assumption
+ * ever stops being true.
+ */
+ if (WARN_ON_ONCE(nn->nfsd4_lease > 3600))
+ return 360 * HZ;
+
+ return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
+}
+
+static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ trace_nfsd_cb_queue(clp, cb);
+ return queue_work(clp->cl_callback_wq, &cb->cb_work);
+}
+
+static void nfsd4_requeue_cb(struct rpc_task *task, struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ trace_nfsd_cb_restart(clp, cb);
+ task->tk_status = 0;
+ set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ }
+}
+
+static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
+{
+ atomic_inc(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
+{
+
+ atomic_dec_and_wake_up(&clp->cl_cb_inflight);
+}
+
+static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
+{
+ wait_var_event(&clp->cl_cb_inflight,
+ !atomic_read(&clp->cl_cb_inflight));
}
static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
@@ -854,12 +1128,12 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r
} else {
struct cred *kcred;
- kcred = prepare_kernel_cred(NULL);
+ kcred = prepare_kernel_cred(&init_task);
if (!kcred)
return NULL;
- kcred->uid = ses->se_cb_sec.uid;
- kcred->gid = ses->se_cb_sec.gid;
+ kcred->fsuid = ses->se_cb_sec.uid;
+ kcred->fsgid = ses->se_cb_sec.gid;
return kcred;
}
}
@@ -881,23 +1155,25 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.program = &cb_program,
.version = 1,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
+ .cred = current_cred(),
};
struct rpc_clnt *client;
const struct cred *cred;
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
- (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
+ (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
+ trace_nfsd_cb_setup_err(clp, -EINVAL);
return -EINVAL;
+ }
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog;
args.protocol = XPRT_TRANSPORT_TCP;
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
- if (!conn->cb_xprt)
+ if (!conn->cb_xprt || !ses)
return -EINVAL;
- clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
@@ -908,40 +1184,47 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
/* Create RPC client */
client = rpc_create(&args);
if (IS_ERR(client)) {
- dprintk("NFSD: couldn't create callback client: %ld\n",
- PTR_ERR(client));
+ trace_nfsd_cb_setup_err(clp, PTR_ERR(client));
return PTR_ERR(client);
}
cred = get_backchannel_cred(clp, client, ses);
- if (IS_ERR(cred)) {
+ if (!cred) {
+ trace_nfsd_cb_setup_err(clp, -ENOMEM);
rpc_shutdown_client(client);
- return PTR_ERR(cred);
+ return -ENOMEM;
}
+
+ if (clp->cl_minorversion != 0)
+ clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
+ rcu_read_lock();
+ trace_nfsd_cb_setup(clp, rpc_peeraddr2str(client, RPC_DISPLAY_NETID),
+ args.authflavor);
+ rcu_read_unlock();
return 0;
}
-static void warn_no_callback_path(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
{
- dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
- (int)clp->cl_name.len, clp->cl_name.data, reason);
+ if (clp->cl_cb_state != newstate) {
+ clp->cl_cb_state = newstate;
+ trace_nfsd_cb_new_state(clp);
+ }
}
-static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_down(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
- clp->cl_cb_state = NFSD4_CB_DOWN;
- warn_no_callback_path(clp, reason);
+ nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
}
-static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_fault(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
- clp->cl_cb_state = NFSD4_CB_FAULT;
- warn_no_callback_path(clp, reason);
+ nfsd4_mark_cb_state(clp, NFSD4_CB_FAULT);
}
static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
@@ -949,26 +1232,34 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
- nfsd4_mark_cb_down(clp, task->tk_status);
+ nfsd4_mark_cb_down(clp);
else
- clp->cl_cb_state = NFSD4_CB_UP;
+ nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
+}
+
+static void nfsd4_cb_probe_release(void *calldata)
+{
+ struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
+
+ nfsd41_cb_inflight_end(clp);
+
}
static const struct rpc_call_ops nfsd4_cb_probe_ops = {
/* XXX: release method to ensure we set the cb channel down if
* necessary on early failure? */
.rpc_call_done = nfsd4_cb_probe_done,
+ .rpc_release = nfsd4_cb_probe_release,
};
-static struct workqueue_struct *callback_wq;
-
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
*/
void nfsd4_probe_callback(struct nfs4_client *clp)
{
- clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ trace_nfsd_cb_probe(clp);
+ nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
nfsd4_run_cb(&clp->cl_cb_null);
}
@@ -976,40 +1267,184 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
void nfsd4_probe_callback_sync(struct nfs4_client *clp)
{
nfsd4_probe_callback(clp);
- flush_workqueue(callback_wq);
+ flush_workqueue(clp->cl_callback_wq);
}
void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
{
- clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
spin_lock(&clp->cl_lock);
memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
spin_unlock(&clp->cl_lock);
}
+static int grab_slot(struct nfsd4_session *ses)
+{
+ int idx;
+
+ spin_lock(&ses->se_lock);
+ idx = ffs(ses->se_cb_slot_avail) - 1;
+ if (idx < 0 || idx > ses->se_cb_highest_slot) {
+ spin_unlock(&ses->se_lock);
+ return -1;
+ }
+ /* clear the bit for the slot */
+ ses->se_cb_slot_avail &= ~BIT(idx);
+ spin_unlock(&ses->se_lock);
+ return idx;
+}
+
/*
* There's currently a single callback channel slot.
* If the slot is available, then mark it busy. Otherwise, set the
* thread for sleeping on the callback RPC wait queue.
*/
-static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
+static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
{
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = clp->cl_cb_session;
+
+ if (cb->cb_held_slot >= 0)
+ return true;
+ cb->cb_held_slot = grab_slot(ses);
+ if (cb->cb_held_slot < 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
/* Race breaker */
- if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
- dprintk("%s slot is busy\n", __func__);
+ cb->cb_held_slot = grab_slot(ses);
+ if (cb->cb_held_slot < 0)
return false;
- }
rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
return true;
}
-/*
- * TODO: cb_sequence should support referring call lists, cachethis, multiple
- * slots, and mark callback channel down on communication errors.
+static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd4_session *ses = clp->cl_cb_session;
+
+ if (cb->cb_held_slot >= 0) {
+ spin_lock(&ses->se_lock);
+ ses->se_cb_slot_avail |= BIT(cb->cb_held_slot);
+ spin_unlock(&ses->se_lock);
+ cb->cb_held_slot = -1;
+ rpc_wake_up_next(&clp->cl_cb_waitq);
+ }
+}
+
+static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ trace_nfsd_cb_destroy(clp, cb);
+ nfsd41_cb_release_slot(cb);
+ if (test_bit(NFSD4_CALLBACK_WAKE, &cb->cb_flags))
+ clear_and_wake_up_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
+ else
+ clear_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags);
+
+ if (cb->cb_ops && cb->cb_ops->release)
+ cb->cb_ops->release(cb);
+ nfsd41_cb_inflight_end(clp);
+}
+
+/**
+ * nfsd41_cb_referring_call - add a referring call to a callback operation
+ * @cb: context of callback to add the rc to
+ * @sessionid: referring call's session ID
+ * @slotid: referring call's session slot index
+ * @seqno: referring call's slot sequence number
+ *
+ * Caller serializes access to @cb.
+ *
+ * NB: If memory allocation fails, the referring call is not added.
+ */
+void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
+ struct nfs4_sessionid *sessionid,
+ u32 slotid, u32 seqno)
+{
+ struct nfsd4_referring_call_list *rcl;
+ struct nfsd4_referring_call *rc;
+ bool found;
+
+ might_sleep();
+
+ found = false;
+ list_for_each_entry(rcl, &cb->cb_referring_call_list, __list) {
+ if (!memcmp(rcl->rcl_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rcl = kmalloc(sizeof(*rcl), GFP_KERNEL);
+ if (!rcl)
+ return;
+ memcpy(rcl->rcl_sessionid.data, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN);
+ rcl->__nr_referring_calls = 0;
+ INIT_LIST_HEAD(&rcl->rcl_referring_calls);
+ list_add(&rcl->__list, &cb->cb_referring_call_list);
+ cb->cb_nr_referring_call_list++;
+ }
+
+ found = false;
+ list_for_each_entry(rc, &rcl->rcl_referring_calls, __list) {
+ if (rc->rc_sequenceid == seqno && rc->rc_slotid == slotid) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ rc = kmalloc(sizeof(*rc), GFP_KERNEL);
+ if (!rc)
+ goto out;
+ rc->rc_sequenceid = seqno;
+ rc->rc_slotid = slotid;
+ rcl->__nr_referring_calls++;
+ list_add(&rc->__list, &rcl->rcl_referring_calls);
+ }
+
+out:
+ if (!rcl->__nr_referring_calls) {
+ cb->cb_nr_referring_call_list--;
+ list_del(&rcl->__list);
+ kfree(rcl);
+ }
+}
+
+/**
+ * nfsd41_cb_destroy_referring_call_list - release referring call info
+ * @cb: context of a callback that has completed
+ *
+ * Callers who allocate referring calls using nfsd41_cb_referring_call() must
+ * release those resources by calling nfsd41_cb_destroy_referring_call_list.
+ *
+ * Caller serializes access to @cb.
*/
+void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb)
+{
+ struct nfsd4_referring_call_list *rcl;
+ struct nfsd4_referring_call *rc;
+
+ while (!list_empty(&cb->cb_referring_call_list)) {
+ rcl = list_first_entry(&cb->cb_referring_call_list,
+ struct nfsd4_referring_call_list,
+ __list);
+
+ while (!list_empty(&rcl->rcl_referring_calls)) {
+ rc = list_first_entry(&rcl->rcl_referring_calls,
+ struct nfsd4_referring_call,
+ __list);
+ list_del(&rc->__list);
+ kfree(rc);
+ }
+ list_del(&rcl->__list);
+ kfree(rcl);
+ }
+}
+
static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
@@ -1020,37 +1455,25 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
* cb_seq_status is only set in decode_cb_sequence4res,
* and so will remain 1 if an rpc level failure occurs.
*/
+ trace_nfsd_cb_rpc_prepare(clp);
cb->cb_seq_status = 1;
cb->cb_status = 0;
- if (minorversion) {
- if (!nfsd41_cb_get_slot(clp, task))
- return;
- }
+ if (minorversion && !nfsd41_cb_get_slot(cb, task))
+ return;
rpc_call_start(task);
}
+/* Returns true if CB_COMPOUND processing should continue */
static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
{
- struct nfs4_client *clp = cb->cb_clp;
- struct nfsd4_session *session = clp->cl_cb_session;
- bool ret = true;
-
- if (!clp->cl_minorversion) {
- /*
- * If the backchannel connection was shut down while this
- * task was queued, we need to resubmit it after setting up
- * a new backchannel connection.
- *
- * Note that if we lost our callback connection permanently
- * the submission code will error out, so we don't need to
- * handle that case here.
- */
- if (task->tk_flags & RPC_TASK_KILLED)
- goto need_restart;
+ struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
+ bool ret = false;
- return true;
- }
+ if (cb->cb_held_slot < 0)
+ goto requeue;
+ /* This is the operation status code for CB_SEQUENCE */
+ trace_nfsd_cb_seq_status(task, cb);
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1060,51 +1483,64 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
- ++session->se_cb_seq_nr;
+ ++session->se_cb_seq_nr[cb->cb_held_slot];
+ ret = true;
break;
case -ESERVERFAULT:
- ++session->se_cb_seq_nr;
- /* Fall through */
+ /*
+ * Call succeeded, but the session, slot index, or slot
+ * sequence number in the response do not match the same
+ * in the server's call. The sequence information is thus
+ * untrustworthy.
+ */
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ break;
case 1:
+ /*
+ * cb_seq_status remains 1 if an RPC Reply was never
+ * received. NFSD can't know if the client processed
+ * the CB_SEQUENCE operation. Ask the client to send a
+ * DESTROY_SESSION to recover.
+ */
+ fallthrough;
case -NFS4ERR_BADSESSION:
- nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
- ret = false;
- break;
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ goto requeue;
case -NFS4ERR_DELAY:
- if (!rpc_restart_call(task))
- goto out;
-
+ cb->cb_seq_status = 1;
+ if (RPC_SIGNALLED(task) || !rpc_restart_call(task))
+ goto requeue;
rpc_delay(task, 2 * HZ);
return false;
+ case -NFS4ERR_SEQ_MISORDERED:
case -NFS4ERR_BADSLOT:
+ /*
+ * A SEQ_MISORDERED or BADSLOT error means that the client and
+ * server are out of sync as to the backchannel parameters. Mark
+ * the backchannel faulty and restart the RPC, but leak the slot
+ * so that it's no longer used.
+ */
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ cb->cb_held_slot = -1;
goto retry_nowait;
- case -NFS4ERR_SEQ_MISORDERED:
- if (session->se_cb_seq_nr != 1) {
- session->se_cb_seq_nr = 1;
- goto retry_nowait;
- }
- break;
default:
- dprintk("%s: unprocessed error %d\n", __func__,
- cb->cb_seq_status);
+ nfsd4_mark_cb_fault(cb->cb_clp);
}
-
- clear_bit(0, &clp->cl_cb_slot_busy);
- rpc_wake_up_next(&clp->cl_cb_waitq);
- dprintk("%s: freed slot, new seqid=%d\n", __func__,
- clp->cl_cb_session->se_cb_seq_nr);
-
- if (task->tk_flags & RPC_TASK_KILLED)
- goto need_restart;
-out:
+ trace_nfsd_cb_free_slot(task, cb);
+ nfsd41_cb_release_slot(cb);
return ret;
retry_nowait:
- if (rpc_restart_call_prepare(task))
- ret = false;
- goto out;
-need_restart:
- task->tk_status = 0;
- cb->cb_need_restart = true;
+ /*
+ * RPC_SIGNALLED() means that the rpc_client is being torn down and
+ * (possibly) recreated. Requeue the call in that case.
+ */
+ if (!RPC_SIGNALLED(task)) {
+ if (rpc_restart_call_prepare(task))
+ return false;
+ }
+requeue:
+ nfsd41_cb_release_slot(cb);
+ nfsd4_requeue_cb(task, cb);
return false;
}
@@ -1113,14 +1549,28 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
- dprintk("%s: minorversion=%d\n", __func__,
- clp->cl_minorversion);
+ trace_nfsd_cb_rpc_done(clp);
- if (!nfsd4_cb_sequence_done(task, cb))
+ if (!clp->cl_minorversion) {
+ /*
+ * If the backchannel connection was shut down while this
+ * task was queued, we need to resubmit it after setting up
+ * a new backchannel connection.
+ *
+ * Note that if we lost our callback connection permanently
+ * the submission code will error out, so we don't need to
+ * handle that case here.
+ */
+ if (RPC_SIGNALLED(task))
+ nfsd4_requeue_cb(task, cb);
+ } else if (!nfsd4_cb_sequence_done(task, cb)) {
return;
+ }
if (cb->cb_status) {
- WARN_ON_ONCE(task->tk_status);
+ WARN_ONCE(task->tk_status,
+ "cb_status=%d tk_status=%d cb_opcode=%d",
+ cb->cb_status, task->tk_status, cb->cb_ops->opcode);
task->tk_status = cb->cb_status;
}
@@ -1130,10 +1580,12 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
rpc_restart_call_prepare(task);
return;
case 1:
- break;
- case -1:
- /* Network partition? */
- nfsd4_mark_cb_down(clp, task->tk_status);
+ switch (task->tk_status) {
+ case -EIO:
+ case -ETIMEDOUT:
+ case -EACCES:
+ nfsd4_mark_cb_down(clp);
+ }
break;
default:
BUG();
@@ -1144,10 +1596,12 @@ static void nfsd4_cb_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
- if (cb->cb_need_restart)
- nfsd4_run_cb(cb);
+ trace_nfsd_cb_rpc_release(cb->cb_clp);
+
+ if (test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags))
+ nfsd4_queue_cb(cb);
else
- cb->cb_ops->release(cb);
+ nfsd41_destroy_cb(cb);
}
@@ -1157,22 +1611,12 @@ static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_release = nfsd4_cb_release,
};
-int nfsd4_create_callback_queue(void)
-{
- callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
- if (!callback_wq)
- return -ENOMEM;
- return 0;
-}
-
-void nfsd4_destroy_callback_queue(void)
-{
- destroy_workqueue(callback_wq);
-}
-
/* must be called under the state lock */
void nfsd4_shutdown_callback(struct nfs4_client *clp)
{
+ if (clp->cl_cb_state != NFSD4_CB_UNKNOWN)
+ trace_nfsd_cb_shutdown(clp);
+
set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
/*
* Note this won't actually result in a null callback;
@@ -1180,15 +1624,17 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
* client, destroy the rpc client, and stop:
*/
nfsd4_run_cb(&clp->cl_cb_null);
- flush_workqueue(callback_wq);
+ flush_workqueue(clp->cl_callback_wq);
+ nfsd41_cb_inflight_wait_complete(clp);
}
-/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
struct nfsd4_session *s;
struct nfsd4_conn *c;
+ lockdep_assert_held(&clp->cl_lock);
+
list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_flags & NFS4_CDFC4_BACK)
@@ -1198,6 +1644,12 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
return NULL;
}
+/*
+ * Note there isn't a lot of locking in this code; instead we depend on
+ * the fact that it is run from clp->cl_callback_wq, which won't run two
+ * work items at once. So, for example, clp->cl_callback_wq handles all
+ * access of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
+ */
static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
struct nfs4_cb_conn conn;
@@ -1206,11 +1658,14 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
struct nfsd4_conn *c;
int err;
+ trace_nfsd_cb_bc_update(clp, cb);
+
/*
* This is either an update, or the client dying; in either case,
* kill the old client:
*/
if (clp->cl_cb_client) {
+ trace_nfsd_cb_bc_shutdown(clp, cb);
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
@@ -1222,13 +1677,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
}
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return;
+
spin_lock(&clp->cl_lock);
/*
* Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them:
*/
- BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
+ WARN_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
+
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp);
if (c) {
@@ -1240,7 +1697,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
- nfsd4_mark_cb_down(clp, err);
+ nfsd4_mark_cb_down(clp);
+ if (c)
+ svc_xprt_put(c->cn_xprt);
return;
}
}
@@ -1252,22 +1711,20 @@ nfsd4_run_cb_work(struct work_struct *work)
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
+ int flags, ret;
- if (cb->cb_need_restart) {
- cb->cb_need_restart = false;
- } else {
- if (cb->cb_ops && cb->cb_ops->prepare)
- cb->cb_ops->prepare(cb);
- }
+ trace_nfsd_cb_start(clp);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
- if (!clnt) {
- /* Callback channel broken, or client killed; give up: */
- if (cb->cb_ops && cb->cb_ops->release)
- cb->cb_ops->release(cb);
+ if (!clnt || clp->cl_state == NFSD4_COURTESY) {
+ /*
+ * Callback channel broken, client killed or
+ * nfs4_client in courtesy state; give up.
+ */
+ nfsd41_destroy_cb(cb);
return;
}
@@ -1275,13 +1732,24 @@ nfsd4_run_cb_work(struct work_struct *work)
* Don't send probe messages for 4.1 or later.
*/
if (!cb->cb_ops && clp->cl_minorversion) {
- clp->cl_cb_state = NFSD4_CB_UP;
+ nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
+ nfsd41_destroy_cb(cb);
return;
}
+ if (!test_and_clear_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags)) {
+ if (cb->cb_ops && cb->cb_ops->prepare)
+ cb->cb_ops->prepare(cb);
+ }
+
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
- rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
- cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
+ flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
+ ret = rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
+ cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
+ if (ret != 0) {
+ set_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ nfsd4_queue_cb(cb);
+ }
}
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
@@ -1291,14 +1759,30 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
+ cb->cb_flags = 0;
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
- cb->cb_seq_status = 1;
cb->cb_status = 0;
- cb->cb_need_restart = false;
+ cb->cb_held_slot = -1;
+ cb->cb_nr_referring_call_list = 0;
+ INIT_LIST_HEAD(&cb->cb_referring_call_list);
}
-void nfsd4_run_cb(struct nfsd4_callback *cb)
+/**
+ * nfsd4_run_cb - queue up a callback job to run
+ * @cb: callback to queue
+ *
+ * Kick off a callback to do its thing. Returns false if it was already
+ * on a queue, true otherwise.
+ */
+bool nfsd4_run_cb(struct nfsd4_callback *cb)
{
- queue_work(callback_wq, &cb->cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+ bool queued;
+
+ nfsd41_cb_inflight_begin(clp);
+ queued = nfsd4_queue_cb(cb);
+ if (!queued)
+ nfsd41_cb_inflight_end(clp);
+ return queued;
}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index bf137fec33ff..8cca1329f348 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -41,6 +41,7 @@
#include "idmap.h"
#include "nfsd.h"
#include "netns.h"
+#include "vfs.h"
/*
* Turn off idmapping when using AUTH_SYS.
@@ -82,8 +83,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm)
new->id = itm->id;
new->type = itm->type;
- strlcpy(new->name, itm->name, sizeof(new->name));
- strlcpy(new->authname, itm->authname, sizeof(new->name));
+ strscpy(new->name, itm->name, sizeof(new->name));
+ strscpy(new->authname, itm->authname, sizeof(new->authname));
}
static void
@@ -122,6 +123,12 @@ idtoname_hash(struct ent *ent)
return hash;
}
+static int
+idtoname_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall_timeout(cd, h);
+}
+
static void
idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
int *blen)
@@ -162,7 +169,7 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
ent->id);
if (test_bit(CACHE_VALID, &h->flags))
seq_printf(m, " %s", ent->name);
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
return 0;
}
@@ -184,6 +191,7 @@ static const struct cache_detail idtoname_cache_template = {
.hash_size = ENT_HASHMAX,
.name = "nfs4.idtoname",
.cache_put = ent_put,
+ .cache_upcall = idtoname_upcall,
.cache_request = idtoname_request,
.cache_parse = idtoname_parse,
.cache_show = idtoname_show,
@@ -232,8 +240,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out;
/* expiry */
- ent.h.expiry_time = get_expiry(&buf);
- if (ent.h.expiry_time == 0)
+ error = get_expiry(&buf, &ent.h.expiry_time);
+ if (error)
goto out;
error = -ENOMEM;
@@ -295,6 +303,12 @@ nametoid_hash(struct ent *ent)
return hash_str(ent->name, ENT_HASHBITS);
}
+static int
+nametoid_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+ return sunrpc_cache_pipe_upcall_timeout(cd, h);
+}
+
static void
nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
int *blen)
@@ -333,7 +347,7 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
ent->name);
if (test_bit(CACHE_VALID, &h->flags))
seq_printf(m, " %u", ent->id);
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
return 0;
}
@@ -347,6 +361,7 @@ static const struct cache_detail nametoid_cache_template = {
.hash_size = ENT_HASHMAX,
.name = "nfs4.nametoid",
.cache_put = ent_put,
+ .cache_upcall = nametoid_upcall,
.cache_request = nametoid_request,
.cache_parse = nametoid_parse,
.cache_show = nametoid_show,
@@ -393,8 +408,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
memcpy(ent.name, buf1, sizeof(ent.name));
/* expiry */
- ent.h.expiry_time = get_expiry(&buf);
- if (ent.h.expiry_time == 0)
+ error = get_expiry(&buf, &ent.h.expiry_time);
+ if (error)
goto out;
/* ID */
@@ -534,7 +549,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
return nfserr_badowner;
memcpy(key.name, name, namelen);
key.name[namelen] = '\0';
- strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item);
if (ret == -ENOENT)
return nfserr_badowner;
@@ -566,11 +581,12 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
.id = id,
.type = type,
};
+ __be32 status = nfs_ok;
__be32 *p;
int ret;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
+ strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
if (ret == -ENOENT)
return encode_ascii_id(xdr, id);
@@ -578,12 +594,16 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr,
return nfserrno(ret);
ret = strlen(item->name);
WARN_ON_ONCE(ret > IDMAP_NAMESZ);
+
p = xdr_reserve_space(xdr, ret + 4);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque(p, item->name, ret);
+ if (unlikely(!p)) {
+ status = nfserr_resource;
+ goto out_put;
+ }
+ xdr_encode_opaque(p, item->name, ret);
+out_put:
cache_put(&item->h, nn->idtoname_cache);
- return 0;
+ return status;
}
static bool
@@ -634,7 +654,7 @@ nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, &id);
- *uid = make_kuid(&init_user_ns, id);
+ *uid = make_kuid(nfsd_user_namespace(rqstp), id);
if (!uid_valid(*uid))
status = nfserr_badowner;
return status;
@@ -651,7 +671,7 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return nfserr_inval;
status = do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, &id);
- *gid = make_kgid(&init_user_ns, id);
+ *gid = make_kgid(nfsd_user_namespace(rqstp), id);
if (!gid_valid(*gid))
status = nfserr_badowner;
return status;
@@ -660,13 +680,13 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kuid_t uid)
{
- u32 id = from_kuid(&init_user_ns, uid);
+ u32 id = from_kuid_munged(nfsd_user_namespace(rqstp), uid);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
}
__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kgid_t gid)
{
- u32 id = from_kgid(&init_user_ns, gid);
+ u32 id = from_kgid_munged(nfsd_user_namespace(rqstp), gid);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 44517fb5c0de..683bd1130afe 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -25,7 +25,7 @@ static struct kmem_cache *nfs4_layout_cache;
static struct kmem_cache *nfs4_layout_stateid_cache;
static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
-static const struct lock_manager_operations nfsd4_layouts_lm_ops;
+static const struct lease_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
@@ -65,7 +65,7 @@ nfsd4_alloc_devid_map(const struct svc_fh *fhp)
return;
map->fsid_type = fh->fh_fsid_type;
- memcpy(&map->fsid, fh->fh_fsid, fsid_len);
+ memcpy(&map->fsid, fh_fsid(fh), fsid_len);
spin_lock(&nfsd_devid_lock);
if (fhp->fh_export->ex_devid_map)
@@ -75,7 +75,7 @@ nfsd4_alloc_devid_map(const struct svc_fh *fhp)
list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
if (old->fsid_type != fh->fh_fsid_type)
continue;
- if (memcmp(old->fsid, fh->fh_fsid,
+ if (memcmp(old->fsid, fh_fsid(fh),
key_len(old->fsid_type)))
continue;
@@ -120,7 +120,6 @@ nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
id->generation = device_generation;
- id->pad = 0;
return 0;
}
@@ -145,12 +144,30 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
#ifdef CONFIG_NFSD_SCSILAYOUT
if (sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks &&
- sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops &&
- blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue))
+ sb->s_bdev &&
+ sb->s_bdev->bd_disk->fops->pr_ops &&
+ sb->s_bdev->bd_disk->fops->get_unique_id)
exp->ex_layout_types |= 1 << LAYOUT_SCSI;
#endif
}
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+ struct nfsd_file *fl;
+
+ spin_lock(&ls->ls_stid.sc_file->fi_lock);
+ fl = ls->ls_file;
+ ls->ls_file = NULL;
+ spin_unlock(&ls->ls_stid.sc_file->fi_lock);
+
+ if (fl) {
+ if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
+ kernel_setlease(fl->nf_file, F_UNLCK, NULL,
+ (void **)&ls);
+ nfsd_file_put(fl);
+ }
+}
+
static void
nfsd4_free_layout_stateid(struct nfs4_stid *stid)
{
@@ -168,9 +185,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
list_del_init(&ls->ls_perfile);
spin_unlock(&fp->fi_lock);
- if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
- vfs_setlease(ls->ls_file, F_UNLCK, NULL, (void **)&ls);
- fput(ls->ls_file);
+ nfsd4_close_layout(ls);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -181,27 +196,26 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
static int
nfsd4_layout_setlease(struct nfs4_layout_stateid *ls)
{
- struct file_lock *fl;
+ struct file_lease *fl;
int status;
if (nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
return 0;
- fl = locks_alloc_lock();
+ fl = locks_alloc_lease();
if (!fl)
return -ENOMEM;
- locks_init_lock(fl);
+ locks_init_lease(fl);
fl->fl_lmops = &nfsd4_layouts_lm_ops;
- fl->fl_flags = FL_LAYOUT;
- fl->fl_type = F_RDLCK;
- fl->fl_end = OFFSET_MAX;
- fl->fl_owner = ls;
- fl->fl_pid = current->tgid;
- fl->fl_file = ls->ls_file;
-
- status = vfs_setlease(fl->fl_file, fl->fl_type, &fl, NULL);
+ fl->c.flc_flags = FL_LAYOUT;
+ fl->c.flc_type = F_RDLCK;
+ fl->c.flc_owner = ls;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_file = ls->ls_file->nf_file;
+
+ status = kernel_setlease(fl->c.flc_file, fl->c.flc_type, &fl, NULL);
if (status) {
- locks_free_lock(fl);
+ locks_free_lease(fl);
return status;
}
BUG_ON(fl != NULL);
@@ -235,21 +249,21 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
- if (parent->sc_type == NFS4_DELEG_STID)
- ls->ls_file = get_file(fp->fi_deleg_file);
+ if (parent->sc_type == SC_TYPE_DELEG)
+ ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
BUG_ON(!ls->ls_file);
if (nfsd4_layout_setlease(ls)) {
- fput(ls->ls_file);
+ nfsd_file_put(ls->ls_file);
put_nfs4_file(fp);
kmem_cache_free(nfs4_layout_stateid_cache, ls);
return NULL;
}
spin_lock(&clp->cl_lock);
- stp->sc_type = NFS4_LAYOUT_STID;
+ stp->sc_type = SC_TYPE_LAYOUT;
list_add(&ls->ls_perclnt, &clp->cl_lo_states);
spin_unlock(&clp->cl_lock);
@@ -268,13 +282,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid;
- unsigned char typemask = NFS4_LAYOUT_STID;
+ unsigned short typemask = SC_TYPE_LAYOUT;
__be32 status;
if (create)
- typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
+ typemask |= (SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG);
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+ status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
net_generic(SVC_NET(rqstp), nfsd_net_id));
if (status)
goto out;
@@ -285,7 +299,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
goto out_put_stid;
}
- if (stid->sc_type != NFS4_LAYOUT_STID) {
+ if (stid->sc_type != SC_TYPE_LAYOUT) {
ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
nfs4_put_stid(stid);
@@ -322,16 +336,17 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
if (ls->ls_recalled)
goto out_unlock;
- ls->ls_recalled = true;
- atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
if (list_empty(&ls->ls_layouts))
goto out_unlock;
+ ls->ls_recalled = true;
+ atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
- refcount_inc(&ls->ls_stid.sc_count);
- nfsd4_run_cb(&ls->ls_recall);
-
+ if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ls->ls_recall.cb_flags)) {
+ refcount_inc(&ls->ls_stid.sc_count);
+ nfsd4_run_cb(&ls->ls_recall);
+ }
out_unlock:
spin_unlock(&ls->ls_lock);
}
@@ -421,7 +436,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
if (!new)
return nfserr_jukebox;
- memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
+ memcpy(&new->lo_seg, seg, sizeof(new->lo_seg));
new->lo_state = ls;
spin_lock(&fp->fi_lock);
@@ -514,11 +529,11 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
if (!list_empty(&ls->ls_layouts)) {
if (found)
nfs4_inc_and_copy_stateid(&lrp->lr_sid, &ls->ls_stid);
- lrp->lrs_present = 1;
+ lrp->lrs_present = true;
} else {
trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
- nfs4_unhash_stid(&ls->ls_stid);
- lrp->lrs_present = 0;
+ ls->ls_stid.sc_status |= SC_STATUS_CLOSED;
+ lrp->lrs_present = false;
}
spin_unlock(&ls->ls_lock);
@@ -538,7 +553,7 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
struct nfs4_layout *lp, *t;
LIST_HEAD(reaplist);
- lrp->lrs_present = 0;
+ lrp->lrs_present = false;
spin_lock(&clp->cl_lock);
list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
@@ -604,7 +619,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
}
static void
-nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
+nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
char addr_str[INET6_ADDRSTRLEN];
@@ -626,7 +641,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
- argv[2] = ls->ls_file->f_path.mnt->mnt_sb->s_id;
+ argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@@ -656,8 +671,9 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
+ struct nfsd_file *fl;
-
+ trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
case 0:
case -NFS4ERR_DELAY:
@@ -675,25 +691,30 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/* Client gets 2 lease periods to return it */
cutoff = ktime_add_ns(task->tk_start,
- nn->nfsd4_lease * NSEC_PER_SEC * 2);
+ (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2);
if (ktime_before(now, cutoff)) {
rpc_delay(task, HZ/100); /* 10 mili-seconds */
return 0;
}
- /* Fallthrough */
+ fallthrough;
default:
/*
* Unknown error or non-responding client, we'll need to fence.
*/
trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
- ops = nfsd4_layout_ops[ls->ls_layout_type];
- if (ops->fence_client)
- ops->fence_client(ls);
- else
- nfsd4_cb_layout_fail(ls);
- return -1;
+ rcu_read_lock();
+ fl = nfsd_file_get(ls->ls_file);
+ rcu_read_unlock();
+ if (fl) {
+ ops = nfsd4_layout_ops[ls->ls_layout_type];
+ if (ops->fence_client)
+ ops->fence_client(ls, fl);
+ else
+ nfsd4_cb_layout_fail(ls, fl);
+ nfsd_file_put(fl);
+ }
+ return 1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
task->tk_status = 0;
@@ -719,10 +740,11 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops = {
.prepare = nfsd4_cb_layout_prepare,
.done = nfsd4_cb_layout_done,
.release = nfsd4_cb_layout_release,
+ .opcode = OP_CB_LAYOUTRECALL,
};
static bool
-nfsd4_layout_lm_break(struct file_lock *fl)
+nfsd4_layout_lm_break(struct file_lease *fl)
{
/*
* We don't want the locks code to timeout the lease for us;
@@ -730,19 +752,19 @@ nfsd4_layout_lm_break(struct file_lock *fl)
* in time:
*/
fl->fl_break_time = 0;
- nfsd4_recall_file_layout(fl->fl_owner);
+ nfsd4_recall_file_layout(fl->c.flc_owner);
return false;
}
static int
-nfsd4_layout_lm_change(struct file_lock *onlist, int arg,
+nfsd4_layout_lm_change(struct file_lease *onlist, int arg,
struct list_head *dispose)
{
BUG_ON(!(arg & F_UNLCK));
return lease_modify(onlist, arg, dispose);
}
-static const struct lock_manager_operations nfsd4_layouts_lm_ops = {
+static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
.lm_break = nfsd4_layout_lm_break,
.lm_change = nfsd4_layout_lm_change,
};
@@ -755,13 +777,11 @@ nfsd4_init_pnfs(void)
for (i = 0; i < DEVID_HASH_SIZE; i++)
INIT_LIST_HEAD(&nfsd_devid_hash[i]);
- nfs4_layout_cache = kmem_cache_create("nfs4_layout",
- sizeof(struct nfs4_layout), 0, 0, NULL);
+ nfs4_layout_cache = KMEM_CACHE(nfs4_layout, 0);
if (!nfs4_layout_cache)
return -ENOMEM;
- nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
- sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
+ nfs4_layout_stateid_cache = KMEM_CACHE(nfs4_layout_stateid, 0);
if (!nfs4_layout_stateid_cache) {
kmem_cache_destroy(nfs4_layout_cache);
return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0cfd257ffdaf..b74800917583 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,10 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/namei.h>
+
+#include <linux/sunrpc/addr.h>
+#include <linux/nfs_ssc.h>
#include "idmap.h"
#include "cache.h"
@@ -48,34 +52,18 @@
#include "pnfs.h"
#include "trace.h"
-#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-#include <linux/security.h>
-
-static inline void
-nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
-{
- struct inode *inode = d_inode(resfh->fh_dentry);
- int status;
-
- inode_lock(inode);
- status = security_inode_setsecctx(resfh->fh_dentry,
- label->data, label->len);
- inode_unlock(inode);
+static bool inter_copy_offload_enable;
+module_param(inter_copy_offload_enable, bool, 0644);
+MODULE_PARM_DESC(inter_copy_offload_enable,
+ "Enable inter server to server copy offload. Default: false");
- if (status)
- /*
- * XXX: We should really fail the whole open, but we may
- * already have created a new file, so it may be too
- * late. For now this seems the least of evils:
- */
- bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+static void cleanup_async_copy(struct nfsd4_copy *copy);
- return;
-}
-#else
-static inline void
-nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
-{ }
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */
+module_param(nfsd4_ssc_umount_timeout, int, 0644);
+MODULE_PARM_DESC(nfsd4_ssc_umount_timeout,
+ "idle msecs before unmount export from source server");
#endif
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -142,26 +130,6 @@ is_create_with_attrs(struct nfsd4_open *open)
|| open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1);
}
-/*
- * if error occurs when setting the acl, just clear the acl bit
- * in the returned attr bitmap.
- */
-static void
-do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfs4_acl *acl, u32 *bmval)
-{
- __be32 status;
-
- status = nfsd4_set_nfs4_acl(rqstp, fhp, acl);
- if (status)
- /*
- * We should probably fail the whole open at this point,
- * but we've already created the file, so it's too late;
- * So this seems the least of evils:
- */
- bmval[0] &= ~FATTR4_WORD0_ACL;
-}
-
static inline void
fh_dup2(struct svc_fh *dst, struct svc_fh *src)
{
@@ -175,7 +143,6 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
static __be32
do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
{
- __be32 status;
if (open->op_truncate &&
!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
@@ -190,12 +157,10 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
if (open->op_share_deny & NFS4_SHARE_DENY_READ)
accmode |= NFSD_MAY_WRITE;
- status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
-
- return status;
+ return fh_verify(rqstp, current_fh, S_IFREG, accmode);
}
-static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
+static __be32 nfsd_check_obj_isreg(struct svc_fh *fh, u32 minor_version)
{
umode_t mode = d_inode(fh->fh_dentry)->i_mode;
@@ -203,14 +168,15 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
return nfs_ok;
if (S_ISDIR(mode))
return nfserr_isdir;
- /*
- * Using err_symlink as our catch-all case may look odd; but
- * there's no other obvious error for this case in 4.0, and we
- * happen to know that it will cause the linux v4 client to do
- * the right thing on attempts to open something other than a
- * regular file.
- */
- return nfserr_symlink;
+ if (S_ISLNK(mode))
+ return nfserr_symlink;
+
+ /* RFC 7530 - 16.16.6 */
+ if (minor_version == 0)
+ return nfserr_symlink;
+ else
+ return nfserr_wrong_type;
+
}
static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh)
@@ -221,6 +187,234 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
&resfh->fh_handle);
}
+static inline bool nfsd4_create_is_exclusive(int createmode)
+{
+ return createmode == NFS4_CREATE_EXCLUSIVE ||
+ createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
+static __be32
+nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
+ struct nfsd4_open *open)
+{
+ struct file *filp;
+ struct path path;
+ int oflags;
+
+ oflags = O_CREAT | O_LARGEFILE;
+ switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
+ case NFS4_SHARE_ACCESS_WRITE:
+ oflags |= O_WRONLY;
+ break;
+ case NFS4_SHARE_ACCESS_BOTH:
+ oflags |= O_RDWR;
+ break;
+ default:
+ oflags |= O_RDONLY;
+ }
+
+ path.mnt = fhp->fh_export->ex_path.mnt;
+ path.dentry = child;
+ filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
+ current_cred());
+ if (IS_ERR(filp))
+ return nfserrno(PTR_ERR(filp));
+
+ open->op_filp = filp;
+ return nfs_ok;
+}
+
+/*
+ * Implement NFSv4's unchecked, guarded, and exclusive create
+ * semantics for regular files. Open state for this new file is
+ * subsequently fabricated in nfsd4_process_open2().
+ *
+ * Upon return, caller must release @fhp and @resfhp.
+ */
+static __be32
+nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd4_open *open)
+{
+ struct iattr *iap = &open->op_iattr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = iap,
+ .na_seclabel = &open->op_label,
+ };
+ struct dentry *parent, *child;
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+ __be32 status;
+ int host_err;
+
+ if (isdotent(open->op_fname, open->op_fnamelen))
+ return nfserr_exist;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+ if (status != nfs_ok)
+ return status;
+ parent = fhp->fh_dentry;
+ inode = d_inode(parent);
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ return nfserrno(host_err);
+
+ if (is_create_with_attrs(open))
+ nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs);
+
+ child = start_creating(&nop_mnt_idmap, parent,
+ &QSTR_LEN(open->op_fname, open->op_fnamelen));
+ if (IS_ERR(child)) {
+ status = nfserrno(PTR_ERR(child));
+ goto out_write;
+ }
+
+ if (d_really_is_negative(child)) {
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ if (status != nfs_ok)
+ goto out;
+ }
+
+ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
+ if (status != nfs_ok)
+ goto out;
+
+ v_mtime = 0;
+ v_atime = 0;
+ if (nfsd4_create_is_exclusive(open->op_createmode)) {
+ u32 *verifier = (u32 *)open->op_verf.data;
+
+ /*
+ * Solaris 7 gets confused (bugid 4218508) if these have
+ * the high bit set, as do xfs filesystems without the
+ * "bigtime" feature. So just clear the high bits. If this
+ * is ever changed to use different attrs for storing the
+ * verifier, then do_open_lookup() will also need to be
+ * fixed accordingly.
+ */
+ v_mtime = verifier[0] & 0x7fffffff;
+ v_atime = verifier[1] & 0x7fffffff;
+ }
+
+ if (d_really_is_positive(child)) {
+ /* NFSv4 protocol requires change attributes even though
+ * no change happened.
+ */
+ status = fh_fill_both_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
+
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ if (!d_is_reg(child))
+ break;
+
+ /*
+ * In NFSv4, we don't want to truncate the file
+ * now. This would be wrong if the OPEN fails for
+ * some other reason. Furthermore, if the size is
+ * nonzero, we should ignore it according to spec!
+ */
+ open->op_truncate = (iap->ia_valid & ATTR_SIZE) &&
+ !iap->ia_size;
+ break;
+ case NFS4_CREATE_GUARDED:
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
+ inode_get_atime_sec(d_inode(child)) == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ break; /* subtle */
+ }
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ if (inode_get_mtime_sec(d_inode(child)) == v_mtime &&
+ inode_get_atime_sec(d_inode(child)) == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ goto set_attr; /* subtle */
+ }
+ status = nfserr_exist;
+ }
+ goto out;
+ }
+
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
+ status = nfsd4_vfs_create(fhp, child, open);
+ if (status != nfs_ok)
+ goto out;
+ open->op_created = true;
+ fh_fill_post_attrs(fhp);
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+ if (nfsd4_create_is_exclusive(open->op_createmode)) {
+ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
+ ATTR_MTIME_SET|ATTR_ATIME_SET;
+ iap->ia_mtime.tv_sec = v_mtime;
+ iap->ia_atime.tv_sec = v_atime;
+ iap->ia_mtime.tv_nsec = 0;
+ iap->ia_atime.tv_nsec = 0;
+ }
+
+set_attr:
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs);
+
+ if (attrs.na_labelerr)
+ open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (attrs.na_aclerr)
+ open->op_bmval[0] &= ~FATTR4_WORD0_ACL;
+out:
+ end_creating(child);
+ nfsd_attrs_free(&attrs);
+out_write:
+ fh_drop_write(fhp);
+ return status;
+}
+
+/**
+ * set_change_info - set up the change_info4 for a reply
+ * @cinfo: pointer to nfsd4_change_info to be populated
+ * @fhp: pointer to svc_fh to use as source
+ *
+ * Many operations in NFSv4 require change_info4 in the reply. This function
+ * populates that from the info that we (should!) have already collected. In
+ * the event that we didn't get any pre-attrs, just zero out both.
+ */
+static void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+ cinfo->atomic = (u32)(fhp->fh_pre_saved && fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
+ cinfo->before_change = fhp->fh_pre_change;
+ cinfo->after_change = fhp->fh_post_change;
+
+ /*
+ * If fetching the pre-change attributes failed, then we should
+ * have already failed the whole operation. We could have still
+ * failed to fetch post-change attributes however.
+ *
+ * If we didn't get post-op attrs, just zero-out the after
+ * field since we don't know what it should be. If the pre_saved
+ * field isn't set for some reason, throw warning and just copy
+ * whatever is in the after field.
+ */
+ if (WARN_ON_ONCE(!fhp->fh_pre_saved))
+ cinfo->before_change = 0;
+ if (!fhp->fh_post_saved)
+ cinfo->after_change = cinfo->before_change + 1;
+}
+
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
@@ -232,7 +426,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
if (!*resfh)
return nfserr_jukebox;
fh_init(*resfh, NFS4_FHSIZE);
- open->op_truncate = 0;
+ open->op_truncate = false;
if (open->op_create) {
/* FIXME: check session persistence and pnfs flags.
@@ -250,47 +444,33 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* yes | yes | GUARDED4 | GUARDED4
*/
- /*
- * Note: create modes (UNCHECKED,GUARDED...) are the same
- * in NFSv4 as in v3 except EXCLUSIVE4_1.
- */
current->fs->umask = open->op_umask;
- status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
- open->op_fname.len, &open->op_iattr,
- *resfh, open->op_createmode,
- (u32 *)open->op_verf.data,
- &open->op_truncate, &open->op_created);
+ status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
current->fs->umask = 0;
- if (!status && open->op_label.len)
- nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
-
/*
* Following rfc 3530 14.2.16, and rfc 5661 18.16.4
* use the returned bitmask to indicate which attributes
* we used to store the verifier:
*/
- if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
+ if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
FATTR4_WORD1_TIME_MODIFY);
- } else
- /*
- * Note this may exit with the parent still locked.
- * We will hold the lock until nfsd4_open's final
- * lookup, to prevent renames or unlinks until we've had
- * a chance to an acquire a delegation if appropriate.
- */
+ } else {
status = nfsd_lookup(rqstp, current_fh,
- open->op_fname.data, open->op_fname.len, *resfh);
+ open->op_fname, open->op_fnamelen, *resfh);
+ if (status == nfs_ok)
+ /* NFSv4 protocol requires change attributes even though
+ * no change happened.
+ */
+ status = fh_fill_both_attrs(current_fh);
+ }
if (status)
goto out;
- status = nfsd_check_obj_isreg(*resfh);
+ status = nfsd_check_obj_isreg(*resfh, cstate->minorversion);
if (status)
goto out;
- if (is_create_with_attrs(open) && open->op_acl != NULL)
- do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval);
-
nfsd4_set_open_owner_reply_cache(cstate, open, *resfh);
accmode = NFSD_MAY_NOP;
if (open->op_created ||
@@ -306,7 +486,6 @@ static __be32
do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
{
struct svc_fh *current_fh = &cstate->current_fh;
- __be32 status;
int accmode = 0;
/* We don't know the target directory, and therefore can not
@@ -331,9 +510,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str
if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH)
accmode = NFSD_MAY_OWNER_OVERRIDE;
- status = do_open_permission(rqstp, current_fh, open, accmode);
-
- return status;
+ return do_open_permission(rqstp, current_fh, open, accmode);
}
static void
@@ -358,21 +535,23 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
bool reclaim = false;
dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
- (int)open->op_fname.len, open->op_fname.data,
+ (int)open->op_fnamelen, open->op_fname,
open->op_openowner);
+ open->op_filp = NULL;
+ open->op_rqstp = rqstp;
+
/* This check required by spec. */
if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
return nfserr_inval;
- open->op_created = 0;
+ open->op_created = false;
/*
* RFC5661 18.51.3
* Before RECLAIM_COMPLETE done, server should deny new lock
*/
if (nfsd4_has_session(cstate) &&
- !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
- &cstate->session->se_client->cl_flags) &&
+ !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags) &&
open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
return nfserr_grace;
@@ -414,50 +593,46 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
switch (open->op_claim_type) {
- case NFS4_OPEN_CLAIM_DELEGATE_CUR:
- case NFS4_OPEN_CLAIM_NULL:
- status = do_open_lookup(rqstp, cstate, open, &resfh);
- if (status)
- goto out;
- break;
- case NFS4_OPEN_CLAIM_PREVIOUS:
- status = nfs4_check_open_reclaim(&open->op_clientid,
- cstate, nn);
- if (status)
- goto out;
- open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
- reclaim = true;
- case NFS4_OPEN_CLAIM_FH:
- case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
- status = do_open_fhandle(rqstp, cstate, open);
- if (status)
- goto out;
- resfh = &cstate->current_fh;
- break;
- case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
- case NFS4_OPEN_CLAIM_DELEGATE_PREV:
- dprintk("NFSD: unsupported OPEN claim type %d\n",
- open->op_claim_type);
- status = nfserr_notsupp;
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_NULL:
+ status = do_open_lookup(rqstp, cstate, open, &resfh);
+ if (status)
goto out;
- default:
- dprintk("NFSD: Invalid OPEN claim type %d\n",
- open->op_claim_type);
- status = nfserr_inval;
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ status = nfs4_check_open_reclaim(cstate->clp);
+ if (status)
+ goto out;
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+ reclaim = true;
+ fallthrough;
+ case NFS4_OPEN_CLAIM_FH:
+ case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+ status = do_open_fhandle(rqstp, cstate, open);
+ if (status)
goto out;
+ resfh = &cstate->current_fh;
+ break;
+ case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ status = nfserr_notsupp;
+ goto out;
+ default:
+ status = nfserr_inval;
+ goto out;
}
- /*
- * nfsd4_process_open2() does the actual opening of the file. If
- * successful, it (1) truncates the file if open->op_truncate was
- * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
- */
+
status = nfsd4_process_open2(rqstp, resfh, open);
- WARN(status && open->op_created,
- "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
- be32_to_cpu(status));
+ if (status && open->op_created)
+ pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n",
+ be32_to_cpu(status));
if (reclaim && !status)
nn->somebody_reclaimed = true;
out:
+ if (open->op_filp) {
+ fput(open->op_filp);
+ open->op_filp = NULL;
+ }
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
fh_put(resfh);
@@ -502,23 +677,29 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_putfh *putfh = &u->putfh;
+ __be32 ret;
fh_put(&cstate->current_fh);
cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
- memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
+ memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval,
putfh->pf_fhlen);
- return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+ ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ if (ret == nfserr_stale && putfh->no_verify) {
+ SET_FH_FLAG(&cstate->current_fh, NFSD4_FH_FOREIGN);
+ ret = 0;
+ }
+#endif
+ return ret;
}
static __be32
nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
- __be32 status;
-
fh_put(&cstate->current_fh);
- status = exp_pseudoroot(rqstp, &cstate->current_fh);
- return status;
+
+ return exp_pseudoroot(rqstp, &cstate->current_fh);
}
static __be32
@@ -529,9 +710,9 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_restorefh;
fh_dup2(&cstate->current_fh, &cstate->save_fh);
- if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) {
+ if (HAS_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG)) {
memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
return nfs_ok;
}
@@ -541,9 +722,9 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
fh_dup2(&cstate->save_fh, &cstate->current_fh);
- if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
+ if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG)) {
memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, SAVED_STATE_ID_FLAG);
}
return nfs_ok;
}
@@ -556,8 +737,14 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_access *access = &u->access;
+ u32 access_full;
+
+ access_full = NFS3_ACCESS_FULL;
+ if (cstate->minorversion >= 2)
+ access_full |= NFS4_ACCESS_XALIST | NFS4_ACCESS_XAREAD |
+ NFS4_ACCESS_XAWRITE;
- if (access->ac_req_access & ~NFS3_ACCESS_FULL)
+ if (access->ac_req_access & ~access_full)
return nfserr_inval;
access->ac_resp_access = access->ac_req_access;
@@ -565,30 +752,24 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&access->ac_supported);
}
-static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
-{
- __be32 verf[2];
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- /*
- * This is opaque to client, so no need to byte-swap. Use
- * __force to keep sparse happy. y2038 time_t overflow is
- * irrelevant in this usage.
- */
- verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
- memcpy(verifier->data, verf, sizeof(verifier->data));
-}
-
static __be32
nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_commit *commit = &u->commit;
+ struct nfsd_file *nf;
+ __be32 status;
+
+ status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE |
+ NFSD_MAY_NOT_BREAK_LEASE, &nf);
+ if (status != nfs_ok)
+ return status;
- gen_boot_verifier(&commit->co_verf, SVC_NET(rqstp));
- return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset,
- commit->co_count);
+ status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset,
+ commit->co_count,
+ (__be32 *)commit->co_verf.data);
+ nfsd_file_put(nf);
+ return status;
}
static __be32
@@ -596,6 +777,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_create *create = &u->create;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &create->cr_iattr,
+ .na_seclabel = &create->cr_label,
+ };
struct svc_fh resfh;
__be32 status;
dev_t rdev;
@@ -611,12 +796,13 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
return status;
+ status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs);
current->fs->umask = create->cr_umask;
switch (create->cr_type) {
case NF4LNK:
status = nfsd_symlink(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- create->cr_data, &resfh);
+ create->cr_data, &attrs, &resfh);
break;
case NF4BLK:
@@ -627,7 +813,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out_umask;
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- &create->cr_iattr, S_IFBLK, rdev, &resfh);
+ &attrs, S_IFBLK, rdev, &resfh);
break;
case NF4CHR:
@@ -638,26 +824,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out_umask;
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- &create->cr_iattr,S_IFCHR, rdev, &resfh);
+ &attrs, S_IFCHR, rdev, &resfh);
break;
case NF4SOCK:
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- &create->cr_iattr, S_IFSOCK, 0, &resfh);
+ &attrs, S_IFSOCK, 0, &resfh);
break;
case NF4FIFO:
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- &create->cr_iattr, S_IFIFO, 0, &resfh);
+ &attrs, S_IFIFO, 0, &resfh);
break;
case NF4DIR:
create->cr_iattr.ia_valid &= ~ATTR_SIZE;
status = nfsd_create(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- &create->cr_iattr, S_IFDIR, 0, &resfh);
+ &attrs, S_IFDIR, 0, &resfh);
break;
default:
@@ -667,20 +853,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- if (create->cr_label.len)
- nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval);
-
- if (create->cr_acl != NULL)
- do_set_nfs4_acl(rqstp, &resfh, create->cr_acl,
- create->cr_bmval);
-
- fh_unlock(&cstate->current_fh);
+ if (attrs.na_labelerr)
+ create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+ if (attrs.na_aclerr)
+ create->cr_bmval[0] &= ~FATTR4_WORD0_ACL;
set_change_info(&create->cr_cinfo, &cstate->current_fh);
fh_dup2(&cstate->current_fh, &resfh);
out:
fh_put(&resfh);
out_umask:
current->fs->umask = 0;
+ nfsd_attrs_free(&attrs);
return status;
}
@@ -691,6 +874,8 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_getattr *getattr = &u->getattr;
__be32 status;
+ trace_nfsd_vfs_getattr(rqstp, &cstate->current_fh);
+
status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
if (status)
return status;
@@ -760,13 +945,17 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_read *read = &u->read;
__be32 status;
- read->rd_filp = NULL;
- if (read->rd_offset >= OFFSET_MAX)
- return nfserr_inval;
+ read->rd_nf = NULL;
trace_nfsd_read_start(rqstp, &cstate->current_fh,
read->rd_offset, read->rd_length);
+ read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp));
+ if (read->rd_offset > (u64)OFFSET_MAX)
+ read->rd_offset = (u64)OFFSET_MAX;
+ if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX)
+ read->rd_length = (u64)OFFSET_MAX - read->rd_offset;
+
/*
* If we do a zero copy read, then a client will see read data
* that reflects the state of the file *after* performing the
@@ -775,19 +964,17 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* To ensure proper ordering, we therefore turn off zero copy if
* the client wants us to do more in this compound:
*/
- if (!nfsd4_last_compound_op(rqstp))
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ if (!nfsd4_last_compound_op(rqstp)) {
+ struct nfsd4_compoundargs *argp = rqstp->rq_argp;
+
+ argp->splice_ok = false;
+ }
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
- &read->rd_filp, &read->rd_tmp_file);
- if (status) {
- dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
- goto out;
- }
- status = nfs_ok;
-out:
+ &read->rd_nf, NULL);
+
read->rd_rqstp = rqstp;
read->rd_fhp = &cstate->current_fh;
return status;
@@ -797,10 +984,11 @@ out:
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
- if (u->read.rd_filp)
- fput(u->read.rd_filp);
- trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
- u->read.rd_offset, u->read.rd_length);
+ if (u->read.rd_nf) {
+ trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
+ u->read.rd_offset, u->read.rd_length);
+ nfsd_file_put(u->read.rd_nf);
+ }
}
static __be32
@@ -811,6 +999,9 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
u64 cookie = readdir->rd_cookie;
static const nfs4_verifier zeroverf;
+ trace_nfsd_vfs_readdir(rqstp, &cstate->current_fh,
+ readdir->rd_maxcount, readdir->rd_cookie);
+
/* no need to check permission - this will be done in nfsd_readdir() */
if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
@@ -849,10 +1040,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_grace;
status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
remove->rm_name, remove->rm_namelen);
- if (!status) {
- fh_unlock(&cstate->current_fh);
+ if (!status)
set_change_info(&remove->rm_cinfo, &cstate->current_fh);
- }
return status;
}
@@ -870,8 +1059,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
rename->rn_tname, rename->rn_tnamelen);
if (status)
return status;
- set_change_info(&rename->rn_sinfo, &cstate->current_fh);
- set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+ set_change_info(&rename->rn_sinfo, &cstate->save_fh);
+ set_change_info(&rename->rn_tinfo, &cstate->current_fh);
return nfs_ok;
}
@@ -892,7 +1081,6 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&exp, &dentry);
if (err)
return err;
- fh_unlock(&cstate->current_fh);
if (d_really_is_negative(dentry)) {
exp_put(exp);
err = nfserr_noent;
@@ -942,23 +1130,83 @@ nfsd4_secinfo_no_name_release(union nfsd4_op_u *u)
exp_put(u->secinfo_no_name.sin_exp);
}
+/*
+ * Validate that the requested timestamps are within the acceptable range. If
+ * timestamp appears to be in the future, then it will be clamped to
+ * current_time().
+ */
+static void
+vet_deleg_attrs(struct nfsd4_setattr *setattr, struct nfs4_delegation *dp)
+{
+ struct timespec64 now = current_time(dp->dl_stid.sc_file->fi_inode);
+ struct iattr *iattr = &setattr->sa_iattr;
+
+ if ((setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) &&
+ !nfsd4_vet_deleg_time(&iattr->ia_atime, &dp->dl_atime, &now))
+ iattr->ia_valid &= ~(ATTR_ATIME | ATTR_ATIME_SET);
+
+ if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ if (nfsd4_vet_deleg_time(&iattr->ia_mtime, &dp->dl_mtime, &now)) {
+ iattr->ia_ctime = iattr->ia_mtime;
+ if (nfsd4_vet_deleg_time(&iattr->ia_ctime, &dp->dl_ctime, &now))
+ dp->dl_setattr = true;
+ else
+ iattr->ia_valid &= ~(ATTR_CTIME | ATTR_CTIME_SET);
+ } else {
+ iattr->ia_valid &= ~(ATTR_CTIME | ATTR_CTIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET);
+ }
+ }
+}
+
static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_setattr *setattr = &u->setattr;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &setattr->sa_iattr,
+ .na_seclabel = &setattr->sa_label,
+ };
+ bool save_no_wcc, deleg_attrs;
+ struct nfs4_stid *st = NULL;
+ struct inode *inode;
__be32 status = nfs_ok;
int err;
- if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+ deleg_attrs = setattr->sa_bmval[2] & (FATTR4_WORD2_TIME_DELEG_ACCESS |
+ FATTR4_WORD2_TIME_DELEG_MODIFY);
+
+ if (deleg_attrs || (setattr->sa_iattr.ia_valid & ATTR_SIZE)) {
+ int flags = WR_STATE;
+
+ if (setattr->sa_bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS)
+ flags |= RD_STATE;
+
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&cstate->current_fh, &setattr->sa_stateid,
- WR_STATE, NULL, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
+ flags, NULL, &st);
+ if (status)
return status;
+ }
+
+ if (deleg_attrs) {
+ status = nfserr_bad_stateid;
+ if (st->sc_type & SC_TYPE_DELEG) {
+ struct nfs4_delegation *dp = delegstateid(st);
+
+ /* Only for *_ATTRS_DELEG flavors */
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ vet_deleg_attrs(setattr, dp);
+ status = nfs_ok;
+ }
}
}
+ if (st)
+ nfs4_put_stid(st);
+ if (status)
+ return status;
+
err = fh_want_write(&cstate->current_fh);
if (err)
return nfserrno(err);
@@ -969,58 +1217,73 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- if (setattr->sa_acl != NULL)
- status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
- setattr->sa_acl);
- if (status)
- goto out;
- if (setattr->sa_label.len)
- status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh,
- &setattr->sa_label);
+ inode = cstate->current_fh.fh_dentry->d_inode;
+ status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG,
+ setattr->sa_acl, &attrs);
+
if (status)
goto out;
- status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
- 0, (time_t)0);
+ save_no_wcc = cstate->current_fh.fh_no_wcc;
+ cstate->current_fh.fh_no_wcc = true;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
+ cstate->current_fh.fh_no_wcc = save_no_wcc;
+ if (!status)
+ status = nfserrno(attrs.na_labelerr);
+ if (!status)
+ status = nfserrno(attrs.na_aclerr);
out:
+ nfsd_attrs_free(&attrs);
fh_drop_write(&cstate->current_fh);
return status;
}
+static void nfsd4_file_mark_deleg_written(struct nfs4_file *fi)
+{
+ spin_lock(&fi->fi_lock);
+ if (!list_empty(&fi->fi_delegations)) {
+ struct nfs4_delegation *dp = list_first_entry(&fi->fi_delegations,
+ struct nfs4_delegation, dl_perfile);
+
+ if (dp->dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG)
+ dp->dl_written = true;
+ }
+ spin_unlock(&fi->fi_lock);
+}
+
static __be32
nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_write *write = &u->write;
stateid_t *stateid = &write->wr_stateid;
- struct file *filp = NULL;
+ struct nfs4_stid *stid = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status = nfs_ok;
unsigned long cnt;
- int nvecs;
- if (write->wr_offset >= OFFSET_MAX)
- return nfserr_inval;
+ if (write->wr_offset > (u64)OFFSET_MAX ||
+ write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX)
+ return nfserr_fbig;
cnt = write->wr_buflen;
trace_nfsd_write_start(rqstp, &cstate->current_fh,
write->wr_offset, cnt);
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- stateid, WR_STATE, &filp, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+ stateid, WR_STATE, &nf, &stid);
+ if (status)
return status;
+
+ if (stid) {
+ nfsd4_file_mark_deleg_written(stid->sc_file);
+ nfs4_put_stid(stid);
}
write->wr_how_written = write->wr_stable_how;
- gen_boot_verifier(&write->wr_verifier, SVC_NET(rqstp));
-
- nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
- &write->wr_head, write->wr_buflen);
- WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
-
- status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
- write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
- write->wr_how_written);
- fput(filp);
+ status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
+ write->wr_offset, &write->wr_payload,
+ &cnt, write->wr_how_written,
+ (__be32 *)write->wr_verifier.data);
+ nfsd_file_put(nf);
write->wr_bytes_written = cnt;
trace_nfsd_write_done(rqstp, &cstate->current_fh,
@@ -1030,8 +1293,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static __be32
nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
- stateid_t *src_stateid, struct file **src,
- stateid_t *dst_stateid, struct file **dst)
+ stateid_t *src_stateid, struct nfsd_file **src,
+ stateid_t *dst_stateid, struct nfsd_file **dst)
{
__be32 status;
@@ -1040,21 +1303,17 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
src_stateid, RD_STATE, src, NULL);
- if (status) {
- dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
+ if (status)
goto out;
- }
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
dst_stateid, WR_STATE, dst, NULL);
- if (status) {
- dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
+ if (status)
goto out_put_src;
- }
/* fix up for NFS-specific error code */
- if (!S_ISREG(file_inode(*src)->i_mode) ||
- !S_ISREG(file_inode(*dst)->i_mode)) {
+ if (!S_ISREG(file_inode((*src)->nf_file)->i_mode) ||
+ !S_ISREG(file_inode((*dst)->nf_file)->i_mode)) {
status = nfserr_wrong_type;
goto out_put_dst;
}
@@ -1062,9 +1321,11 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
out:
return status;
out_put_dst:
- fput(*dst);
+ nfsd_file_put(*dst);
+ *dst = NULL;
out_put_src:
- fput(*src);
+ nfsd_file_put(*src);
+ *src = NULL;
goto out;
}
@@ -1073,7 +1334,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_clone *clone = &u->clone;
- struct file *src, *dst;
+ struct nfsd_file *src, *dst;
__be32 status;
status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
@@ -1081,44 +1342,101 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd4_clone_file_range(src, clone->cl_src_pos,
- dst, clone->cl_dst_pos, clone->cl_count);
+ status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos,
+ dst, clone->cl_dst_pos, clone->cl_count,
+ EX_ISSYNC(cstate->current_fh.fh_export));
- fput(dst);
- fput(src);
+ nfsd_file_put(dst);
+ nfsd_file_put(src);
out:
return status;
}
-void nfs4_put_copy(struct nfsd4_copy *copy)
+/**
+ * nfsd4_has_active_async_copies - Check for ongoing copy operations
+ * @clp: Client to be checked
+ *
+ * NFSD maintains state for async COPY operations after they complete,
+ * and this state remains in the nfs4_client's async_copies list.
+ * Ongoing copies should block the destruction of the nfs4_client, but
+ * completed copies should not.
+ *
+ * Return values:
+ * %true: At least one active async COPY is ongoing
+ * %false: No active async COPY operations were found
+ */
+bool nfsd4_has_active_async_copies(struct nfs4_client *clp)
{
- if (!refcount_dec_and_test(&copy->refcount))
- return;
- kfree(copy);
+ struct nfsd4_copy *copy;
+ bool result = false;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_entry(copy, &clp->async_copies, copies) {
+ if (!test_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags) &&
+ !test_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags)) {
+ result = true;
+ break;
+ }
+ }
+ spin_unlock(&clp->async_lock);
+ return result;
}
-static bool
-check_and_set_stop_copy(struct nfsd4_copy *copy)
+/**
+ * nfsd4_async_copy_reaper - Purge completed copies
+ * @nn: Network namespace with possible active copy information
+ */
+void nfsd4_async_copy_reaper(struct nfsd_net *nn)
{
- bool value;
+ struct nfs4_client *clp;
+ struct nfsd4_copy *copy;
+ LIST_HEAD(reaplist);
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ struct list_head *pos, *next;
+
+ spin_lock(&clp->async_lock);
+ list_for_each_safe(pos, next, &clp->async_copies) {
+ copy = list_entry(pos, struct nfsd4_copy, copies);
+ if (test_bit(NFSD4_COPY_F_OFFLOAD_DONE, &copy->cp_flags)) {
+ if (--copy->cp_ttl) {
+ list_del_init(&copy->copies);
+ list_add(&copy->copies, &reaplist);
+ }
+ }
+ }
+ spin_unlock(&clp->async_lock);
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&reaplist)) {
+ copy = list_first_entry(&reaplist, struct nfsd4_copy, copies);
+ list_del_init(&copy->copies);
+ cleanup_async_copy(copy);
+ }
+}
- spin_lock(&copy->cp_clp->async_lock);
- value = copy->stopped;
- if (!copy->stopped)
- copy->stopped = true;
- spin_unlock(&copy->cp_clp->async_lock);
- return value;
+static void nfs4_put_copy(struct nfsd4_copy *copy)
+{
+ if (!refcount_dec_and_test(&copy->refcount))
+ return;
+ kfree(copy->cp_src);
+ kfree(copy);
}
static void nfsd4_stop_copy(struct nfsd4_copy *copy)
{
- /* only 1 thread should stop the copy */
- if (!check_and_set_stop_copy(copy))
+ trace_nfsd_copy_async_cancel(copy);
+ if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags)) {
kthread_stop(copy->copy_task);
+ copy->nfserr = nfs_ok;
+ set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+ }
nfs4_put_copy(copy);
}
-static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
+static struct nfsd4_copy *nfsd4_unhash_copy(struct nfs4_client *clp)
{
struct nfsd4_copy *copy = NULL;
@@ -1127,6 +1445,9 @@ static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp)
copy = list_first_entry(&clp->async_copies, struct nfsd4_copy,
copies);
refcount_inc(&copy->refcount);
+ copy->cp_clp = NULL;
+ if (!list_empty(&copy->copies))
+ list_del_init(&copy->copies);
}
spin_unlock(&clp->async_lock);
return copy;
@@ -1136,63 +1457,382 @@ void nfsd4_shutdown_copy(struct nfs4_client *clp)
{
struct nfsd4_copy *copy;
- while ((copy = nfsd4_get_copy(clp)) != NULL)
+ while ((copy = nfsd4_unhash_copy(clp)) != NULL)
nfsd4_stop_copy(copy);
}
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+
+extern struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid);
+extern void nfs42_ssc_close(struct file *filep);
+
+extern void nfs_sb_deactive(struct super_block *sb);
+
+#define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys"
+
+/*
+ * setup a work entry in the ssc delayed unmount list.
+ */
+static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
+ struct nfsd4_ssc_umount_item **nsui,
+ struct svc_rqst *rqstp)
+{
+ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd4_ssc_umount_item *work = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+ DEFINE_WAIT(wait);
+ __be32 status = 0;
+
+ *nsui = NULL;
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+try_again:
+ spin_lock(&nn->nfsd_ssc_lock);
+ list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
+ if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr)))
+ continue;
+ /* found a match */
+ if (ni->nsui_busy) {
+ /* wait - and try again */
+ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
+ spin_unlock(&nn->nfsd_ssc_lock);
+
+ /* allow 20secs for mount/unmount for now - revisit */
+ if (svc_thread_should_stop(rqstp) ||
+ (schedule_timeout(20*HZ) == 0)) {
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
+ kfree(work);
+ return nfserr_eagain;
+ }
+ finish_wait(&nn->nfsd_ssc_waitq, &wait);
+ goto try_again;
+ }
+ *nsui = ni;
+ refcount_inc(&ni->nsui_refcnt);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(work);
+
+ /* return vfsmount in (*nsui)->nsui_vfsmount */
+ return 0;
+ }
+ if (work) {
+ strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr));
+ refcount_set(&work->nsui_refcnt, 2);
+ work->nsui_busy = true;
+ list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list);
+ *nsui = work;
+ } else
+ status = nfserr_resource;
+ spin_unlock(&nn->nfsd_ssc_lock);
+ return status;
+}
+
+static void nfsd4_ssc_update_dul(struct nfsd_net *nn,
+ struct nfsd4_ssc_umount_item *nsui,
+ struct vfsmount *ss_mnt)
+{
+ spin_lock(&nn->nfsd_ssc_lock);
+ nsui->nsui_vfsmount = ss_mnt;
+ nsui->nsui_busy = false;
+ wake_up_all(&nn->nfsd_ssc_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+}
+
+static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn,
+ struct nfsd4_ssc_umount_item *nsui)
+{
+ spin_lock(&nn->nfsd_ssc_lock);
+ list_del(&nsui->nsui_list);
+ wake_up_all(&nn->nfsd_ssc_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ kfree(nsui);
+}
+
+/*
+ * Support one copy source server for now.
+ */
+static __be32
+nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
+ struct nfsd4_ssc_umount_item **nsui)
+{
+ struct file_system_type *type;
+ struct vfsmount *ss_mnt;
+ struct nfs42_netaddr *naddr;
+ struct sockaddr_storage tmp_addr;
+ size_t tmp_addrlen, match_netid_len = 3;
+ char *startsep = "", *endsep = "", *match_netid = "tcp";
+ char *ipaddr, *dev_name, *raw_data;
+ int len, raw_len;
+ __be32 status = nfserr_inval;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ naddr = &nss->u.nl4_addr;
+ tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr,
+ naddr->addr_len,
+ (struct sockaddr *)&tmp_addr,
+ sizeof(tmp_addr));
+ *nsui = NULL;
+ if (tmp_addrlen == 0)
+ goto out_err;
+
+ if (tmp_addr.ss_family == AF_INET6) {
+ startsep = "[";
+ endsep = "]";
+ match_netid = "tcp6";
+ match_netid_len = 4;
+ }
+
+ if (naddr->netid_len != match_netid_len ||
+ strncmp(naddr->netid, match_netid, naddr->netid_len))
+ goto out_err;
+
+ /* Construct the raw data for the vfs_kern_mount call */
+ len = RPC_MAX_ADDRBUFLEN + 1;
+ ipaddr = kzalloc(len, GFP_KERNEL);
+ if (!ipaddr)
+ goto out_err;
+
+ rpc_ntop((struct sockaddr *)&tmp_addr, ipaddr, len);
+
+ /* 2 for ipv6 endsep and startsep. 3 for ":/" and trailing '/0'*/
+
+ raw_len = strlen(NFSD42_INTERSSC_MOUNTOPS) + strlen(ipaddr);
+ raw_data = kzalloc(raw_len, GFP_KERNEL);
+ if (!raw_data)
+ goto out_free_ipaddr;
+
+ snprintf(raw_data, raw_len, NFSD42_INTERSSC_MOUNTOPS, ipaddr);
+
+ status = nfserr_nodev;
+ type = get_fs_type("nfs");
+ if (!type)
+ goto out_free_rawdata;
+
+ /* Set the server:<export> for the vfs_kern_mount call */
+ dev_name = kzalloc(len + 5, GFP_KERNEL);
+ if (!dev_name)
+ goto out_free_rawdata;
+ snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
+
+ status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
+ if (status)
+ goto out_free_devname;
+ if ((*nsui)->nsui_vfsmount)
+ goto out_done;
+
+ /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */
+ ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data);
+ module_put(type->owner);
+ if (IS_ERR(ss_mnt)) {
+ status = nfserr_nodev;
+ nfsd4_ssc_cancel_dul(nn, *nsui);
+ goto out_free_devname;
+ }
+ nfsd4_ssc_update_dul(nn, *nsui, ss_mnt);
+out_done:
+ status = 0;
+
+out_free_devname:
+ kfree(dev_name);
+out_free_rawdata:
+ kfree(raw_data);
+out_free_ipaddr:
+ kfree(ipaddr);
+out_err:
+ return status;
+}
+
+/*
+ * Verify COPY destination stateid.
+ *
+ * Connect to the source server with NFSv4.1.
+ * Create the source struct file for nfsd_copy_range.
+ * Called with COPY cstate:
+ * SAVED_FH: source filehandle
+ * CURRENT_FH: destination filehandle
+ */
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ struct svc_fh *s_fh = NULL;
+ stateid_t *s_stid = &copy->cp_src_stateid;
+ __be32 status = nfserr_inval;
+
+ /* Verify the destination stateid and set dst struct file*/
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &copy->cp_dst_stateid,
+ WR_STATE, &copy->nf_dst, NULL);
+ if (status)
+ goto out;
+
+ status = nfsd4_interssc_connect(copy->cp_src, rqstp, &copy->ss_nsui);
+ if (status)
+ goto out;
+
+ s_fh = &cstate->save_fh;
+
+ copy->c_fh.size = s_fh->fh_handle.fh_size;
+ memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size);
+ copy->stateid.seqid = cpu_to_be32(s_stid->si_generation);
+ memcpy(copy->stateid.other, (void *)&s_stid->si_opaque,
+ sizeof(stateid_opaque_t));
+
+ status = 0;
+out:
+ return status;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+ struct nfsd_file *dst)
+{
+ struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id);
+ long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout);
+
+ nfs42_ssc_close(filp);
+ fput(filp);
+
+ spin_lock(&nn->nfsd_ssc_lock);
+ list_del(&nsui->nsui_list);
+ /*
+ * vfsmount can be shared by multiple exports,
+ * decrement refcnt. If the count drops to 1 it
+ * will be unmounted when nsui_expire expires.
+ */
+ refcount_dec(&nsui->nsui_refcnt);
+ nsui->nsui_expire = jiffies + timeout;
+ list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list);
+ spin_unlock(&nn->nfsd_ssc_lock);
+}
+
+#else /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_inter_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ return nfserr_inval;
+}
+
+static void
+nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp,
+ struct nfsd_file *dst)
+{
+}
+
+static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
+ struct nfs_fh *src_fh,
+ nfs4_stateid *stateid)
+{
+ return NULL;
+}
+#endif /* CONFIG_NFSD_V4_2_INTER_SSC */
+
+static __be32
+nfsd4_setup_intra_ssc(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ return nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
+ &copy->nf_src, &copy->cp_dst_stateid,
+ &copy->nf_dst);
+}
static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
{
- struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
+ struct nfsd4_cb_offload *cbo =
+ container_of(cb, struct nfsd4_cb_offload, co_cb);
+ struct nfsd4_copy *copy =
+ container_of(cbo, struct nfsd4_copy, cp_cb_offload);
- nfs4_put_copy(copy);
+ set_bit(NFSD4_COPY_F_OFFLOAD_DONE, &copy->cp_flags);
}
static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ struct nfsd4_cb_offload *cbo =
+ container_of(cb, struct nfsd4_cb_offload, co_cb);
+
+ trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task);
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ if (cbo->co_retries--) {
+ rpc_delay(task, HZ / 5);
+ return 0;
+ }
+ }
+ nfsd41_cb_destroy_referring_call_list(cb);
return 1;
}
static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = {
.release = nfsd4_cb_offload_release,
- .done = nfsd4_cb_offload_done
+ .done = nfsd4_cb_offload_done,
+ .opcode = OP_CB_OFFLOAD,
};
static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
{
- copy->cp_res.wr_stable_how = NFS_UNSTABLE;
- copy->cp_synchronous = sync;
- gen_boot_verifier(&copy->cp_res.wr_verifier, copy->cp_clp->net);
+ copy->cp_res.wr_stable_how =
+ test_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags) ?
+ NFS_FILE_SYNC : NFS_UNSTABLE;
+ nfsd4_copy_set_sync(copy, sync);
}
-static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy)
+static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
+ struct file *dst,
+ struct file *src)
{
+ errseq_t since;
ssize_t bytes_copied = 0;
- size_t bytes_total = copy->cp_count;
+ u64 bytes_total = copy->cp_count;
u64 src_pos = copy->cp_src_pos;
u64 dst_pos = copy->cp_dst_pos;
+ int status;
+ loff_t end;
+ /* See RFC 7862 p.67: */
+ if (bytes_total == 0)
+ bytes_total = ULLONG_MAX;
do {
+ /* Only async copies can be stopped here */
if (kthread_should_stop())
break;
- bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos,
- copy->file_dst, dst_pos, bytes_total);
+ bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
+ bytes_total);
if (bytes_copied <= 0)
break;
bytes_total -= bytes_copied;
copy->cp_res.wr_bytes_written += bytes_copied;
src_pos += bytes_copied;
dst_pos += bytes_copied;
- } while (bytes_total > 0 && !copy->cp_synchronous);
+ } while (bytes_total > 0 && nfsd4_copy_is_async(copy));
+ /* for a non-zero asynchronous copy do a commit of data */
+ if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) {
+ since = READ_ONCE(dst->f_wb_err);
+ end = copy->cp_dst_pos + copy->cp_res.wr_bytes_written - 1;
+ status = vfs_fsync_range(dst, copy->cp_dst_pos, end, 0);
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
+ set_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags);
+ }
return bytes_copied;
}
-static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
+static __be32 nfsd4_do_copy(struct nfsd4_copy *copy,
+ struct file *src, struct file *dst,
+ bool sync)
{
__be32 status;
ssize_t bytes;
- bytes = _nfsd_copy_file_range(copy);
+ bytes = _nfsd_copy_file_range(copy, dst, src);
+
/* for async copy, we ignore the error, client can always retry
* to get the error
*/
@@ -1202,9 +1842,6 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
nfsd4_init_copy_res(copy, sync);
status = nfs_ok;
}
-
- fput(copy->file_src);
- fput(copy->file_dst);
return status;
}
@@ -1213,44 +1850,107 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
dst->cp_src_pos = src->cp_src_pos;
dst->cp_dst_pos = src->cp_dst_pos;
dst->cp_count = src->cp_count;
- dst->cp_synchronous = src->cp_synchronous;
+ dst->cp_flags = src->cp_flags;
memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res));
memcpy(&dst->fh, &src->fh, sizeof(src->fh));
dst->cp_clp = src->cp_clp;
- dst->file_dst = get_file(src->file_dst);
- dst->file_src = get_file(src->file_src);
+ dst->nf_dst = nfsd_file_get(src->nf_dst);
+ /* for inter, nf_src doesn't exist yet */
+ if (!nfsd4_ssc_is_inter(src))
+ dst->nf_src = nfsd_file_get(src->nf_src);
+
memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid));
+ memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server));
+ memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
+ memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
+ dst->ss_nsui = src->ss_nsui;
+}
+
+static void release_copy_files(struct nfsd4_copy *copy)
+{
+ if (copy->nf_src)
+ nfsd_file_put(copy->nf_src);
+ if (copy->nf_dst)
+ nfsd_file_put(copy->nf_dst);
}
static void cleanup_async_copy(struct nfsd4_copy *copy)
{
- nfs4_free_cp_state(copy);
- fput(copy->file_dst);
- fput(copy->file_src);
- spin_lock(&copy->cp_clp->async_lock);
- list_del(&copy->copies);
- spin_unlock(&copy->cp_clp->async_lock);
+ nfs4_free_copy_state(copy);
+ release_copy_files(copy);
+ if (copy->cp_clp) {
+ spin_lock(&copy->cp_clp->async_lock);
+ if (!list_empty(&copy->copies))
+ list_del_init(&copy->copies);
+ spin_unlock(&copy->cp_clp->async_lock);
+ }
nfs4_put_copy(copy);
}
+static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
+{
+ struct nfsd4_cb_offload *cbo = &copy->cp_cb_offload;
+
+ memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
+ memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
+ cbo->co_nfserr = copy->nfserr;
+ cbo->co_retries = 5;
+
+ nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops,
+ NFSPROC4_CLNT_CB_OFFLOAD);
+ nfsd41_cb_referring_call(&cbo->co_cb, &cbo->co_referring_sessionid,
+ cbo->co_referring_slotid,
+ cbo->co_referring_seqno);
+ trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
+ &cbo->co_fh, copy->cp_count, copy->nfserr);
+ nfsd4_try_run_cb(&cbo->co_cb);
+}
+
+/**
+ * nfsd4_do_async_copy - kthread function for background server-side COPY
+ * @data: arguments for COPY operation
+ *
+ * Return values:
+ * %0: Copy operation is done.
+ */
static int nfsd4_do_async_copy(void *data)
{
struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
- struct nfsd4_copy *cb_copy;
- copy->nfserr = nfsd4_do_copy(copy, 0);
- cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
- if (!cb_copy)
- goto out;
- memcpy(&cb_copy->cp_res, &copy->cp_res, sizeof(copy->cp_res));
- cb_copy->cp_clp = copy->cp_clp;
- cb_copy->nfserr = copy->nfserr;
- memcpy(&cb_copy->fh, &copy->fh, sizeof(copy->fh));
- nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp,
- &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD);
- nfsd4_run_cb(&cb_copy->cp_cb);
-out:
- cleanup_async_copy(copy);
+ trace_nfsd_copy_async(copy);
+ if (nfsd4_ssc_is_inter(copy)) {
+ struct file *filp;
+
+ filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount,
+ &copy->c_fh, &copy->stateid);
+ if (IS_ERR(filp)) {
+ switch (PTR_ERR(filp)) {
+ case -EBADF:
+ copy->nfserr = nfserr_wrong_type;
+ break;
+ default:
+ copy->nfserr = nfserr_offload_denied;
+ }
+ /* ss_mnt will be unmounted by the laundromat */
+ goto do_callback;
+ }
+ copy->nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
+ false);
+ nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst);
+ } else {
+ copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, false);
+ }
+
+do_callback:
+ /* The kthread exits forthwith. Ensure that a subsequent
+ * OFFLOAD_CANCEL won't try to kill it again. */
+ set_bit(NFSD4_COPY_F_STOPPED, &copy->cp_flags);
+
+ set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+ trace_nfsd_copy_async_done(copy);
+ nfsd4_send_cb_offload(copy);
+ atomic_dec(&copy->cp_nn->pending_async_copies);
return 0;
}
@@ -1258,86 +1958,190 @@ static __be32
nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd4_copy *async_copy = NULL;
struct nfsd4_copy *copy = &u->copy;
+ struct nfsd42_write_res *result;
__be32 status;
- struct nfsd4_copy *async_copy = NULL;
- status = nfsd4_verify_copy(rqstp, cstate, &copy->cp_src_stateid,
- &copy->file_src, &copy->cp_dst_stateid,
- &copy->file_dst);
- if (status)
- goto out;
+ result = &copy->cp_res;
+ nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn);
copy->cp_clp = cstate->clp;
+ if (nfsd4_ssc_is_inter(copy)) {
+ trace_nfsd_copy_inter(copy);
+ if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) {
+ status = nfserr_notsupp;
+ goto out;
+ }
+ status = nfsd4_setup_inter_ssc(rqstp, cstate, copy);
+ if (status) {
+ trace_nfsd_copy_done(copy, status);
+ return nfserr_offload_denied;
+ }
+ } else {
+ trace_nfsd_copy_intra(copy);
+ status = nfsd4_setup_intra_ssc(rqstp, cstate, copy);
+ if (status) {
+ trace_nfsd_copy_done(copy, status);
+ return status;
+ }
+ }
+
memcpy(&copy->fh, &cstate->current_fh.fh_handle,
sizeof(struct knfsd_fh));
- if (!copy->cp_synchronous) {
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-
- status = nfserrno(-ENOMEM);
+ if (nfsd4_copy_is_async(copy)) {
async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
if (!async_copy)
- goto out;
- if (!nfs4_init_cp_state(nn, copy)) {
- kfree(async_copy);
- goto out;
- }
+ goto out_err;
+ async_copy->cp_nn = nn;
+ INIT_LIST_HEAD(&async_copy->copies);
refcount_set(&async_copy->refcount, 1);
- memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid,
- sizeof(copy->cp_stateid));
+ async_copy->cp_ttl = NFSD_COPY_INITIAL_TTL;
+ /* Arbitrary cap on number of pending async copy operations */
+ if (atomic_inc_return(&nn->pending_async_copies) >
+ (int)rqstp->rq_pool->sp_nrthreads)
+ goto out_dec_async_copy_err;
+ async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
+ if (!async_copy->cp_src)
+ goto out_dec_async_copy_err;
+ if (!nfs4_init_copy_state(nn, copy))
+ goto out_dec_async_copy_err;
+ memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
+ sizeof(result->cb_stateid));
dup_copy_fields(copy, async_copy);
+ memcpy(async_copy->cp_cb_offload.co_referring_sessionid.data,
+ cstate->session->se_sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ async_copy->cp_cb_offload.co_referring_slotid = cstate->slot->sl_index;
+ async_copy->cp_cb_offload.co_referring_seqno = cstate->slot->sl_seqid;
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
- goto out_err;
+ goto out_dec_async_copy_err;
spin_lock(&async_copy->cp_clp->async_lock);
list_add(&async_copy->copies,
&async_copy->cp_clp->async_copies);
spin_unlock(&async_copy->cp_clp->async_lock);
wake_up_process(async_copy->copy_task);
status = nfs_ok;
- } else
- status = nfsd4_do_copy(copy, 1);
+ } else {
+ status = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, true);
+ }
out:
+ trace_nfsd_copy_done(copy, status);
+ release_copy_files(copy);
return status;
+out_dec_async_copy_err:
+ if (async_copy)
+ atomic_dec(&nn->pending_async_copies);
out_err:
- cleanup_async_copy(async_copy);
+ if (nfsd4_ssc_is_inter(copy)) {
+ /*
+ * Source's vfsmount of inter-copy will be unmounted
+ * by the laundromat. Use copy instead of async_copy
+ * since async_copy->ss_nsui might not be set yet.
+ */
+ refcount_dec(&copy->ss_nsui->nsui_refcnt);
+ }
+ if (async_copy)
+ cleanup_async_copy(async_copy);
+ status = nfserr_jukebox;
goto out;
}
-struct nfsd4_copy *
-find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
+static struct nfsd4_copy *
+find_async_copy_locked(struct nfs4_client *clp, stateid_t *stateid)
{
struct nfsd4_copy *copy;
- spin_lock(&clp->async_lock);
+ lockdep_assert_held(&clp->async_lock);
+
list_for_each_entry(copy, &clp->async_copies, copies) {
- if (memcmp(&copy->cp_stateid, stateid, NFS4_STATEID_SIZE))
+ if (memcmp(&copy->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE))
continue;
- refcount_inc(&copy->refcount);
- spin_unlock(&clp->async_lock);
return copy;
}
- spin_unlock(&clp->async_lock);
return NULL;
}
+static struct nfsd4_copy *
+find_async_copy(struct nfs4_client *clp, stateid_t *stateid)
+{
+ struct nfsd4_copy *copy;
+
+ spin_lock(&clp->async_lock);
+ copy = find_async_copy_locked(clp, stateid);
+ if (copy)
+ refcount_inc(&copy->refcount);
+ spin_unlock(&clp->async_lock);
+ return copy;
+}
+
static __be32
nfsd4_offload_cancel(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_offload_status *os = &u->offload_status;
- __be32 status = 0;
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
copy = find_async_copy(clp, &os->stateid);
- if (copy)
+ if (!copy) {
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ return manage_cpntf_state(nn, &os->stateid, clp, NULL);
+ } else
nfsd4_stop_copy(copy);
- else
- status = nfserr_bad_stateid;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ __be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_stid *stid = NULL;
+ struct nfs4_cpntf_state *cps;
+ struct nfs4_client *clp = cstate->clp;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &cn->cpn_src_stateid, RD_STATE, NULL,
+ &stid);
+ if (status)
+ return status;
+ if (!stid)
+ return nfserr_bad_stateid;
+
+ cn->cpn_lease_time.tv_sec = nn->nfsd4_lease;
+ cn->cpn_lease_time.tv_nsec = 0;
+
+ status = nfserrno(-ENOMEM);
+ cps = nfs4_alloc_init_cpntf_state(nn, stid);
+ if (!cps)
+ goto out;
+ memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t));
+ memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t));
+
+ /* For now, only return one server address in cpn_src, the
+ * address used by the client to connect to this server.
+ */
+ cn->cpn_src->nl4_type = NL4_NETADDR;
+ status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
+ &cn->cpn_src->u.nl4_addr);
+ WARN_ON_ONCE(status);
+ if (status) {
+ nfs4_put_cpntf_state(nn, cps);
+ goto out;
+ }
+out:
+ nfs4_put_stid(stid);
return status;
}
@@ -1346,39 +2150,44 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
- WR_STATE, &file, NULL);
- if (status != nfs_ok) {
- dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
+ WR_STATE, &nf, NULL);
+ if (status != nfs_ok)
return status;
- }
- status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
+ status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, nf->nf_file,
fallocate->falloc_offset,
fallocate->falloc_length,
flags);
- fput(file);
+ nfsd_file_put(nf);
return status;
}
+
static __be32
nfsd4_offload_status(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_offload_status *os = &u->offload_status;
- __be32 status = 0;
+ __be32 status = nfs_ok;
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
- copy = find_async_copy(clp, &os->stateid);
+ os->completed = false;
+ spin_lock(&clp->async_lock);
+ copy = find_async_copy_locked(clp, &os->stateid);
if (copy) {
os->count = copy->cp_res.wr_bytes_written;
- nfs4_put_copy(copy);
+ if (test_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags)) {
+ os->completed = true;
+ os->status = copy->nfserr;
+ }
} else
status = nfserr_bad_stateid;
+ spin_unlock(&clp->async_lock);
return status;
}
@@ -1405,15 +2214,13 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek = &u->seek;
int whence;
__be32 status;
- struct file *file;
+ struct nfsd_file *nf;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
- RD_STATE, &file, NULL);
- if (status) {
- dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
+ RD_STATE, &nf, NULL);
+ if (status)
return status;
- }
switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
@@ -1431,14 +2238,14 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
- seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+ seek->seek_pos = vfs_llseek(nf->nf_file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
- else if (seek->seek_pos >= i_size_read(file_inode(file)))
+ else if (seek->seek_pos >= i_size_read(file_inode(nf->nf_file)))
seek->seek_eof = true;
out:
- fput(file);
+ nfsd_file_put(nf);
return status;
}
@@ -1525,6 +2332,49 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status == nfserr_same ? nfs_ok : status;
}
+static __be32
+nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ struct nfs4_delegation *dd;
+ struct nfsd_file *nf;
+ __be32 status;
+
+ status = nfsd_file_acquire_dir(rqstp, &cstate->current_fh, &nf);
+ if (status != nfs_ok)
+ return status;
+
+ /*
+ * RFC 8881, section 18.39.3 says:
+ *
+ * "The server may refuse to grant the delegation. In that case, the
+ * server will return NFS4ERR_DIRDELEG_UNAVAIL."
+ *
+ * This is sub-optimal, since it means that the server would need to
+ * abort compound processing just because the delegation wasn't
+ * available. RFC8881bis should change this to allow the server to
+ * return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this
+ * situation.
+ */
+ dd = nfsd_get_dir_deleg(cstate, gdd, nf);
+ nfsd_file_put(nf);
+ if (IS_ERR(dd)) {
+ int err = PTR_ERR(dd);
+
+ if (err != -EAGAIN)
+ return nfserrno(err);
+ gdd->gddrnf_status = GDD4_UNAVAIL;
+ return nfs_ok;
+ }
+
+ gdd->gddrnf_status = GDD4_OK;
+ memcpy(&gdd->gddr_stateid, &dd->dl_stid.sc_stateid, sizeof(gdd->gddr_stateid));
+ nfs4_put_stid(&dd->dl_stid);
+ return nfs_ok;
+}
+
#ifdef CONFIG_NFSD_PNFS
static const struct nfsd4_layout_ops *
nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
@@ -1567,7 +2417,9 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
return nfserr_noent;
}
- exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
+ exp = rqst_exp_find(&rqstp->rq_chandle, SVC_NET(rqstp),
+ rqstp->rq_client, rqstp->rq_gssclient,
+ map->fsid_type, map->fsid);
if (IS_ERR(exp)) {
dprintk("%s: could not find device id\n", __func__);
return nfserr_noent;
@@ -1581,7 +2433,7 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
nfserr = nfs_ok;
if (gdp->gd_maxcount != 0) {
nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
- rqstp, cstate->session->se_client, gdp);
+ rqstp, cstate->clp, gdp);
}
gdp->gd_notify_types &= ops->notify_types;
@@ -1605,7 +2457,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
const struct nfsd4_layout_ops *ops;
struct nfs4_layout_stateid *ls;
__be32 nfserr;
- int accmode = NFSD_MAY_READ_IF_EXEC;
+ int accmode = NFSD_MAY_READ_IF_EXEC | NFSD_MAY_OWNER_OVERRIDE;
switch (lgp->lg_seg.iomode) {
case IOMODE_READ:
@@ -1662,7 +2514,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls))
goto out_put_stid;
- nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
+ nfserr = ops->proc_layoutget(rqstp, d_inode(current_fh->fh_dentry),
current_fh, lgp);
if (nfserr)
goto out_put_stid;
@@ -1686,16 +2538,17 @@ static __be32
nfsd4_layoutcommit(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
{
+ struct net *net = SVC_NET(rqstp);
struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
struct svc_fh *current_fh = &cstate->current_fh;
const struct nfsd4_layout_ops *ops;
- loff_t new_size = lcp->lc_last_wr + 1;
struct inode *inode;
struct nfs4_layout_stateid *ls;
__be32 nfserr;
- nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
+ nfserr = fh_verify(rqstp, current_fh, 0,
+ NFSD_MAY_WRITE | NFSD_MAY_OWNER_OVERRIDE);
if (nfserr)
goto out;
@@ -1705,43 +2558,50 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
goto out;
inode = d_inode(current_fh->fh_dentry);
- nfserr = nfserr_inval;
- if (new_size <= seg->offset) {
- dprintk("pnfsd: last write before layout segment\n");
- goto out;
+ lcp->lc_size_chg = false;
+ if (lcp->lc_newoffset) {
+ loff_t new_size = lcp->lc_last_wr + 1;
+
+ nfserr = nfserr_inval;
+ if (new_size <= seg->offset)
+ goto out;
+ if (new_size > seg->offset + seg->length)
+ goto out;
+
+ if (new_size > i_size_read(inode)) {
+ lcp->lc_size_chg = true;
+ lcp->lc_newsize = new_size;
+ }
}
- if (new_size > seg->offset + seg->length) {
- dprintk("pnfsd: last write beyond layout segment\n");
+
+ nfserr = nfserr_grace;
+ if (locks_in_grace(net) && !lcp->lc_reclaim)
goto out;
- }
- if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
- dprintk("pnfsd: layoutcommit beyond EOF\n");
+ nfserr = nfserr_no_grace;
+ if (!locks_in_grace(net) && lcp->lc_reclaim)
goto out;
- }
- nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
- false, lcp->lc_layout_type,
- &ls);
- if (nfserr) {
- trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
- /* fixup error code as per RFC5661 */
- if (nfserr == nfserr_bad_stateid)
- nfserr = nfserr_badlayout;
- goto out;
+ if (!lcp->lc_reclaim) {
+ nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate,
+ &lcp->lc_sid, false, lcp->lc_layout_type, &ls);
+ if (nfserr) {
+ trace_nfsd_layout_commit_lookup_fail(&lcp->lc_sid);
+ /* fixup error code as per RFC5661 */
+ if (nfserr == nfserr_bad_stateid)
+ nfserr = nfserr_badlayout;
+ goto out;
+ }
+
+ /* LAYOUTCOMMIT does not require any serialization */
+ mutex_unlock(&ls->ls_mutex);
}
- /* LAYOUTCOMMIT does not require any serialization */
- mutex_unlock(&ls->ls_mutex);
+ nfserr = ops->proc_layoutcommit(inode, rqstp, lcp);
- if (new_size > i_size_read(inode)) {
- lcp->lc_size_chg = 1;
- lcp->lc_newsize = new_size;
- } else {
- lcp->lc_size_chg = 0;
+ if (!lcp->lc_reclaim) {
+ nfsd4_file_mark_deleg_written(ls->ls_stid.sc_file);
+ nfs4_put_stid(&ls->ls_stid);
}
-
- nfserr = ops->proc_layoutcommit(inode, lcp);
- nfs4_put_stid(&ls->ls_stid);
out:
return nfserr;
}
@@ -1793,19 +2653,81 @@ out:
}
#endif /* CONFIG_NFSD_PNFS */
+static __be32
+nfsd4_getxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_getxattr *getxattr = &u->getxattr;
+
+ return nfsd_getxattr(rqstp, &cstate->current_fh,
+ getxattr->getxa_name, &getxattr->getxa_buf,
+ &getxattr->getxa_len);
+}
+
+static __be32
+nfsd4_setxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ __be32 ret;
+
+ if (opens_in_grace(SVC_NET(rqstp)))
+ return nfserr_grace;
+
+ ret = nfsd_setxattr(rqstp, &cstate->current_fh, setxattr->setxa_name,
+ setxattr->setxa_buf, setxattr->setxa_len,
+ setxattr->setxa_flags);
+
+ if (!ret)
+ set_change_info(&setxattr->setxa_cinfo, &cstate->current_fh);
+
+ return ret;
+}
+
+static __be32
+nfsd4_listxattrs(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ /*
+ * Get the entire list, then copy out only the user attributes
+ * in the encode function.
+ */
+ return nfsd_listxattr(rqstp, &cstate->current_fh,
+ &u->listxattrs.lsxa_buf, &u->listxattrs.lsxa_len);
+}
+
+static __be32
+nfsd4_removexattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ __be32 ret;
+
+ if (opens_in_grace(SVC_NET(rqstp)))
+ return nfserr_grace;
+
+ ret = nfsd_removexattr(rqstp, &cstate->current_fh,
+ removexattr->rmxa_name);
+
+ if (!ret)
+ set_change_info(&removexattr->rmxa_cinfo, &cstate->current_fh);
+
+ return ret;
+}
+
/*
* NULL call.
*/
static __be32
nfsd4_proc_null(struct svc_rqst *rqstp)
{
- return nfs_ok;
+ return rpc_success;
}
-static inline void nfsd4_increment_op_stats(u32 opnum)
+static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum)
{
if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
- nfsdstats.nfs4_opcount[opnum]++;
+ percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]);
}
static const struct nfsd4_operation nfsd4_ops[];
@@ -1895,24 +2817,44 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
}
-static void svcxdr_init_encode(struct svc_rqst *rqstp,
- struct nfsd4_compoundres *resp)
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
{
- struct xdr_stream *xdr = &resp->xdr;
- struct xdr_buf *buf = &rqstp->rq_res;
- struct kvec *head = buf->head;
+ struct nfsd4_op *op, *current_op = NULL, *saved_op = NULL;
+ struct nfsd4_copy *copy;
+ struct nfsd4_putfh *putfh;
+ int i;
- xdr->buf = buf;
- xdr->iov = head;
- xdr->p = head->iov_base + head->iov_len;
- xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
- /* Tail and page_len should be zero at this point: */
- buf->len = buf->head[0].iov_len;
- xdr->scratch.iov_len = 0;
- xdr->page_ptr = buf->pages - 1;
- buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
- - rqstp->rq_auth_slack;
+ /* traverse all operation and if it's a COPY compound, mark the
+ * source filehandle to skip verification
+ */
+ for (i = 0; i < args->opcnt; i++) {
+ op = &args->ops[i];
+ if (op->opnum == OP_PUTFH)
+ current_op = op;
+ else if (op->opnum == OP_SAVEFH)
+ saved_op = current_op;
+ else if (op->opnum == OP_RESTOREFH)
+ current_op = saved_op;
+ else if (op->opnum == OP_COPY) {
+ copy = (struct nfsd4_copy *)&op->u;
+ if (!saved_op) {
+ op->status = nfserr_nofilehandle;
+ return;
+ }
+ putfh = (struct nfsd4_putfh *)&saved_op->u;
+ if (nfsd4_ssc_is_inter(copy))
+ putfh->no_verify = true;
+ }
+ }
+}
+#else
+static void
+check_if_stalefh_allowed(struct nfsd4_compoundargs *args)
+{
}
+#endif
/*
* COMPOUND call.
@@ -1926,12 +2868,17 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
__be32 status;
- svcxdr_init_encode(rqstp, resp);
- resp->tagp = resp->xdr.p;
+ resp->xdr = &rqstp->rq_res_stream;
+ resp->statusp = resp->xdr->p;
+
+ /* reserve space for: NFS status code */
+ xdr_reserve_space(resp->xdr, XDR_UNIT);
+
/* reserve space for: taglen, tag, and opcnt */
- xdr_reserve_space(&resp->xdr, 8 + args->taglen);
+ xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen);
resp->taglen = args->taglen;
resp->tag = args->tag;
resp->rqstp = rqstp;
@@ -1948,10 +2895,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
* According to RFC3010, this takes precedence over all other errors.
*/
status = nfserr_minor_vers_mismatch;
- if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0)
- goto out;
- status = nfserr_resource;
- if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
+ if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0)
goto out;
status = nfs41_check_op_ordering(args);
@@ -1961,11 +2905,24 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
resp->opcnt = 1;
goto encode_op;
}
+ check_if_stalefh_allowed(args);
+
+ rqstp->rq_lease_breaker = (void **)&cstate->clp;
- trace_nfsd_compound(rqstp, args->opcnt);
+ trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
+ if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
+ /* If there are still more operations to process,
+ * stop here and report NFS4ERR_RESOURCE. */
+ if (cstate->minorversion == 0 &&
+ args->client_opcnt > resp->opcnt) {
+ op->status = nfserr_resource;
+ goto encode_op;
+ }
+ }
+
/*
* The XDR decode routines may have pre-set op->status;
* for example, if there is a miscellaneous XDR error
@@ -1976,25 +2933,26 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
op->status = nfsd4_open_omfg(rqstp, cstate, op);
goto encode_op;
}
-
- if (!current_fh->fh_dentry) {
+ if (!current_fh->fh_dentry &&
+ !HAS_FH_FLAG(current_fh, NFSD4_FH_FOREIGN)) {
if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
op->status = nfserr_nofilehandle;
goto encode_op;
}
- } else if (current_fh->fh_export->ex_fslocs.migrated &&
+ } else if (current_fh->fh_export &&
+ current_fh->fh_export->ex_fslocs.migrated &&
!(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
op->status = nfserr_moved;
goto encode_op;
}
- fh_clear_wcc(current_fh);
+ fh_clear_pre_post_attrs(current_fh);
/* If op is non-idempotent */
if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
/*
* Don't execute this op if we couldn't encode a
- * succesful reply:
+ * successful reply:
*/
u32 plen = op->opdesc->op_rsize_bop(rqstp, op);
/*
@@ -2012,6 +2970,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->opdesc->op_get_currentstateid)
op->opdesc->op_get_currentstateid(cstate, &op->u);
op->status = op->opdesc->op_func(rqstp, cstate, &op->u);
+ trace_nfsd_compound_op_err(rqstp, op->opnum, op->status);
/* Only from SEQUENCE */
if (cstate->status == nfserr_replay_cache) {
@@ -2026,35 +2985,35 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->opdesc->op_flags & OP_CLEAR_STATEID)
clear_current_stateid(cstate);
- if (need_wrongsec_check(rqstp))
- op->status = check_nfsd_access(current_fh->fh_export, rqstp);
+ if (current_fh->fh_export &&
+ need_wrongsec_check(rqstp))
+ op->status = check_nfsd_access(current_fh->fh_export, rqstp, false);
}
encode_op:
if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay;
- nfsd4_encode_replay(&resp->xdr, op);
+ nfsd4_encode_replay(resp->xdr, op);
status = op->status = op->replay->rp_status;
} else {
nfsd4_encode_operation(resp, op);
status = op->status;
}
- trace_nfsd_compound_status(args->opcnt, resp->opcnt, status,
- nfsd4_op_name(op->opnum));
+ trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
+ status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
- nfsd4_increment_op_stats(op->opnum);
+ nfsd4_increment_op_stats(nn, op->opnum);
}
- cstate->status = status;
fh_put(current_fh);
fh_put(save_fh);
BUG_ON(cstate->replay_owner);
out:
+ cstate->status = status;
/* Reset deferral mechanism for RPC deferrals */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
- dprintk("nfsv4 compound returned %d\n", ntohl(status));
- return status;
+ return rpc_success;
}
#define op_encode_hdr_size (2)
@@ -2075,28 +3034,49 @@ out:
#define op_encode_channel_attrs_maxsz (6 + 1 + 1)
-static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+/*
+ * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which
+ * is called before sunrpc sets rq_res.buflen. Thus we have to compute
+ * the maximum payload size here, based on transport limits and the size
+ * of the remaining space in the rq_pages array.
+ */
+static u32 nfsd4_max_payload(const struct svc_rqst *rqstp)
+{
+ u32 buflen;
+
+ buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE;
+ buflen -= rqstp->rq_auth_slack;
+ buflen -= rqstp->rq_res.head[0].iov_len;
+ return min_t(u32, buflen, svc_max_payload(rqstp));
+}
+
+static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size) * sizeof(__be32);
}
-static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
}
-static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
/* ac_supported, ac_resp_access */
return (op_encode_hdr_size + 2)* sizeof(__be32);
}
-static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz
+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
@@ -2107,17 +3087,17 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
* the op prematurely if the estimate is too large. We may turn off splice
* reads unnecessarily.
*/
-static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 *bmap = op->u.getattr.ga_bmval;
+ const u32 *bmap = op->u.getattr.ga_bmval;
u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
u32 ret = 0;
if (bmap0 & FATTR4_WORD0_ACL)
- return svc_max_payload(rqstp);
+ return nfsd4_max_payload(rqstp);
if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
- return svc_max_payload(rqstp);
+ return nfsd4_max_payload(rqstp);
if (bmap1 & FATTR4_WORD1_OWNER) {
ret += IDMAP_NAMESZ + 4;
@@ -2145,24 +3125,28 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
return ret;
}
-static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE;
}
-static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_stateid_maxsz
+ op_encode_change_info_maxsz + 1
@@ -2170,80 +3154,99 @@ static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+ op_encode_delegation_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = 0, rlen = 0;
-
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.read.rd_length, maxcount);
+ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = 0, rlen = 0;
+ u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp));
+ /*
+ * If we detect that the file changed during hole encoding, then we
+ * recover by encoding the remaining reply as data. This means we need
+ * to set aside enough room to encode two data segments.
+ */
+ u32 seg_len = 2 * (1 + 2 + 1);
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.readdir.rd_maxcount, maxcount);
+ return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
+}
+
+static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size + op_encode_verifier_maxsz +
XDR_QUADLEN(rlen)) * sizeof(__be32);
}
-static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE;
}
-static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
* sizeof(__be32);
}
-static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz
+ op_encode_change_info_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
}
-static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids)
* sizeof(__be32);
}
-static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR *
(4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32);
}
-static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
sizeof(__be32);
}
-static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
1 + 1 + /* eir_flags, spr_how */\
@@ -2257,14 +3260,16 @@ static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_o
0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
}
-static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + \
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
}
-static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + \
XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
@@ -2273,7 +3278,8 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
op_encode_channel_attrs_maxsz) * sizeof(__be32);
}
-static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* wr_callback */ +
@@ -2285,21 +3291,46 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1 /* cr_synchronous */) * sizeof(__be32);
}
-static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp,
- struct nfsd4_op *op)
+static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
2 /* osr_count */ +
1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32);
}
-#ifdef CONFIG_NFSD_PNFS
-static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
- u32 maxcount = 0, rlen = 0;
+ return (op_encode_hdr_size +
+ 3 /* cnr_lease_time */ +
+ 1 /* We support one cnr_source_server */ +
+ 1 /* cnr_stateid seq */ +
+ op_encode_stateid_maxsz /* cnr_stateid */ +
+ 1 /* num cnr_source_server*/ +
+ 1 /* nl4_type */ +
+ 1 /* nl4 size */ +
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) /*nl4_loc + nl4_loc_sz */)
+ * sizeof(__be32);
+}
- maxcount = svc_max_payload(rqstp);
- rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount);
+static u32 nfsd4_get_dir_delegation_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 1 /* gddr_status */ +
+ op_encode_verifier_maxsz +
+ op_encode_stateid_maxsz +
+ 2 /* gddr_notification */ +
+ 2 /* gddr_child_attributes */ +
+ 2 /* gddr_dir_attributes */);
+}
+
+#ifdef CONFIG_NFSD_PNFS
+static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp));
return (op_encode_hdr_size +
1 /* gd_layout_type*/ +
@@ -2312,7 +3343,8 @@ static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4
* so we need to define an arbitrary upper bound here.
*/
#define MAX_LAYOUT_SIZE 128
-static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* logr_return_on_close */ +
@@ -2321,14 +3353,16 @@ static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op
MAX_LAYOUT_SIZE) * sizeof(__be32);
}
-static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* locr_newsize */ +
2 /* ns_size */) * sizeof(__be32);
}
-static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size +
1 /* lrs_stateid */ +
@@ -2337,11 +3371,42 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_
#endif /* CONFIG_NFSD_PNFS */
-static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
{
return (op_encode_hdr_size + 3) * sizeof(__be32);
}
+static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+}
+
+static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+}
+static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp));
+
+ return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32);
+}
+
+static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+}
+
+
static const struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = nfsd4_access,
@@ -2395,6 +3460,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LOCK] = {
.op_func = nfsd4_lock,
+ .op_release = nfsd4_lock_release,
.op_flags = OP_MODIFIES_SOMETHING |
OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_LOCK",
@@ -2403,6 +3469,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LOCKT] = {
.op_func = nfsd4_lockt,
+ .op_release = nfsd4_lockt_release,
.op_flags = OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_LOCKT",
.op_rsize_bop = nfsd4_lock_rsize,
@@ -2576,6 +3643,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
/* NFSv4.1 operations */
[OP_EXCHANGE_ID] = {
.op_func = nfsd4_exchange_id,
+ .op_release = nfsd4_exchange_id_release,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
| OP_MODIFIES_SOMETHING,
.op_name = "OP_EXCHANGE_ID",
@@ -2647,6 +3715,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_get_currentstateid = nfsd4_get_freestateid,
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_GET_DIR_DELEGATION] = {
+ .op_func = nfsd4_get_dir_delegation,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_GET_DIR_DELEGATION",
+ .op_rsize_bop = nfsd4_get_dir_delegation_rsize,
+ },
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = {
.op_func = nfsd4_getdeviceinfo,
@@ -2701,6 +3775,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_COPY",
.op_rsize_bop = nfsd4_copy_rsize,
},
+ [OP_READ_PLUS] = {
+ .op_func = nfsd4_read,
+ .op_release = nfsd4_read_release,
+ .op_name = "OP_READ_PLUS",
+ .op_rsize_bop = nfsd4_read_plus_rsize,
+ .op_get_currentstateid = nfsd4_get_readstateid,
+ },
[OP_SEEK] = {
.op_func = nfsd4_seek,
.op_name = "OP_SEEK",
@@ -2717,6 +3798,34 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_OFFLOAD_CANCEL",
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_COPY_NOTIFY] = {
+ .op_func = nfsd4_copy_notify,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_COPY_NOTIFY",
+ .op_rsize_bop = nfsd4_copy_notify_rsize,
+ },
+ [OP_GETXATTR] = {
+ .op_func = nfsd4_getxattr,
+ .op_name = "OP_GETXATTR",
+ .op_rsize_bop = nfsd4_getxattr_rsize,
+ },
+ [OP_SETXATTR] = {
+ .op_func = nfsd4_setxattr,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_SETXATTR",
+ .op_rsize_bop = nfsd4_setxattr_rsize,
+ },
+ [OP_LISTXATTRS] = {
+ .op_func = nfsd4_listxattrs,
+ .op_name = "OP_LISTXATTRS",
+ .op_rsize_bop = nfsd4_listxattrs_rsize,
+ },
+ [OP_REMOVEXATTR] = {
+ .op_func = nfsd4_removexattr,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_REMOVEXATTR",
+ .op_rsize_bop = nfsd4_removexattr_rsize,
+ },
};
/**
@@ -2733,15 +3842,16 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_compoundargs *argp = rqstp->rq_argp;
- struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
+ struct nfsd4_op *this;
struct nfsd4_compound_state *cstate = &resp->cstate;
struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow;
u32 opiter;
- if (!cstate->minorversion)
+ if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] ||
+ cstate->minorversion == 0)
return false;
- if (cstate->spo_must_allowed == true)
+ if (cstate->spo_must_allowed)
return true;
opiter = resp->opcnt;
@@ -2770,7 +3880,7 @@ int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
void warn_on_nonidempotent_op(struct nfsd4_op *op)
{
if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) {
- pr_err("unable to encode reply to nonidempotent op %d (%s)\n",
+ pr_err("unable to encode reply to nonidempotent op %u (%s)\n",
op->opnum, nfsd4_op_name(op->opnum));
WARN_ON_ONCE(1);
}
@@ -2783,44 +3893,41 @@ static const char *nfsd4_op_name(unsigned opnum)
return "unknown_operation";
}
-#define nfsd4_voidres nfsd4_voidargs
-struct nfsd4_voidargs { int dummy; };
-
static const struct svc_procedure nfsd_procedures4[2] = {
[NFSPROC4_NULL] = {
.pc_func = nfsd4_proc_null,
- .pc_encode = nfs4svc_encode_voidres,
- .pc_argsize = sizeof(struct nfsd4_voidargs),
- .pc_ressize = sizeof(struct nfsd4_voidres),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 1,
+ .pc_name = "NULL",
},
[NFSPROC4_COMPOUND] = {
.pc_func = nfsd4_proc_compound,
.pc_decode = nfs4svc_decode_compoundargs,
.pc_encode = nfs4svc_encode_compoundres,
.pc_argsize = sizeof(struct nfsd4_compoundargs),
+ .pc_argzero = offsetof(struct nfsd4_compoundargs, iops),
.pc_ressize = sizeof(struct nfsd4_compoundres),
.pc_release = nfsd4_release_compoundargs,
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = NFSD_BUFSIZE/4,
+ .pc_xdrressize = 3+NFSSVC_MAXBLKSIZE/4,
+ .pc_name = "COMPOUND",
},
};
-static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_count4[ARRAY_SIZE(nfsd_procedures4)]);
const struct svc_version nfsd_version4 = {
.vs_vers = 4,
- .vs_nproc = 2,
+ .vs_nproc = ARRAY_SIZE(nfsd_procedures4),
.vs_proc = nfsd_procedures4,
- .vs_count = nfsd_count3,
+ .vs_count = nfsd_count4,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS4_SVC_XDRSIZE,
.vs_rpcb_optnl = true,
.vs_need_cong_ctrl = true,
};
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5188f9f70c78..441dfbfe2d2b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -32,7 +32,8 @@
*
*/
-#include <crypto/hash.h>
+#include <crypto/md5.h>
+#include <crypto/sha2.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
@@ -59,8 +60,14 @@ struct nfsd4_client_tracking_ops {
void (*remove)(struct nfs4_client *);
int (*check)(struct nfs4_client *);
void (*grace_done)(struct nfsd_net *);
+ uint8_t version;
+ size_t msglen;
};
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
+
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
/* Globals */
static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
@@ -76,97 +83,39 @@ nfs4_save_creds(const struct cred **original_creds)
new->fsuid = GLOBAL_ROOT_UID;
new->fsgid = GLOBAL_ROOT_GID;
*original_creds = override_creds(new);
- put_cred(new);
return 0;
}
static void
nfs4_reset_creds(const struct cred *original)
{
- revert_creds(original);
+ put_cred(revert_creds(original));
}
static void
-md5_to_hex(char *out, char *md5)
-{
- int i;
-
- for (i=0; i<16; i++) {
- unsigned char c = md5[i];
-
- *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
- *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
- }
- *out = '\0';
-}
-
-static int
-nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
+nfs4_make_rec_clidname(char dname[HEXDIR_LEN], const struct xdr_netobj *clname)
{
- struct xdr_netobj cksum;
- struct crypto_shash *tfm;
- int status;
+ u8 digest[MD5_DIGEST_SIZE];
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
- tfm = crypto_alloc_shash("md5", 0, 0);
- if (IS_ERR(tfm)) {
- status = PTR_ERR(tfm);
- goto out_no_tfm;
- }
- cksum.len = crypto_shash_digestsize(tfm);
- cksum.data = kmalloc(cksum.len, GFP_KERNEL);
- if (cksum.data == NULL) {
- status = -ENOMEM;
- goto out;
- }
+ md5(clname->data, clname->len, digest);
- {
- SHASH_DESC_ON_STACK(desc, tfm);
-
- desc->tfm = tfm;
- desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-
- status = crypto_shash_digest(desc, clname->data, clname->len,
- cksum.data);
- shash_desc_zero(desc);
- }
-
- if (status)
- goto out;
-
- md5_to_hex(dname, cksum.data);
-
- status = 0;
-out:
- kfree(cksum.data);
- crypto_free_shash(tfm);
-out_no_tfm:
- return status;
+ static_assert(HEXDIR_LEN == 2 * MD5_DIGEST_SIZE + 1);
+ sprintf(dname, "%*phN", MD5_DIGEST_SIZE, digest);
}
-/*
- * If we had an error generating the recdir name for the legacy tracker
- * then warn the admin. If the error doesn't appear to be transient,
- * then disable recovery tracking.
- */
static void
-legacy_recdir_name_error(struct nfs4_client *clp, int error)
+__nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
+ char *dname, struct nfsd_net *nn)
{
- printk(KERN_ERR "NFSD: unable to generate recoverydir "
- "name (%d).\n", error);
+ struct xdr_netobj name = { .len = strlen(dname), .data = dname };
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
+ struct nfs4_client_reclaim *crp;
- /*
- * if the algorithm just doesn't exist, then disable the recovery
- * tracker altogether. The crypto libs will generally return this if
- * FIPS is enabled as well.
- */
- if (error == -ENOENT) {
- printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
- "Reboot recovery will not function correctly!\n");
- nfsd4_client_tracking_exit(clp->net);
- }
+ crp = nfs4_client_to_reclaim(name, princhash, nn);
+ crp->cr_clp = clp;
}
static void
@@ -175,7 +124,6 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
const struct cred *original_cred;
char dname[HEXDIR_LEN];
struct dentry *dir, *dentry;
- struct nfs4_client_reclaim *crp;
int status;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -184,9 +132,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
if (!nn->rec_file)
return;
- status = nfs4_make_rec_clidname(dname, &clp->cl_name);
- if (status)
- return legacy_recdir_name_error(clp, status);
+ nfs4_make_rec_clidname(dname, &clp->cl_name);
status = nfs4_save_creds(&original_cred);
if (status < 0)
@@ -197,13 +143,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
goto out_creds;
dir = nn->rec_file->f_path.dentry;
- /* lock the parent */
- inode_lock(d_inode(dir));
- dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
+ dentry = start_creating(&nop_mnt_idmap, dir, &QSTR(dname));
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
- goto out_unlock;
+ goto out;
}
if (d_really_is_positive(dentry))
/*
@@ -214,18 +158,16 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* In the 4.0 case, we should never get here; but we may
* as well be forgiving and just succeed silently.
*/
- goto out_put;
- status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
-out_put:
- dput(dentry);
-out_unlock:
- inode_unlock(d_inode(dir));
+ goto out_end;
+ dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, 0700, NULL);
+ if (IS_ERR(dentry))
+ status = PTR_ERR(dentry);
+out_end:
+ end_creating(dentry);
+out:
if (status == 0) {
- if (nn->in_grace) {
- crp = nfs4_client_to_reclaim(dname, nn);
- if (crp)
- crp->cr_clp = clp;
- }
+ if (nn->in_grace)
+ __nfsd4_create_reclaim_record_grace(clp, dname, nn);
vfs_fsync(nn->rec_file, 0);
} else {
printk(KERN_ERR "NFSD: failed to write recovery record"
@@ -238,7 +180,7 @@ out_creds:
nfs4_reset_creds(original_cred);
}
-typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
+typedef int (recdir_func)(struct dentry *, char *, struct nfsd_net *);
struct name_list {
char name[HEXDIR_LEN];
@@ -250,7 +192,7 @@ struct nfs4_dir_ctx {
struct list_head names;
};
-static int
+static bool
nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
@@ -259,14 +201,14 @@ nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
struct name_list *entry;
if (namlen != HEXDIR_LEN - 1)
- return 0;
+ return true;
entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
if (entry == NULL)
- return -ENOMEM;
+ return false;
memcpy(entry->name, name, HEXDIR_LEN - 1);
entry->name[HEXDIR_LEN - 1] = '\0';
list_add(&entry->list, &ctx->names);
- return 0;
+ return true;
}
static int
@@ -292,23 +234,14 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
status = iterate_dir(nn->rec_file, &ctx.ctx);
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
- if (!status) {
- struct dentry *dentry;
- dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
- if (IS_ERR(dentry)) {
- status = PTR_ERR(dentry);
- break;
- }
- status = f(dir, dentry, nn);
- dput(dentry);
- }
+ if (!status)
+ status = f(dir, entry->name, nn);
+
list_del(&entry->list);
kfree(entry);
}
- inode_unlock(d_inode(dir));
nfs4_reset_creds(original_cred);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -320,36 +253,47 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
static int
-nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
+nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn)
{
struct dentry *dir, *dentry;
int status;
- dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+ dprintk("NFSD: nfsd4_unlink_clid_dir. name %s\n", name);
dir = nn->rec_file->f_path.dentry;
- inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
- dentry = lookup_one_len(name, dir, namlen);
- if (IS_ERR(dentry)) {
- status = PTR_ERR(dentry);
- goto out_unlock;
- }
- status = -ENOENT;
- if (d_really_is_negative(dentry))
- goto out;
- status = vfs_rmdir(d_inode(dir), dentry);
-out:
- dput(dentry);
-out_unlock:
- inode_unlock(d_inode(dir));
+ dentry = start_removing(&nop_mnt_idmap, dir, &QSTR(name));
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry, NULL);
+ end_removing(dentry);
return status;
}
static void
+__nfsd4_remove_reclaim_record_grace(const char *dname, int len,
+ struct nfsd_net *nn)
+{
+ struct xdr_netobj name;
+ struct nfs4_client_reclaim *crp;
+
+ name.data = kmemdup(dname, len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return;
+ }
+ name.len = len;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ nfs4_remove_reclaim_record(crp, nn);
+}
+
+static void
nfsd4_remove_clid_dir(struct nfs4_client *clp)
{
const struct cred *original_cred;
- struct nfs4_client_reclaim *crp;
char dname[HEXDIR_LEN];
int status;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -357,9 +301,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- status = nfs4_make_rec_clidname(dname, &clp->cl_name);
- if (status)
- return legacy_recdir_name_error(clp, status);
+ nfs4_make_rec_clidname(dname, &clp->cl_name);
status = mnt_want_write_file(nn->rec_file);
if (status)
@@ -370,16 +312,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
if (status < 0)
goto out_drop_write;
- status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
+ status = nfsd4_unlink_clid_dir(dname, nn);
nfs4_reset_creds(original_cred);
if (status == 0) {
vfs_fsync(nn->rec_file, 0);
- if (nn->in_grace) {
- /* remove reclaim record */
- crp = nfsd4_find_reclaim_client(dname, nn);
- if (crp)
- nfs4_remove_reclaim_record(crp, nn);
- }
+ if (nn->in_grace)
+ __nfsd4_remove_reclaim_record_grace(dname,
+ HEXDIR_LEN, nn);
}
out_drop_write:
mnt_drop_write_file(nn->rec_file);
@@ -390,17 +329,42 @@ out:
}
static int
-purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
+purge_old(struct dentry *parent, char *cname, struct nfsd_net *nn)
{
int status;
+ struct dentry *child;
+ struct xdr_netobj name;
- if (nfs4_has_reclaimed_state(child->d_name.name, nn))
+ if (strlen(cname) != HEXDIR_LEN - 1) {
+ printk("%s: illegal name %s in recovery directory\n",
+ __func__, cname);
+ /* Keep trying; maybe the others are OK: */
return 0;
+ }
+ name.data = kstrdup(cname, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out;
+ }
+ name.len = HEXDIR_LEN;
+ if (nfs4_has_reclaimed_state(name, nn))
+ goto out_free;
+
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
+ child = lookup_one(&nop_mnt_idmap, &QSTR(cname), parent);
+ if (!IS_ERR(child)) {
+ status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL);
+ if (status)
+ printk("failed to remove client recovery directory %pd\n",
+ child);
+ dput(child);
+ }
+ inode_unlock(d_inode(parent));
- status = vfs_rmdir(d_inode(parent), child);
- if (status)
- printk("failed to remove client recovery directory %pd\n",
- child);
+out_free:
+ kfree(name.data);
+out:
/* Keep trying, success or failure: */
return 0;
}
@@ -428,15 +392,18 @@ out:
}
static int
-load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
+load_recdir(struct dentry *parent, char *cname, struct nfsd_net *nn)
{
- if (child->d_name.len != HEXDIR_LEN - 1) {
- printk("nfsd4: illegal name %pd in recovery directory\n",
- child);
+ struct xdr_netobj name = { .len = HEXDIR_LEN, .data = cname };
+ struct xdr_netobj princhash = { .len = 0, .data = NULL };
+
+ if (strlen(cname) != HEXDIR_LEN - 1) {
+ printk("%s: illegal name %s in recovery directory\n",
+ __func__, cname);
/* Keep trying; maybe the others are OK: */
return 0;
}
- nfs4_client_to_reclaim(child->d_name.name, nn);
+ nfs4_client_to_reclaim(name, princhash, nn);
return 0;
}
@@ -565,6 +532,7 @@ nfsd4_legacy_tracking_init(struct net *net)
status = nfsd4_load_reboot_recovery_data(net);
if (status)
goto err;
+ pr_info("NFSD: Using legacy client tracking operations.\n");
return 0;
err:
@@ -596,7 +564,8 @@ nfs4_reset_recoverydir(char *recdir)
return status;
status = -ENOTDIR;
if (d_is_dir(path.dentry)) {
- strcpy(user_recovery_dirname, recdir);
+ strscpy(user_recovery_dirname, recdir,
+ sizeof(user_recovery_dirname));
status = 0;
}
path_put(&path);
@@ -612,29 +581,34 @@ nfs4_recoverydir(void)
static int
nfsd4_check_legacy_client(struct nfs4_client *clp)
{
- int status;
char dname[HEXDIR_LEN];
struct nfs4_client_reclaim *crp;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct xdr_netobj name;
/* did we already find that this client is stable? */
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
- status = nfs4_make_rec_clidname(dname, &clp->cl_name);
- if (status) {
- legacy_recdir_name_error(clp, status);
- return status;
- }
+ nfs4_make_rec_clidname(dname, &clp->cl_name);
/* look for it in the reclaim hashtable otherwise */
- crp = nfsd4_find_reclaim_client(dname, nn);
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ goto out_enoent;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
if (crp) {
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
crp->cr_clp = clp;
return 0;
}
+out_enoent:
return -ENOENT;
}
@@ -645,7 +619,10 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
.remove = nfsd4_remove_clid_dir,
.check = nfsd4_check_legacy_client,
.grace_done = nfsd4_recdir_purge_old,
+ .version = 1,
+ .msglen = 0,
};
+#endif /* CONFIG_NFSD_LEGACY_CLIENT_TRACKING */
/* Globals */
#define NFSD_PIPE_DIR "nfsd"
@@ -657,25 +634,32 @@ struct cld_net {
spinlock_t cn_lock;
struct list_head cn_list;
unsigned int cn_xid;
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
+ bool cn_has_legacy;
+#endif
};
struct cld_upcall {
struct list_head cu_list;
struct cld_net *cu_net;
struct completion cu_done;
- struct cld_msg cu_msg;
+ union {
+ struct cld_msg_hdr cu_hdr;
+ struct cld_msg cu_msg;
+ struct cld_msg_v2 cu_msg_v2;
+ } cu_u;
};
static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn)
{
int ret;
struct rpc_pipe_msg msg;
- struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
+ struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
memset(&msg, 0, sizeof(msg));
msg.data = cmsg;
- msg.len = sizeof(*cmsg);
+ msg.len = nn->client_tracking_ops->msglen;
ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) {
@@ -691,7 +675,7 @@ out:
}
static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn)
{
int ret;
@@ -700,41 +684,127 @@ cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
* upcalls queued.
*/
do {
- ret = __cld_pipe_upcall(pipe, cmsg);
+ ret = __cld_pipe_upcall(pipe, cmsg, nn);
} while (ret == -EAGAIN);
return ret;
}
static ssize_t
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
+ struct nfsd_net *nn)
+{
+ uint8_t cmd, princhashlen;
+ struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
+ char *namecopy __free(kfree) = NULL;
+ char *princhashcopy __free(kfree) = NULL;
+ uint16_t namelen;
+
+ if (get_user(cmd, &cmsg->cm_cmd)) {
+ dprintk("%s: error when copying cmd from userspace", __func__);
+ return -EFAULT;
+ }
+ if (cmd == Cld_GraceStart) {
+ if (nn->client_tracking_ops->version >= 2) {
+ const struct cld_clntinfo __user *ci;
+
+ ci = &cmsg->cm_u.cm_clntinfo;
+ if (get_user(namelen, &ci->cc_name.cn_len))
+ return -EFAULT;
+ if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) {
+ dprintk("%s: invalid namelen (%u)", __func__, namelen);
+ return -EINVAL;
+ }
+ namecopy = memdup_user(&ci->cc_name.cn_id, namelen);
+ if (IS_ERR(namecopy))
+ return PTR_ERR(namecopy);
+ name.data = namecopy;
+ name.len = namelen;
+ get_user(princhashlen, &ci->cc_princhash.cp_len);
+ if (princhashlen > 0) {
+ princhashcopy = memdup_user(
+ &ci->cc_princhash.cp_data,
+ princhashlen);
+ if (IS_ERR(princhashcopy))
+ return PTR_ERR(princhashcopy);
+ princhash.data = princhashcopy;
+ princhash.len = princhashlen;
+ } else
+ princhash.len = 0;
+ } else {
+ const struct cld_name __user *cnm;
+
+ cnm = &cmsg->cm_u.cm_name;
+ if (get_user(namelen, &cnm->cn_len))
+ return -EFAULT;
+ if (namelen == 0 || namelen > NFS4_OPAQUE_LIMIT) {
+ dprintk("%s: invalid namelen (%u)", __func__, namelen);
+ return -EINVAL;
+ }
+ namecopy = memdup_user(&cnm->cn_id, namelen);
+ if (IS_ERR(namecopy))
+ return PTR_ERR(namecopy);
+ name.data = namecopy;
+ name.len = namelen;
+ }
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
+ if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
+ struct cld_net *cn = nn->cld_net;
+
+ name.len = name.len - 5;
+ name.data = name.data + 5;
+ cn->cn_has_legacy = true;
+ }
+#endif
+ if (!nfs4_client_to_reclaim(name, princhash, nn))
+ return -EFAULT;
+ return nn->client_tracking_ops->msglen;
+ }
+ return -EFAULT;
+}
+
+static ssize_t
cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct cld_upcall *tmp, *cup;
- struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+ struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
+ struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
uint32_t xid;
struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
nfsd_net_id);
struct cld_net *cn = nn->cld_net;
+ int16_t status;
- if (mlen != sizeof(*cmsg)) {
+ if (mlen != nn->client_tracking_ops->msglen) {
dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
- sizeof(*cmsg));
+ nn->client_tracking_ops->msglen);
return -EINVAL;
}
/* copy just the xid so we can try to find that */
- if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+ if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
dprintk("%s: error when copying xid from userspace", __func__);
return -EFAULT;
}
+ /*
+ * copy the status so we know whether to remove the upcall from the
+ * list (for -EINPROGRESS, we just want to make sure the xid is
+ * valid, not remove the upcall from the list)
+ */
+ if (get_user(status, &hdr->cm_status)) {
+ dprintk("%s: error when copying status from userspace", __func__);
+ return -EFAULT;
+ }
+
/* walk the list and find corresponding xid */
cup = NULL;
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
- if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+ if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
cup = tmp;
- list_del_init(&cup->cu_list);
+ if (status != -EINPROGRESS)
+ list_del_init(&cup->cu_list);
break;
}
}
@@ -746,7 +816,10 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
return -EINVAL;
}
- if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+ if (status == -EINPROGRESS)
+ return __cld_pipe_inprogress_downcall(cmsg, nn);
+
+ if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
return -EFAULT;
complete(&cup->cu_done);
@@ -758,7 +831,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct cld_msg *cmsg = msg->data;
struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
- cu_msg);
+ cu_u.cu_msg);
/* errno >= 0 means we got a downcall */
if (msg->errno >= 0)
@@ -773,38 +846,32 @@ static const struct rpc_pipe_ops cld_upcall_ops = {
.destroy_msg = cld_pipe_destroy_msg,
};
-static struct dentry *
+static int
nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
{
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
if (dir == NULL)
- return ERR_PTR(-ENOENT);
- dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
+ return -ENOENT;
+ err = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
dput(dir);
- return dentry;
+ return err;
}
-static void
-nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
-{
- if (pipe->dentry)
- rpc_unlink(pipe->dentry);
-}
-
-static struct dentry *
+static int
nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
{
struct super_block *sb;
- struct dentry *dentry;
+ int err;
sb = rpc_get_sb_net(net);
if (!sb)
- return NULL;
- dentry = nfsd4_cld_register_sb(sb, pipe);
+ return 0;
+ err = nfsd4_cld_register_sb(sb, pipe);
rpc_put_sb_net(net);
- return dentry;
+ return err;
}
static void
@@ -814,17 +881,16 @@ nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
sb = rpc_get_sb_net(net);
if (sb) {
- nfsd4_cld_unregister_sb(pipe);
+ rpc_unlink(pipe);
rpc_put_sb_net(net);
}
}
/* Initialize rpc_pipefs pipe for communication with client tracking daemon */
static int
-nfsd4_init_cld_pipe(struct net *net)
+__nfsd4_init_cld_pipe(struct net *net)
{
int ret;
- struct dentry *dentry;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct cld_net *cn;
@@ -845,13 +911,13 @@ nfsd4_init_cld_pipe(struct net *net)
spin_lock_init(&cn->cn_lock);
INIT_LIST_HEAD(&cn->cn_list);
- dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
+ ret = nfsd4_cld_register_net(net, cn->cn_pipe);
+ if (unlikely(ret))
goto err_destroy_data;
- }
- cn->cn_pipe->dentry = dentry;
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
+ cn->cn_has_legacy = false;
+#endif
nn->cld_net = cn;
return 0;
@@ -864,6 +930,17 @@ err:
return ret;
}
+static int
+nfsd4_init_cld_pipe(struct net *net)
+{
+ int status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (!status)
+ pr_info("NFSD: Using old nfsdcld client tracking operations.\n");
+ return status;
+}
+
static void
nfsd4_remove_cld_pipe(struct net *net)
{
@@ -877,9 +954,10 @@ nfsd4_remove_cld_pipe(struct net *net)
}
static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
{
struct cld_upcall *new, *tmp;
+ struct cld_net *cn = nn->cld_net;
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
@@ -889,20 +967,20 @@ alloc_cld_upcall(struct cld_net *cn)
restart_search:
spin_lock(&cn->cn_lock);
list_for_each_entry(tmp, &cn->cn_list, cu_list) {
- if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+ if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
cn->cn_xid++;
spin_unlock(&cn->cn_lock);
goto restart_search;
}
}
init_completion(&new->cu_done);
- new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
- put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+ new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+ put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
new->cu_net = cn;
list_add(&new->cu_list, &cn->cn_list);
spin_unlock(&cn->cn_lock);
- dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+ dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
return new;
}
@@ -931,20 +1009,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_Create;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -957,6 +1035,56 @@ out_err:
/* Ask daemon to create a new record */
static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct cld_net *cn = nn->cld_net;
+ struct cld_msg_v2 *cmsg;
+ char *principal = NULL;
+
+ /* Don't upcall if it's already stored */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cmsg = &cup->cu_u.cu_msg_v2;
+ cmsg->cm_cmd = Cld_Create;
+ cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+ memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+ clp->cl_name.len);
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal) {
+ sha256(principal, strlen(principal),
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data);
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = SHA256_DIGEST_SIZE;
+ } else
+ cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+ ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn);
+ if (!ret) {
+ ret = cmsg->cm_status;
+ set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+ }
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+ ret);
+}
+
+/* Ask daemon to create a new record */
+static void
nfsd4_cld_remove(struct nfs4_client *clp)
{
int ret;
@@ -968,20 +1096,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_Remove;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -992,9 +1120,14 @@ out_err:
"record from stable storage: %d\n", ret);
}
-/* Check for presence of a record, and update its timestamp */
+/*
+ * For older nfsdcld's that do not allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record, and update its timestamp
+ */
static int
-nfsd4_cld_check(struct nfs4_client *clp)
+nfsd4_cld_check_v0(struct nfs4_client *clp)
{
int ret;
struct cld_upcall *cup;
@@ -1005,21 +1138,21 @@ nfsd4_cld_check(struct nfs4_client *clp)
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
printk(KERN_ERR "NFSD: Unable to check client record on "
"stable storage: %d\n", -ENOMEM);
return -ENOMEM;
}
- cup->cu_msg.cm_cmd = Cld_Check;
- cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
- memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+ cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+ cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+ memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
clp->cl_name.len);
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
if (!ret) {
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
@@ -1027,6 +1160,170 @@ nfsd4_cld_check(struct nfs4_client *clp)
return ret;
}
+/*
+ * For newer nfsdcld's that allow us to "slurp" the clients
+ * from the tracking database during startup.
+ *
+ * Check for presence of a record in the reclaim_str_hashtbl
+ */
+static int
+nfsd4_cld_check(struct nfs4_client *clp)
+{
+ struct nfs4_client_reclaim *crp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
+ if (nn->cld_net->cn_has_legacy) {
+ char dname[HEXDIR_LEN];
+ struct xdr_netobj name;
+
+ nfs4_make_rec_clidname(dname, &clp->cl_name);
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+#endif
+ return -ENOENT;
+found:
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
+ struct cld_net *cn = nn->cld_net;
+#endif
+ struct nfs4_client_reclaim *crp;
+ char *principal = NULL;
+
+ /* did we already find that this client is stable? */
+ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+ return 0;
+
+ /* look for it in the reclaim hashtable otherwise */
+ crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+ if (crp)
+ goto found;
+
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
+ if (cn->cn_has_legacy) {
+ struct xdr_netobj name;
+ char dname[HEXDIR_LEN];
+
+ nfs4_make_rec_clidname(dname, &clp->cl_name);
+
+ name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data\n",
+ __func__);
+ return -ENOENT;
+ }
+ name.len = HEXDIR_LEN;
+ crp = nfsd4_find_reclaim_client(name, nn);
+ kfree(name.data);
+ if (crp)
+ goto found;
+
+ }
+#endif
+ return -ENOENT;
+found:
+ if (crp->cr_princhash.len) {
+ u8 digest[SHA256_DIGEST_SIZE];
+
+ if (clp->cl_cred.cr_raw_principal)
+ principal = clp->cl_cred.cr_raw_principal;
+ else if (clp->cl_cred.cr_principal)
+ principal = clp->cl_cred.cr_principal;
+ if (principal == NULL)
+ return -ENOENT;
+ sha256(principal, strlen(principal), digest);
+ if (memcmp(crp->cr_princhash.data, digest,
+ crp->cr_princhash.len))
+ return -ENOENT;
+ }
+ crp->cr_clp = clp;
+ return 0;
+}
+
+static int
+nfsd4_cld_grace_start(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get clients from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+/* For older nfsdcld's that need cm_gracetime */
+static void
+nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
+{
+ int ret;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret)
+ ret = cup->cu_u.cu_msg.cm_status;
+
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
+}
+
+/*
+ * For newer nfsdcld's that do not need cm_gracetime. We also need to call
+ * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl.
+ */
static void
nfsd4_cld_grace_done(struct nfsd_net *nn)
{
@@ -1034,33 +1331,210 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
struct cld_upcall *cup;
struct cld_net *cn = nn->cld_net;
- cup = alloc_cld_upcall(cn);
+ cup = alloc_cld_upcall(nn);
if (!cup) {
ret = -ENOMEM;
goto out_err;
}
- cup->cu_msg.cm_cmd = Cld_GraceDone;
- cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
- ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+ cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
if (!ret)
- ret = cup->cu_msg.cm_status;
+ ret = cup->cu_u.cu_msg.cm_status;
free_cld_upcall(cup);
out_err:
+ nfs4_release_reclaim(nn);
if (ret)
printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
}
-static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+static int
+nfs4_cld_state_init(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int i;
+
+ nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
+ sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!nn->reclaim_str_hashtbl)
+ return -ENOMEM;
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
+ nn->reclaim_str_hashtbl_size = 0;
+ nn->track_reclaim_completes = true;
+ atomic_set(&nn->nr_reclaim_complete, 0);
+
+ return 0;
+}
+
+static void
+nfs4_cld_state_shutdown(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nn->track_reclaim_completes = false;
+ kfree(nn->reclaim_str_hashtbl);
+}
+
+static bool
+cld_running(struct nfsd_net *nn)
+{
+ struct cld_net *cn = nn->cld_net;
+ struct rpc_pipe *pipe = cn->cn_pipe;
+
+ return pipe->nreaders || pipe->nwriters;
+}
+
+static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+ int ret = 0;
+ struct cld_upcall *cup;
+ struct cld_net *cn = nn->cld_net;
+ uint8_t version;
+
+ cup = alloc_cld_upcall(nn);
+ if (!cup) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+ ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn);
+ if (!ret) {
+ ret = cup->cu_u.cu_msg.cm_status;
+ if (ret)
+ goto out_free;
+ version = cup->cu_u.cu_msg.cm_u.cm_version;
+ dprintk("%s: userspace returned version %u\n",
+ __func__, version);
+ if (version < 1)
+ version = 1;
+ else if (version > CLD_UPCALL_VERSION)
+ version = CLD_UPCALL_VERSION;
+
+ switch (version) {
+ case 1:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+ break;
+ case 2:
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+ break;
+ default:
+ break;
+ }
+ }
+out_free:
+ free_cld_upcall(cup);
+out_err:
+ if (ret)
+ dprintk("%s: Unable to get version from userspace: %d\n",
+ __func__, ret);
+ return ret;
+}
+
+static int
+nfsd4_cld_tracking_init(struct net *net)
+{
+ int status;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool running;
+ int retries = 10;
+
+ status = nfs4_cld_state_init(net);
+ if (status)
+ return status;
+
+ status = __nfsd4_init_cld_pipe(net);
+ if (status)
+ goto err_shutdown;
+
+ /*
+ * rpc pipe upcalls take 30 seconds to time out, so we don't want to
+ * queue an upcall unless we know that nfsdcld is running (because we
+ * want this to fail fast so that nfsd4_client_tracking_init() can try
+ * the next client tracking method). nfsdcld should already be running
+ * before nfsd is started, so the wait here is for nfsdcld to open the
+ * pipefs file we just created.
+ */
+ while (!(running = cld_running(nn)) && retries--)
+ msleep(100);
+
+ if (!running) {
+ status = -ETIMEDOUT;
+ goto err_remove;
+ }
+
+ status = nfsd4_cld_get_version(nn);
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
+ status = nfsd4_cld_grace_start(nn);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
+ nfs4_release_reclaim(nn);
+ goto err_remove;
+ } else
+ pr_info("NFSD: Using nfsdcld client tracking operations.\n");
+ return 0;
+
+err_remove:
+ nfsd4_remove_cld_pipe(net);
+err_shutdown:
+ nfs4_cld_state_shutdown(net);
+ return status;
+}
+
+static void
+nfsd4_cld_tracking_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs4_release_reclaim(nn);
+ nfsd4_remove_cld_pipe(net);
+ nfs4_cld_state_shutdown(net);
+}
+
+/* For older nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
.init = nfsd4_init_cld_pipe,
.exit = nfsd4_remove_cld_pipe,
.create = nfsd4_cld_create,
.remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v0,
+ .grace_done = nfsd4_cld_grace_done_v0,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
+};
+
+/* For newer nfsdcld's */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create,
+ .remove = nfsd4_cld_remove,
.check = nfsd4_cld_check,
.grace_done = nfsd4_cld_grace_done,
+ .version = 1,
+ .msglen = sizeof(struct cld_msg),
};
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+ .init = nfsd4_cld_tracking_init,
+ .exit = nfsd4_cld_tracking_exit,
+ .create = nfsd4_cld_create_v2,
+ .remove = nfsd4_cld_remove,
+ .check = nfsd4_cld_check_v2,
+ .grace_done = nfsd4_cld_grace_done,
+ .version = 2,
+ .msglen = sizeof(struct cld_msg_v2),
+};
+
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
/* upcall via usermodehelper */
static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
@@ -1131,11 +1605,7 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
return NULL;
}
- copied = nfs4_make_rec_clidname(result + copied, name);
- if (copied) {
- kfree(result);
- return NULL;
- }
+ nfs4_make_rec_clidname(result + copied, name);
return result;
}
@@ -1166,7 +1636,7 @@ nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
}
static char *
-nfsd4_cltrack_grace_start(time_t grace_start)
+nfsd4_cltrack_grace_start(time64_t grace_start)
{
int copied;
size_t len;
@@ -1179,7 +1649,7 @@ nfsd4_cltrack_grace_start(time_t grace_start)
if (!result)
return result;
- copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
+ copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%lld",
grace_start);
if (copied >= len) {
/* just return nothing if output was truncated */
@@ -1236,19 +1706,14 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
static char *
bin_to_hex_dup(const unsigned char *src, int srclen)
{
- int i;
- char *buf, *hex;
+ char *buf;
/* +1 for terminating NULL */
- buf = kmalloc((srclen * 2) + 1, GFP_KERNEL);
+ buf = kzalloc((srclen * 2) + 1, GFP_KERNEL);
if (!buf)
return buf;
- hex = buf;
- for (i = 0; i < srclen; i++) {
- sprintf(hex, "%2.2x", *src++);
- hex += 2;
- }
+ bin2hex(buf, src, srclen);
return buf;
}
@@ -1268,6 +1733,8 @@ nfsd4_umh_cltrack_init(struct net *net)
ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
kfree(grace_start);
+ if (!ret)
+ pr_info("NFSD: Using UMH upcall client tracking operations.\n");
return ret;
}
@@ -1281,10 +1748,7 @@ nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
static void
nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
{
- smp_mb__before_atomic();
- clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
- smp_mb__after_atomic();
- wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
+ clear_and_wake_up_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
}
static void
@@ -1391,7 +1855,7 @@ nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
char *legacy;
char timestr[22]; /* FIXME: better way to determine max size? */
- sprintf(timestr, "%ld", nn->boot_time);
+ sprintf(timestr, "%lld", nn->boot_time);
legacy = nfsd4_cltrack_legacy_topdir();
nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
kfree(legacy);
@@ -1404,22 +1868,17 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
.remove = nfsd4_umh_cltrack_remove,
.check = nfsd4_umh_cltrack_check,
.grace_done = nfsd4_umh_cltrack_grace_done,
+ .version = 1,
+ .msglen = 0,
};
-int
-nfsd4_client_tracking_init(struct net *net)
+static inline int check_for_legacy_methods(int status, struct net *net)
{
- int status;
- struct path path;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- /* just run the init if it the method is already decided */
- if (nn->client_tracking_ops)
- goto do_init;
+ struct path path;
/*
- * First, try a UMH upcall. It should succeed or fail quickly, so
- * there's little harm in trying that first.
+ * Next, try the UMH upcall.
*/
nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
status = nn->client_tracking_ops->init(net);
@@ -1427,28 +1886,57 @@ nfsd4_client_tracking_init(struct net *net)
return status;
/*
- * See if the recoverydir exists and is a directory. If it is,
- * then use the legacy ops.
+ * Finally, See if the recoverydir exists and is a directory.
+ * If it is, then use the legacy ops.
*/
nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
if (!status) {
- status = d_is_dir(path.dentry);
+ status = !d_is_dir(path.dentry);
path_put(&path);
if (status)
- goto do_init;
+ return -ENOTDIR;
}
+ return status;
+}
+#else
+static inline int check_for_legacy_methods(int status, struct net *net)
+{
+ return status;
+}
+#endif /* CONFIG_LEGACY_NFSD_CLIENT_TRACKING */
+
+int
+nfsd4_client_tracking_init(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int status;
+
+ /* just run the init if it the method is already decided */
+ if (nn->client_tracking_ops)
+ goto do_init;
- /* Finally, try to use nfsdcld */
+ /* First, try to use nfsdcld */
nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
- printk(KERN_WARNING "NFSD: the nfsdcld client tracking upcall will be "
- "removed in 3.10. Please transition to using "
- "nfsdcltrack.\n");
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ if (status != -ETIMEDOUT) {
+ nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0;
+ status = nn->client_tracking_ops->init(net);
+ if (!status)
+ return status;
+ }
+
+ status = check_for_legacy_methods(status, net);
+ if (status)
+ goto out;
do_init:
status = nn->client_tracking_ops->init(net);
+out:
if (status) {
- printk(KERN_WARNING "NFSD: Unable to initialize client "
- "recovery tracking! (%d)\n", status);
+ pr_warn("NFSD: Unable to initialize client recovery tracking! (%d)\n", status);
+ pr_warn("NFSD: Is nfsdcld running? If not, enable CONFIG_NFSD_LEGACY_CLIENT_TRACKING.\n");
nn->client_tracking_ops = NULL;
}
return status;
@@ -1509,7 +1997,6 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
struct net *net = sb->s_fs_info;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct cld_net *cn = nn->cld_net;
- struct dentry *dentry;
int ret = 0;
if (!try_module_get(THIS_MODULE))
@@ -1522,16 +2009,10 @@ rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- break;
- }
- cn->cn_pipe->dentry = dentry;
+ ret = nfsd4_cld_register_sb(sb, cn->cn_pipe);
break;
case RPC_PIPEFS_UMOUNT:
- if (cn->cn_pipe->dentry)
- nfsd4_cld_unregister_sb(cn->cn_pipe);
+ rpc_unlink(cn->cn_pipe);
break;
default:
ret = -ENOTSUPP;
@@ -1548,6 +2029,7 @@ static struct notifier_block nfsd4_cld_block = {
int
register_cld_notifier(void)
{
+ WARN_ON(!nfsd_net_id);
return rpc_pipefs_notifier_register(&nfsd4_cld_block);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb3c9844c82a..808c24fb5c9a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -42,6 +42,11 @@
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/addr.h>
#include <linux/jhash.h>
+#include <linux/string_helpers.h>
+#include <linux/fsnotify.h>
+#include <linux/rhashtable.h>
+#include <linux/nfs_ssc.h>
+
#include "xdr4.h"
#include "xdr4cb.h"
#include "vfs.h"
@@ -49,10 +54,12 @@
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-#define all_ones {{~0,~0},~0}
+#define all_ones {{ ~0, ~0}, ~0}
static const stateid_t one_stateid = {
.si_generation = ~0,
.si_opaque = all_ones,
@@ -77,6 +84,10 @@ static u64 current_sessionid = 1;
/* forward declarations */
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
+void nfsd4_end_grace(struct nfsd_net *nn);
+static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
+static void nfsd4_file_hash_remove(struct nfs4_file *fi);
+static void deleg_reaper(struct nfsd_net *nn);
/* Locking: */
@@ -98,6 +109,13 @@ enum nfsd4_st_mutex_lock_subclass {
*/
static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+/*
+ * A waitqueue where a writer to clients/#/ctl destroying a client can
+ * wait for cl_rpc_users to drop to 0 and then for the client to be
+ * unhashed.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(expiry_wq);
+
static struct kmem_cache *client_slab;
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
@@ -110,17 +128,35 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
+
+static struct workqueue_struct *laundry_wq;
+
+int nfsd4_create_laundry_wq(void)
+{
+ int rc = 0;
+
+ laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
+ if (laundry_wq == NULL)
+ rc = -ENOMEM;
+ return rc;
+}
+
+void nfsd4_destroy_laundry_wq(void)
+{
+ destroy_workqueue(laundry_wq);
+}
static bool is_session_dead(struct nfsd4_session *ses)
{
- return ses->se_flags & NFS4_SESSION_DEAD;
+ return ses->se_dead;
}
static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
{
if (atomic_read(&ses->se_ref) > ref_held_by_me)
return nfserr_jukebox;
- ses->se_flags |= NFS4_SESSION_DEAD;
+ ses->se_dead = true;
return nfs_ok;
}
@@ -129,6 +165,13 @@ static bool is_client_expired(struct nfs4_client *clp)
return clp->cl_time == 0;
}
+static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn,
+ struct nfs4_client *clp)
+{
+ if (clp->cl_state != NFSD4_ACTIVE)
+ atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0);
+}
+
static __be32 get_client_locked(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -137,7 +180,9 @@ static __be32 get_client_locked(struct nfs4_client *clp)
if (is_client_expired(clp))
return nfserr_expired;
- atomic_inc(&clp->cl_refcount);
+ atomic_inc(&clp->cl_rpc_users);
+ nfsd4_dec_courtesy_client_count(nn, clp);
+ clp->cl_state = NFSD4_ACTIVE;
return nfs_ok;
}
@@ -156,11 +201,10 @@ renew_client_locked(struct nfs4_client *clp)
return;
}
- dprintk("renewing client (clientid %08x/%08x)\n",
- clp->cl_clientid.cl_boot,
- clp->cl_clientid.cl_id);
list_move_tail(&clp->cl_lru, &nn->client_lru);
- clp->cl_time = get_seconds();
+ clp->cl_time = ktime_get_boottime_seconds();
+ nfsd4_dec_courtesy_client_count(nn, clp);
+ clp->cl_state = NFSD4_ACTIVE;
}
static void put_client_renew_locked(struct nfs4_client *clp)
@@ -169,20 +213,24 @@ static void put_client_renew_locked(struct nfs4_client *clp)
lockdep_assert_held(&nn->client_lock);
- if (!atomic_dec_and_test(&clp->cl_refcount))
+ if (!atomic_dec_and_test(&clp->cl_rpc_users))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
}
static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
+ if (!atomic_dec_and_lock(&clp->cl_rpc_users, &nn->client_lock))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
+ else
+ wake_up_all(&expiry_wq);
spin_unlock(&nn->client_lock);
}
@@ -231,6 +279,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
if (fh_match(fh, &cur->nbl_fh)) {
list_del_init(&cur->nbl_list);
+ WARN_ON(list_empty(&cur->nbl_lru));
list_del_init(&cur->nbl_lru);
found = cur;
break;
@@ -250,10 +299,13 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
nbl = find_blocked_lock(lo, fh, nn);
if (!nbl) {
- nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
+ nbl = kmalloc(sizeof(*nbl), GFP_KERNEL);
if (nbl) {
+ INIT_LIST_HEAD(&nbl->nbl_list);
+ INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock);
+ kref_init(&nbl->nbl_kref);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
&nfsd4_cb_notify_lock_ops,
NFSPROC4_CLNT_CB_NOTIFY_LOCK);
@@ -263,13 +315,23 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
}
static void
-free_blocked_lock(struct nfsd4_blocked_lock *nbl)
+free_nbl(struct kref *kref)
{
+ struct nfsd4_blocked_lock *nbl;
+
+ nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
static void
+free_blocked_lock(struct nfsd4_blocked_lock *nbl)
+{
+ locks_delete_block(&nbl->nbl_lock);
+ kref_put(&nbl->nbl_kref, free_nbl);
+}
+
+static void
remove_blocked_locks(struct nfs4_lockowner *lo)
{
struct nfs4_client *clp = lo->lo_owner.so_client;
@@ -284,6 +346,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
struct nfsd4_blocked_lock,
nbl_list);
list_del_init(&nbl->nbl_list);
+ WARN_ON(list_empty(&nbl->nbl_lru));
list_move(&nbl->nbl_lru, &reaplist);
}
spin_unlock(&nn->blocked_locks_lock);
@@ -293,14 +356,23 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
}
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+ locks_delete_block(&nbl->nbl_lock);
+}
+
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
+ trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
+
/*
* Since this is just an optimization, we don't try very hard if it
* turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
@@ -325,10 +397,136 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
}
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .prepare = nfsd4_cb_notify_lock_prepare,
.done = nfsd4_cb_notify_lock_done,
.release = nfsd4_cb_notify_lock_release,
+ .opcode = OP_CB_NOTIFY_LOCK,
};
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used. This allows us
+ * to enforce the recommendation in
+ * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that
+ * the server return an error if the client attempt to downgrade to a
+ * combination of share bits not explicable by closing some of its
+ * previous opens.
+ *
+ * This enforcement is arguably incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ * OPEN allow read, deny write
+ * OPEN allow both, deny none
+ * DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ *
+ * But you could also argue that our current code is already overkill,
+ * since it only exists to return NFS4ERR_INVAL on incorrect client
+ * behavior.
+ */
+static unsigned int
+bmap_to_share_mode(unsigned long bmap)
+{
+ int i;
+ unsigned int access = 0;
+
+ for (i = 1; i < 4; i++) {
+ if (test_bit(i, &bmap))
+ access |= i;
+ }
+ return access;
+}
+
+/* set share access for a given stateid */
+static inline void
+set_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap |= mask;
+}
+
+/* clear share access for a given stateid */
+static inline void
+clear_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap &= ~mask;
+}
+
+/* test whether a given stateid has access */
+static inline bool
+test_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << access;
+
+ return (bool)(stp->st_access_bmap & mask);
+}
+
+/* set share deny for a given stateid */
+static inline void
+set_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap |= mask;
+}
+
+/* clear share deny for a given stateid */
+static inline void
+clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap &= ~mask;
+}
+
+/* test whether a given stateid is denying specific access */
+static inline bool
+test_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << deny;
+
+ return (bool)(stp->st_deny_bmap & mask);
+}
+
+static int nfs4_access_to_omode(u32 access)
+{
+ switch (access & NFS4_SHARE_ACCESS_BOTH) {
+ case NFS4_SHARE_ACCESS_READ:
+ return O_RDONLY;
+ case NFS4_SHARE_ACCESS_WRITE:
+ return O_WRONLY;
+ case NFS4_SHARE_ACCESS_BOTH:
+ return O_RDWR;
+ }
+ WARN_ON_ONCE(1);
+ return O_RDONLY;
+}
+
+static inline int
+access_permit_read(struct nfs4_ol_stateid *stp)
+{
+ return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
+ test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
+ test_access(NFS4_SHARE_ACCESS_WRITE, stp);
+}
+
+static inline int
+access_permit_write(struct nfs4_ol_stateid *stp)
+{
+ return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
+ test_access(NFS4_SHARE_ACCESS_BOTH, stp);
+}
+
static inline struct nfs4_stateowner *
nfs4_get_stateowner(struct nfs4_stateowner *sop)
{
@@ -344,7 +542,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
}
static struct nfs4_openowner *
-find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
struct nfs4_client *clp)
{
struct nfs4_stateowner *so;
@@ -361,18 +559,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
return NULL;
}
-static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
- struct nfs4_client *clp)
-{
- struct nfs4_openowner *oo;
-
- spin_lock(&clp->cl_lock);
- oo = find_openstateowner_str_locked(hashval, open, clp);
- spin_unlock(&clp->cl_lock);
- return oo;
-}
-
static inline u32
opaque_hashval(const void *ptr, int nbytes)
{
@@ -386,52 +572,34 @@ opaque_hashval(const void *ptr, int nbytes)
return x;
}
-static void nfsd4_free_file_rcu(struct rcu_head *rcu)
-{
- struct nfs4_file *fp = container_of(rcu, struct nfs4_file, fi_rcu);
-
- kmem_cache_free(file_slab, fp);
-}
-
void
put_nfs4_file(struct nfs4_file *fi)
{
- might_lock(&state_lock);
-
- if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
- hlist_del_rcu(&fi->fi_hash);
- spin_unlock(&state_lock);
+ if (refcount_dec_and_test(&fi->fi_ref)) {
+ nfsd4_file_hash_remove(fi);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
- call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
+ kfree_rcu(fi, fi_rcu);
}
}
-static struct file *
-__nfs4_get_fd(struct nfs4_file *f, int oflag)
-{
- if (f->fi_fds[oflag])
- return get_file(f->fi_fds[oflag]);
- return NULL;
-}
-
-static struct file *
+static struct nfsd_file *
find_writeable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
- ret = __nfs4_get_fd(f, O_WRONLY);
+ ret = nfsd_file_get(f->fi_fds[O_WRONLY]);
if (!ret)
- ret = __nfs4_get_fd(f, O_RDWR);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
return ret;
}
-static struct file *
+static struct nfsd_file *
find_writeable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_writeable_file_locked(f);
@@ -440,22 +608,23 @@ find_writeable_file(struct nfs4_file *f)
return ret;
}
-static struct file *find_readable_file_locked(struct nfs4_file *f)
+static struct nfsd_file *
+find_readable_file_locked(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
lockdep_assert_held(&f->fi_lock);
- ret = __nfs4_get_fd(f, O_RDONLY);
+ ret = nfsd_file_get(f->fi_fds[O_RDONLY]);
if (!ret)
- ret = __nfs4_get_fd(f, O_RDWR);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
return ret;
}
-static struct file *
+static struct nfsd_file *
find_readable_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
spin_lock(&f->fi_lock);
ret = find_readable_file_locked(f);
@@ -464,22 +633,37 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
-struct file *
+struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
- struct file *ret;
+ struct nfsd_file *ret;
+ if (!f)
+ return NULL;
spin_lock(&f->fi_lock);
- ret = __nfs4_get_fd(f, O_RDWR);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
if (!ret) {
- ret = __nfs4_get_fd(f, O_WRONLY);
+ ret = nfsd_file_get(f->fi_fds[O_WRONLY]);
if (!ret)
- ret = __nfs4_get_fd(f, O_RDONLY);
+ ret = nfsd_file_get(f->fi_fds[O_RDONLY]);
}
spin_unlock(&f->fi_lock);
return ret;
}
+static struct nfsd_file *find_any_file_locked(struct nfs4_file *f)
+{
+ lockdep_assert_held(&f->fi_lock);
+
+ if (f->fi_fds[O_RDWR])
+ return f->fi_fds[O_RDWR];
+ if (f->fi_fds[O_WRONLY])
+ return f->fi_fds[O_WRONLY];
+ if (f->fi_fds[O_RDONLY])
+ return f->fi_fds[O_RDONLY];
+ return NULL;
+}
+
static atomic_long_t num_delegations;
unsigned long max_delegations;
@@ -500,21 +684,71 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
return ret & OWNER_HASH_MASK;
}
-/* hash table for nfs4_file */
-#define FILE_HASH_BITS 8
-#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp;
-static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
-{
- return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
-}
+static const struct rhashtable_params nfs4_file_rhash_params = {
+ .key_len = sizeof_field(struct nfs4_file, fi_inode),
+ .key_offset = offsetof(struct nfs4_file, fi_inode),
+ .head_offset = offsetof(struct nfs4_file, fi_rlist),
-static unsigned int file_hashval(struct knfsd_fh *fh)
+ /*
+ * Start with a single page hash table to reduce resizing churn
+ * on light workloads.
+ */
+ .min_size = 256,
+ .automatic_shrinking = true,
+};
+
+/*
+ * Check if courtesy clients have conflicting access and resolve it if possible
+ *
+ * access: is op_share_access if share_access is true.
+ * Check if access mode, op_share_access, would conflict with
+ * the current deny mode of the file 'fp'.
+ * access: is op_share_deny if share_access is false.
+ * Check if the deny mode, op_share_deny, would conflict with
+ * current access of the file 'fp'.
+ * stp: skip checking this entry.
+ * new_stp: normal open, not open upgrade.
+ *
+ * Function returns:
+ * false - access/deny mode conflict with normal client.
+ * true - no conflict or conflict with courtesy client(s) is resolved.
+ */
+static bool
+nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp,
+ struct nfs4_ol_stateid *stp, u32 access, bool share_access)
{
- return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
-}
+ struct nfs4_ol_stateid *st;
+ bool resolvable = true;
+ unsigned char bmap;
+ struct nfsd_net *nn;
+ struct nfs4_client *clp;
-static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
+ lockdep_assert_held(&fp->fi_lock);
+ list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
+ /* ignore lock stateid */
+ if (st->st_openstp)
+ continue;
+ if (st == stp && new_stp)
+ continue;
+ /* check file access against deny mode or vice versa */
+ bmap = share_access ? st->st_deny_bmap : st->st_access_bmap;
+ if (!(access & bmap_to_share_mode(bmap)))
+ continue;
+ clp = st->st_stid.sc_client;
+ if (try_to_expire_client(clp))
+ continue;
+ resolvable = false;
+ break;
+ }
+ if (resolvable) {
+ clp = stp->st_stid.sc_client;
+ nn = net_generic(clp->net, nfsd_net_id);
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+ }
+ return resolvable;
+}
static void
__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
@@ -568,17 +802,17 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
might_lock(&fp->fi_lock);
if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
- struct file *f1 = NULL;
- struct file *f2 = NULL;
+ struct nfsd_file *f1 = NULL;
+ struct nfsd_file *f2 = NULL;
swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
swap(f2, fp->fi_fds[O_RDWR]);
spin_unlock(&fp->fi_lock);
if (f1)
- fput(f1);
+ nfsd_file_put(f1);
if (f2)
- fput(f2);
+ nfsd_file_put(f2);
}
}
@@ -684,7 +918,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
idr_preload(GFP_KERNEL);
spin_lock(&cl->cl_lock);
- new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
+ /* Reserving 0 for start of file in nfsdfs "states" file: */
+ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 1, 0, GFP_NOWAIT);
spin_unlock(&cl->cl_lock);
idr_preload_end();
if (new_id < 0)
@@ -697,16 +932,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
/* Will be incremented before return to client: */
refcount_set(&stid->sc_count, 1);
spin_lock_init(&stid->sc_lock);
+ INIT_LIST_HEAD(&stid->sc_cp_list);
- /*
- * It shouldn't be a problem to reuse an opaque stateid value.
- * I don't think it is for 4.1. But with 4.0 I worry that, for
- * example, a stray write retransmission could be accepted by
- * the server when it should have been rejected. Therefore,
- * adopt a trick from the sctp code to attempt to maximize the
- * amount of time until an id is reused, by ensuring they always
- * "increase" (mod INT_MAX):
- */
return stid;
out_free:
kmem_cache_free(slab, stid);
@@ -716,30 +943,78 @@ out_free:
/*
* Create a unique stateid_t to represent each COPY.
*/
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
+ unsigned char cs_type)
{
int new_id;
+ stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time;
+ stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+
idr_preload(GFP_KERNEL);
spin_lock(&nn->s2s_cp_lock);
- new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
+ new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
+ stid->cs_stid.si_opaque.so_id = new_id;
+ stid->cs_stid.si_generation = 1;
spin_unlock(&nn->s2s_cp_lock);
idr_preload_end();
if (new_id < 0)
return 0;
- copy->cp_stateid.si_opaque.so_id = new_id;
- copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
- copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
+ stid->cs_type = cs_type;
return 1;
}
-void nfs4_free_cp_state(struct nfsd4_copy *copy)
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
+{
+ return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
+}
+
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_stid *p_stid)
+{
+ struct nfs4_cpntf_state *cps;
+
+ cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
+ if (!cps)
+ return NULL;
+ cps->cpntf_time = ktime_get_boottime_seconds();
+ refcount_set(&cps->cp_stateid.cs_count, 1);
+ if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
+ goto out_free;
+ spin_lock(&nn->s2s_cp_lock);
+ list_add(&cps->cp_list, &p_stid->sc_cp_list);
+ spin_unlock(&nn->s2s_cp_lock);
+ return cps;
+out_free:
+ kfree(cps);
+ return NULL;
+}
+
+void nfs4_free_copy_state(struct nfsd4_copy *copy)
{
struct nfsd_net *nn;
+ if (copy->cp_stateid.cs_type != NFS4_COPY_STID)
+ return;
nn = net_generic(copy->cp_clp->net, nfsd_net_id);
spin_lock(&nn->s2s_cp_lock);
- idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
+ idr_remove(&nn->s2s_cp_stateids,
+ copy->cp_stateid.cs_stid.si_opaque.so_id);
+ spin_unlock(&nn->s2s_cp_lock);
+}
+
+static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
+{
+ struct nfs4_cpntf_state *cps;
+ struct nfsd_net *nn;
+
+ nn = net_generic(net, nfsd_net_id);
+ spin_lock(&nn->s2s_cp_lock);
+ while (!list_empty(&stid->sc_cp_list)) {
+ cps = list_first_entry(&stid->sc_cp_list,
+ struct nfs4_cpntf_state, cp_list);
+ _free_cpntf_state_locked(nn, cps);
+ }
spin_unlock(&nn->s2s_cp_lock);
}
@@ -754,8 +1029,20 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
return openlockstateid(stid);
}
+/*
+ * As the sc_free callback of deleg, this may be called by nfs4_put_stid
+ * in nfsd_break_one_deleg.
+ * Considering nfsd_break_one_deleg is called with the flc->flc_lock held,
+ * this function mustn't ever sleep.
+ */
static void nfs4_free_deleg(struct nfs4_stid *stid)
{
+ struct nfs4_delegation *dp = delegstateid(stid);
+
+ WARN_ON_ONCE(!list_empty(&stid->sc_cp_list));
+ WARN_ON_ONCE(!list_empty(&dp->dl_perfile));
+ WARN_ON_ONCE(!list_empty(&dp->dl_perclnt));
+ WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru));
kmem_cache_free(deleg_slab, stid);
atomic_long_dec(&num_delegations);
}
@@ -769,7 +1056,8 @@ static void nfs4_free_deleg(struct nfs4_stid *stid)
* When a delegation is recalled, the filehandle is stored in the "new"
* filter.
* Every 30 seconds we swap the filters and clear the "new" one,
- * unless both are empty of course.
+ * unless both are empty of course. This results in delegations for a
+ * given filehandle being blocked for between 30 and 60 seconds.
*
* Each filter is 256 bits. We hash the filehandle to 32bit and use the
* low 3 bytes as hash-table indices.
@@ -781,7 +1069,7 @@ static void nfs4_free_deleg(struct nfs4_stid *stid)
static DEFINE_SPINLOCK(blocked_delegations_lock);
static struct bloom_pair {
int entries, old_entries;
- time_t swap_time;
+ time64_t swap_time;
int new; /* index into 'set' */
DECLARE_BITMAP(set[2], 256);
} blocked_delegations;
@@ -793,19 +1081,19 @@ static int delegation_blocked(struct knfsd_fh *fh)
if (bd->entries == 0)
return 0;
- if (seconds_since_boot() - bd->swap_time > 30) {
+ if (ktime_get_seconds() - bd->swap_time > 30) {
spin_lock(&blocked_delegations_lock);
- if (seconds_since_boot() - bd->swap_time > 30) {
+ if (ktime_get_seconds() - bd->swap_time > 30) {
bd->entries -= bd->old_entries;
bd->old_entries = bd->entries;
+ bd->new = 1-bd->new;
memset(bd->set[bd->new], 0,
sizeof(bd->set[0]));
- bd->new = 1-bd->new;
- bd->swap_time = seconds_since_boot();
+ bd->swap_time = ktime_get_seconds();
}
spin_unlock(&blocked_delegations_lock);
}
- hash = jhash(&fh->fh_base, fh->fh_size, 0);
+ hash = jhash(&fh->fh_raw, fh->fh_size, 0);
if (test_bit(hash&255, bd->set[0]) &&
test_bit((hash>>8)&255, bd->set[0]) &&
test_bit((hash>>16)&255, bd->set[0]))
@@ -824,35 +1112,36 @@ static void block_delegations(struct knfsd_fh *fh)
u32 hash;
struct bloom_pair *bd = &blocked_delegations;
- hash = jhash(&fh->fh_base, fh->fh_size, 0);
+ hash = jhash(&fh->fh_raw, fh->fh_size, 0);
spin_lock(&blocked_delegations_lock);
__set_bit(hash&255, bd->set[bd->new]);
__set_bit((hash>>8)&255, bd->set[bd->new]);
__set_bit((hash>>16)&255, bd->set[bd->new]);
if (bd->entries == 0)
- bd->swap_time = seconds_since_boot();
+ bd->swap_time = ktime_get_seconds();
bd->entries += 1;
spin_unlock(&blocked_delegations_lock);
}
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
- struct svc_fh *current_fh,
- struct nfs4_clnt_odstate *odstate)
+ struct nfs4_clnt_odstate *odstate, u32 dl_type)
{
struct nfs4_delegation *dp;
+ struct nfs4_stid *stid;
long n;
dprintk("NFSD alloc_init_deleg\n");
n = atomic_long_inc_return(&num_delegations);
if (n < 0 || n > max_delegations)
goto out_dec;
- if (delegation_blocked(&current_fh->fh_handle))
+ if (delegation_blocked(&fp->fi_fhandle))
goto out_dec;
- dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
- if (dp == NULL)
+ stid = nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg);
+ if (stid == NULL)
goto out_dec;
+ dp = delegstateid(stid);
/*
* delegation seqid's are never incremented. The 4.1 special
@@ -865,10 +1154,14 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_clnt_odstate = odstate;
get_clnt_odstate(odstate);
- dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+ dp->dl_type = dl_type;
dp->dl_retries = 1;
+ dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+ &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+ dp->dl_cb_fattr.ncf_file_modified = false;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -890,6 +1183,9 @@ nfs4_put_stid(struct nfs4_stid *s)
return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
+ nfs4_free_cpntf_statelist(clp->net, s);
spin_unlock(&clp->cl_lock);
s->sc_free(s);
if (fp)
@@ -910,25 +1206,67 @@ nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid)
static void put_deleg_file(struct nfs4_file *fp)
{
- struct file *filp = NULL;
+ struct nfsd_file *rnf = NULL;
+ struct nfsd_file *nf = NULL;
spin_lock(&fp->fi_lock);
- if (--fp->fi_delegees == 0)
- swap(filp, fp->fi_deleg_file);
+ if (--fp->fi_delegees == 0) {
+ swap(nf, fp->fi_deleg_file);
+ swap(rnf, fp->fi_rdeleg_file);
+ }
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
+ if (rnf)
+ nfs4_file_put_access(fp, NFS4_SHARE_ACCESS_READ);
+}
+
+static void nfsd4_finalize_deleg_timestamps(struct nfs4_delegation *dp, struct file *f)
+{
+ struct iattr ia = { .ia_valid = ATTR_ATIME | ATTR_CTIME | ATTR_MTIME };
+ struct inode *inode = file_inode(f);
+ int ret;
+
+ /* don't do anything if FMODE_NOCMTIME isn't set */
+ if ((READ_ONCE(f->f_mode) & FMODE_NOCMTIME) == 0)
+ return;
+
+ spin_lock(&f->f_lock);
+ f->f_mode &= ~FMODE_NOCMTIME;
+ spin_unlock(&f->f_lock);
+
+ /* was it never written? */
+ if (!dp->dl_written)
+ return;
+
+ /* did it get a setattr for the timestamps at some point? */
+ if (dp->dl_setattr)
+ return;
+
+ /* Stamp everything to "now" */
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, f->f_path.dentry, &ia, NULL);
+ inode_unlock(inode);
+ if (ret) {
+ struct inode *inode = file_inode(f);
+
+ pr_notice_ratelimited("Unable to update timestamps on inode %02x:%02x:%lu: %d\n",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino, ret);
+ }
}
static void nfs4_unlock_deleg_lease(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
- struct file *filp = fp->fi_deleg_file;
+ struct nfsd_file *nf = fp->fi_deleg_file;
WARN_ON_ONCE(!fp->fi_delegees);
- vfs_setlease(filp, F_UNLCK, NULL, (void **)&dp);
+ nfsd4_finalize_deleg_timestamps(dp, nf->nf_file);
+ kernel_setlease(nf->nf_file, F_UNLCK, NULL, (void **)&dp);
put_deleg_file(fp);
}
@@ -939,11 +1277,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
nfs4_put_stid(&dp->dl_stid);
}
-void nfs4_unhash_stid(struct nfs4_stid *s)
-{
- s->sc_type = 0;
-}
-
/**
* nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to
@@ -991,27 +1324,39 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ lockdep_assert_held(&clp->cl_lock);
if (nfs4_delegation_exists(clp, fp))
return -EAGAIN;
refcount_inc(&dp->dl_stid.sc_count);
- dp->dl_stid.sc_type = NFS4_DELEG_STID;
+ dp->dl_stid.sc_type = SC_TYPE_DELEG;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &clp->cl_delegations);
return 0;
}
+static bool delegation_hashed(struct nfs4_delegation *dp)
+{
+ return !(list_empty(&dp->dl_perfile));
+}
+
static bool
-unhash_delegation_locked(struct nfs4_delegation *dp)
+unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
lockdep_assert_held(&state_lock);
- if (list_empty(&dp->dl_perfile))
+ if (!delegation_hashed(dp))
return false;
- dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+ if (statusmask == SC_STATUS_REVOKED &&
+ dp->dl_stid.sc_client->cl_minorversion == 0)
+ statusmask = SC_STATUS_CLOSED;
+ dp->dl_stid.sc_status |= statusmask;
+ if (statusmask & SC_STATUS_ADMIN_REVOKED)
+ atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked);
+
/* Ensure that deleg break won't try to requeue it */
++dp->dl_time;
spin_lock(&fp->fi_lock);
@@ -1027,30 +1372,59 @@ static void destroy_delegation(struct nfs4_delegation *dp)
bool unhashed;
spin_lock(&state_lock);
- unhashed = unhash_delegation_locked(dp);
+ unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED);
spin_unlock(&state_lock);
if (unhashed)
destroy_unhashed_deleg(dp);
}
+/**
+ * revoke_delegation - perform nfs4 delegation structure cleanup
+ * @dp: pointer to the delegation
+ *
+ * This function assumes that it's called either from the administrative
+ * interface (nfsd4_revoke_states()) that's revoking a specific delegation
+ * stateid or it's called from a laundromat thread (nfsd4_landromat()) that
+ * determined that this specific state has expired and needs to be revoked
+ * (both mark state with the appropriate stid sc_status mode). It is also
+ * assumed that a reference was taken on the @dp state.
+ *
+ * If this function finds that the @dp state is SC_STATUS_FREED it means
+ * that a FREE_STATEID operation for this stateid has been processed and
+ * we can proceed to removing it from recalled list. However, if @dp state
+ * isn't marked SC_STATUS_FREED, it means we need place it on the cl_revoked
+ * list and wait for the FREE_STATEID to arrive from the client. At the same
+ * time, we need to mark it as SC_STATUS_FREEABLE to indicate to the
+ * nfsd4_free_stateid() function that this stateid has already been added
+ * to the cl_revoked list and that nfsd4_free_stateid() is now responsible
+ * for removing it from the list. Inspection of where the delegation state
+ * in the revocation process is protected by the clp->cl_lock.
+ */
static void revoke_delegation(struct nfs4_delegation *dp)
{
struct nfs4_client *clp = dp->dl_stid.sc_client;
WARN_ON(!list_empty(&dp->dl_recall_lru));
+ WARN_ON_ONCE(dp->dl_stid.sc_client->cl_minorversion > 0 &&
+ !(dp->dl_stid.sc_status &
+ (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)));
- if (clp->cl_minorversion) {
- dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
- refcount_inc(&dp->dl_stid.sc_count);
- spin_lock(&clp->cl_lock);
- list_add(&dp->dl_recall_lru, &clp->cl_revoked);
- spin_unlock(&clp->cl_lock);
+ trace_nfsd_stid_revoke(&dp->dl_stid);
+
+ spin_lock(&clp->cl_lock);
+ if (dp->dl_stid.sc_status & SC_STATUS_FREED) {
+ list_del_init(&dp->dl_recall_lru);
+ goto out;
}
+ list_add(&dp->dl_recall_lru, &clp->cl_revoked);
+ dp->dl_stid.sc_status |= SC_STATUS_FREEABLE;
+out:
+ spin_unlock(&clp->cl_lock);
destroy_unhashed_deleg(dp);
}
-/*
- * SETCLIENTID state
+/*
+ * SETCLIENTID state
*/
static unsigned int clientid_hashval(u32 id)
@@ -1058,111 +1432,9 @@ static unsigned int clientid_hashval(u32 id)
return id & CLIENT_HASH_MASK;
}
-static unsigned int clientstr_hashval(const char *name)
+static unsigned int clientstr_hashval(struct xdr_netobj name)
{
- return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
-}
-
-/*
- * We store the NONE, READ, WRITE, and BOTH bits separately in the
- * st_{access,deny}_bmap field of the stateid, in order to track not
- * only what share bits are currently in force, but also what
- * combinations of share bits previous opens have used. This allows us
- * to enforce the recommendation of rfc 3530 14.2.19 that the server
- * return an error if the client attempt to downgrade to a combination
- * of share bits not explicable by closing some of its previous opens.
- *
- * XXX: This enforcement is actually incomplete, since we don't keep
- * track of access/deny bit combinations; so, e.g., we allow:
- *
- * OPEN allow read, deny write
- * OPEN allow both, deny none
- * DOWNGRADE allow read, deny none
- *
- * which we should reject.
- */
-static unsigned int
-bmap_to_share_mode(unsigned long bmap) {
- int i;
- unsigned int access = 0;
-
- for (i = 1; i < 4; i++) {
- if (test_bit(i, &bmap))
- access |= i;
- }
- return access;
-}
-
-/* set share access for a given stateid */
-static inline void
-set_access(u32 access, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << access;
-
- WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
- stp->st_access_bmap |= mask;
-}
-
-/* clear share access for a given stateid */
-static inline void
-clear_access(u32 access, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << access;
-
- WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
- stp->st_access_bmap &= ~mask;
-}
-
-/* test whether a given stateid has access */
-static inline bool
-test_access(u32 access, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << access;
-
- return (bool)(stp->st_access_bmap & mask);
-}
-
-/* set share deny for a given stateid */
-static inline void
-set_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << deny;
-
- WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
- stp->st_deny_bmap |= mask;
-}
-
-/* clear share deny for a given stateid */
-static inline void
-clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << deny;
-
- WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
- stp->st_deny_bmap &= ~mask;
-}
-
-/* test whether a given stateid is denying specific access */
-static inline bool
-test_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << deny;
-
- return (bool)(stp->st_deny_bmap & mask);
-}
-
-static int nfs4_access_to_omode(u32 access)
-{
- switch (access & NFS4_SHARE_ACCESS_BOTH) {
- case NFS4_SHARE_ACCESS_READ:
- return O_RDONLY;
- case NFS4_SHARE_ACCESS_WRITE:
- return O_WRONLY;
- case NFS4_SHARE_ACCESS_BOTH:
- return O_RDWR;
- }
- WARN_ON_ONCE(1);
- return O_RDONLY;
+ return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
}
/*
@@ -1173,11 +1445,16 @@ static void
recalculate_deny_mode(struct nfs4_file *fp)
{
struct nfs4_ol_stateid *stp;
+ u32 old_deny;
spin_lock(&fp->fi_lock);
+ old_deny = fp->fi_share_deny;
fp->fi_share_deny = 0;
- list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
+ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
+ if (fp->fi_share_deny == old_deny)
+ break;
+ }
spin_unlock(&fp->fi_lock);
}
@@ -1235,6 +1512,12 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
nfs4_free_stateowner(sop);
}
+static bool
+nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp)
+{
+ return list_empty(&stp->st_perfile);
+}
+
static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
{
struct nfs4_file *fp = stp->st_stid.sc_file;
@@ -1259,6 +1542,8 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
release_all_access(stp);
if (stp->st_stateowner)
nfs4_put_stateowner(stp->st_stateowner);
+ if (!list_empty(&stid->sc_cp_list))
+ nfs4_free_cpntf_statelist(stid->sc_client->net, stid);
kmem_cache_free(stateid_slab, stid);
}
@@ -1266,11 +1551,14 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
{
struct nfs4_ol_stateid *stp = openlockstateid(stid);
struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
- struct file *file;
+ struct nfsd_file *nf;
- file = find_any_file(stp->st_stid.sc_file);
- if (file)
- filp_close(file, (fl_owner_t)lo);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file, (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
nfs4_free_ol_stateid(stid);
}
@@ -1295,6 +1583,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
list_add(&stp->st_locks, reaplist);
}
@@ -1302,9 +1592,11 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
{
lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
+ if (!unhash_ol_stateid(stp))
+ return false;
list_del_init(&stp->st_locks);
- nfs4_unhash_stid(&stp->st_stid);
- return unhash_ol_stateid(stp);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
+ return true;
}
static void release_lock_stateid(struct nfs4_ol_stateid *stp)
@@ -1361,7 +1653,7 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
while (!list_empty(&open_stp->st_locks)) {
stp = list_entry(open_stp->st_locks.next,
struct nfs4_ol_stateid, st_locks);
- WARN_ON(!unhash_lock_stateid(stp));
+ unhash_lock_stateid(stp);
put_ol_stateid_locked(stp, reaplist);
}
}
@@ -1369,13 +1661,12 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
struct list_head *reaplist)
{
- bool unhashed;
-
lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
- unhashed = unhash_ol_stateid(stp);
+ if (!unhash_ol_stateid(stp))
+ return false;
release_open_stateid_locks(stp, reaplist);
- return unhashed;
+ return true;
}
static void release_open_stateid(struct nfs4_ol_stateid *stp)
@@ -1383,12 +1674,21 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
LIST_HEAD(reaplist);
spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
if (unhash_open_stateid(stp, &reaplist))
put_ol_stateid_locked(stp, &reaplist);
spin_unlock(&stp->st_stid.sc_client->cl_lock);
free_ol_stateid_reaplist(&reaplist);
}
+static bool nfs4_openowner_unhashed(struct nfs4_openowner *oo)
+{
+ lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
+
+ return list_empty(&oo->oo_owner.so_strhash) &&
+ list_empty(&oo->oo_perclient);
+}
+
static void unhash_openowner_locked(struct nfs4_openowner *oo)
{
struct nfs4_client *clp = oo->oo_owner.so_client;
@@ -1420,9 +1720,7 @@ static void release_openowner(struct nfs4_openowner *oo)
{
struct nfs4_ol_stateid *stp;
struct nfs4_client *clp = oo->oo_owner.so_client;
- struct list_head reaplist;
-
- INIT_LIST_HEAD(&reaplist);
+ LIST_HEAD(reaplist);
spin_lock(&clp->cl_lock);
unhash_openowner_locked(oo);
@@ -1438,6 +1736,137 @@ static void release_openowner(struct nfs4_openowner *oo)
nfs4_put_stateowner(&oo->oo_owner);
}
+static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
+ struct super_block *sb,
+ unsigned int sc_types)
+{
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if ((stid->sc_type & sc_types) &&
+ stid->sc_status == 0 &&
+ stid->sc_file->fi_inode->i_sb == sb) {
+ refcount_inc(&stid->sc_count);
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+ return stid;
+}
+
+/**
+ * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem
+ * @net: used to identify instance of nfsd (there is one per net namespace)
+ * @sb: super_block used to identify target filesystem
+ *
+ * All nfs4 states (open, lock, delegation, layout) held by the server instance
+ * and associated with a file on the given filesystem will be revoked resulting
+ * in any files being closed and so all references from nfsd to the filesystem
+ * being released. Thus nfsd will no longer prevent the filesystem from being
+ * unmounted.
+ *
+ * The clients which own the states will subsequently being notified that the
+ * states have been "admin-revoked".
+ */
+void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unsigned int idhashval;
+ unsigned int sc_types;
+
+ sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT;
+
+ spin_lock(&nn->client_lock);
+ for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
+ struct list_head *head = &nn->conf_id_hashtbl[idhashval];
+ struct nfs4_client *clp;
+ retry:
+ list_for_each_entry(clp, head, cl_idhash) {
+ struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
+ sc_types);
+ if (stid) {
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
+ struct nfs4_layout_stateid *ls;
+
+ spin_unlock(&nn->client_lock);
+ switch (stid->sc_type) {
+ case SC_TYPE_OPEN:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ OPEN_STATEID_MUTEX);
+
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ spin_unlock(&clp->cl_lock);
+ release_all_access(stp);
+ } else
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ break;
+ case SC_TYPE_LOCK:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ LOCK_STATEID_MUTEX);
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ struct nfs4_lockowner *lo =
+ lockowner(stp->st_stateowner);
+ struct nfsd_file *nf;
+
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ spin_unlock(&clp->cl_lock);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file,
+ (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
+ release_all_access(stp);
+ } else
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ break;
+ case SC_TYPE_DELEG:
+ refcount_inc(&stid->sc_count);
+ dp = delegstateid(stid);
+ spin_lock(&state_lock);
+ if (!unhash_delegation_locked(
+ dp, SC_STATUS_ADMIN_REVOKED))
+ dp = NULL;
+ spin_unlock(&state_lock);
+ if (dp)
+ revoke_delegation(dp);
+ break;
+ case SC_TYPE_LAYOUT:
+ ls = layoutstateid(stid);
+ nfsd4_close_layout(ls);
+ break;
+ }
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ if (clp->cl_minorversion == 0)
+ /* Allow cleanup after a lease period.
+ * store_release ensures cleanup will
+ * see any newly revoked states if it
+ * sees the time updated.
+ */
+ nn->nfs40_last_revoke =
+ ktime_get_boottime_seconds();
+ goto retry;
+ }
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static inline int
hash_sessionid(struct nfs4_sessionid *sessionid)
{
@@ -1509,96 +1938,145 @@ gen_sessionid(struct nfsd4_session *ses)
*/
#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session. The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed. The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
{
int i;
- for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
- free_svc_cred(&ses->se_slots[i]->sl_cred);
- kfree(ses->se_slots[i]);
+ if (from >= ses->se_fchannel.maxreqs)
+ return;
+
+ for (i = from; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+ /*
+ * Save the seqid in case we reactivate this slot.
+ * This will never require a memory allocation so GFP
+ * flag is irrelevant
+ */
+ xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
+ free_svc_cred(&slot->sl_cred);
+ kfree(slot);
+ }
+ ses->se_fchannel.maxreqs = from;
+ if (ses->se_target_maxslots > from) {
+ int new_target = from ?: 1;
+ atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+ ses->se_target_maxslots = new_target;
}
}
-/*
- * We don't actually need to cache the rpc and session headers, so we
- * can allocate a little less for each slot:
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses: The session to affect
+ * @dec: how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock. As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ * The number of slots that the target was reduced by.
*/
-static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
+static int
+reduce_session_slots(struct nfsd4_session *ses, int dec)
{
- u32 size;
+ struct nfsd_net *nn = net_generic(ses->se_client->net,
+ nfsd_net_id);
+ int ret = 0;
- if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
- size = 0;
- else
- size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
- return size + sizeof(struct nfsd4_slot);
+ if (ses->se_target_maxslots <= 1)
+ return ret;
+ if (!spin_trylock(&nn->client_lock))
+ return ret;
+ ret = min(dec, ses->se_target_maxslots-1);
+ ses->se_target_maxslots -= ret;
+ atomic_sub(ret, &nfsd_total_target_slots);
+ ses->se_slot_gen += 1;
+ if (ses->se_slot_gen == 0) {
+ int i;
+ ses->se_slot_gen = 1;
+ for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+ struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+ slot->sl_generation = 0;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+ return ret;
}
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
+static struct nfsd4_slot *nfsd4_alloc_slot(struct nfsd4_channel_attrs *fattrs,
+ int index, gfp_t gfp)
{
- u32 slotsize = slot_bytes(ca);
- u32 num = ca->maxreqs;
- int avail;
+ struct nfsd4_slot *slot;
+ size_t size;
- spin_lock(&nfsd_drc_lock);
- avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
- nfsd_drc_max_mem - nfsd_drc_mem_used);
/*
- * Never use more than a third of the remaining memory,
- * unless it's the only way to give this client a slot:
+ * The RPC and NFS session headers are never saved in
+ * the slot reply cache buffer.
*/
- avail = clamp_t(int, avail, slotsize, avail/3);
- num = min_t(int, num, avail / slotsize);
- nfsd_drc_mem_used += num * slotsize;
- spin_unlock(&nfsd_drc_lock);
-
- return num;
-}
+ size = fattrs->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ ?
+ 0 : fattrs->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
- int slotsize = slot_bytes(ca);
-
- spin_lock(&nfsd_drc_lock);
- nfsd_drc_mem_used -= slotsize * ca->maxreqs;
- spin_unlock(&nfsd_drc_lock);
+ slot = kzalloc(struct_size(slot, sl_data, size), gfp);
+ if (!slot)
+ return NULL;
+ slot->sl_index = index;
+ return slot;
}
static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
struct nfsd4_channel_attrs *battrs)
{
int numslots = fattrs->maxreqs;
- int slotsize = slot_bytes(fattrs);
struct nfsd4_session *new;
- int mem, i;
-
- BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
- + sizeof(struct nfsd4_session) > PAGE_SIZE);
- mem = numslots * sizeof(struct nfsd4_slot *);
+ struct nfsd4_slot *slot;
+ int i;
- new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
+ new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL;
- /* allocate each struct nfsd4_slot and data cache in one piece */
- for (i = 0; i < numslots; i++) {
- new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
- if (!new->se_slots[i])
- goto out_free;
- }
+ xa_init(&new->se_slots);
- memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
- memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
+ slot = nfsd4_alloc_slot(fattrs, 0, GFP_KERNEL);
+ if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+ goto out_free;
+ for (i = 1; i < numslots; i++) {
+ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ slot = nfsd4_alloc_slot(fattrs, i, gfp);
+ if (!slot)
+ break;
+ if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) {
+ kfree(slot);
+ break;
+ }
+ }
+ fattrs->maxreqs = i;
+ memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+ new->se_target_maxslots = i;
+ atomic_add(i - 1, &nfsd_total_target_slots);
+ new->se_cb_slot_avail = ~0U;
+ new->se_cb_highest_slot = min(battrs->maxreqs - 1,
+ NFSD_BC_SLOT_TABLE_SIZE - 1);
+ spin_lock_init(&new->se_lock);
return new;
out_free:
- while (i--)
- kfree(new->se_slots[i]);
+ kfree(slot);
+ xa_destroy(&new->se_slots);
kfree(new);
return NULL;
}
@@ -1614,6 +2092,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u)
struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
struct nfs4_client *clp = c->cn_session->se_client;
+ trace_nfsd_cb_lost(clp);
+
spin_lock(&clp->cl_lock);
if (!list_empty(&c->cn_persession)) {
list_del(&c->cn_persession);
@@ -1702,17 +2182,47 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
static void __free_session(struct nfsd4_session *ses)
{
- free_session_slots(ses);
+ free_session_slots(ses, 0);
+ xa_destroy(&ses->se_slots);
kfree(ses);
}
static void free_session(struct nfsd4_session *ses)
{
nfsd4_del_conns(ses);
- nfsd4_put_drc_mem(&ses->se_fchannel);
__free_session(ses);
}
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+ unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+ return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+ struct nfsd4_session *ses;
+ unsigned long scanned = 0;
+ unsigned long freed = 0;
+
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+ freed += reduce_session_slots(ses, 1);
+ scanned += 1;
+ if (scanned >= sc->nr_to_scan) {
+ /* Move starting point for next scan */
+ list_move(&nfsd_session_list, &ses->se_all_sessions);
+ break;
+ }
+ }
+ spin_unlock(&nfsd_session_list_lock);
+ sc->nr_scanned = scanned;
+ return freed;
+}
+
static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
{
int idx;
@@ -1723,17 +2233,24 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
INIT_LIST_HEAD(&new->se_conns);
- new->se_cb_seq_nr = 1;
- new->se_flags = cses->flags;
+ atomic_set(&new->se_ref, 0);
+ new->se_dead = false;
new->se_cb_prog = cses->callback_prog;
new->se_cb_sec = cses->cb_sec;
- atomic_set(&new->se_ref, 0);
+
+ for (idx = 0; idx < NFSD_BC_SLOT_TABLE_SIZE; ++idx)
+ new->se_cb_seq_nr[idx] = 1;
+
idx = hash_sessionid(&new->se_sessionid);
list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
spin_lock(&clp->cl_lock);
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+ spin_unlock(&nfsd_session_list_lock);
+
{
struct sockaddr *sa = svc_addr(rqstp);
/*
@@ -1803,6 +2320,9 @@ unhash_session(struct nfsd4_session *ses)
spin_lock(&ses->se_client->cl_lock);
list_del(&ses->se_perclnt);
spin_unlock(&ses->se_client->cl_lock);
+ spin_lock(&nfsd_session_list_lock);
+ list_del(&ses->se_all_sessions);
+ spin_unlock(&nfsd_session_list_lock);
}
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -1816,25 +2336,24 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
*/
if (clid->cl_boot == (u32)nn->boot_time)
return 0;
- dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
- clid->cl_boot, clid->cl_id, nn->boot_time);
+ trace_nfsd_clid_stale(clid);
return 1;
}
-/*
- * XXX Should we use a slab cache ?
- * This type of memory management is somewhat inefficient, but we use it
- * anyway since SETCLIENTID is not a common operation.
- */
-static struct nfs4_client *alloc_client(struct xdr_netobj name)
+static struct nfs4_client *alloc_client(struct xdr_netobj name,
+ struct nfsd_net *nn)
{
struct nfs4_client *clp;
int i;
+ if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients &&
+ atomic_read(&nn->nfsd_courtesy_clients) > 0)
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+
clp = kmem_cache_zalloc(client_slab, GFP_KERNEL);
if (clp == NULL)
return NULL;
- clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ xdr_netobj_dup(&clp->cl_name, &name, GFP_KERNEL);
if (clp->cl_name.data == NULL)
goto err_no_name;
clp->cl_ownerstr_hashtbl = kmalloc_array(OWNER_HASH_SIZE,
@@ -1842,13 +2361,19 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
GFP_KERNEL);
if (!clp->cl_ownerstr_hashtbl)
goto err_no_hashtbl;
+ clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
+ if (!clp->cl_callback_wq)
+ goto err_no_callback_wq;
+
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
- clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
+ atomic_set(&clp->cl_rpc_users, 0);
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ clp->cl_state = NFSD4_ACTIVE;
+ atomic_inc(&nn->nfs4_client_count);
+ atomic_set(&clp->cl_delegs_in_recall, 0);
INIT_LIST_HEAD(&clp->cl_idhash);
INIT_LIST_HEAD(&clp->cl_openowners);
INIT_LIST_HEAD(&clp->cl_delegations);
@@ -1862,6 +2387,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
spin_lock_init(&clp->cl_lock);
rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
+err_no_callback_wq:
+ kfree(clp->cl_ownerstr_hashtbl);
err_no_hashtbl:
kfree(clp->cl_name.data);
err_no_name:
@@ -1869,6 +2396,27 @@ err_no_name:
return NULL;
}
+static void __free_client(struct kref *k)
+{
+ struct nfsdfs_client *c = container_of(k, struct nfsdfs_client, cl_ref);
+ struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
+
+ free_svc_cred(&clp->cl_cred);
+ destroy_workqueue(clp->cl_callback_wq);
+ kfree(clp->cl_ownerstr_hashtbl);
+ kfree(clp->cl_name.data);
+ kfree(clp->cl_nii_domain.data);
+ kfree(clp->cl_nii_name.data);
+ idr_destroy(&clp->cl_stateids);
+ kfree(clp->cl_ra);
+ kmem_cache_free(client_slab, clp);
+}
+
+static void drop_client(struct nfs4_client *clp)
+{
+ kref_put(&clp->cl_nfsdfs.cl_ref, __free_client);
+}
+
static void
free_client(struct nfs4_client *clp)
{
@@ -1881,11 +2429,12 @@ free_client(struct nfs4_client *clp)
free_session(ses);
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
- free_svc_cred(&clp->cl_cred);
- kfree(clp->cl_ownerstr_hashtbl);
- kfree(clp->cl_name.data);
- idr_destroy(&clp->cl_stateids);
- kmem_cache_free(client_slab, clp);
+ if (clp->cl_nfsd_dentry) {
+ nfsd_client_rmdir(clp->cl_nfsd_dentry);
+ clp->cl_nfsd_dentry = NULL;
+ wake_up_all(&expiry_wq);
+ }
+ drop_client(clp);
}
/* must be called under the client_lock */
@@ -1909,8 +2458,12 @@ unhash_client_locked(struct nfs4_client *clp)
}
list_del_init(&clp->cl_lru);
spin_lock(&clp->cl_lock);
- list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ spin_lock(&nfsd_session_list_lock);
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) {
list_del_init(&ses->se_hash);
+ list_del_init(&ses->se_all_sessions);
+ }
+ spin_unlock(&nfsd_session_list_lock);
spin_unlock(&clp->cl_lock);
}
@@ -1926,7 +2479,11 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_refcount))
+ int users = atomic_read(&clp->cl_rpc_users);
+
+ trace_nfsd_mark_client_expired(clp, users);
+
+ if (users)
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -1935,16 +2492,16 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp)
static void
__destroy_client(struct nfs4_client *clp)
{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
int i;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
- struct list_head reaplist;
+ LIST_HEAD(reaplist);
- INIT_LIST_HEAD(&reaplist);
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -1978,7 +2535,10 @@ __destroy_client(struct nfs4_client *clp)
nfsd4_shutdown_callback(clp);
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
+ atomic_add_unless(&nn->nfs4_client_count, -1, 0);
+ nfsd4_dec_courtesy_client_count(nn, clp);
free_client(clp);
+ wake_up_all(&expiry_wq);
}
static void
@@ -1988,6 +2548,22 @@ destroy_client(struct nfs4_client *clp)
__destroy_client(clp);
}
+static void inc_reclaim_complete(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ if (!nn->track_reclaim_completes)
+ return;
+ if (!nfsd4_find_reclaim_client(clp->cl_name, nn))
+ return;
+ if (atomic_inc_return(&nn->nr_reclaim_complete) ==
+ nn->reclaim_str_hashtbl_size) {
+ printk(KERN_INFO "NFSD: all clients done reclaiming, ending NFSv4 grace period (net %x)\n",
+ clp->net->ns.inum);
+ nfsd4_end_grace(nn);
+ }
+}
+
static void expire_client(struct nfs4_client *clp)
{
unhash_client(clp);
@@ -2039,11 +2615,6 @@ compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
return memcmp(o1->data, o2->data, o1->len);
}
-static int same_name(const char *n1, const char *n2)
-{
- return 0 == memcmp(n1, n2, HEXDIR_LEN);
-}
-
static int
same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
{
@@ -2138,14 +2709,14 @@ static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy
*/
- verf[0] = (__force __be32)get_seconds();
+ verf[0] = (__force __be32)(u32)ktime_get_real_seconds();
verf[1] = (__force __be32)nn->clverifier_counter++;
memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
}
static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
{
- clp->cl_clientid.cl_boot = nn->boot_time;
+ clp->cl_clientid.cl_boot = (u32)nn->boot_time;
clp->cl_clientid.cl_id = nn->clientid_counter++;
gen_confirm(clp, nn);
}
@@ -2162,14 +2733,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
}
static struct nfs4_stid *
-find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
+ unsigned short typemask, unsigned short ok_states)
{
struct nfs4_stid *s;
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, t);
if (s != NULL) {
- if (typemask & s->sc_type)
+ if ((s->sc_status & ~ok_states) == 0 &&
+ (typemask & s->sc_type))
refcount_inc(&s->sc_count);
else
s = NULL;
@@ -2178,6 +2751,519 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
return s;
}
+static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+ nc = get_nfsdfs_client(inode);
+ if (!nc)
+ return NULL;
+ return container_of(nc, struct nfs4_client, cl_nfsdfs);
+}
+
+static void seq_quote_mem(struct seq_file *m, char *data, int len)
+{
+ seq_puts(m, "\"");
+ seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\");
+ seq_puts(m, "\"");
+}
+
+static const char *cb_state2str(int state)
+{
+ switch (state) {
+ case NFSD4_CB_UP:
+ return "UP";
+ case NFSD4_CB_UNKNOWN:
+ return "UNKNOWN";
+ case NFSD4_CB_DOWN:
+ return "DOWN";
+ case NFSD4_CB_FAULT:
+ return "FAULT";
+ }
+ return "UNDEFINED";
+}
+
+static int client_info_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = file_inode(m->file);
+ struct nfsd4_session *ses;
+ struct nfs4_client *clp;
+ u64 clid;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+ memcpy(&clid, &clp->cl_clientid, sizeof(clid));
+ seq_printf(m, "clientid: 0x%llx\n", clid);
+ seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr);
+
+ if (clp->cl_state == NFSD4_COURTESY)
+ seq_puts(m, "status: courtesy\n");
+ else if (clp->cl_state == NFSD4_EXPIRABLE)
+ seq_puts(m, "status: expirable\n");
+ else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
+ seq_puts(m, "status: confirmed\n");
+ else
+ seq_puts(m, "status: unconfirmed\n");
+ seq_printf(m, "seconds from last renew: %lld\n",
+ ktime_get_boottime_seconds() - clp->cl_time);
+ seq_puts(m, "name: ");
+ seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
+ seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
+ if (clp->cl_nii_domain.data) {
+ seq_puts(m, "Implementation domain: ");
+ seq_quote_mem(m, clp->cl_nii_domain.data,
+ clp->cl_nii_domain.len);
+ seq_puts(m, "\nImplementation name: ");
+ seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
+ seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
+ clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
+ }
+ seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
+ seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
+ seq_printf(m, "admin-revoked states: %d\n",
+ atomic_read(&clp->cl_admin_revoked));
+ spin_lock(&clp->cl_lock);
+ seq_printf(m, "session slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+ seq_printf(m, "\nsession target slots:");
+ list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+ seq_printf(m, " %u", ses->se_target_maxslots);
+ spin_unlock(&clp->cl_lock);
+ seq_puts(m, "\n");
+
+ drop_client(clp);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(client_info);
+
+static void *states_start(struct seq_file *s, loff_t *pos)
+ __acquires(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ spin_lock(&clp->cl_lock);
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void *states_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct nfs4_client *clp = s->private;
+ unsigned long id = *pos;
+ void *ret;
+
+ id = *pos;
+ id++;
+ ret = idr_get_next_ul(&clp->cl_stateids, &id);
+ *pos = id;
+ return ret;
+}
+
+static void states_stop(struct seq_file *s, void *v)
+ __releases(&clp->cl_lock)
+{
+ struct nfs4_client *clp = s->private;
+
+ spin_unlock(&clp->cl_lock);
+}
+
+static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f)
+{
+ seq_printf(s, "filename: \"%pD2\"", f->nf_file);
+}
+
+static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
+{
+ struct inode *inode = file_inode(f->nf_file);
+
+ seq_printf(s, "superblock: \"%02x:%02x:%ld\"",
+ MAJOR(inode->i_sb->s_dev),
+ MINOR(inode->i_sb->s_dev),
+ inode->i_ino);
+}
+
+static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
+{
+ seq_puts(s, "owner: ");
+ seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
+}
+
+static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid)
+{
+ seq_printf(s, "0x%.8x", stid->si_generation);
+ seq_printf(s, "%12phN", &stid->si_opaque);
+}
+
+static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct nfsd_file *file;
+ struct nfs4_stateowner *oo;
+ unsigned int access, deny;
+
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+
+ seq_puts(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+ seq_puts(s, ": { type: open, ");
+
+ access = bmap_to_share_mode(ols->st_access_bmap);
+ deny = bmap_to_share_mode(ols->st_deny_bmap);
+
+ seq_printf(s, "access: %s%s, ",
+ access & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ access & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+ seq_printf(s, "deny: %s%s, ",
+ deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
+ deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
+
+ if (nf) {
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
+ spin_unlock(&nf->fi_lock);
+ } else
+ seq_puts(s, "closed, ");
+ nfs4_show_owner(s, oo);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
+ return 0;
+}
+
+static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_ol_stateid *ols;
+ struct nfs4_file *nf;
+ struct nfsd_file *file;
+ struct nfs4_stateowner *oo;
+
+ ols = openlockstateid(st);
+ oo = ols->st_stateowner;
+ nf = st->sc_file;
+
+ seq_puts(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+ seq_puts(s, ": { type: lock, ");
+
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
+
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
+ nfs4_show_owner(s, oo);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
+ spin_unlock(&nf->fi_lock);
+ return 0;
+}
+
+static char *nfs4_show_deleg_type(u32 dl_type)
+{
+ switch (dl_type) {
+ case OPEN_DELEGATE_READ:
+ return "r";
+ case OPEN_DELEGATE_WRITE:
+ return "w";
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
+ return "ra";
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ return "wa";
+ }
+ return "?";
+}
+
+static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_delegation *ds;
+ struct nfs4_file *nf;
+ struct nfsd_file *file;
+
+ ds = delegstateid(st);
+ nf = st->sc_file;
+
+ seq_puts(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+ seq_puts(s, ": { type: deleg, ");
+
+ seq_printf(s, "access: %s", nfs4_show_deleg_type(ds->dl_type));
+
+ /* XXX: lease time, whether it's being recalled. */
+
+ spin_lock(&nf->fi_lock);
+ file = nf->fi_deleg_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
+ spin_unlock(&nf->fi_lock);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
+ return 0;
+}
+
+static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
+{
+ struct nfs4_layout_stateid *ls;
+ struct nfsd_file *file;
+
+ ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
+
+ seq_puts(s, "- ");
+ nfs4_show_stateid(s, &st->sc_stateid);
+ seq_puts(s, ": { type: layout");
+
+ /* XXX: What else would be useful? */
+
+ spin_lock(&ls->ls_stid.sc_file->fi_lock);
+ file = ls->ls_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
+ spin_unlock(&ls->ls_stid.sc_file->fi_lock);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
+
+ return 0;
+}
+
+static int states_show(struct seq_file *s, void *v)
+{
+ struct nfs4_stid *st = v;
+
+ switch (st->sc_type) {
+ case SC_TYPE_OPEN:
+ return nfs4_show_open(s, st);
+ case SC_TYPE_LOCK:
+ return nfs4_show_lock(s, st);
+ case SC_TYPE_DELEG:
+ return nfs4_show_deleg(s, st);
+ case SC_TYPE_LAYOUT:
+ return nfs4_show_layout(s, st);
+ default:
+ return 0; /* XXX: or SEQ_SKIP? */
+ }
+ /* XXX: copy stateids? */
+}
+
+static struct seq_operations states_seq_ops = {
+ .start = states_start,
+ .next = states_next,
+ .stop = states_stop,
+ .show = states_show
+};
+
+static int client_states_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *s;
+ struct nfs4_client *clp;
+ int ret;
+
+ clp = get_nfsdfs_clp(inode);
+ if (!clp)
+ return -ENXIO;
+
+ ret = seq_open(file, &states_seq_ops);
+ if (ret)
+ return ret;
+ s = file->private_data;
+ s->private = clp;
+ return 0;
+}
+
+static int client_opens_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct nfs4_client *clp = m->private;
+
+ /* XXX: alternatively, we could get/drop in seq start/stop */
+ drop_client(clp);
+ return seq_release(inode, file);
+}
+
+static const struct file_operations client_states_fops = {
+ .open = client_states_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = client_opens_release,
+};
+
+/*
+ * Normally we refuse to destroy clients that are in use, but here the
+ * administrator is telling us to just do it. We also want to wait
+ * so the caller has a guarantee that the client's locks are gone by
+ * the time the write returns:
+ */
+static void force_expire_client(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ bool already_expired;
+
+ trace_nfsd_clid_admin_expired(&clp->cl_clientid);
+
+ spin_lock(&nn->client_lock);
+ clp->cl_time = 0;
+ spin_unlock(&nn->client_lock);
+
+ wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
+ spin_lock(&nn->client_lock);
+ already_expired = list_empty(&clp->cl_lru);
+ if (!already_expired)
+ unhash_client_locked(clp);
+ spin_unlock(&nn->client_lock);
+
+ if (!already_expired)
+ expire_client(clp);
+ else
+ wait_event(expiry_wq, clp->cl_nfsd_dentry == NULL);
+}
+
+static ssize_t client_ctl_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ char *data;
+ struct nfs4_client *clp;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ if (size != 7 || 0 != memcmp(data, "expire\n", 7))
+ return -EINVAL;
+ clp = get_nfsdfs_clp(file_inode(file));
+ if (!clp)
+ return -ENXIO;
+ force_expire_client(clp);
+ drop_client(clp);
+ return 7;
+}
+
+static const struct file_operations client_ctl_fops = {
+ .write = client_ctl_write,
+ .release = simple_transaction_release,
+};
+
+static const struct tree_descr client_files[] = {
+ [0] = {"info", &client_info_fops, S_IRUSR},
+ [1] = {"states", &client_states_fops, S_IRUSR},
+ [2] = {"ctl", &client_ctl_fops, S_IWUSR},
+ [3] = {""},
+};
+
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ trace_nfsd_cb_recall_any_done(cb, task);
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+
+ drop_client(clp);
+}
+
+static int
+nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ trace_nfsd_cb_getattr_done(&dp->dl_stid.sc_stateid, task);
+ ncf->ncf_cb_status = task->tk_status;
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ nfs4_put_stid(&dp->dl_stid);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+ .opcode = OP_CB_RECALL_ANY,
+};
+
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
+ .done = nfsd4_cb_getattr_done,
+ .release = nfsd4_cb_getattr_release,
+ .opcode = OP_CB_GETATTR,
+};
+
+static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
+{
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncf->ncf_getattr.cb_flags))
+ return;
+
+ /* set to proper status when nfsd4_cb_getattr_done runs */
+ ncf->ncf_cb_status = NFS4ERR_IO;
+
+ /* ensure that wake_bit is done when RUNNING is cleared */
+ set_bit(NFSD4_CALLBACK_WAKE, &ncf->ncf_getattr.cb_flags);
+
+ refcount_inc(&dp->dl_stid.sc_count);
+ nfsd4_run_cb(&ncf->ncf_getattr);
+}
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2185,8 +3271,10 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct dentry *dentries[ARRAY_SIZE(client_files)];
- clp = alloc_client(name);
+ clp = alloc_client(name, nn);
if (clp == NULL)
return NULL;
@@ -2195,13 +3283,31 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ gen_clid(clp, nn);
+ kref_init(&clp->cl_nfsdfs.cl_ref);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
- clp->cl_time = get_seconds();
- clear_bit(0, &clp->cl_cb_slot_busy);
+ clp->cl_time = ktime_get_boottime_seconds();
copy_verf(clp, verf);
- rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
+ memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage));
clp->cl_cb_session = NULL;
clp->net = net;
+ clp->cl_nfsd_dentry = nfsd_client_mkdir(
+ nn, &clp->cl_nfsdfs,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ client_files, dentries);
+ clp->cl_nfsd_info_dentry = dentries[0];
+ if (!clp->cl_nfsd_dentry) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+ if (!clp->cl_ra) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra_time = 0;
+ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}
@@ -2268,11 +3374,11 @@ move_to_confirmed(struct nfs4_client *clp)
lockdep_assert_held(&nn->client_lock);
- dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
add_clp_to_name_tree(clp, &nn->conf_name_tree);
set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
+ trace_nfsd_clid_confirmed(&clp->cl_clientid);
renew_client_locked(clp);
}
@@ -2362,14 +3468,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
conn->cb_prog = se->se_callback_prog;
conn->cb_ident = se->se_callback_ident;
memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
+ trace_nfsd_cb_args(clp, conn);
return;
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
- dprintk("NFSD: this client (clientid %08x/%08x) "
- "will not receive delegations\n",
- clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-
+ trace_nfsd_cb_nodelegs(clp);
return;
}
@@ -2379,11 +3483,24 @@ out_err:
static void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{
- struct xdr_buf *buf = resp->xdr.buf;
+ struct xdr_buf *buf = resp->xdr->buf;
struct nfsd4_slot *slot = resp->cstate.slot;
unsigned int base;
- dprintk("--> %s slot %p\n", __func__, slot);
+ /*
+ * RFC 5661 Section 2.10.6.1.2:
+ *
+ * Any time SEQUENCE ... returns an error ... [t]he replier MUST NOT
+ * modify the reply cache entry for the slot whenever an error is
+ * returned from SEQUENCE ...
+ *
+ * Because nfsd4_store_cache_entry is called only by
+ * nfsd4_sequence_done(), nfsd4_store_cache_entry() is called only
+ * when a SEQUENCE operation was part of the COMPOUND.
+ * nfs41_check_op_ordering() ensures SEQUENCE is the first op.
+ */
+ if (resp->opcnt == 1 && resp->cstate.status != nfs_ok)
+ return;
slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
slot->sl_opcnt = resp->opcnt;
@@ -2391,7 +3508,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
free_svc_cred(&slot->sl_cred);
copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
- if (!nfsd4_cache_this(resp)) {
+ if (!(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
slot->sl_flags &= ~NFSD4_SLOT_CACHED;
return;
}
@@ -2406,41 +3523,6 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
}
/*
- * Encode the replay sequence operation from the slot values.
- * If cachethis is FALSE encode the uncached rep error on the next
- * operation which sets resp->p and increments resp->opcnt for
- * nfs4svc_encode_compoundres.
- *
- */
-static __be32
-nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
- struct nfsd4_compoundres *resp)
-{
- struct nfsd4_op *op;
- struct nfsd4_slot *slot = resp->cstate.slot;
-
- /* Encode the replayed sequence operation */
- op = &args->ops[resp->opcnt - 1];
- nfsd4_encode_operation(resp, op);
-
- if (slot->sl_flags & NFSD4_SLOT_CACHED)
- return op->status;
- if (args->opcnt == 1) {
- /*
- * The original operation wasn't a solo sequence--we
- * always cache those--so this retry must not match the
- * original:
- */
- op->status = nfserr_seq_false_retry;
- } else {
- op = &args->ops[resp->opcnt++];
- op->status = nfserr_retry_uncached_rep;
- nfsd4_encode_operation(resp, op);
- }
- return op->status;
-}
-
-/*
* The sequence operation is not cached because we can use the slot and
* session values.
*/
@@ -2448,17 +3530,30 @@ static __be32
nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq)
{
+ struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
struct nfsd4_slot *slot = resp->cstate.slot;
- struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_stream *xdr = resp->xdr;
__be32 *p;
- __be32 status;
dprintk("--> %s slot %p\n", __func__, slot);
- status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
- if (status)
- return status;
+ /* Always encode the SEQUENCE response. */
+ nfsd4_encode_operation(resp, &args->ops[0]);
+ if (args->opcnt == 1)
+ /* A solo SEQUENCE - nothing was cached */
+ return args->ops[0].status;
+
+ if (!(slot->sl_flags & NFSD4_SLOT_CACHED)) {
+ /* We weren't asked to cache this. */
+ struct nfsd4_op *op;
+
+ op = &args->ops[resp->opcnt++];
+ op->status = nfserr_retry_uncached_rep;
+ nfsd4_encode_operation(resp, op);
+ return op->status;
+ }
+ /* return reply from cache */
p = xdr_reserve_space(xdr, slot->sl_datalen);
if (!p) {
WARN_ON_ONCE(1);
@@ -2509,7 +3604,22 @@ static bool client_has_state(struct nfs4_client *clp)
#endif
|| !list_empty(&clp->cl_delegations)
|| !list_empty(&clp->cl_sessions)
- || !list_empty(&clp->async_copies);
+ || nfsd4_has_active_async_copies(clp);
+}
+
+static __be32 copy_impl_id(struct nfs4_client *clp,
+ struct nfsd4_exchange_id *exid)
+{
+ if (!exid->nii_domain.data)
+ return 0;
+ xdr_netobj_dup(&clp->cl_nii_domain, &exid->nii_domain, GFP_KERNEL);
+ if (!clp->cl_nii_domain.data)
+ return nfserr_jukebox;
+ xdr_netobj_dup(&clp->cl_nii_name, &exid->nii_name, GFP_KERNEL);
+ if (!clp->cl_nii_name.data)
+ return nfserr_jukebox;
+ clp->cl_nii_time = exid->nii_time;
+ return 0;
}
__be32
@@ -2528,16 +3638,25 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
rpc_ntop(sa, addr_str, sizeof(addr_str));
dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
- "ip_addr=%s flags %x, spa_how %d\n",
+ "ip_addr=%s flags %x, spa_how %u\n",
__func__, rqstp, exid, exid->clname.len, exid->clname.data,
addr_str, exid->flags, exid->spa_how);
+ exid->server_impl_name = kasprintf(GFP_KERNEL, "%s %s %s %s",
+ utsname()->sysname, utsname()->release,
+ utsname()->version, utsname()->machine);
+ if (!exid->server_impl_name)
+ return nfserr_jukebox;
+
if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
return nfserr_inval;
new = create_client(exid->clname, rqstp, &verf);
if (new == NULL)
return nfserr_jukebox;
+ status = copy_impl_id(new, exid);
+ if (status)
+ goto out_nolock;
switch (exid->spa_how) {
case SP4_MACH_CRED:
@@ -2572,10 +3691,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out_nolock;
}
new->cl_mach_cred = true;
+ break;
case SP4_NONE:
break;
default: /* checked by xdr code */
WARN_ON_ONCE(1);
+ fallthrough;
case SP4_SSV:
status = nfserr_encr_alg_unsupp;
goto out_nolock;
@@ -2607,20 +3728,24 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
/* case 6 */
exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
+ trace_nfsd_clid_confirmed_r(conf);
goto out_copy;
}
if (!creds_match) { /* case 3 */
if (client_has_state(conf)) {
status = nfserr_clid_inuse;
+ trace_nfsd_clid_cred_mismatch(conf, rqstp);
goto out;
}
goto out_new;
}
if (verfs_match) { /* case 2 */
conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
+ trace_nfsd_clid_confirmed_r(conf);
goto out_copy;
}
/* case 5, client reboot */
+ trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf);
conf = NULL;
goto out_new;
}
@@ -2630,22 +3755,27 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
+ unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
if (unconf) /* case 4, possible retry or client restart */
unhash_client_locked(unconf);
- /* case 1 (normal case) */
+ /* case 1, new owner ID */
+ trace_nfsd_clid_fresh(new);
+
out_new:
if (conf) {
status = mark_client_expired_locked(conf);
if (status)
goto out;
+ trace_nfsd_clid_replaced(&conf->cl_clientid);
}
new->cl_minorversion = cstate->minorversion;
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
- gen_clid(new, nn);
+ /* Contrived initial CREATE_SESSION response */
+ new->cl_cs_slot.sl_status = nfserr_seq_misordered;
+
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -2655,6 +3785,23 @@ out_copy:
exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
nfsd4_set_ex_flags(conf, exid);
+ exid->nii_domain.len = sizeof("kernel.org") - 1;
+ exid->nii_domain.data = "kernel.org";
+
+ /*
+ * Note that RFC 8881 places no length limit on
+ * nii_name, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes.
+ */
+ exid->nii_name.len = strlen(exid->server_impl_name);
+ if (exid->nii_name.len > NFS4_OPAQUE_LIMIT)
+ exid->nii_name.len = NFS4_OPAQUE_LIMIT;
+ exid->nii_name.data = exid->server_impl_name;
+
+ /* just send zeros - the date is in nii_name */
+ exid->nii_time.tv_sec = 0;
+ exid->nii_time.tv_nsec = 0;
+
dprintk("nfsd4_exchange_id seqid %d flags %x\n",
conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
status = nfs_ok;
@@ -2664,19 +3811,25 @@ out:
out_nolock:
if (new)
expire_client(new);
- if (unconf)
+ if (unconf) {
+ trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
expire_client(unconf);
+ }
return status;
}
-static __be32
-check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
+void
+nfsd4_exchange_id_release(union nfsd4_op_u *u)
{
- dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
- slot_seqid);
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
+
+ kfree(exid->server_impl_name);
+}
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
+{
/* The slot is in use, and no response has been sent. */
- if (slot_inuse) {
+ if (flags & NFSD4_SLOT_INUSE) {
if (seqid == slot_seqid)
return nfserr_jukebox;
else
@@ -2685,6 +3838,8 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
/* Note unsigned 32-bit arithmetic handles wraparound: */
if (likely(seqid == slot_seqid + 1))
return nfs_ok;
+ if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+ return nfs_ok;
if (seqid == slot_seqid)
return nfserr_replay_cache;
return nfserr_seq_misordered;
@@ -2743,17 +3898,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
- /*
- * Note decreasing slot size below client's request may make it
- * difficult for client to function correctly, whereas
- * decreasing the number of slots will (just?) affect
- * performance. When short on memory we therefore prefer to
- * decrease number of slots instead of their size. Clients that
- * request larger slots than they need will get poor results:
- */
- ca->maxreqs = nfsd4_get_drc_mem(ca);
- if (!ca->maxreqs)
- return nfserr_jukebox;
return nfs_ok;
}
@@ -2814,10 +3958,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
struct nfsd4_create_session *cr_ses = &u->create_session;
struct sockaddr *sa = svc_addr(rqstp);
struct nfs4_client *conf, *unconf;
+ struct nfsd4_clid_slot *cs_slot;
struct nfs4_client *old = NULL;
struct nfsd4_session *new;
struct nfsd4_conn *conn;
- struct nfsd4_clid_slot *cs_slot = NULL;
__be32 status = 0;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -2831,93 +3975,120 @@ nfsd4_create_session(struct svc_rqst *rqstp,
return status;
status = check_backchannel_attrs(&cr_ses->back_channel);
if (status)
- goto out_release_drc_mem;
+ goto out_err;
status = nfserr_jukebox;
new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
if (!new)
- goto out_release_drc_mem;
+ goto out_err;
conn = alloc_conn_from_crses(rqstp, cr_ses);
if (!conn)
goto out_free_session;
spin_lock(&nn->client_lock);
+
+ /* RFC 8881 Section 18.36.4 Phase 1: Client record look-up. */
unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
conf = find_confirmed_client(&cr_ses->clientid, true, nn);
- WARN_ON_ONCE(conf && unconf);
+ if (!conf && !unconf) {
+ status = nfserr_stale_clientid;
+ goto out_free_conn;
+ }
+
+ /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */
+ if (conf) {
+ cs_slot = &conf->cl_cs_slot;
+ trace_nfsd_slot_seqid_conf(conf, cr_ses);
+ } else {
+ cs_slot = &unconf->cl_cs_slot;
+ trace_nfsd_slot_seqid_unconf(unconf, cr_ses);
+ }
+ status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
+ switch (status) {
+ case nfs_ok:
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ break;
+ case nfserr_replay_cache:
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ fallthrough;
+ case nfserr_jukebox:
+ /* The server MUST NOT cache NFS4ERR_DELAY */
+ goto out_free_conn;
+ default:
+ goto out_cache_error;
+ }
+ /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
if (conf) {
status = nfserr_wrong_cred;
if (!nfsd4_mach_creds_match(conf, rqstp))
- goto out_free_conn;
- cs_slot = &conf->cl_cs_slot;
- status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache)
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
- goto out_free_conn;
- }
- } else if (unconf) {
+ goto out_cache_error;
+ } else {
+ status = nfserr_clid_inuse;
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
!rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
- status = nfserr_clid_inuse;
- goto out_free_conn;
+ trace_nfsd_clid_cred_mismatch(unconf, rqstp);
+ goto out_cache_error;
}
status = nfserr_wrong_cred;
if (!nfsd4_mach_creds_match(unconf, rqstp))
- goto out_free_conn;
- cs_slot = &unconf->cl_cs_slot;
- status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- /* an unconfirmed replay returns misordered */
- status = nfserr_seq_misordered;
- goto out_free_conn;
- }
+ goto out_cache_error;
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
- if (status) {
- old = NULL;
- goto out_free_conn;
- }
+ if (status)
+ goto out_expired_error;
+ trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
conf = unconf;
- } else {
- status = nfserr_stale_clientid;
- goto out_free_conn;
}
+
+ /* RFC 8881 Section 18.36.4 Phase 4: Session creation. */
status = nfs_ok;
/* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST;
/* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
+ /* Report the correct number of backchannel slots */
+ cr_ses->back_channel.maxreqs = new->se_cb_highest_slot + 1;
init_session(rqstp, new, conf, cr_ses);
nfsd4_get_session_locked(new);
memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* cache solo and embedded create sessions under the client_lock */
nfsd4_cache_create_session(cr_ses, cs_slot, status);
spin_unlock(&nn->client_lock);
+ if (conf == unconf)
+ fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
/* init connection and backchannel */
nfsd4_init_conn(rqstp, conn, new);
nfsd4_put_session(new);
if (old)
expire_client(old);
return status;
+
+out_expired_error:
+ /*
+ * Revert the slot seq_nr change so the server will process
+ * the client's resend instead of returning a cached response.
+ */
+ if (status == nfserr_jukebox) {
+ cs_slot->sl_seqid--;
+ cr_ses->seqid = cs_slot->sl_seqid;
+ goto out_free_conn;
+ }
+out_cache_error:
+ nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
spin_unlock(&nn->client_lock);
free_conn(conn);
- if (old)
- expire_client(old);
out_free_session:
__free_session(new);
-out_release_drc_mem:
- nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
return status;
}
@@ -2931,7 +4102,7 @@ static __be32 nfsd4_map_bcts_dir(u32 *dir)
case NFS4_CDFC4_BACK_OR_BOTH:
*dir = NFS4_CDFC4_BOTH;
return nfs_ok;
- };
+ }
return nfserr_inval;
}
@@ -2957,6 +4128,47 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp,
return nfs_ok;
}
+static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
+{
+ struct nfsd4_conn *c;
+
+ list_for_each_entry(c, &s->se_conns, cn_persession) {
+ if (c->cn_xprt == xpt) {
+ return c;
+ }
+ }
+ return NULL;
+}
+
+static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst,
+ struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn)
+{
+ struct nfs4_client *clp = session->se_client;
+ struct svc_xprt *xpt = rqst->rq_xprt;
+ struct nfsd4_conn *c;
+ __be32 status;
+
+ /* Following the last paragraph of RFC 5661 Section 18.34.3: */
+ spin_lock(&clp->cl_lock);
+ c = __nfsd4_find_conn(xpt, session);
+ if (!c)
+ status = nfserr_noent;
+ else if (req == c->cn_flags)
+ status = nfs_ok;
+ else if (req == NFS4_CDFC4_FORE_OR_BOTH &&
+ c->cn_flags != NFS4_CDFC4_BACK)
+ status = nfs_ok;
+ else if (req == NFS4_CDFC4_BACK_OR_BOTH &&
+ c->cn_flags != NFS4_CDFC4_FORE)
+ status = nfs_ok;
+ else
+ status = nfserr_inval;
+ spin_unlock(&clp->cl_lock);
+ if (status == nfs_ok && conn)
+ *conn = c;
+ return status;
+}
+
__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -2978,6 +4190,17 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
status = nfserr_wrong_cred;
if (!nfsd4_mach_creds_match(session->se_client, rqstp))
goto out;
+ status = nfsd4_match_existing_connection(rqstp, session,
+ bcts->dir, &conn);
+ if (status == nfs_ok) {
+ if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH ||
+ bcts->dir == NFS4_CDFC4_BACK)
+ conn->cn_flags |= NFS4_CDFC4_BACK;
+ nfsd4_probe_callback(session->se_client);
+ goto out;
+ }
+ if (status == nfserr_inval)
+ goto out;
status = nfsd4_map_bcts_dir(&bcts->dir);
if (status)
goto out;
@@ -3043,18 +4266,6 @@ out:
return status;
}
-static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
-{
- struct nfsd4_conn *c;
-
- list_for_each_entry(c, &s->se_conns, cn_persession) {
- if (c->cn_xprt == xpt) {
- return c;
- }
- }
- return NULL;
-}
-
static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
{
struct nfs4_client *clp = ses->se_client;
@@ -3106,12 +4317,17 @@ static bool replay_matches_cache(struct svc_rqst *rqstp,
(bool)seq->cachethis)
return false;
/*
- * If there's an error than the reply can have fewer ops than
- * the call. But if we cached a reply with *more* ops than the
- * call you're sending us now, then this new call is clearly not
- * really a replay of the old one:
+ * If there's an error then the reply can have fewer ops than
+ * the call.
*/
- if (slot->sl_opcnt < argp->opcnt)
+ if (slot->sl_opcnt < argp->opcnt && !slot->sl_status)
+ return false;
+ /*
+ * But if we cached a reply with *more* ops than the call you're
+ * sending us now, then this new call is clearly not really a
+ * replay of the old one:
+ */
+ if (slot->sl_opcnt > argp->opcnt)
return false;
/* This is the only check explicitly called by spec: */
if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
@@ -3125,13 +4341,43 @@ static bool replay_matches_cache(struct svc_rqst *rqstp,
return true;
}
+/*
+ * Note that the response is constructed here both for the case
+ * of a new SEQUENCE request and for a replayed SEQUENCE request.
+ * We do not cache SEQUENCE responses as SEQUENCE is idempotent.
+ */
+static void nfsd4_construct_sequence_response(struct nfsd4_session *session,
+ struct nfsd4_sequence *seq)
+{
+ struct nfs4_client *clp = session->se_client;
+
+ seq->maxslots_response = max(session->se_target_maxslots,
+ seq->maxslots);
+ seq->target_maxslots = session->se_target_maxslots;
+
+ switch (clp->cl_cb_state) {
+ case NFSD4_CB_DOWN:
+ seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
+ break;
+ case NFSD4_CB_FAULT:
+ seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
+ break;
+ default:
+ seq->status_flags = 0;
+ }
+ if (!list_empty(&clp->cl_revoked))
+ seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+ if (atomic_read(&clp->cl_admin_revoked))
+ seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
+}
+
__be32
nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_sequence *seq = &u->sequence;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
- struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_stream *xdr = resp->xdr;
struct nfsd4_session *session;
struct nfs4_client *clp;
struct nfsd4_slot *slot;
@@ -3170,16 +4416,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (seq->slotid >= session->se_fchannel.maxreqs)
goto out_put_session;
- slot = session->se_slots[seq->slotid];
+ slot = xa_load(&session->se_slots, seq->slotid);
dprintk("%s: slotid %d\n", __func__, seq->slotid);
- /* We do not negotiate the number of slots yet, so set the
- * maxslots to the session maxreqs which is used to encode
- * sr_highest_slotid and the sr_target_slot id to maxslots */
- seq->maxslots = session->se_fchannel.maxreqs;
+ trace_nfsd_slot_seqid_sequence(clp, seq, slot);
+
+ nfsd4_construct_sequence_response(session, seq);
- status = check_slot_seqid(seq->seqid, slot->sl_seqid,
- slot->sl_flags & NFSD4_SLOT_INUSE);
+ status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
if (status == nfserr_replay_cache) {
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -3204,6 +4448,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out_put_session;
+ if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+ slot->sl_generation == session->se_slot_gen &&
+ seq->maxslots <= session->se_target_maxslots)
+ /* Client acknowledged our reduce maxreqs */
+ free_session_slots(session, session->se_target_maxslots);
+
buflen = (seq->cachethis) ?
session->se_fchannel.maxresp_cached :
session->se_fchannel.maxresp_sz;
@@ -3211,12 +4461,14 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfserr_rep_too_big;
if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
goto out_put_session;
- svc_reserve(rqstp, buflen);
+ svc_reserve_auth(rqstp, buflen);
status = nfs_ok;
- /* Success! bump slot seqid */
+ /* Success! accept new slot seqid */
slot->sl_seqid = seq->seqid;
+ slot->sl_flags &= ~NFSD4_SLOT_REUSED;
slot->sl_flags |= NFSD4_SLOT_INUSE;
+ slot->sl_generation = session->se_slot_gen;
if (seq->cachethis)
slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
else
@@ -3226,19 +4478,49 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->session = session;
cstate->clp = clp;
-out:
- switch (clp->cl_cb_state) {
- case NFSD4_CB_DOWN:
- seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
- break;
- case NFSD4_CB_FAULT:
- seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
- break;
- default:
- seq->status_flags = 0;
+ /*
+ * If the client ever uses the highest available slot,
+ * gently try to allocate another 20%. This allows
+ * fairly quick growth without grossly over-shooting what
+ * the client might use.
+ */
+ if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+ session->se_target_maxslots >= session->se_fchannel.maxreqs &&
+ session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+ int s = session->se_fchannel.maxreqs;
+ int cnt = DIV_ROUND_UP(s, 5);
+ void *prev_slot;
+
+ do {
+ /*
+ * GFP_NOWAIT both allows allocation under a
+ * spinlock, and only succeeds if there is
+ * plenty of memory.
+ */
+ slot = nfsd4_alloc_slot(&session->se_fchannel, s,
+ GFP_NOWAIT);
+ prev_slot = xa_load(&session->se_slots, s);
+ if (xa_is_value(prev_slot) && slot) {
+ slot->sl_seqid = xa_to_value(prev_slot);
+ slot->sl_flags |= NFSD4_SLOT_REUSED;
+ }
+ if (slot &&
+ !xa_is_err(xa_store(&session->se_slots, s, slot,
+ GFP_NOWAIT))) {
+ s += 1;
+ session->se_fchannel.maxreqs = s;
+ atomic_add(s - session->se_target_maxslots,
+ &nfsd_total_target_slots);
+ session->se_target_maxslots = s;
+ } else {
+ kfree(slot);
+ slot = NULL;
+ }
+ } while (slot && --cnt > 0);
}
- if (!list_empty(&clp->cl_revoked))
- seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+
+out:
+ trace_nfsd_seq4_status(rqstp, seq);
out_no_session:
if (conn)
free_conn(conn);
@@ -3301,6 +4583,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp,
status = nfserr_wrong_cred;
goto out;
}
+ trace_nfsd_clid_destroyed(&clp->cl_clientid);
unhash_client_locked(clp);
out:
spin_unlock(&nn->client_lock);
@@ -3314,6 +4597,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
{
struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
+ struct nfs4_client *clp = cstate->clp;
__be32 status = 0;
if (rc->rca_one_fs) {
@@ -3327,12 +4611,11 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
}
status = nfserr_complete_already;
- if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
- &cstate->session->se_client->cl_flags))
+ if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags))
goto out;
status = nfserr_stale_clientid;
- if (is_client_expired(cstate->session->se_client))
+ if (is_client_expired(clp))
/*
* The following error isn't really legal.
* But we only get here if the client just explicitly
@@ -3343,7 +4626,9 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp,
goto out;
status = nfs_ok;
- nfsd4_client_record_create(cstate->session->se_client);
+ trace_nfsd_clid_reclaim_complete(&clp->cl_clientid);
+ nfsd4_client_record_create(clp);
+ inc_reclaim_complete(clp);
out:
return status;
}
@@ -3363,32 +4648,29 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
new = create_client(clname, rqstp, &clverifier);
if (new == NULL)
return nfserr_jukebox;
- /* Cases below refer to rfc 3530 section 14.2.33: */
spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&clname, nn);
if (conf && client_has_state(conf)) {
- /* case 0: */
status = nfserr_clid_inuse;
if (clp_used_exchangeid(conf))
goto out;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
- char addr_str[INET6_ADDRSTRLEN];
- rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
- sizeof(addr_str));
- dprintk("NFSD: setclientid: string in use by client "
- "at %s\n", addr_str);
+ trace_nfsd_clid_cred_mismatch(conf, rqstp);
goto out;
}
}
unconf = find_unconfirmed_client_by_name(&clname, nn);
if (unconf)
unhash_client_locked(unconf);
- if (conf && same_verf(&conf->cl_verifier, &clverifier)) {
- /* case 1: probable callback update */
- copy_clid(new, conf);
- gen_confirm(new, nn);
- } else /* case 4 (new client) or cases 2, 3 (client reboot): */
- gen_clid(new, nn);
+ if (conf) {
+ if (same_verf(&conf->cl_verifier, &clverifier)) {
+ copy_clid(new, conf);
+ gen_confirm(new, nn);
+ } else
+ trace_nfsd_clid_verf_mismatch(conf, rqstp,
+ &clverifier);
+ } else
+ trace_nfsd_clid_fresh(new);
new->cl_minorversion = 0;
gen_callback(new, setclid, rqstp);
add_to_unconfirmed(new);
@@ -3401,12 +4683,13 @@ out:
spin_unlock(&nn->client_lock);
if (new)
free_client(new);
- if (unconf)
+ if (unconf) {
+ trace_nfsd_clid_expire_unconf(&unconf->cl_clientid);
expire_client(unconf);
+ }
return status;
}
-
__be32
nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
@@ -3435,43 +4718,60 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
* Nevertheless, RFC 7530 recommends INUSE for this case:
*/
status = nfserr_clid_inuse;
- if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
+ if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
+ trace_nfsd_clid_cred_mismatch(unconf, rqstp);
goto out;
- if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
+ }
+ if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
+ trace_nfsd_clid_cred_mismatch(conf, rqstp);
goto out;
- /* cases below refer to rfc 3530 section 14.2.34: */
+ }
if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
if (conf && same_verf(&confirm, &conf->cl_confirm)) {
- /* case 2: probable retransmit */
status = nfs_ok;
- } else /* case 4: client hasn't noticed we rebooted yet? */
+ } else
status = nfserr_stale_clientid;
goto out;
}
status = nfs_ok;
- if (conf) { /* case 1: callback update */
- old = unconf;
- unhash_client_locked(old);
- nfsd4_change_callback(conf, &unconf->cl_cb_conn);
- } else { /* case 3: normal case; new or rebooted client */
+ if (conf) {
+ if (get_client_locked(conf) == nfs_ok) {
+ old = unconf;
+ unhash_client_locked(old);
+ nfsd4_change_callback(conf, &unconf->cl_cb_conn);
+ } else {
+ conf = NULL;
+ }
+ }
+
+ if (!conf) {
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = nfserr_clid_inuse;
if (client_has_state(old)
&& !same_creds(&unconf->cl_cred,
- &old->cl_cred))
+ &old->cl_cred)) {
+ old = NULL;
goto out;
+ }
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
goto out;
}
+ trace_nfsd_clid_replaced(&old->cl_clientid);
+ }
+ status = get_client_locked(unconf);
+ if (status != nfs_ok) {
+ old = NULL;
+ goto out;
}
move_to_confirmed(unconf);
conf = unconf;
}
- get_client_locked(conf);
spin_unlock(&nn->client_lock);
+ if (conf == unconf)
+ fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY);
nfsd4_probe_callback(conf);
spin_lock(&nn->client_lock);
put_client_renew_locked(conf);
@@ -3488,27 +4788,27 @@ static struct nfs4_file *nfsd4_alloc_file(void)
}
/* OPEN Share state helper functions */
-static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
- struct nfs4_file *fp)
-{
- lockdep_assert_held(&state_lock);
+static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
+{
refcount_set(&fp->fi_ref, 1);
spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
INIT_LIST_HEAD(&fp->fi_clnt_odstate);
- fh_copy_shallow(&fp->fi_fhandle, fh);
+ fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle);
fp->fi_deleg_file = NULL;
+ fp->fi_rdeleg_file = NULL;
fp->fi_had_conflict = false;
fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
+ fp->fi_aliased = false;
+ fp->fi_inode = d_inode(fh->fh_dentry);
#ifdef CONFIG_NFSD_PNFS
INIT_LIST_HEAD(&fp->fi_lo_states);
atomic_set(&fp->fi_lo_recalls, 0);
#endif
- hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
}
void
@@ -3526,32 +4826,25 @@ nfsd4_free_slabs(void)
int
nfsd4_init_slabs(void)
{
- client_slab = kmem_cache_create("nfsd4_clients",
- sizeof(struct nfs4_client), 0, 0, NULL);
+ client_slab = KMEM_CACHE(nfs4_client, 0);
if (client_slab == NULL)
goto out;
- openowner_slab = kmem_cache_create("nfsd4_openowners",
- sizeof(struct nfs4_openowner), 0, 0, NULL);
+ openowner_slab = KMEM_CACHE(nfs4_openowner, 0);
if (openowner_slab == NULL)
goto out_free_client_slab;
- lockowner_slab = kmem_cache_create("nfsd4_lockowners",
- sizeof(struct nfs4_lockowner), 0, 0, NULL);
+ lockowner_slab = KMEM_CACHE(nfs4_lockowner, 0);
if (lockowner_slab == NULL)
goto out_free_openowner_slab;
- file_slab = kmem_cache_create("nfsd4_files",
- sizeof(struct nfs4_file), 0, 0, NULL);
+ file_slab = KMEM_CACHE(nfs4_file, 0);
if (file_slab == NULL)
goto out_free_lockowner_slab;
- stateid_slab = kmem_cache_create("nfsd4_stateids",
- sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
+ stateid_slab = KMEM_CACHE(nfs4_ol_stateid, 0);
if (stateid_slab == NULL)
goto out_free_file_slab;
- deleg_slab = kmem_cache_create("nfsd4_delegations",
- sizeof(struct nfs4_delegation), 0, 0, NULL);
+ deleg_slab = KMEM_CACHE(nfs4_delegation, 0);
if (deleg_slab == NULL)
goto out_free_stateid_slab;
- odstate_slab = kmem_cache_create("nfsd4_odstate",
- sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
+ odstate_slab = KMEM_CACHE(nfs4_clnt_odstate, 0);
if (odstate_slab == NULL)
goto out_free_deleg_slab;
return 0;
@@ -3569,25 +4862,79 @@ out_free_openowner_slab:
out_free_client_slab:
kmem_cache_destroy(client_slab);
out:
- dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM;
}
+static unsigned long
+nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct nfsd_net *nn = shrink->private_data;
+ long count;
+
+ count = atomic_read(&nn->nfsd_courtesy_clients);
+ if (!count)
+ count = atomic_long_read(&num_delegations);
+ if (count)
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
+ return (unsigned long)count;
+}
+
+static unsigned long
+nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
+void
+nfsd4_init_leases_net(struct nfsd_net *nn)
+{
+ struct sysinfo si;
+ u64 max_clients;
+
+ nn->nfsd4_lease = 90; /* default lease time */
+ nn->nfsd4_grace = 90;
+ nn->somebody_reclaimed = false;
+ nn->track_reclaim_completes = false;
+ nn->clverifier_counter = get_random_u32();
+ nn->clientid_base = get_random_u32();
+ nn->clientid_counter = nn->clientid_base + 1;
+ nn->s2s_cp_cl_id = nn->clientid_counter++;
+
+ atomic_set(&nn->nfs4_client_count, 0);
+ si_meminfo(&si);
+ max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024);
+ max_clients *= NFS4_CLIENTS_PER_GB;
+ nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
+
+ atomic_set(&nn->nfsd_courtesy_clients, 0);
+}
+
+enum rp_lock {
+ RP_UNLOCKED,
+ RP_LOCKED,
+ RP_UNHASHED,
+};
+
static void init_nfs4_replay(struct nfs4_replay *rp)
{
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- mutex_init(&rp->rp_mutex);
+ rp->rp_locked = RP_UNLOCKED;
}
-static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
- struct nfs4_stateowner *so)
+static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
+ struct nfs4_stateowner *so)
{
if (!nfsd4_has_session(cstate)) {
- mutex_lock(&so->so_replay.rp_mutex);
+ wait_var_event(&so->so_replay.rp_locked,
+ cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (so->so_replay.rp_locked == RP_UNHASHED)
+ return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
+ return 0;
}
void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
@@ -3596,7 +4943,7 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- mutex_unlock(&so->so_replay.rp_mutex);
+ store_release_wake_up(&so->so_replay.rp_locked, RP_UNLOCKED);
nfs4_put_stateowner(so);
}
}
@@ -3609,12 +4956,11 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
if (!sop)
return NULL;
- sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
+ xdr_netobj_dup(&sop->so_owner, owner, GFP_KERNEL);
if (!sop->so_owner.data) {
kmem_cache_free(slab, sop);
return NULL;
}
- sop->so_owner.len = owner->len;
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
@@ -3663,7 +5009,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
continue;
if (local->st_stateowner != &oo->oo_owner)
continue;
- if (local->st_stid.sc_type == NFS4_OPEN_STID) {
+ if (local->st_stid.sc_type == SC_TYPE_OPEN &&
+ !local->st_stid.sc_status) {
ret = local;
refcount_inc(&ret->st_stid.sc_count);
break;
@@ -3672,22 +5019,75 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
return ret;
}
-static __be32
-nfsd4_verify_open_stid(struct nfs4_stid *s)
+static void nfsd4_drop_revoked_stid(struct nfs4_stid *s)
+ __releases(&s->sc_client->cl_lock)
{
- __be32 ret = nfs_ok;
+ struct nfs4_client *cl = s->sc_client;
+ LIST_HEAD(reaplist);
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
+ bool unhashed;
switch (s->sc_type) {
- default:
+ case SC_TYPE_OPEN:
+ stp = openlockstateid(s);
+ if (unhash_open_stateid(stp, &reaplist))
+ put_ol_stateid_locked(stp, &reaplist);
+ spin_unlock(&cl->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
break;
- case 0:
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
- ret = nfserr_bad_stateid;
+ case SC_TYPE_LOCK:
+ stp = openlockstateid(s);
+ unhashed = unhash_lock_stateid(stp);
+ spin_unlock(&cl->cl_lock);
+ if (unhashed)
+ nfs4_put_stid(s);
break;
- case NFS4_REVOKED_DELEG_STID:
- ret = nfserr_deleg_revoked;
+ case SC_TYPE_DELEG:
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ break;
+ default:
+ spin_unlock(&cl->cl_lock);
}
+}
+
+static void nfsd40_drop_revoked_stid(struct nfs4_client *cl,
+ stateid_t *stid)
+{
+ /* NFSv4.0 has no way for the client to tell the server
+ * that it can forget an admin-revoked stateid.
+ * So we keep it around until the first time that the
+ * client uses it, and drop it the first time
+ * nfserr_admin_revoked is returned.
+ * For v4.1 and later we wait until explicitly told
+ * to free the stateid.
+ */
+ if (cl->cl_minorversion == 0) {
+ struct nfs4_stid *st;
+
+ spin_lock(&cl->cl_lock);
+ st = find_stateid_locked(cl, stid);
+ if (st)
+ nfsd4_drop_revoked_stid(st);
+ else
+ spin_unlock(&cl->cl_lock);
+ }
+}
+
+static __be32
+nfsd4_verify_open_stid(struct nfs4_stid *s)
+{
+ __be32 ret = nfs_ok;
+
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ ret = nfserr_admin_revoked;
+ else if (s->sc_status & SC_STATUS_REVOKED)
+ ret = nfserr_deleg_revoked;
+ else if (s->sc_status & SC_STATUS_CLOSED)
+ ret = nfserr_bad_stateid;
return ret;
}
@@ -3699,6 +5099,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(stp->st_stid.sc_client,
+ &stp->st_stid.sc_stateid);
+
if (ret != nfs_ok)
mutex_unlock(&stp->st_mutex);
return ret;
@@ -3720,34 +5124,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
}
static struct nfs4_openowner *
-alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
- struct nfsd4_compound_state *cstate)
+find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
+ struct nfsd4_compound_state *cstate)
{
struct nfs4_client *clp = cstate->clp;
- struct nfs4_openowner *oo, *ret;
+ struct nfs4_openowner *oo, *new = NULL;
- oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
- if (!oo)
- return NULL;
- oo->oo_owner.so_ops = &openowner_ops;
- oo->oo_owner.so_is_open_owner = 1;
- oo->oo_owner.so_seqid = open->op_seqid;
- oo->oo_flags = 0;
- if (nfsd4_has_session(cstate))
- oo->oo_flags |= NFS4_OO_CONFIRMED;
- oo->oo_time = 0;
- oo->oo_last_closed_stid = NULL;
- INIT_LIST_HEAD(&oo->oo_close_lru);
+retry:
spin_lock(&clp->cl_lock);
- ret = find_openstateowner_str_locked(strhashval, open, clp);
- if (ret == NULL) {
- hash_openowner(oo, clp, strhashval);
- ret = oo;
- } else
- nfs4_free_stateowner(&oo->oo_owner);
-
+ oo = find_openstateowner_str(strhashval, open, clp);
+ if (!oo && new) {
+ hash_openowner(new, clp, strhashval);
+ spin_unlock(&clp->cl_lock);
+ return new;
+ }
spin_unlock(&clp->cl_lock);
- return ret;
+
+ if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ /* Replace unconfirmed owners without checking for replay. */
+ release_openowner(oo);
+ oo = NULL;
+ }
+ if (oo) {
+ if (new)
+ nfs4_free_stateowner(&new->oo_owner);
+ return oo;
+ }
+
+ new = alloc_stateowner(openowner_slab, &open->op_owner, clp);
+ if (!new)
+ return NULL;
+ new->oo_owner.so_ops = &openowner_ops;
+ new->oo_owner.so_is_open_owner = 1;
+ new->oo_owner.so_seqid = open->op_seqid;
+ new->oo_flags = 0;
+ if (nfsd4_has_session(cstate))
+ new->oo_flags |= NFS4_OO_CONFIRMED;
+ new->oo_time = 0;
+ new->oo_last_closed_stid = NULL;
+ INIT_LIST_HEAD(&new->oo_close_lru);
+ goto retry;
}
static struct nfs4_ol_stateid *
@@ -3767,13 +5183,19 @@ retry:
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
+ if (nfs4_openowner_unhashed(oo)) {
+ mutex_unlock(&stp->st_mutex);
+ stp = NULL;
+ goto out_unlock;
+ }
+
retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
goto out_unlock;
open->op_stp = NULL;
refcount_inc(&stp->st_stid.sc_count);
- stp->st_stid.sc_type = NFS4_OPEN_STID;
+ stp->st_stid.sc_type = SC_TYPE_OPEN;
INIT_LIST_HEAD(&stp->st_locks);
stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
get_nfs4_file(fp);
@@ -3823,7 +5245,10 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Wait for the refcount to drop to 2. Since it has been unhashed,
* there should be no danger of the refcount going back up again at
* this point.
+ * Some threads with a reference might be waiting for rp_locked,
+ * so tell them to stop waiting.
*/
+ store_release_wake_up(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -3836,60 +5261,86 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
last = oo->oo_last_closed_stid;
oo->oo_last_closed_stid = s;
list_move_tail(&oo->oo_close_lru, &nn->close_lru);
- oo->oo_time = get_seconds();
+ oo->oo_time = ktime_get_boottime_seconds();
spin_unlock(&nn->client_lock);
if (last)
nfs4_put_stid(&last->st_stid);
}
-/* search file_hashtbl[] for file */
-static struct nfs4_file *
-find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
+static noinline_for_stack struct nfs4_file *
+nfsd4_file_hash_lookup(const struct svc_fh *fhp)
{
- struct nfs4_file *fp;
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ struct rhlist_head *tmp, *list;
+ struct nfs4_file *fi;
- hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
- if (fh_match(&fp->fi_fhandle, fh)) {
- if (refcount_inc_not_zero(&fp->fi_ref))
- return fp;
+ rcu_read_lock();
+ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
+ nfs4_file_rhash_params);
+ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
+ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
+ if (refcount_inc_not_zero(&fi->fi_ref)) {
+ rcu_read_unlock();
+ return fi;
+ }
}
}
+ rcu_read_unlock();
return NULL;
}
-struct nfs4_file *
-find_file(struct knfsd_fh *fh)
-{
- struct nfs4_file *fp;
- unsigned int hashval = file_hashval(fh);
+/*
+ * On hash insertion, identify entries with the same inode but
+ * distinct filehandles. They will all be on the list returned
+ * by rhltable_lookup().
+ *
+ * inode->i_lock prevents racing insertions from adding an entry
+ * for the same inode/fhp pair twice.
+ */
+static noinline_for_stack struct nfs4_file *
+nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp)
+{
+ struct inode *inode = d_inode(fhp->fh_dentry);
+ struct rhlist_head *tmp, *list;
+ struct nfs4_file *ret = NULL;
+ bool alias_found = false;
+ struct nfs4_file *fi;
+ int err;
rcu_read_lock();
- fp = find_file_locked(fh, hashval);
- rcu_read_unlock();
- return fp;
-}
+ spin_lock(&inode->i_lock);
-static struct nfs4_file *
-find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
-{
- struct nfs4_file *fp;
- unsigned int hashval = file_hashval(fh);
+ list = rhltable_lookup(&nfs4_file_rhltable, &inode,
+ nfs4_file_rhash_params);
+ rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) {
+ if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) {
+ if (refcount_inc_not_zero(&fi->fi_ref))
+ ret = fi;
+ } else
+ fi->fi_aliased = alias_found = true;
+ }
+ if (ret)
+ goto out_unlock;
- rcu_read_lock();
- fp = find_file_locked(fh, hashval);
- rcu_read_unlock();
- if (fp)
- return fp;
+ nfsd4_file_init(fhp, new);
+ err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist,
+ nfs4_file_rhash_params);
+ if (err)
+ goto out_unlock;
- spin_lock(&state_lock);
- fp = find_file_locked(fh, hashval);
- if (likely(fp == NULL)) {
- nfsd4_init_file(fh, hashval, new);
- fp = new;
- }
- spin_unlock(&state_lock);
+ new->fi_aliased = alias_found;
+ ret = new;
- return fp;
+out_unlock:
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ return ret;
+}
+
+static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi)
+{
+ rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist,
+ nfs4_file_rhash_params);
}
/*
@@ -3902,9 +5353,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
struct nfs4_file *fp;
__be32 ret = nfs_ok;
- fp = find_file(&current_fh->fh_handle);
+ fp = nfsd4_file_hash_lookup(current_fh);
if (!fp)
return ret;
+
/* Check for conflicting share reservations */
spin_lock(&fp->fi_lock);
if (fp->fi_share_deny & deny_type)
@@ -3914,6 +5366,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
return ret;
}
+static bool nfsd4_deleg_present(const struct inode *inode)
+{
+ struct file_lock_context *ctx = locks_inode_context(inode);
+
+ return ctx && !list_empty_careful(&ctx->flc_lease);
+}
+
+/**
+ * nfsd_wait_for_delegreturn - wait for delegations to be returned
+ * @rqstp: the RPC transaction being executed
+ * @inode: in-core inode of the file being waited for
+ *
+ * The timeout prevents deadlock if all nfsd threads happen to be
+ * tied up waiting for returning delegations.
+ *
+ * Return values:
+ * %true: delegation was returned
+ * %false: timed out waiting for delegreturn
+ */
+bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode)
+{
+ long __maybe_unused timeo;
+
+ timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode),
+ NFSD_DELEGRETURN_TIMEOUT);
+ trace_nfsd_delegret_wakeup(rqstp, inode, timeo);
+ return timeo > 0;
+}
+
static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
@@ -3930,8 +5411,8 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
* queued for a lease break. Don't queue it again.
*/
spin_lock(&state_lock);
- if (dp->dl_time == 0) {
- dp->dl_time = get_seconds();
+ if (delegation_hashed(dp) && dp->dl_time == 0) {
+ dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
spin_unlock(&state_lock);
@@ -3942,12 +5423,18 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
- if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID)
- return 1;
+ trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
+
+ if (dp->dl_stid.sc_status)
+ /* CLOSED or REVOKED */
+ return 1;
switch (task->tk_status) {
case 0:
return 1;
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/*
@@ -3958,9 +5445,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
rpc_delay(task, 2 * HZ);
return 0;
}
- /*FALLTHRU*/
+ fallthrough;
default:
- return -1;
+ return 1;
}
}
@@ -3975,28 +5462,47 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
+ .opcode = OP_CB_RECALL,
};
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
+ bool queued;
+
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags))
+ return;
+
/*
* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the
- * i_lock) we know the server hasn't removed the lease yet, and
+ * flc_lock) we know the server hasn't removed the lease yet, and
* we know it's safe to take a reference.
*/
refcount_inc(&dp->dl_stid.sc_count);
- nfsd4_run_cb(&dp->dl_recall);
+ queued = nfsd4_run_cb(&dp->dl_recall);
+ WARN_ON_ONCE(!queued);
+ if (!queued)
+ refcount_dec(&dp->dl_stid.sc_count);
}
-/* Called from break_lease() with i_lock held. */
+/* Called from break_lease() with flc_lock held. */
static bool
-nfsd_break_deleg_cb(struct file_lock *fl)
+nfsd_break_deleg_cb(struct file_lease *fl)
{
- bool ret = false;
- struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+ struct nfs4_delegation *dp = (struct nfs4_delegation *) fl->c.flc_owner;
struct nfs4_file *fp = dp->dl_stid.sc_file;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+ struct nfsd_net *nn;
+
+ trace_nfsd_cb_recall(&dp->dl_stid);
+
+ dp->dl_recalled = true;
+ atomic_inc(&clp->cl_delegs_in_recall);
+ if (try_to_expire_client(clp)) {
+ nn = net_generic(clp->net, nfsd_net_id);
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+ }
/*
* We don't want the locks code to timeout the lease for us;
@@ -4005,24 +5511,49 @@ nfsd_break_deleg_cb(struct file_lock *fl)
*/
fl->fl_break_time = 0;
- spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true;
nfsd_break_one_deleg(dp);
- spin_unlock(&fp->fi_lock);
- return ret;
+ return false;
+}
+
+/**
+ * nfsd_breaker_owns_lease - Check if lease conflict was resolved
+ * @fl: Lock state to check
+ *
+ * Return values:
+ * %true: Lease conflict was resolved
+ * %false: Lease conflict was not resolved.
+ */
+static bool nfsd_breaker_owns_lease(struct file_lease *fl)
+{
+ struct nfs4_delegation *dl = fl->c.flc_owner;
+ struct svc_rqst *rqst;
+ struct nfs4_client *clp;
+
+ rqst = nfsd_current_rqst();
+ if (!nfsd_v4client(rqst))
+ return false;
+ clp = *(rqst->rq_lease_breaker);
+ return dl->dl_stid.sc_client == clp;
}
static int
-nfsd_change_deleg_cb(struct file_lock *onlist, int arg,
+nfsd_change_deleg_cb(struct file_lease *onlist, int arg,
struct list_head *dispose)
{
- if (arg & F_UNLCK)
+ struct nfs4_delegation *dp = (struct nfs4_delegation *) onlist->c.flc_owner;
+ struct nfs4_client *clp = dp->dl_stid.sc_client;
+
+ if (arg & F_UNLCK) {
+ if (dp->dl_recalled)
+ atomic_dec(&clp->cl_delegs_in_recall);
return lease_modify(onlist, arg, dispose);
- else
+ } else
return -EAGAIN;
}
-static const struct lock_manager_operations nfsd_lease_mng_ops = {
+static const struct lease_manager_operations nfsd_lease_mng_ops = {
+ .lm_breaker_owns_lease = nfsd_breaker_owns_lease,
.lm_break = nfsd_break_deleg_cb,
.lm_change = nfsd_change_deleg_cb,
};
@@ -4038,39 +5569,37 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
return nfserr_bad_seqid;
}
-static __be32 lookup_clientid(clientid_t *clid,
- struct nfsd4_compound_state *cstate,
- struct nfsd_net *nn)
+static struct nfs4_client *lookup_clientid(clientid_t *clid, bool sessions,
+ struct nfsd_net *nn)
{
struct nfs4_client *found;
+ spin_lock(&nn->client_lock);
+ found = find_confirmed_client(clid, sessions, nn);
+ if (found)
+ atomic_inc(&found->cl_rpc_users);
+ spin_unlock(&nn->client_lock);
+ return found;
+}
+
+static __be32 set_client(clientid_t *clid,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd_net *nn)
+{
if (cstate->clp) {
- found = cstate->clp;
- if (!same_clid(&found->cl_clientid, clid))
+ if (!same_clid(&cstate->clp->cl_clientid, clid))
return nfserr_stale_clientid;
return nfs_ok;
}
-
if (STALE_CLIENTID(clid, nn))
return nfserr_stale_clientid;
-
/*
- * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
- * cached already then we know this is for is for v4.0 and "sessions"
- * will be false.
+ * We're in the 4.0 case (otherwise the SEQUENCE op would have
+ * set cstate->clp), so session = false:
*/
- WARN_ON_ONCE(cstate->session);
- spin_lock(&nn->client_lock);
- found = find_confirmed_client(clid, false, nn);
- if (!found) {
- spin_unlock(&nn->client_lock);
+ cstate->clp = lookup_clientid(clid, false, nn);
+ if (!cstate->clp)
return nfserr_expired;
- }
- atomic_inc(&found->cl_refcount);
- spin_unlock(&nn->client_lock);
-
- /* Cache the nfs4_client in cstate! */
- cstate->clp = found;
return nfs_ok;
}
@@ -4084,8 +5613,6 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
struct nfs4_openowner *oo = NULL;
__be32 status;
- if (STALE_CLIENTID(&open->op_clientid, nn))
- return nfserr_stale_clientid;
/*
* In case we need it later, after we've already created the
* file and don't want to risk a further failure:
@@ -4094,33 +5621,25 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
if (open->op_file == NULL)
return nfserr_jukebox;
- status = lookup_clientid(clientid, cstate, nn);
+ status = set_client(clientid, cstate, nn);
if (status)
return status;
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
- oo = find_openstateowner_str(strhashval, open, clp);
+retry:
+ oo = find_or_alloc_open_stateowner(strhashval, open, cstate);
open->op_openowner = oo;
- if (!oo) {
- goto new_owner;
- }
- if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- /* Replace unconfirmed owners without checking for replay. */
- release_openowner(oo);
- open->op_openowner = NULL;
- goto new_owner;
+ if (!oo)
+ return nfserr_jukebox;
+ if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
+ nfs4_put_stateowner(&oo->oo_owner);
+ goto retry;
}
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
- goto alloc_stateid;
-new_owner:
- oo = alloc_init_open_stateowner(strhashval, open, cstate);
- if (oo == NULL)
- return nfserr_jukebox;
- open->op_openowner = oo;
-alloc_stateid:
+
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
@@ -4138,7 +5657,7 @@ alloc_stateid:
static inline __be32
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{
- if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+ if (!(flags & RD_STATE) && deleg_is_read(dp->dl_type))
return nfserr_openmode;
else
return nfs_ok;
@@ -4149,12 +5668,12 @@ static int share_access_to_flags(u32 share_access)
return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
}
-static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
+static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl,
+ stateid_t *s)
{
struct nfs4_stid *ret;
- ret = find_stateid_by_type(cl, s,
- NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
+ ret = find_stateid_by_type(cl, s, SC_TYPE_DELEG, SC_STATUS_REVOKED);
if (!ret)
return NULL;
return delegstateid(ret);
@@ -4177,10 +5696,15 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
- if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+ if (deleg->dl_stid.sc_status & SC_STATUS_ADMIN_REVOKED) {
nfs4_put_stid(&deleg->dl_stid);
- if (cl->cl_minorversion)
- status = nfserr_deleg_revoked;
+ status = nfserr_admin_revoked;
+ goto out;
+ }
+ if (deleg->dl_stid.sc_status & SC_STATUS_REVOKED) {
+ nfs4_put_stid(&deleg->dl_stid);
+ nfsd40_drop_revoked_stid(cl, &open->op_delegate_stateid);
+ status = nfserr_deleg_revoked;
goto out;
}
flags = share_access_to_flags(open->op_share_access);
@@ -4218,18 +5742,21 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
.ia_valid = ATTR_SIZE,
.ia_size = 0,
};
+ struct nfsd_attrs attrs = {
+ .na_iattr = &iattr,
+ };
if (!open->op_truncate)
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+ return nfsd_setattr(rqstp, fh, &attrs, NULL);
}
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
- struct nfsd4_open *open)
+ struct nfsd4_open *open, bool new_stp)
{
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
__be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access);
@@ -4243,6 +5770,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
*/
status = nfs4_file_check_deny(fp, open->op_share_deny);
if (status != nfs_ok) {
+ if (status != nfserr_share_denied) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+ if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
+ stp, open->op_share_deny, false))
+ status = nfserr_jukebox;
spin_unlock(&fp->fi_lock);
goto out;
}
@@ -4250,6 +5784,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
/* set access to the file */
status = nfs4_file_get_access(fp, open->op_share_access);
if (status != nfs_ok) {
+ if (status != nfserr_share_denied) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+ if (nfs4_resolve_deny_conflicts_locked(fp, new_stp,
+ stp, open->op_share_access, true))
+ status = nfserr_jukebox;
spin_unlock(&fp->fi_lock);
goto out;
}
@@ -4265,18 +5806,26 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
- status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
- if (status)
+
+ status = nfsd_file_acquire_opened(rqstp, cur_fh, access,
+ open->op_filp, &nf);
+ if (status != nfs_ok)
goto out_put_access;
+
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
- fp->fi_fds[oflag] = filp;
- filp = NULL;
+ fp->fi_fds[oflag] = nf;
+ nf = NULL;
}
}
spin_unlock(&fp->fi_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
+
+ status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode,
+ access));
+ if (status)
+ goto out_put_access;
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
@@ -4291,21 +5840,30 @@ out_put_access:
}
static __be32
-nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
+ struct nfsd4_open *open)
{
__be32 status;
unsigned char old_deny_bmap = stp->st_deny_bmap;
if (!test_access(open->op_share_access, stp))
- return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
+ return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false);
/* test and set deny mode */
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny);
- if (status == nfs_ok) {
+ switch (status) {
+ case nfs_ok:
set_deny(open->op_share_deny, stp);
fp->fi_share_deny |=
- (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+ (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+ break;
+ case nfserr_share_denied:
+ if (nfs4_resolve_deny_conflicts_locked(fp, false,
+ stp, open->op_share_deny, false))
+ status = nfserr_jukebox;
+ break;
}
spin_unlock(&fp->fi_lock);
@@ -4331,32 +5889,151 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
- int flag)
+static struct file_lease *nfs4_alloc_init_lease(struct nfs4_delegation *dp)
{
- struct file_lock *fl;
+ struct file_lease *fl;
- fl = locks_alloc_lock();
+ fl = locks_alloc_lease();
if (!fl)
return NULL;
fl->fl_lmops = &nfsd_lease_mng_ops;
- fl->fl_flags = FL_DELEG;
- fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
- fl->fl_end = OFFSET_MAX;
- fl->fl_owner = (fl_owner_t)dp;
- fl->fl_pid = current->tgid;
- fl->fl_file = dp->dl_stid.sc_file->fi_deleg_file;
+ fl->c.flc_flags = FL_DELEG;
+ fl->c.flc_type = deleg_is_read(dp->dl_type) ? F_RDLCK : F_WRLCK;
+ fl->c.flc_owner = (fl_owner_t)dp;
+ fl->c.flc_pid = current->tgid;
+ fl->c.flc_file = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
return fl;
}
+static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
+ struct nfs4_file *fp)
+{
+ struct nfs4_ol_stateid *st;
+ struct file *f = fp->fi_deleg_file->nf_file;
+ struct inode *ino = file_inode(f);
+ int writes;
+
+ writes = atomic_read(&ino->i_writecount);
+ if (!writes)
+ return 0;
+ /*
+ * There could be multiple filehandles (hence multiple
+ * nfs4_files) referencing this file, but that's not too
+ * common; let's just give up in that case rather than
+ * trying to go look up all the clients using that other
+ * nfs4_file as well:
+ */
+ if (fp->fi_aliased)
+ return -EAGAIN;
+ /*
+ * If there's a close in progress, make sure that we see it
+ * clear any fi_fds[] entries before we see it decrement
+ * i_writecount:
+ */
+ smp_mb__after_atomic();
+
+ if (fp->fi_fds[O_WRONLY])
+ writes--;
+ if (fp->fi_fds[O_RDWR])
+ writes--;
+ if (writes > 0)
+ return -EAGAIN; /* There may be non-NFSv4 writers */
+ /*
+ * It's possible there are non-NFSv4 write opens in progress,
+ * but if they haven't incremented i_writecount yet then they
+ * also haven't called break lease yet; so, they'll break this
+ * lease soon enough. So, all that's left to check for is NFSv4
+ * opens:
+ */
+ spin_lock(&fp->fi_lock);
+ list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
+ if (st->st_openstp == NULL /* it's an open */ &&
+ access_permit_write(st) &&
+ st->st_stid.sc_client != clp) {
+ spin_unlock(&fp->fi_lock);
+ return -EAGAIN;
+ }
+ }
+ spin_unlock(&fp->fi_lock);
+ /*
+ * There's a small chance that we could be racing with another
+ * NFSv4 open. However, any open that hasn't added itself to
+ * the fi_stateids list also hasn't called break_lease yet; so,
+ * they'll break this lease soon enough.
+ */
+ return 0;
+}
+
+/*
+ * It's possible that between opening the dentry and setting the delegation,
+ * that it has been renamed or unlinked. Redo the lookup to verify that this
+ * hasn't happened.
+ */
+static int
+nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp,
+ struct svc_fh *parent)
+{
+ struct svc_export *exp;
+ struct dentry *child;
+ __be32 err;
+
+ err = nfsd_lookup_dentry(open->op_rqstp, parent,
+ open->op_fname, open->op_fnamelen,
+ &exp, &child);
+
+ if (err)
+ return -EAGAIN;
+
+ exp_put(exp);
+ dput(child);
+ if (child != file_dentry(fp->fi_deleg_file->nf_file))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/*
+ * We avoid breaking delegations held by a client due to its own activity, but
+ * clearing setuid/setgid bits on a write is an implicit activity and the client
+ * may not notice and continue using the old mode. Avoid giving out a delegation
+ * on setuid/setgid files when the client is requesting an open for write.
+ */
+static int
+nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
+{
+ struct inode *inode = file_inode(nf->nf_file);
+
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) &&
+ (inode->i_mode & (S_ISUID|S_ISGID)))
+ return -EAGAIN;
+ return 0;
+}
+
+#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS
+static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
+{
+ return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
+}
+#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */
+static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
+{
+ return false;
+}
+#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */
+
static struct nfs4_delegation *
-nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
- struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
+nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
+ struct svc_fh *parent)
{
- int status = 0;
+ bool deleg_ts = nfsd4_want_deleg_timestamps(open);
+ struct nfs4_client *clp = stp->st_stid.sc_client;
+ struct nfs4_file *fp = stp->st_stid.sc_file;
+ struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
- struct file *filp;
- struct file_lock *fl;
+ struct nfsd_file *nf = NULL;
+ struct file_lease *fl;
+ int status = 0;
+ u32 dl_type;
/*
* The fi_had_conflict and nfs_get_existing_delegation checks
@@ -4366,53 +6043,114 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- filp = find_readable_file(fp);
- if (!filp) {
- /* We should always have a readable file here */
- WARN_ON_ONCE(1);
- return ERR_PTR(-EBADF);
+ /*
+ * Try for a write delegation first. RFC8881 section 10.4 says:
+ *
+ * "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
+ * on its own, all opens."
+ *
+ * Furthermore, section 9.1.2 says:
+ *
+ * "In the case of READ, the server may perform the corresponding
+ * check on the access mode, or it may choose to allow READ for
+ * OPEN4_SHARE_ACCESS_WRITE, to accommodate clients whose WRITE
+ * implementation may unavoidably do reads (e.g., due to buffer
+ * cache constraints)."
+ *
+ * We choose to offer a write delegation for OPEN with the
+ * OPEN4_SHARE_ACCESS_WRITE access mode to accommodate such clients.
+ */
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ nf = find_writeable_file(fp);
+ dl_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG : OPEN_DELEGATE_WRITE;
+ }
+
+ /*
+ * If the file is being opened O_RDONLY or we couldn't get a O_RDWR
+ * file for some reason, then try for a read delegation instead.
+ */
+ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
+ nf = find_readable_file(fp);
+ dl_type = deleg_ts ? OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
+ }
+
+ if (!nf)
+ return ERR_PTR(-EAGAIN);
+
+ /*
+ * File delegations and associated locks cannot be recovered if the
+ * export is from an NFS proxy server.
+ */
+ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
+ nfsd_file_put(nf);
+ return ERR_PTR(-EOPNOTSUPP);
}
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN;
+ else if (nfsd4_verify_setuid_write(open, nf))
+ status = -EAGAIN;
else if (!fp->fi_deleg_file) {
- fp->fi_deleg_file = filp;
+ fp->fi_deleg_file = nf;
/* increment early to prevent fi_deleg_file from being
* cleared */
fp->fi_delegees = 1;
- filp = NULL;
+ nf = NULL;
} else
fp->fi_delegees++;
spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (status)
return ERR_PTR(status);
status = -ENOMEM;
- dp = alloc_init_deleg(clp, fp, fh, odstate);
+ dp = alloc_init_deleg(clp, fp, odstate, dl_type);
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ fl = nfs4_alloc_init_lease(dp);
if (!fl)
goto out_clnt_odstate;
- status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL);
+ status = kernel_setlease(fp->fi_deleg_file->nf_file,
+ fl->c.flc_type, &fl, NULL);
if (fl)
- locks_free_lock(fl);
+ locks_free_lease(fl);
if (status)
goto out_clnt_odstate;
+ if (parent) {
+ status = nfsd4_verify_deleg_dentry(open, fp, parent);
+ if (status)
+ goto out_unlock;
+ }
+
+ status = nfsd4_check_conflicting_opens(clp, fp);
+ if (status)
+ goto out_unlock;
+
+ /*
+ * Now that the deleg is set, check again to ensure that nothing
+ * raced in and changed the mode while we weren't looking.
+ */
+ status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file);
+ if (status)
+ goto out_unlock;
+
+ status = -EAGAIN;
+ if (fp->fi_had_conflict)
+ goto out_unlock;
+
spin_lock(&state_lock);
+ spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
- if (fp->fi_had_conflict)
- status = -EAGAIN;
- else
- status = hash_delegation_locked(dp, fp);
+ status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
+ spin_unlock(&clp->cl_lock);
spin_unlock(&state_lock);
if (status)
@@ -4420,7 +6158,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
return dp;
out_unlock:
- vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp);
+ kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
out_clnt_odstate:
put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
@@ -4431,51 +6169,124 @@ out_delegees:
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
{
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
if (status == -EAGAIN)
open->op_why_no_deleg = WND4_CONTENTION;
else {
open->op_why_no_deleg = WND4_RESOURCE;
switch (open->op_deleg_want) {
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
break;
- case NFS4_SHARE_WANT_CANCEL:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
open->op_why_no_deleg = WND4_CANCELLED;
break;
- case NFS4_SHARE_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
WARN_ON_ONCE(1);
}
}
}
+static bool
+nfs4_delegation_stat(struct nfs4_delegation *dp, struct svc_fh *currentfh,
+ struct kstat *stat)
+{
+ struct nfsd_file *nf = find_writeable_file(dp->dl_stid.sc_file);
+ struct path path;
+ int rc;
+
+ if (!nf)
+ return false;
+
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = file_dentry(nf->nf_file);
+
+ rc = vfs_getattr(&path, stat,
+ STATX_MODE | STATX_SIZE | STATX_ATIME |
+ STATX_MTIME | STATX_CTIME | STATX_CHANGE_COOKIE,
+ AT_STATX_SYNC_AS_STAT);
+
+ nfsd_file_put(nf);
+ return rc == 0;
+}
+
+/*
+ * Add NFS4_SHARE_ACCESS_READ to the write delegation granted on OPEN
+ * with NFS4_SHARE_ACCESS_WRITE by allocating separate nfsd_file and
+ * struct file to be used for read with delegation stateid.
+ *
+ */
+static bool
+nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open,
+ struct svc_fh *fh, struct nfs4_ol_stateid *stp)
+{
+ struct nfs4_file *fp;
+ struct nfsd_file *nf = NULL;
+
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) ==
+ NFS4_SHARE_ACCESS_WRITE) {
+ if (nfsd_file_acquire_opened(rqstp, fh, NFSD_MAY_READ, NULL, &nf))
+ return (false);
+ fp = stp->st_stid.sc_file;
+ spin_lock(&fp->fi_lock);
+ __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ);
+ fp = stp->st_stid.sc_file;
+ fp->fi_fds[O_RDONLY] = nf;
+ fp->fi_rdeleg_file = nf;
+ spin_unlock(&fp->fi_lock);
+ }
+ return true;
+}
+
/*
- * Attempt to hand out a delegation.
+ * The Linux NFS server does not offer write delegations to NFSv4.0
+ * clients in order to avoid conflicts between write delegations and
+ * GETATTRs requesting CHANGE or SIZE attributes.
+ *
+ * With NFSv4.1 and later minorversions, the SEQUENCE operation that
+ * begins each COMPOUND contains a client ID. Delegation recall can
+ * be avoided when the server recognizes the client sending a
+ * GETATTR also holds write delegation it conflicts with.
*
- * Note we don't support write delegations, and won't until the vfs has
- * proper support for them.
+ * However, the NFSv4.0 protocol does not enable a server to
+ * determine that a GETATTR originated from the client holding the
+ * conflicting delegation versus coming from some other client. Per
+ * RFC 7530 Section 16.7.5, the server must recall or send a
+ * CB_GETATTR even when the GETATTR originates from the client that
+ * holds the conflicting delegation.
+ *
+ * An NFSv4.0 client can trigger a pathological situation if it
+ * always sends a DELEGRETURN preceded by a conflicting GETATTR in
+ * the same COMPOUND. COMPOUND execution will always stop at the
+ * GETATTR and the DELEGRETURN will never get executed. The server
+ * eventually revokes the delegation, which can result in loss of
+ * open or lock state.
*/
static void
-nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
- struct nfs4_ol_stateid *stp)
+nfs4_open_delegation(struct svc_rqst *rqstp, struct nfsd4_open *open,
+ struct nfs4_ol_stateid *stp, struct svc_fh *currentfh,
+ struct svc_fh *fh)
{
- struct nfs4_delegation *dp;
struct nfs4_openowner *oo = openowner(stp->st_stateowner);
+ bool deleg_ts = nfsd4_want_deleg_timestamps(open);
struct nfs4_client *clp = stp->st_stid.sc_client;
- int cb_up;
+ struct svc_fh *parent = NULL;
+ struct nfs4_delegation *dp;
+ struct kstat stat;
int status = 0;
+ int cb_up;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
- open->op_recall = 0;
+ open->op_recall = false;
switch (open->op_claim_type) {
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!cb_up)
- open->op_recall = 1;
- if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
- goto out_no_deleg;
+ open->op_recall = true;
break;
case NFS4_OPEN_CLAIM_NULL:
+ parent = currentfh;
+ fallthrough;
case NFS4_OPEN_CLAIM_FH:
/*
* Let's not give out any delegations till everyone's
@@ -4486,39 +6297,49 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
- /*
- * Also, if the file was opened for write or
- * create, there's a good chance the client's
- * about to write to it, resulting in an
- * immediate recall (since we don't support
- * write delegations):
- */
- if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
- goto out_no_deleg;
- if (open->op_create == NFS4_OPEN_CREATE)
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
+ !clp->cl_minorversion)
goto out_no_deleg;
break;
default:
goto out_no_deleg;
}
- dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
+ dp = nfs4_set_delegation(open, stp, parent);
if (IS_ERR(dp))
goto out_no_deleg;
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
- dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
- STATEID_VAL(&dp->dl_stid.sc_stateid));
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ struct file *f = dp->dl_stid.sc_file->fi_deleg_file->nf_file;
+
+ if (!nfsd4_add_rdaccess_to_wrdeleg(rqstp, open, fh, stp) ||
+ !nfs4_delegation_stat(dp, currentfh, &stat)) {
+ nfs4_put_stid(&dp->dl_stid);
+ destroy_delegation(dp);
+ goto out_no_deleg;
+ }
+ open->op_delegate_type = deleg_ts ? OPEN_DELEGATE_WRITE_ATTRS_DELEG :
+ OPEN_DELEGATE_WRITE;
+ dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
+ dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat);
+ dp->dl_atime = stat.atime;
+ dp->dl_ctime = stat.ctime;
+ dp->dl_mtime = stat.mtime;
+ spin_lock(&f->f_lock);
+ f->f_mode |= FMODE_NOCMTIME;
+ spin_unlock(&f->f_lock);
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ } else {
+ open->op_delegate_type = deleg_ts && nfs4_delegation_stat(dp, currentfh, &stat) ?
+ OPEN_DELEGATE_READ_ATTRS_DELEG : OPEN_DELEGATE_READ;
+ dp->dl_atime = stat.atime;
+ trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
+ }
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
- if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
- open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
- dprintk("NFSD: WARNING: refusing delegation reclaim\n");
- open->op_recall = 1;
- }
+ open->op_delegate_type = OPEN_DELEGATE_NONE;
/* 4.1 client asking for a delegation? */
if (open->op_deleg_want)
@@ -4529,21 +6350,44 @@ out_no_deleg:
static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
struct nfs4_delegation *dp)
{
- if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
- } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
- dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
- open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ if (deleg_is_write(dp->dl_type)) {
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_READ_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
+ } else if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
+ open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
+ }
}
/* Otherwise the client must be confused wanting a delegation
* it already has, therefore we don't return
- * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
+ * OPEN_DELEGATE_NONE_EXT and reason.
*/
}
+/* Are we returning only a delegation stateid? */
+static bool open_xor_delegation(struct nfsd4_open *open)
+{
+ if (!(open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+ return false;
+ /* Did we actually get a delegation? */
+ if (!deleg_is_read(open->op_delegate_type) && !deleg_is_write(open->op_delegate_type))
+ return false;
+ return true;
+}
+
+/**
+ * nfsd4_process_open2 - finish open processing
+ * @rqstp: the RPC transaction being executed
+ * @current_fh: NFSv4 COMPOUND's current filehandle
+ * @open: OPEN arguments
+ *
+ * If successful, (1) truncate the file if open->op_truncate was
+ * set, (2) set open->op_stateid, (3) set open->op_delegation.
+ *
+ * Returns %nfs_ok on success; otherwise an nfs4stat value in
+ * network byte order is returned.
+ */
__be32
nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
@@ -4560,11 +6404,27 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* and check for delegations in the process of being recalled.
* If not found, create the nfs4_file struct
*/
- fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
+ fp = nfsd4_file_hash_insert(open->op_file, current_fh);
+ if (unlikely(!fp))
+ return nfserr_jukebox;
if (fp != open->op_file) {
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ if (dp && nfsd4_is_deleg_cur(open) &&
+ (dp->dl_stid.sc_file != fp)) {
+ /*
+ * RFC8881 section 8.2.4 mandates the server to return
+ * NFS4ERR_BAD_STATEID if the selected table entry does
+ * not match the current filehandle. However returning
+ * NFS4ERR_BAD_STATEID in the OPEN can cause the client
+ * to repeatedly retry the operation with the same
+ * stateid, since the stateid itself is valid. To avoid
+ * this situation NFSD returns NFS4ERR_INVAL instead.
+ */
+ status = nfserr_inval;
+ goto out;
+ }
stp = nfsd4_find_and_lock_existing_open(fp, open);
} else {
open->op_file = NULL;
@@ -4575,6 +6435,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (!stp) {
stp = init_open_stateid(fp, open);
+ if (!stp) {
+ status = nfserr_jukebox;
+ goto out;
+ }
+
if (!open->op_stp)
new_stp = true;
}
@@ -4593,9 +6458,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
goto out;
}
} else {
- status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+ status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
if (status) {
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
goto out;
@@ -4611,8 +6475,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
- if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
- open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
+ if (open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_NO_DELEG) {
+ open->op_delegate_type = OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_WANTED;
goto nodeleg;
}
@@ -4622,15 +6486,25 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* Attempt to hand out a delegation. No error return, because the
* OPEN succeeds even if we fail.
*/
- nfs4_open_delegation(current_fh, open, stp);
+ nfs4_open_delegation(rqstp, open, stp,
+ &resp->cstate.current_fh, current_fh);
+
+ /*
+ * If there is an existing open stateid, it must be updated and
+ * returned. Only respect WANT_OPEN_XOR_DELEGATION when a new
+ * open stateid would have to be created.
+ */
+ if (new_stp && open_xor_delegation(open)) {
+ memcpy(&open->op_stateid, &zero_stateid, sizeof(open->op_stateid));
+ open->op_rflags |= OPEN4_RESULT_NO_OPEN_STATEID;
+ release_open_stateid(stp);
+ }
nodeleg:
status = nfs_ok;
-
- dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
- STATEID_VAL(&stp->st_stid.sc_stateid));
+ trace_nfsd_open(&stp->st_stid.sc_stateid);
out:
/* 4.1 client trying to upgrade/downgrade delegation? */
- if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
+ if (open->op_delegate_type == OPEN_DELEGATE_NONE && dp &&
open->op_deleg_want)
nfsd4_deleg_xgrade_none_ext(open, dp);
@@ -4641,7 +6515,7 @@ out:
/*
* To finish the open response, we just need to set the rflags.
*/
- open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+ open->op_rflags |= NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
if (nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
@@ -4658,12 +6532,8 @@ out:
void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open)
{
- if (open->op_openowner) {
- struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
-
- nfsd4_cstate_assign_replay(cstate, so);
- nfs4_put_stateowner(so);
- }
+ if (open->op_openowner)
+ nfs4_put_stateowner(&open->op_openowner->oo_owner);
if (open->op_file)
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
@@ -4681,19 +6551,15 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- dprintk("process_renew(%08x/%08x): starting\n",
- clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate, nn);
+ trace_nfsd_clid_renew(clid);
+ status = set_client(clid, cstate, nn);
if (status)
- goto out;
+ return status;
clp = cstate->clp;
- status = nfserr_cb_path_down;
if (!list_empty(&clp->cl_delegations)
&& clp->cl_cb_state != NFSD4_CB_UP)
- goto out;
- status = nfs_ok;
-out:
- return status;
+ return nfserr_cb_path_down;
+ return nfs_ok;
}
void
@@ -4703,7 +6569,7 @@ nfsd4_end_grace(struct nfsd_net *nn)
if (nn->grace_ended)
return;
- dprintk("NFSD: end of grace period\n");
+ trace_nfsd_grace_complete(nn);
nn->grace_ended = true;
/*
* If the server goes down again right now, an NFSv4
@@ -4735,10 +6601,13 @@ nfsd4_end_grace(struct nfsd_net *nn)
*/
static bool clients_still_reclaiming(struct nfsd_net *nn)
{
- unsigned long now = get_seconds();
- unsigned long double_grace_period_end = nn->boot_time +
- 2 * nn->nfsd4_lease;
+ time64_t double_grace_period_end = nn->boot_time +
+ 2 * nn->nfsd4_lease;
+ if (nn->track_reclaim_completes &&
+ atomic_read(&nn->nr_reclaim_complete) ==
+ nn->reclaim_str_hashtbl_size)
+ return false;
if (!nn->somebody_reclaimed)
return false;
nn->somebody_reclaimed = false;
@@ -4746,63 +6615,291 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
* If we've given them *two* lease times to reclaim, and they're
* still not done, give up:
*/
- if (time_after(now, double_grace_period_end))
+ if (ktime_get_boottime_seconds() > double_grace_period_end)
return false;
return true;
}
-static time_t
-nfs4_laundromat(struct nfsd_net *nn)
+struct laundry_time {
+ time64_t cutoff;
+ time64_t new_timeo;
+};
+
+static bool state_expired(struct laundry_time *lt, time64_t last_refresh)
{
- struct nfs4_client *clp;
- struct nfs4_openowner *oo;
- struct nfs4_delegation *dp;
+ time64_t time_remaining;
+
+ if (last_refresh < lt->cutoff)
+ return true;
+ time_remaining = last_refresh - lt->cutoff;
+ lt->new_timeo = min(lt->new_timeo, time_remaining);
+ return false;
+}
+
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+void nfsd4_ssc_init_umount_work(struct nfsd_net *nn)
+{
+ spin_lock_init(&nn->nfsd_ssc_lock);
+ INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list);
+ init_waitqueue_head(&nn->nfsd_ssc_waitq);
+}
+
+/*
+ * This is called when nfsd is being shutdown, after all inter_ssc
+ * cleanup were done, to destroy the ssc delayed unmount list.
+ */
+static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn)
+{
+ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+
+ spin_lock(&nn->nfsd_ssc_lock);
+ list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
+ list_del(&ni->nsui_list);
+ spin_unlock(&nn->nfsd_ssc_lock);
+ mntput(ni->nsui_vfsmount);
+ kfree(ni);
+ spin_lock(&nn->nfsd_ssc_lock);
+ }
+ spin_unlock(&nn->nfsd_ssc_lock);
+}
+
+static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
+{
+ bool do_wakeup = false;
+ struct nfsd4_ssc_umount_item *ni = NULL;
+ struct nfsd4_ssc_umount_item *tmp;
+
+ spin_lock(&nn->nfsd_ssc_lock);
+ list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) {
+ if (time_after(jiffies, ni->nsui_expire)) {
+ if (refcount_read(&ni->nsui_refcnt) > 1)
+ continue;
+
+ /* mark being unmount */
+ ni->nsui_busy = true;
+ spin_unlock(&nn->nfsd_ssc_lock);
+ mntput(ni->nsui_vfsmount);
+ spin_lock(&nn->nfsd_ssc_lock);
+
+ /* waiters need to start from begin of list */
+ list_del(&ni->nsui_list);
+ kfree(ni);
+
+ /* wakeup ssc_connect waiters */
+ do_wakeup = true;
+ continue;
+ }
+ break;
+ }
+ if (do_wakeup)
+ wake_up_all(&nn->nfsd_ssc_waitq);
+ spin_unlock(&nn->nfsd_ssc_lock);
+}
+#endif
+
+/* Check if any lock belonging to this lockowner has any blockers */
+static bool
+nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
+{
+ struct file_lock_context *ctx;
struct nfs4_ol_stateid *stp;
- struct nfsd4_blocked_lock *nbl;
- struct list_head *pos, *next, reaplist;
- time_t cutoff = get_seconds() - nn->nfsd4_lease;
- time_t t, new_timeo = nn->nfsd4_lease;
+ struct nfs4_file *nf;
- dprintk("NFSD: laundromat service - starting\n");
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ nf = stp->st_stid.sc_file;
+ ctx = locks_inode_context(nf->fi_inode);
+ if (!ctx)
+ continue;
+ if (locks_owner_has_blockers(ctx, lo))
+ return true;
+ }
+ return false;
+}
- if (clients_still_reclaiming(nn)) {
- new_timeo = 0;
- goto out;
+static bool
+nfs4_anylock_blockers(struct nfs4_client *clp)
+{
+ int i;
+ struct nfs4_stateowner *so;
+ struct nfs4_lockowner *lo;
+
+ if (atomic_read(&clp->cl_delegs_in_recall))
+ return true;
+ spin_lock(&clp->cl_lock);
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i],
+ so_strhash) {
+ if (so->so_is_open_owner)
+ continue;
+ lo = lockowner(so);
+ if (nfs4_lockowner_has_blockers(lo)) {
+ spin_unlock(&clp->cl_lock);
+ return true;
+ }
+ }
}
- nfsd4_end_grace(nn);
- INIT_LIST_HEAD(&reaplist);
+ spin_unlock(&clp->cl_lock);
+ return false;
+}
+
+static void
+nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
+ struct laundry_time *lt)
+{
+ unsigned int maxreap, reapcnt = 0;
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ?
+ NFSD_CLIENT_MAX_TRIM_PER_RUN : 0;
+ INIT_LIST_HEAD(reaplist);
spin_lock(&nn->client_lock);
list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
- t = clp->cl_time - cutoff;
- new_timeo = min(new_timeo, t);
+ if (clp->cl_state == NFSD4_EXPIRABLE)
+ goto exp_client;
+ if (!state_expired(lt, clp->cl_time))
break;
+ if (!atomic_read(&clp->cl_rpc_users)) {
+ if (clp->cl_state == NFSD4_ACTIVE)
+ atomic_inc(&nn->nfsd_courtesy_clients);
+ clp->cl_state = NFSD4_COURTESY;
}
- if (mark_client_expired_locked(clp)) {
- dprintk("NFSD: client in use (clientid %08x)\n",
- clp->cl_clientid.cl_id);
- continue;
+ if (!client_has_state(clp))
+ goto exp_client;
+ if (!nfs4_anylock_blockers(clp))
+ if (reapcnt >= maxreap)
+ continue;
+exp_client:
+ if (!mark_client_expired_locked(clp)) {
+ list_add(&clp->cl_lru, reaplist);
+ reapcnt++;
}
- list_add(&clp->cl_lru, &reaplist);
}
spin_unlock(&nn->client_lock);
- list_for_each_safe(pos, next, &reaplist) {
+}
+
+static void
+nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn,
+ struct list_head *reaplist)
+{
+ unsigned int maxreap = 0, reapcnt = 0;
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN;
+ INIT_LIST_HEAD(reaplist);
+
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- dprintk("NFSD: purging unused client (clientid %08x)\n",
- clp->cl_clientid.cl_id);
+ if (clp->cl_state == NFSD4_ACTIVE)
+ break;
+ if (reapcnt >= maxreap)
+ break;
+ if (!mark_client_expired_locked(clp)) {
+ list_add(&clp->cl_lru, reaplist);
+ reapcnt++;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
+static void
+nfs4_process_client_reaplist(struct list_head *reaplist)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ list_for_each_safe(pos, next, reaplist) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ trace_nfsd_clid_purged(&clp->cl_clientid);
list_del_init(&clp->cl_lru);
expire_client(clp);
}
+}
+
+static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
+ struct laundry_time *lt)
+{
+ struct nfs4_client *clp;
+
+ spin_lock(&nn->client_lock);
+ if (nn->nfs40_last_revoke == 0 ||
+ nn->nfs40_last_revoke > lt->cutoff) {
+ spin_unlock(&nn->client_lock);
+ return;
+ }
+ nn->nfs40_last_revoke = 0;
+
+retry:
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ if (atomic_read(&clp->cl_admin_revoked) == 0)
+ continue;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ refcount_inc(&stid->sc_count);
+ spin_unlock(&nn->client_lock);
+ /* this function drops ->cl_lock */
+ nfsd4_drop_revoked_stid(stid);
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ goto retry;
+ }
+ spin_unlock(&clp->cl_lock);
+ }
+ spin_unlock(&nn->client_lock);
+}
+
+static time64_t
+nfs4_laundromat(struct nfsd_net *nn)
+{
+ struct nfs4_openowner *oo;
+ struct nfs4_delegation *dp;
+ struct nfs4_ol_stateid *stp;
+ struct nfsd4_blocked_lock *nbl;
+ struct list_head *pos, *next, reaplist;
+ struct laundry_time lt = {
+ .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease,
+ .new_timeo = nn->nfsd4_lease
+ };
+ struct nfs4_cpntf_state *cps;
+ copy_stateid_t *cps_t;
+ int i;
+
+ if (clients_still_reclaiming(nn)) {
+ lt.new_timeo = 0;
+ goto out;
+ }
+ nfsd4_end_grace(nn);
+
+ spin_lock(&nn->s2s_cp_lock);
+ idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+ cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
+ if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
+ state_expired(&lt, cps->cpntf_time))
+ _free_cpntf_state_locked(nn, cps);
+ }
+ spin_unlock(&nn->s2s_cp_lock);
+ nfsd4_async_copy_reaper(nn);
+ nfs4_get_client_reaplist(nn, &reaplist, &lt);
+ nfs4_process_client_reaplist(&reaplist);
+
+ nfs40_clean_admin_revoked(nn, &lt);
+
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
- t = dp->dl_time - cutoff;
- new_timeo = min(new_timeo, t);
+ if (!state_expired(&lt, dp->dl_time))
break;
- }
- WARN_ON(!unhash_delegation_locked(dp));
+ refcount_inc(&dp->dl_stid.sc_count);
+ unhash_delegation_locked(dp, SC_STATUS_REVOKED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -4817,12 +6914,8 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->close_lru)) {
oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
oo_close_lru);
- if (time_after((unsigned long)oo->oo_time,
- (unsigned long)cutoff)) {
- t = oo->oo_time - cutoff;
- new_timeo = min(new_timeo, t);
+ if (!state_expired(&lt, oo->oo_time))
break;
- }
list_del_init(&oo->oo_close_lru);
stp = oo->oo_last_closed_stid;
oo->oo_last_closed_stid = NULL;
@@ -4848,12 +6941,8 @@ nfs4_laundromat(struct nfsd_net *nn)
while (!list_empty(&nn->blocked_locks_lru)) {
nbl = list_first_entry(&nn->blocked_locks_lru,
struct nfsd4_blocked_lock, nbl_lru);
- if (time_after((unsigned long)nbl->nbl_time,
- (unsigned long)cutoff)) {
- t = nbl->nbl_time - cutoff;
- new_timeo = min(new_timeo, t);
+ if (!state_expired(&lt, nbl->nbl_time))
break;
- }
list_move(&nbl->nbl_lru, &reaplist);
list_del_init(&nbl->nbl_list);
}
@@ -4863,50 +6952,91 @@ nfs4_laundromat(struct nfsd_net *nn)
nbl = list_first_entry(&reaplist,
struct nfsd4_blocked_lock, nbl_lru);
list_del_init(&nbl->nbl_lru);
- locks_delete_block(&nbl->nbl_lock);
free_blocked_lock(nbl);
}
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ /* service the server-to-server copy delayed unmount list */
+ nfsd4_ssc_expire_umount(nn);
+#endif
+ if (atomic_long_read(&num_delegations) >= max_delegations)
+ deleg_reaper(nn);
out:
- new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
- return new_timeo;
+ return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
}
-static struct workqueue_struct *laundry_wq;
static void laundromat_main(struct work_struct *);
static void
laundromat_main(struct work_struct *laundry)
{
- time_t t;
+ time64_t t;
struct delayed_work *dwork = to_delayed_work(laundry);
struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
laundromat_work);
t = nfs4_laundromat(nn);
- dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}
-static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+static void
+courtesy_client_reaper(struct nfsd_net *nn)
{
- if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
- return nfserr_bad_stateid;
- return nfs_ok;
+ struct list_head reaplist;
+
+ nfs4_get_courtesy_client_reaplist(nn, &reaplist);
+ nfs4_process_client_reaplist(&reaplist);
}
-static inline int
-access_permit_read(struct nfs4_ol_stateid *stp)
+static void
+deleg_reaper(struct nfsd_net *nn)
{
- return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
- test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
- test_access(NFS4_SHARE_ACCESS_WRITE, stp);
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+
+ if (clp->cl_state != NFSD4_ACTIVE)
+ continue;
+ if (list_empty(&clp->cl_delegations))
+ continue;
+ if (atomic_read(&clp->cl_delegs_in_recall))
+ continue;
+ if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &clp->cl_ra->ra_cb.cb_flags))
+ continue;
+ if (ktime_get_boottime_seconds() - clp->cl_ra_time < 5)
+ continue;
+ if (clp->cl_cb_state != NFSD4_CB_UP)
+ continue;
+
+ /* release in nfsd4_cb_recall_any_release */
+ kref_get(&clp->cl_nfsdfs.cl_ref);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ trace_nfsd_cb_recall_any(clp->cl_ra);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+ spin_unlock(&nn->client_lock);
}
-static inline int
-access_permit_write(struct nfs4_ol_stateid *stp)
+static void
+nfsd4_state_shrinker_worker(struct work_struct *work)
{
- return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
- test_access(NFS4_SHARE_ACCESS_BOTH, stp);
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
+ nfsd_shrinker_work);
+
+ courtesy_client_reaper(nn);
+ deleg_reaper(nn);
+}
+
+static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
+{
+ if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
+ return nfserr_bad_stateid;
+ return nfs_ok;
}
static
@@ -4943,16 +7073,6 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid,
NFS4_SHARE_DENY_READ);
}
-/*
- * Allow READ/WRITE during grace period on recovered state only for files
- * that are not able to provide mandatory locking.
- */
-static inline int
-grace_disallows_io(struct net *net, struct inode *inode)
-{
- return opens_in_grace(net) && mandatory_lock(inode);
-}
-
static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
{
/*
@@ -4990,6 +7110,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
if (ret == nfs_ok)
ret = check_stateid_generation(in, &s->sc_stateid, has_session);
spin_unlock(&s->sc_lock);
+ if (ret == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(s->sc_client,
+ &s->sc_stateid);
return ret;
}
@@ -5009,15 +7132,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return status;
- /* Client debugging aid. */
- if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
- char addr_str[INET6_ADDRSTRLEN];
- rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str,
- sizeof(addr_str));
- pr_warn_ratelimited("NFSD: client %s testing state ID "
- "with incorrect client ID\n", addr_str);
- return status;
- }
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
if (!s)
@@ -5025,50 +7139,57 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
if (status)
goto out_unlock;
+ status = nfsd4_verify_open_stid(s);
+ if (status)
+ goto out_unlock;
+
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
status = nfs_ok;
break;
- case NFS4_REVOKED_DELEG_STID:
- status = nfserr_deleg_revoked;
- break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
status = nfsd4_check_openowner_confirmed(openlockstateid(s));
break;
default:
printk("unknown stateid type %x\n", s->sc_type);
- /* Fallthrough */
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
status = nfserr_bad_stateid;
}
out_unlock:
spin_unlock(&cl->cl_lock);
+ if (status == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(cl, stateid);
return status;
}
__be32
nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statusmask,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
+ struct nfs4_stid *stid;
bool return_revoked = false;
/*
* only return revoked delegations if explicitly asked.
* otherwise we report revoked or bad_stateid status.
*/
- if (typemask & NFS4_REVOKED_DELEG_STID)
+ if (statusmask & SC_STATUS_REVOKED)
return_revoked = true;
- else if (typemask & NFS4_DELEG_STID)
- typemask |= NFS4_REVOKED_DELEG_STID;
+ if (typemask & SC_TYPE_DELEG)
+ /* Always allow REVOKED for DELEG so we can
+ * return the appropriate error.
+ */
+ statusmask |= SC_STATUS_REVOKED;
+
+ statusmask |= SC_STATUS_ADMIN_REVOKED | SC_STATUS_FREEABLE;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return nfserr_bad_stateid;
- status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
+ status = set_client(&stateid->si_opaque.so_clid, cstate, nn);
if (status == nfserr_stale_clientid) {
if (cstate->session)
return nfserr_bad_stateid;
@@ -5076,39 +7197,41 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
}
if (status)
return status;
- *s = find_stateid_by_type(cstate->clp, stateid, typemask);
- if (!*s)
- return nfserr_bad_stateid;
- if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
- nfs4_put_stid(*s);
- if (cstate->minorversion)
- return nfserr_deleg_revoked;
+ stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask);
+ if (!stid)
return nfserr_bad_stateid;
+ if ((stid->sc_status & SC_STATUS_REVOKED) && !return_revoked) {
+ nfs4_put_stid(stid);
+ return nfserr_deleg_revoked;
+ }
+ if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfsd40_drop_revoked_stid(cstate->clp, stateid);
+ nfs4_put_stid(stid);
+ return nfserr_admin_revoked;
}
+ *s = stid;
return nfs_ok;
}
-static struct file *
+static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
- if (!s)
+ struct nfsd_file *ret = NULL;
+
+ if (!s || s->sc_status)
return NULL;
switch (s->sc_type) {
- case NFS4_DELEG_STID:
- if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
- return NULL;
- return get_file(s->sc_file->fi_deleg_file);
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_DELEG:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
if (flags & RD_STATE)
- return find_readable_file(s->sc_file);
+ ret = find_readable_file(s->sc_file);
else
- return find_writeable_file(s->sc_file);
- break;
+ ret = find_writeable_file(s->sc_file);
}
- return NULL;
+ return ret;
}
static __be32
@@ -5124,55 +7247,144 @@ nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
static __be32
nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
- struct file **filpp, bool *tmp_file, int flags)
+ struct nfsd_file **nfp, int flags)
{
int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
- struct file *file;
+ struct nfsd_file *nf;
__be32 status;
- file = nfs4_find_file(s, flags);
- if (file) {
- status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ nf = nfs4_find_file(s, flags);
+ if (nf) {
+ status = nfsd_permission(&rqstp->rq_cred,
+ fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
- fput(file);
- return status;
+ nfsd_file_put(nf);
+ goto out;
}
-
- *filpp = file;
} else {
- status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
+ status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
if (status)
return status;
-
- if (tmp_file)
- *tmp_file = true;
}
+ *nfp = nf;
+out:
+ return status;
+}
+static void
+_free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+ WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID);
+ if (!refcount_dec_and_test(&cps->cp_stateid.cs_count))
+ return;
+ list_del(&cps->cp_list);
+ idr_remove(&nn->s2s_cp_stateids,
+ cps->cp_stateid.cs_stid.si_opaque.so_id);
+ kfree(cps);
+}
+/*
+ * A READ from an inter server to server COPY will have a
+ * copy stateid. Look up the copy notify stateid from the
+ * idr structure and take a reference on it.
+ */
+__be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_client *clp,
+ struct nfs4_cpntf_state **cps)
+{
+ copy_stateid_t *cps_t;
+ struct nfs4_cpntf_state *state = NULL;
+ if (st->si_opaque.so_clid.cl_id != nn->s2s_cp_cl_id)
+ return nfserr_bad_stateid;
+ spin_lock(&nn->s2s_cp_lock);
+ cps_t = idr_find(&nn->s2s_cp_stateids, st->si_opaque.so_id);
+ if (cps_t) {
+ state = container_of(cps_t, struct nfs4_cpntf_state,
+ cp_stateid);
+ if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) {
+ state = NULL;
+ goto unlock;
+ }
+ if (!clp)
+ refcount_inc(&state->cp_stateid.cs_count);
+ else
+ _free_cpntf_state_locked(nn, state);
+ }
+unlock:
+ spin_unlock(&nn->s2s_cp_lock);
+ if (!state)
+ return nfserr_bad_stateid;
+ if (!clp)
+ *cps = state;
return 0;
}
-/*
- * Checks for stateid operations
+static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_stid **stid)
+{
+ __be32 status;
+ struct nfs4_cpntf_state *cps = NULL;
+ struct nfs4_client *found;
+
+ status = manage_cpntf_state(nn, st, NULL, &cps);
+ if (status)
+ return status;
+
+ cps->cpntf_time = ktime_get_boottime_seconds();
+
+ status = nfserr_expired;
+ found = lookup_clientid(&cps->cp_p_clid, true, nn);
+ if (!found)
+ goto out;
+
+ *stid = find_stateid_by_type(found, &cps->cp_p_stateid,
+ SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
+ 0);
+ if (*stid)
+ status = nfs_ok;
+ else
+ status = nfserr_bad_stateid;
+
+ put_client_renew(found);
+out:
+ nfs4_put_cpntf_state(nn, cps);
+ return status;
+}
+
+void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
+{
+ spin_lock(&nn->s2s_cp_lock);
+ _free_cpntf_state_locked(nn, cps);
+ spin_unlock(&nn->s2s_cp_lock);
+}
+
+/**
+ * nfs4_preprocess_stateid_op - find and prep stateid for an operation
+ * @rqstp: incoming request from client
+ * @cstate: current compound state
+ * @fhp: filehandle associated with requested stateid
+ * @stateid: stateid (provided by client)
+ * @flags: flags describing type of operation to be done
+ * @nfp: optional nfsd_file return pointer (may be NULL)
+ * @cstid: optional returned nfs4_stid pointer (may be NULL)
+ *
+ * Given info from the client, look up a nfs4_stid for the operation. On
+ * success, it returns a reference to the nfs4_stid and/or the nfsd_file
+ * associated with it.
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
+ stateid_t *stateid, int flags, struct nfsd_file **nfp,
+ struct nfs4_stid **cstid)
{
- struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct nfs4_stid *s = NULL;
__be32 status;
- if (filpp)
- *filpp = NULL;
- if (tmp_file)
- *tmp_file = false;
-
- if (grace_disallows_io(net, ino))
- return nfserr_grace;
+ if (nfp)
+ *nfp = NULL;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
status = check_special_stateids(net, fhp, stateid, flags);
@@ -5180,8 +7392,10 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
}
status = nfsd4_lookup_stateid(cstate, stateid,
- NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
- &s, nn);
+ SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
+ 0, &s, nn);
+ if (status == nfserr_bad_stateid)
+ status = find_cpntf_state(nn, stateid, &s);
if (status)
return status;
status = nfsd4_stid_check_stateid_generation(stateid, s,
@@ -5190,27 +7404,28 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
goto out;
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
status = nfs4_check_delegmode(delegstateid(s), flags);
break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
- default:
- status = nfserr_bad_stateid;
- break;
}
if (status)
goto out;
status = nfs4_check_fh(fhp, s);
done:
- if (!status && filpp)
- status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
+ if (status == nfs_ok && nfp)
+ status = nfs4_check_file(rqstp, fhp, s, nfp, flags);
out:
- if (s)
- nfs4_put_stid(s);
+ if (s) {
+ if (!status && cstid)
+ *cstid = s;
+ else
+ nfs4_put_stid(s);
+ }
return status;
}
@@ -5223,7 +7438,7 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
struct nfsd4_test_stateid_id *stateid;
- struct nfs4_client *cl = cstate->session->se_client;
+ struct nfs4_client *cl = cstate->clp;
list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
stateid->ts_id_status =
@@ -5269,39 +7484,47 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
stateid_t *stateid = &free_stateid->fr_stateid;
struct nfs4_stid *s;
struct nfs4_delegation *dp;
- struct nfs4_client *cl = cstate->session->se_client;
+ struct nfs4_client *cl = cstate->clp;
__be32 ret = nfserr_bad_stateid;
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
- if (!s)
+ if (!s || s->sc_status & SC_STATUS_CLOSED)
goto out_unlock;
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfsd4_drop_revoked_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
spin_lock(&s->sc_lock);
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
+ if (s->sc_status & SC_STATUS_REVOKED) {
+ s->sc_status |= SC_STATUS_CLOSED;
+ spin_unlock(&s->sc_lock);
+ dp = delegstateid(s);
+ if (s->sc_status & SC_STATUS_FREEABLE)
+ list_del_init(&dp->dl_recall_lru);
+ s->sc_status |= SC_STATUS_FREED;
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
ret = nfserr_locks_held;
break;
- case NFS4_OPEN_STID:
+ case SC_TYPE_OPEN:
ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (ret)
break;
ret = nfserr_locks_held;
break;
- case NFS4_LOCK_STID:
+ case SC_TYPE_LOCK:
spin_unlock(&s->sc_lock);
refcount_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
- case NFS4_REVOKED_DELEG_STID:
- spin_unlock(&s->sc_lock);
- dp = delegstateid(s);
- list_del_init(&dp->dl_recall_lru);
- spin_unlock(&cl->cl_lock);
- nfs4_put_stid(s);
- ret = nfs_ok;
- goto out;
- /* Default falls through and returns nfserr_bad_stateid */
}
spin_unlock(&s->sc_lock);
out_unlock:
@@ -5337,12 +7560,24 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
return status;
}
-/*
- * Checks for sequence id mutating operations.
+/**
+ * nfs4_preprocess_seqid_op - find and prep an ol_stateid for a seqid-morphing op
+ * @cstate: compund state
+ * @seqid: seqid (provided by client)
+ * @stateid: stateid (provided by client)
+ * @typemask: mask of allowable types for this operation
+ * @statusmask: mask of allowed states: 0 or STID_CLOSED
+ * @stpp: return pointer for the stateid found
+ * @nn: net namespace for request
+ *
+ * Given a stateid+seqid from a client, look up an nfs4_ol_stateid and
+ * return it in @stpp. On a nfs_ok return, the returned stateid will
+ * have its st_mutex locked.
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
- stateid_t *stateid, char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statusmask,
struct nfs4_ol_stateid **stpp,
struct nfsd_net *nn)
{
@@ -5350,15 +7585,19 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
struct nfs4_stid *s;
struct nfs4_ol_stateid *stp = NULL;
- dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
- seqid, STATEID_VAL(stateid));
+ trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
+retry:
+ status = nfsd4_lookup_stateid(cstate, stateid,
+ typemask, statusmask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
- nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
+ if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) {
+ nfs4_put_stateowner(stp->st_stateowner);
+ goto retry;
+ }
status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
if (!status)
@@ -5376,7 +7615,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
struct nfs4_ol_stateid *stp;
status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
- NFS4_OPEN_STID, &stp, nn);
+ SC_TYPE_OPEN, 0, &stp, nn);
if (status)
return status;
oo = openowner(stp->st_stateowner);
@@ -5407,8 +7646,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
status = nfs4_preprocess_seqid_op(cstate,
- oc->oc_seqid, &oc->oc_req_stateid,
- NFS4_OPEN_STID, &stp, nn);
+ oc->oc_seqid, &oc->oc_req_stateid,
+ SC_TYPE_OPEN, 0, &stp, nn);
if (status)
goto out;
oo = openowner(stp->st_stateowner);
@@ -5420,9 +7659,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo->oo_flags |= NFS4_OO_CONFIRMED;
nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
- dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
- __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
-
+ trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid);
nfsd4_client_record_create(oo->oo_owner.so_client);
status = nfs_ok;
put_stateid:
@@ -5502,11 +7739,12 @@ out:
return status;
}
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
struct nfs4_client *clp = s->st_stid.sc_client;
bool unhashed;
LIST_HEAD(reaplist);
+ struct nfs4_ol_stateid *stp;
spin_lock(&clp->cl_lock);
unhashed = unhash_open_stateid(s, &reaplist);
@@ -5515,12 +7753,14 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
if (unhashed)
put_ol_stateid_locked(s, &reaplist);
spin_unlock(&clp->cl_lock);
+ list_for_each_entry(stp, &reaplist, st_locks)
+ nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
free_ol_stateid_reaplist(&reaplist);
+ return false;
} else {
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
- if (unhashed)
- move_to_close_lru(s, clp->net);
+ return unhashed;
}
}
@@ -5536,19 +7776,22 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *stp;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool need_move_to_close_list;
- dprintk("NFSD: nfsd4_close on file %pd\n",
+ dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
- &close->cl_stateid,
- NFS4_OPEN_STID|NFS4_CLOSED_STID,
- &stp, nn);
+ &close->cl_stateid,
+ SC_TYPE_OPEN, SC_STATUS_CLOSED,
+ &stp, nn);
nfsd4_bump_seqid(cstate, status);
if (status)
- goto out;
+ goto out;
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
+ spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
+ spin_unlock(&stp->st_stid.sc_client->cl_lock);
/*
* Technically we don't _really_ have to increment or copy it, since
@@ -5558,8 +7801,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
*/
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- nfsd4_close_open_stateid(stp);
+ need_move_to_close_list = nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
+ if (need_move_to_close_list)
+ move_to_close_lru(stp, net);
/* v4.1+ suggests that we send a special stateid in here, since the
* clients should just ignore this anyway. Since this is not useful
@@ -5587,10 +7832,11 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
+ status = fh_verify(rqstp, &cstate->current_fh, 0, 0);
+ if (status)
return status;
- status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, SC_STATUS_REVOKED, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
@@ -5598,22 +7844,16 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto put_stateid;
+ trace_nfsd_deleg_return(stateid);
destroy_delegation(dp);
+ smp_mb__after_atomic();
+ wake_up_var(d_inode(cstate->current_fh.fh_dentry));
put_stateid:
nfs4_put_stid(&dp->dl_stid);
out:
return status;
}
-static inline u64
-end_offset(u64 start, u64 len)
-{
- u64 end;
-
- end = start + len;
- return end >= start ? end: NFS4_MAX_UINT64;
-}
-
/* last octet in a range */
static inline u64
last_byte_offset(u64 start, u64 len)
@@ -5643,7 +7883,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
}
static fl_owner_t
-nfsd4_fl_get_owner(fl_owner_t owner)
+nfsd4_lm_get_owner(fl_owner_t owner)
{
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
@@ -5652,7 +7892,7 @@ nfsd4_fl_get_owner(fl_owner_t owner)
}
static void
-nfsd4_fl_put_owner(fl_owner_t owner)
+nfsd4_lm_put_owner(fl_owner_t owner)
{
struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner;
@@ -5660,10 +7900,33 @@ nfsd4_fl_put_owner(fl_owner_t owner)
nfs4_put_stateowner(&lo->lo_owner);
}
+/* return pointer to struct nfs4_client if client is expirable */
+static bool
+nfsd4_lm_lock_expirable(struct file_lock *cfl)
+{
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *) cfl->c.flc_owner;
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+ struct nfsd_net *nn;
+
+ if (try_to_expire_client(clp)) {
+ nn = net_generic(clp->net, nfsd_net_id);
+ mod_delayed_work(laundry_wq, &nn->laundromat_work, 0);
+ return true;
+ }
+ return false;
+}
+
+/* schedule laundromat to run immediately and wait for it to complete */
+static void
+nfsd4_lm_expire_lock(void)
+{
+ flush_workqueue(laundry_wq);
+}
+
static void
nfsd4_lm_notify(struct file_lock *fl)
{
- struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *) fl->c.flc_owner;
struct net *net = lo->lo_owner.so_client->net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct nfsd4_blocked_lock *nbl = container_of(fl,
@@ -5679,14 +7942,19 @@ nfsd4_lm_notify(struct file_lock *fl)
}
spin_unlock(&nn->blocked_locks_lock);
- if (queue)
- nfsd4_run_cb(&nbl->nbl_cb);
+ if (queue) {
+ trace_nfsd_cb_notify_lock(lo, nbl);
+ nfsd4_try_run_cb(&nbl->nbl_cb);
+ }
}
static const struct lock_manager_operations nfsd_posix_mng_ops = {
+ .lm_mod_owner = THIS_MODULE,
.lm_notify = nfsd4_lm_notify,
- .lm_get_owner = nfsd4_fl_get_owner,
- .lm_put_owner = nfsd4_fl_put_owner,
+ .lm_get_owner = nfsd4_lm_get_owner,
+ .lm_put_owner = nfsd4_lm_put_owner,
+ .lm_lock_expirable = nfsd4_lm_lock_expirable,
+ .lm_expire_lock = nfsd4_lm_expire_lock,
};
static inline void
@@ -5695,13 +7963,12 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
struct nfs4_lockowner *lo;
if (fl->fl_lmops == &nfsd_posix_mng_ops) {
- lo = (struct nfs4_lockowner *) fl->fl_owner;
- deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
- lo->lo_owner.so_owner.len, GFP_KERNEL);
+ lo = (struct nfs4_lockowner *) fl->c.flc_owner;
+ xdr_netobj_dup(&deny->ld_owner, &lo->lo_owner.so_owner,
+ GFP_KERNEL);
if (!deny->ld_owner.data)
/* We just don't care that much */
goto nevermind;
- deny->ld_owner.len = lo->lo_owner.so_owner.len;
deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
} else {
nevermind:
@@ -5715,7 +7982,7 @@ nevermind:
if (fl->fl_end != NFS4_MAX_UINT64)
deny->ld_length = fl->fl_end - fl->fl_start + 1;
deny->ld_type = NFS4_READ_LT;
- if (fl->fl_type != F_RDLCK)
+ if (fl->c.flc_type != F_RDLCK)
deny->ld_type = NFS4_WRITE_LT;
}
@@ -5801,21 +8068,21 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
}
static struct nfs4_ol_stateid *
-find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
+find_lock_stateid(const struct nfs4_lockowner *lo,
+ const struct nfs4_ol_stateid *ost)
{
struct nfs4_ol_stateid *lst;
- struct nfs4_client *clp = lo->lo_owner.so_client;
- lockdep_assert_held(&clp->cl_lock);
+ lockdep_assert_held(&ost->st_stid.sc_client->cl_lock);
- list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
- if (lst->st_stid.sc_type != NFS4_LOCK_STID)
- continue;
- if (lst->st_stid.sc_file == fp) {
- refcount_inc(&lst->st_stid.sc_count);
- return lst;
+ /* If ost is not hashed, ost->st_locks will not be valid */
+ if (!nfs4_ol_stateid_unhashed(ost))
+ list_for_each_entry(lst, &ost->st_locks, st_locks) {
+ if (lst->st_stateowner == &lo->lo_owner) {
+ refcount_inc(&lst->st_stid.sc_count);
+ return lst;
+ }
}
- }
return NULL;
}
@@ -5831,35 +8098,39 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX);
retry:
spin_lock(&clp->cl_lock);
- spin_lock(&fp->fi_lock);
- retstp = find_lock_stateid(lo, fp);
+ if (nfs4_ol_stateid_unhashed(open_stp))
+ goto out_close;
+ retstp = find_lock_stateid(lo, open_stp);
if (retstp)
- goto out_unlock;
-
+ goto out_found;
refcount_inc(&stp->st_stid.sc_count);
- stp->st_stid.sc_type = NFS4_LOCK_STID;
+ stp->st_stid.sc_type = SC_TYPE_LOCK;
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
stp->st_stid.sc_file = fp;
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
+ spin_lock(&fp->fi_lock);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
list_add(&stp->st_perfile, &fp->fi_stateids);
-out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&clp->cl_lock);
- if (retstp) {
- if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
- nfs4_put_stid(&retstp->st_stid);
- goto retry;
- }
- /* To keep mutex tracking happy */
- mutex_unlock(&stp->st_mutex);
- stp = retstp;
- }
return stp;
+out_found:
+ spin_unlock(&clp->cl_lock);
+ if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) {
+ nfs4_put_stid(&retstp->st_stid);
+ goto retry;
+ }
+ /* To keep mutex tracking happy */
+ mutex_unlock(&stp->st_mutex);
+ return retstp;
+out_close:
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ return NULL;
}
static struct nfs4_ol_stateid *
@@ -5874,7 +8145,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
*new = false;
spin_lock(&clp->cl_lock);
- lst = find_lock_stateid(lo, fi);
+ lst = find_lock_stateid(lo, ost);
spin_unlock(&clp->cl_lock);
if (lst != NULL) {
if (nfsd4_lock_ol_stateid(lst) == nfs_ok)
@@ -5968,7 +8239,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *lock_stp = NULL;
struct nfs4_ol_stateid *open_stp = NULL;
struct nfs4_file *fp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
@@ -5976,8 +8247,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int lkflg;
int err;
bool new = false;
- unsigned char fl_type;
- unsigned int fl_flags = FL_POSIX;
+ unsigned char type;
+ unsigned int flags = FL_POSIX;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -5988,23 +8259,21 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_lock_length(lock->lk_offset, lock->lk_length))
return nfserr_inval;
- if ((status = fh_verify(rqstp, &cstate->current_fh,
- S_IFREG, NFSD_MAY_LOCK))) {
- dprintk("NFSD: nfsd4_lock: permission denied!\n");
+ status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
+ if (status != nfs_ok)
return status;
+ if (exportfs_cannot_lock(cstate->current_fh.fh_dentry->d_sb->s_export_op)) {
+ status = nfserr_notsupp;
+ goto out;
}
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
/* See rfc 5661 18.10.3: given clientid is ignored: */
memcpy(&lock->lk_new_clientid,
- &cstate->session->se_client->cl_clientid,
+ &cstate->clp->cl_clientid,
sizeof(clientid_t));
- status = nfserr_stale_clientid;
- if (STALE_CLIENTID(&lock->lk_new_clientid, nn))
- goto out;
-
/* validate and update open stateid and open seqid */
status = nfs4_preprocess_confirmed_seqid_op(cstate,
lock->lk_new_open_seqid,
@@ -6022,9 +8291,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&lock_stp, &new);
} else {
status = nfs4_preprocess_seqid_op(cstate,
- lock->lk_old_lock_seqid,
- &lock->lk_old_lock_stateid,
- NFS4_LOCK_STID, &lock_stp, nn);
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ SC_TYPE_LOCK, 0, &lock_stp,
+ nn);
}
if (status)
goto out;
@@ -6042,42 +8312,46 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!locks_in_grace(net) && lock->lk_reclaim)
goto out;
+ if (lock->lk_reclaim)
+ flags |= FL_RECLAIM;
+
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate))
- fl_flags |= FL_SLEEP;
- /* Fallthrough */
+ fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
- filp = find_readable_file_locked(fp);
- if (filp)
+ nf = find_readable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
- fl_type = F_RDLCK;
+ type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate))
- fl_flags |= FL_SLEEP;
- /* Fallthrough */
+ fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
- filp = find_writeable_file_locked(fp);
- if (filp)
+ nf = find_writeable_file_locked(fp);
+ if (nf)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
- fl_type = F_WRLCK;
+ type = F_WRLCK;
break;
default:
status = nfserr_inval;
goto out;
}
- if (!filp) {
+ if (!nf) {
status = nfserr_openmode;
goto out;
}
+ if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) &&
+ nfsd4_has_session(cstate) &&
+ locks_can_async_lock(nf->nf_file->f_op))
+ flags |= FL_SLEEP;
+
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
dprintk("NFSD: %s: unable to allocate block!\n", __func__);
@@ -6086,11 +8360,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
file_lock = &nbl->nbl_lock;
- file_lock->fl_type = fl_type;
- file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
- file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
- file_lock->fl_flags = fl_flags;
+ file_lock->c.flc_type = type;
+ file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
+ file_lock->c.flc_pid = current->tgid;
+ file_lock->c.flc_file = nf->nf_file;
+ file_lock->c.flc_flags = flags;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = lock->lk_offset;
file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
@@ -6103,15 +8377,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- if (fl_flags & FL_SLEEP) {
- nbl->nbl_time = jiffies;
+ if (flags & FL_SLEEP) {
+ nbl->nbl_time = ktime_get_boottime_seconds();
spin_lock(&nn->blocked_locks_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
+ kref_get(&nbl->nbl_kref);
spin_unlock(&nn->blocked_locks_lock);
}
- err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, conflock);
switch (err) {
case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
@@ -6120,8 +8395,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nn->somebody_reclaimed = true;
break;
case FILE_LOCK_DEFERRED:
+ kref_put(&nbl->nbl_kref, free_nbl);
nbl = NULL;
- /* Fallthrough */
+ fallthrough;
case -EAGAIN: /* conflock holds conflicting lock */
status = nfserr_denied;
dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
@@ -6138,16 +8414,21 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
out:
if (nbl) {
/* dequeue it if we queued it before */
- if (fl_flags & FL_SLEEP) {
+ if (flags & FL_SLEEP) {
spin_lock(&nn->blocked_locks_lock);
- list_del_init(&nbl->nbl_list);
- list_del_init(&nbl->nbl_lru);
+ if (!list_empty(&nbl->nbl_list) &&
+ !list_empty(&nbl->nbl_lru)) {
+ list_del_init(&nbl->nbl_list);
+ list_del_init(&nbl->nbl_lru);
+ kref_put(&nbl->nbl_kref, free_nbl);
+ }
+ /* nbl can use one of lists to be linked to reaplist */
spin_unlock(&nn->blocked_locks_lock);
}
free_blocked_lock(nbl);
}
- if (filp)
- fput(filp);
+ if (nf)
+ nfsd_file_put(nf);
if (lock_stp) {
/* Bump seqid manually if the 4.0 replay owner is openowner */
if (cstate->replay_owner &&
@@ -6174,20 +8455,39 @@ out:
return status;
}
+void nfsd4_lock_release(union nfsd4_op_u *u)
+{
+ struct nfsd4_lock *lock = &u->lock;
+ struct nfsd4_lock_denied *deny = &lock->lk_denied;
+
+ kfree(deny->ld_owner.data);
+}
+
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
- * vfs_test_lock. (Arguably perhaps test_lock should be done with an
- * inode operation.)
+ * vfs_test_lock.
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
- struct file *file;
- __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
- if (!err) {
- err = nfserrno(vfs_test_lock(file, lock));
- fput(file);
- }
+ struct nfsd_file *nf;
+ struct inode *inode;
+ __be32 err;
+
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
+ if (err)
+ return err;
+ inode = fhp->fh_dentry->d_inode;
+ inode_lock(inode); /* to block new leases till after test_lock: */
+ err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (err)
+ goto out;
+ lock->c.flc_file = nf->nf_file;
+ err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+ lock->c.flc_file = NULL;
+out:
+ inode_unlock(inode);
+ nfsd_file_put(nf);
return err;
}
@@ -6211,7 +8511,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
if (!nfsd4_has_session(cstate)) {
- status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
+ status = set_client(&lockt->lt_clientid, cstate, nn);
if (status)
goto out;
}
@@ -6229,11 +8529,11 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (lockt->lt_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
- file_lock->fl_type = F_RDLCK;
+ file_lock->c.flc_type = F_RDLCK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
- file_lock->fl_type = F_WRLCK;
+ file_lock->c.flc_type = F_WRLCK;
break;
default:
dprintk("NFSD: nfs4_lockt: bad lock type!\n");
@@ -6243,9 +8543,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
if (lo)
- file_lock->fl_owner = (fl_owner_t)lo;
- file_lock->fl_pid = current->tgid;
- file_lock->fl_flags = FL_POSIX;
+ file_lock->c.flc_owner = (fl_owner_t)lo;
+ file_lock->c.flc_pid = current->tgid;
+ file_lock->c.flc_flags = FL_POSIX;
file_lock->fl_start = lockt->lt_offset;
file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
@@ -6256,7 +8556,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- if (file_lock->fl_type != F_UNLCK) {
+ if (file_lock->c.flc_type != F_UNLCK) {
status = nfserr_denied;
nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
}
@@ -6268,13 +8568,21 @@ out:
return status;
}
+void nfsd4_lockt_release(union nfsd4_op_u *u)
+{
+ struct nfsd4_lockt *lockt = &u->lockt;
+ struct nfsd4_lock_denied *deny = &lockt->lt_denied;
+
+ kfree(deny->ld_owner.data);
+}
+
__be32
nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_locku *locku = &u->locku;
struct nfs4_ol_stateid *stp;
- struct file *filp = NULL;
+ struct nfsd_file *nf = NULL;
struct file_lock *file_lock = NULL;
__be32 status;
int err;
@@ -6288,27 +8596,32 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
- &locku->lu_stateid, NFS4_LOCK_STID,
- &stp, nn);
+ &locku->lu_stateid, SC_TYPE_LOCK, 0,
+ &stp, nn);
if (status)
goto out;
- filp = find_any_file(stp->st_stid.sc_file);
- if (!filp) {
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (!nf) {
status = nfserr_lock_range;
goto put_stateid;
}
+ if (exportfs_cannot_lock(nf->nf_file->f_path.mnt->mnt_sb->s_export_op)) {
+ status = nfserr_notsupp;
+ goto put_file;
+ }
+
file_lock = locks_alloc_lock();
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox;
- goto fput;
+ goto put_file;
}
- file_lock->fl_type = F_UNLCK;
- file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
- file_lock->fl_pid = current->tgid;
- file_lock->fl_file = filp;
- file_lock->fl_flags = FL_POSIX;
+ file_lock->c.flc_type = F_UNLCK;
+ file_lock->c.flc_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(stp->st_stateowner));
+ file_lock->c.flc_pid = current->tgid;
+ file_lock->c.flc_file = nf->nf_file;
+ file_lock->c.flc_flags = FL_POSIX;
file_lock->fl_lmops = &nfsd_posix_mng_ops;
file_lock->fl_start = locku->lu_offset;
@@ -6316,14 +8629,14 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
locku->lu_length);
nfs4_transform_lock_offset(file_lock);
- err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
+ err = vfs_lock_file(nf->nf_file, F_SETLK, file_lock, NULL);
if (err) {
dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
goto out_nfserr;
}
nfs4_inc_and_copy_stateid(&locku->lu_stateid, &stp->st_stid);
-fput:
- fput(filp);
+put_file:
+ nfsd_file_put(nf);
put_stateid:
mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
@@ -6335,7 +8648,7 @@ out:
out_nfserr:
status = nfserrno(err);
- goto fput;
+ goto put_file;
}
/*
@@ -6348,98 +8661,101 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock *fl;
int status = false;
- struct file *filp = find_any_file(fp);
+ struct nfsd_file *nf;
struct inode *inode;
struct file_lock_context *flctx;
- if (!filp) {
+ spin_lock(&fp->fi_lock);
+ nf = find_any_file_locked(fp);
+ if (!nf) {
/* Any valid lock stateid should have some sort of access */
WARN_ON_ONCE(1);
- return status;
+ goto out;
}
- inode = locks_inode(filp);
- flctx = inode->i_flctx;
+ inode = file_inode(nf->nf_file);
+ flctx = locks_inode_context(inode);
if (flctx && !list_empty_careful(&flctx->flc_posix)) {
spin_lock(&flctx->flc_lock);
- list_for_each_entry(fl, &flctx->flc_posix, fl_list) {
- if (fl->fl_owner == (fl_owner_t)lowner) {
+ for_each_file_lock(fl, &flctx->flc_posix) {
+ if (fl->c.flc_owner == (fl_owner_t)lowner) {
status = true;
break;
}
}
spin_unlock(&flctx->flc_lock);
}
- fput(filp);
+out:
+ spin_unlock(&fp->fi_lock);
return status;
}
+/**
+ * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations
+ * @rqstp: RPC transaction
+ * @cstate: NFSv4 COMPOUND state
+ * @u: RELEASE_LOCKOWNER arguments
+ *
+ * Check if there are any locks still held and if not, free the lockowner
+ * and any lock state that is owned.
+ *
+ * Return values:
+ * %nfs_ok: lockowner released or not found
+ * %nfserr_locks_held: lockowner still in use
+ * %nfserr_stale_clientid: clientid no longer active
+ * %nfserr_expired: clientid not recognized
+ */
__be32
nfsd4_release_lockowner(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
clientid_t *clid = &rlockowner->rl_clientid;
- struct nfs4_stateowner *sop;
- struct nfs4_lockowner *lo = NULL;
struct nfs4_ol_stateid *stp;
- struct xdr_netobj *owner = &rlockowner->rl_owner;
- unsigned int hashval = ownerstr_hashval(owner);
- __be32 status;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_lockowner *lo;
struct nfs4_client *clp;
- LIST_HEAD (reaplist);
+ LIST_HEAD(reaplist);
+ __be32 status;
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate, nn);
+ status = set_client(clid, cstate, nn);
if (status)
return status;
-
clp = cstate->clp;
- /* Find the matching lock stateowner */
- spin_lock(&clp->cl_lock);
- list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
- so_strhash) {
-
- if (sop->so_is_open_owner || !same_owner_str(sop, owner))
- continue;
-
- /* see if there are still any locks associated with it */
- lo = lockowner(sop);
- list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
- if (check_for_locks(stp->st_stid.sc_file, lo)) {
- status = nfserr_locks_held;
- spin_unlock(&clp->cl_lock);
- return status;
- }
- }
- nfs4_get_stateowner(sop);
- break;
- }
+ spin_lock(&clp->cl_lock);
+ lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner);
if (!lo) {
spin_unlock(&clp->cl_lock);
- return status;
+ return nfs_ok;
}
+ list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ spin_unlock(&clp->cl_lock);
+ nfs4_put_stateowner(&lo->lo_owner);
+ return nfserr_locks_held;
+ }
+ }
unhash_lockowner_locked(lo);
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid,
st_perstateowner);
- WARN_ON(!unhash_lock_stateid(stp));
+ unhash_lock_stateid(stp);
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
+
free_ol_stateid_reaplist(&reaplist);
remove_blocked_locks(lo);
nfs4_put_stateowner(&lo->lo_owner);
-
- return status;
+ return nfs_ok;
}
static inline struct nfs4_client_reclaim *
@@ -6449,7 +8765,7 @@ alloc_reclaim(void)
}
bool
-nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
+nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
{
struct nfs4_client_reclaim *crp;
@@ -6461,20 +8777,42 @@ nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
* failure => all reset bets are off, nfserr_no_grace...
*/
struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+ struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp;
- dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
+ name.data = kmemdup(name.data, name.len, GFP_KERNEL);
+ if (!name.data) {
+ dprintk("%s: failed to allocate memory for name.data!\n",
+ __func__);
+ return NULL;
+ }
+ if (princhash.len) {
+ princhash.data = kmemdup(princhash.data, princhash.len, GFP_KERNEL);
+ if (!princhash.data) {
+ dprintk("%s: failed to allocate memory for princhash.data!\n",
+ __func__);
+ kfree(name.data);
+ return NULL;
+ }
+ } else
+ princhash.data = NULL;
crp = alloc_reclaim();
if (crp) {
strhashval = clientstr_hashval(name);
INIT_LIST_HEAD(&crp->cr_strhash);
list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
- memcpy(crp->cr_recdir, name, HEXDIR_LEN);
+ crp->cr_name.data = name.data;
+ crp->cr_name.len = name.len;
+ crp->cr_princhash.data = princhash.data;
+ crp->cr_princhash.len = princhash.len;
crp->cr_clp = NULL;
nn->reclaim_str_hashtbl_size++;
+ } else {
+ kfree(name.data);
+ kfree(princhash.data);
}
return crp;
}
@@ -6483,6 +8821,8 @@ void
nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
{
list_del(&crp->cr_strhash);
+ kfree(crp->cr_name.data);
+ kfree(crp->cr_princhash.data);
kfree(crp);
nn->reclaim_str_hashtbl_size--;
}
@@ -6506,636 +8846,32 @@ nfs4_release_reclaim(struct nfsd_net *nn)
/*
* called from OPEN, CLAIM_PREVIOUS with a new clientid. */
struct nfs4_client_reclaim *
-nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
+nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
{
unsigned int strhashval;
struct nfs4_client_reclaim *crp = NULL;
- dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
-
- strhashval = clientstr_hashval(recdir);
+ strhashval = clientstr_hashval(name);
list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
- if (same_name(crp->cr_recdir, recdir)) {
+ if (compare_blob(&crp->cr_name, &name) == 0) {
return crp;
}
}
return NULL;
}
-/*
-* Called from OPEN. Look for clientid in reclaim list.
-*/
__be32
-nfs4_check_open_reclaim(clientid_t *clid,
- struct nfsd4_compound_state *cstate,
- struct nfsd_net *nn)
+nfs4_check_open_reclaim(struct nfs4_client *clp)
{
- __be32 status;
-
- /* find clientid in conf_id_hashtbl */
- status = lookup_clientid(clid, cstate, nn);
- if (status)
- return nfserr_reclaim_bad;
-
- if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
+ if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags))
return nfserr_no_grace;
- if (nfsd4_client_record_check(cstate->clp))
+ if (nfsd4_client_record_check(clp))
return nfserr_reclaim_bad;
return nfs_ok;
}
-#ifdef CONFIG_NFSD_FAULT_INJECTION
-static inline void
-put_client(struct nfs4_client *clp)
-{
- atomic_dec(&clp->cl_refcount);
-}
-
-static struct nfs4_client *
-nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
-{
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
-
- if (!nfsd_netns_ready(nn))
- return NULL;
-
- list_for_each_entry(clp, &nn->client_lru, cl_lru) {
- if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
- return clp;
- }
- return NULL;
-}
-
-u64
-nfsd_inject_print_clients(void)
-{
- struct nfs4_client *clp;
- u64 count = 0;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- char buf[INET6_ADDRSTRLEN];
-
- if (!nfsd_netns_ready(nn))
- return 0;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru) {
- rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
- pr_info("NFS Client: %s\n", buf);
- ++count;
- }
- spin_unlock(&nn->client_lock);
-
- return count;
-}
-
-u64
-nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size)
-{
- u64 count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- clp = nfsd_find_client(addr, addr_size);
- if (clp) {
- if (mark_client_expired_locked(clp) == nfs_ok)
- ++count;
- else
- clp = NULL;
- }
- spin_unlock(&nn->client_lock);
-
- if (clp)
- expire_client(clp);
-
- return count;
-}
-
-u64
-nfsd_inject_forget_clients(u64 max)
-{
- u64 count = 0;
- struct nfs4_client *clp, *next;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
- if (mark_client_expired_locked(clp) == nfs_ok) {
- list_add(&clp->cl_lru, &reaplist);
- if (max != 0 && ++count >= max)
- break;
- }
- }
- spin_unlock(&nn->client_lock);
-
- list_for_each_entry_safe(clp, next, &reaplist, cl_lru)
- expire_client(clp);
-
- return count;
-}
-
-static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
- const char *type)
-{
- char buf[INET6_ADDRSTRLEN];
- rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
- printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
-}
-
-static void
-nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
- struct list_head *collect)
-{
- struct nfs4_client *clp = lst->st_stid.sc_client;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
-
- if (!collect)
- return;
-
- lockdep_assert_held(&nn->client_lock);
- atomic_inc(&clp->cl_refcount);
- list_add(&lst->st_locks, collect);
-}
-
-static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
- struct list_head *collect,
- bool (*func)(struct nfs4_ol_stateid *))
-{
- struct nfs4_openowner *oop;
- struct nfs4_ol_stateid *stp, *st_next;
- struct nfs4_ol_stateid *lst, *lst_next;
- u64 count = 0;
-
- spin_lock(&clp->cl_lock);
- list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
- list_for_each_entry_safe(stp, st_next,
- &oop->oo_owner.so_stateids, st_perstateowner) {
- list_for_each_entry_safe(lst, lst_next,
- &stp->st_locks, st_locks) {
- if (func) {
- if (func(lst))
- nfsd_inject_add_lock_to_list(lst,
- collect);
- }
- ++count;
- /*
- * Despite the fact that these functions deal
- * with 64-bit integers for "count", we must
- * ensure that it doesn't blow up the
- * clp->cl_refcount. Throw a warning if we
- * start to approach INT_MAX here.
- */
- WARN_ON_ONCE(count == (INT_MAX / 2));
- if (count == max)
- goto out;
- }
- }
- }
-out:
- spin_unlock(&clp->cl_lock);
-
- return count;
-}
-
-static u64
-nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect,
- u64 max)
-{
- return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid);
-}
-
-static u64
-nfsd_print_client_locks(struct nfs4_client *clp)
-{
- u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL);
- nfsd_print_count(clp, count, "locked files");
- return count;
-}
-
-u64
-nfsd_inject_print_locks(void)
-{
- struct nfs4_client *clp;
- u64 count = 0;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
-
- if (!nfsd_netns_ready(nn))
- return 0;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru)
- count += nfsd_print_client_locks(clp);
- spin_unlock(&nn->client_lock);
-
- return count;
-}
-
-static void
-nfsd_reap_locks(struct list_head *reaplist)
-{
- struct nfs4_client *clp;
- struct nfs4_ol_stateid *stp, *next;
-
- list_for_each_entry_safe(stp, next, reaplist, st_locks) {
- list_del_init(&stp->st_locks);
- clp = stp->st_stid.sc_client;
- nfs4_put_stid(&stp->st_stid);
- put_client(clp);
- }
-}
-
-u64
-nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size)
-{
- unsigned int count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- clp = nfsd_find_client(addr, addr_size);
- if (clp)
- count = nfsd_collect_client_locks(clp, &reaplist, 0);
- spin_unlock(&nn->client_lock);
- nfsd_reap_locks(&reaplist);
- return count;
-}
-
-u64
-nfsd_inject_forget_locks(u64 max)
-{
- u64 count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru) {
- count += nfsd_collect_client_locks(clp, &reaplist, max - count);
- if (max != 0 && count >= max)
- break;
- }
- spin_unlock(&nn->client_lock);
- nfsd_reap_locks(&reaplist);
- return count;
-}
-
-static u64
-nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
- struct list_head *collect,
- void (*func)(struct nfs4_openowner *))
-{
- struct nfs4_openowner *oop, *next;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- u64 count = 0;
-
- lockdep_assert_held(&nn->client_lock);
-
- spin_lock(&clp->cl_lock);
- list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
- if (func) {
- func(oop);
- if (collect) {
- atomic_inc(&clp->cl_refcount);
- list_add(&oop->oo_perclient, collect);
- }
- }
- ++count;
- /*
- * Despite the fact that these functions deal with
- * 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
- * warning if we start to approach INT_MAX here.
- */
- WARN_ON_ONCE(count == (INT_MAX / 2));
- if (count == max)
- break;
- }
- spin_unlock(&clp->cl_lock);
-
- return count;
-}
-
-static u64
-nfsd_print_client_openowners(struct nfs4_client *clp)
-{
- u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL);
-
- nfsd_print_count(clp, count, "openowners");
- return count;
-}
-
-static u64
-nfsd_collect_client_openowners(struct nfs4_client *clp,
- struct list_head *collect, u64 max)
-{
- return nfsd_foreach_client_openowner(clp, max, collect,
- unhash_openowner_locked);
-}
-
-u64
-nfsd_inject_print_openowners(void)
-{
- struct nfs4_client *clp;
- u64 count = 0;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
-
- if (!nfsd_netns_ready(nn))
- return 0;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru)
- count += nfsd_print_client_openowners(clp);
- spin_unlock(&nn->client_lock);
-
- return count;
-}
-
-static void
-nfsd_reap_openowners(struct list_head *reaplist)
-{
- struct nfs4_client *clp;
- struct nfs4_openowner *oop, *next;
-
- list_for_each_entry_safe(oop, next, reaplist, oo_perclient) {
- list_del_init(&oop->oo_perclient);
- clp = oop->oo_owner.so_client;
- release_openowner(oop);
- put_client(clp);
- }
-}
-
-u64
-nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr,
- size_t addr_size)
-{
- unsigned int count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- clp = nfsd_find_client(addr, addr_size);
- if (clp)
- count = nfsd_collect_client_openowners(clp, &reaplist, 0);
- spin_unlock(&nn->client_lock);
- nfsd_reap_openowners(&reaplist);
- return count;
-}
-
-u64
-nfsd_inject_forget_openowners(u64 max)
-{
- u64 count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru) {
- count += nfsd_collect_client_openowners(clp, &reaplist,
- max - count);
- if (max != 0 && count >= max)
- break;
- }
- spin_unlock(&nn->client_lock);
- nfsd_reap_openowners(&reaplist);
- return count;
-}
-
-static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
- struct list_head *victims)
-{
- struct nfs4_delegation *dp, *next;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- u64 count = 0;
-
- lockdep_assert_held(&nn->client_lock);
-
- spin_lock(&state_lock);
- list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
- if (victims) {
- /*
- * It's not safe to mess with delegations that have a
- * non-zero dl_time. They might have already been broken
- * and could be processed by the laundromat outside of
- * the state_lock. Just leave them be.
- */
- if (dp->dl_time != 0)
- continue;
-
- atomic_inc(&clp->cl_refcount);
- WARN_ON(!unhash_delegation_locked(dp));
- list_add(&dp->dl_recall_lru, victims);
- }
- ++count;
- /*
- * Despite the fact that these functions deal with
- * 64-bit integers for "count", we must ensure that
- * it doesn't blow up the clp->cl_refcount. Throw a
- * warning if we start to approach INT_MAX here.
- */
- WARN_ON_ONCE(count == (INT_MAX / 2));
- if (count == max)
- break;
- }
- spin_unlock(&state_lock);
- return count;
-}
-
-static u64
-nfsd_print_client_delegations(struct nfs4_client *clp)
-{
- u64 count = nfsd_find_all_delegations(clp, 0, NULL);
-
- nfsd_print_count(clp, count, "delegations");
- return count;
-}
-
-u64
-nfsd_inject_print_delegations(void)
-{
- struct nfs4_client *clp;
- u64 count = 0;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
-
- if (!nfsd_netns_ready(nn))
- return 0;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru)
- count += nfsd_print_client_delegations(clp);
- spin_unlock(&nn->client_lock);
-
- return count;
-}
-
-static void
-nfsd_forget_delegations(struct list_head *reaplist)
-{
- struct nfs4_client *clp;
- struct nfs4_delegation *dp, *next;
-
- list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
- list_del_init(&dp->dl_recall_lru);
- clp = dp->dl_stid.sc_client;
- revoke_delegation(dp);
- put_client(clp);
- }
-}
-
-u64
-nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr,
- size_t addr_size)
-{
- u64 count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- clp = nfsd_find_client(addr, addr_size);
- if (clp)
- count = nfsd_find_all_delegations(clp, 0, &reaplist);
- spin_unlock(&nn->client_lock);
-
- nfsd_forget_delegations(&reaplist);
- return count;
-}
-
-u64
-nfsd_inject_forget_delegations(u64 max)
-{
- u64 count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry(clp, &nn->client_lru, cl_lru) {
- count += nfsd_find_all_delegations(clp, max - count, &reaplist);
- if (max != 0 && count >= max)
- break;
- }
- spin_unlock(&nn->client_lock);
- nfsd_forget_delegations(&reaplist);
- return count;
-}
-
-static void
-nfsd_recall_delegations(struct list_head *reaplist)
-{
- struct nfs4_client *clp;
- struct nfs4_delegation *dp, *next;
-
- list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
- list_del_init(&dp->dl_recall_lru);
- clp = dp->dl_stid.sc_client;
- /*
- * We skipped all entries that had a zero dl_time before,
- * so we can now reset the dl_time back to 0. If a delegation
- * break comes in now, then it won't make any difference since
- * we're recalling it either way.
- */
- spin_lock(&state_lock);
- dp->dl_time = 0;
- spin_unlock(&state_lock);
- nfsd_break_one_deleg(dp);
- put_client(clp);
- }
-}
-
-u64
-nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr,
- size_t addr_size)
-{
- u64 count = 0;
- struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- clp = nfsd_find_client(addr, addr_size);
- if (clp)
- count = nfsd_find_all_delegations(clp, 0, &reaplist);
- spin_unlock(&nn->client_lock);
-
- nfsd_recall_delegations(&reaplist);
- return count;
-}
-
-u64
-nfsd_inject_recall_delegations(u64 max)
-{
- u64 count = 0;
- struct nfs4_client *clp, *next;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
- nfsd_net_id);
- LIST_HEAD(reaplist);
-
- if (!nfsd_netns_ready(nn))
- return count;
-
- spin_lock(&nn->client_lock);
- list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
- count += nfsd_find_all_delegations(clp, max - count, &reaplist);
- if (max != 0 && ++count >= max)
- break;
- }
- spin_unlock(&nn->client_lock);
- nfsd_recall_delegations(&reaplist);
- return count;
-}
-#endif /* CONFIG_NFSD_FAULT_INJECTION */
-
/*
* Since the lifetime of a delegation isn't limited to that of an open, a
* client may quite reasonably hang on to a delegation as long as it has
@@ -7186,7 +8922,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
nn->unconf_name_tree = RB_ROOT;
- nn->boot_time = get_seconds();
+ nn->boot_time = ktime_get_real_seconds();
nn->grace_ended = false;
nn->nfsd4_manager.block_opens = true;
INIT_LIST_HEAD(&nn->nfsd4_manager.list);
@@ -7196,15 +8932,30 @@ static int nfs4_state_create_net(struct net *net)
spin_lock_init(&nn->client_lock);
spin_lock_init(&nn->s2s_cp_lock);
idr_init(&nn->s2s_cp_stateids);
+ atomic_set(&nn->pending_async_copies, 0);
spin_lock_init(&nn->blocked_locks_lock);
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);
+ nn->nfsd_client_shrinker = shrinker_alloc(0, "nfsd-client");
+ if (!nn->nfsd_client_shrinker)
+ goto err_shrinker;
+
+ nn->nfsd_client_shrinker->scan_objects = nfsd4_state_shrinker_scan;
+ nn->nfsd_client_shrinker->count_objects = nfsd4_state_shrinker_count;
+ nn->nfsd_client_shrinker->private_data = nn;
+
+ shrinker_register(nn->nfsd_client_shrinker);
+
return 0;
+err_shrinker:
+ put_net(net);
+ kfree(nn->sessionid_hashtbl);
err_sessionid:
kfree(nn->unconf_id_hashtbl);
err_unconf_id:
@@ -7253,35 +9004,43 @@ nfs4_state_start_net(struct net *net)
return ret;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
- printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
+ if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
+ goto skip_grace;
+ printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
+ trace_nfsd_grace_start(nn);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
+
+skip_grace:
+ printk(KERN_INFO "NFSD: no clients to reclaim, skipping NFSv4 grace period (net %x)\n",
+ net->ns.inum);
+ queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_lease * HZ);
+ nfsd4_end_grace(nn);
+ return 0;
}
/* initialization to perform when the nfsd service is started: */
-
int
nfs4_state_start(void)
{
int ret;
- laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4");
- if (laundry_wq == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- ret = nfsd4_create_callback_queue();
+ ret = rhltable_init(&nfs4_file_rhltable, &nfs4_file_rhash_params);
if (ret)
- goto out_free_laundry;
+ return ret;
+
+ nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+ if (!nfsd_slot_shrinker) {
+ rhltable_destroy(&nfs4_file_rhltable);
+ return -ENOMEM;
+ }
+ nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+ nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+ shrinker_register(nfsd_slot_shrinker);
set_max_delegations();
return 0;
-
-out_free_laundry:
- destroy_workqueue(laundry_wq);
-out:
- return ret;
}
void
@@ -7291,6 +9050,8 @@ nfs4_state_shutdown_net(struct net *net)
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ shrinker_free(nn->nfsd_client_shrinker);
+ cancel_work_sync(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -7298,7 +9059,7 @@ nfs4_state_shutdown_net(struct net *net)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -7310,19 +9071,23 @@ nfs4_state_shutdown_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_shutdown_umount(nn);
+#endif
}
void
nfs4_state_shutdown(void)
{
- destroy_workqueue(laundry_wq);
- nfsd4_destroy_callback_queue();
+ rhltable_destroy(&nfs4_file_rhltable);
+ shrinker_free(nfsd_slot_shrinker);
}
static void
get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
- if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
+ if (HAS_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG) &&
+ CURRENT_STATEID(stateid))
memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
}
@@ -7331,14 +9096,14 @@ put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
if (cstate->minorversion) {
memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
- SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ SET_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
}
void
clear_current_stateid(struct nfsd4_compound_state *cstate)
{
- CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
+ CLEAR_CSTATE_FLAG(cstate, CURRENT_STATE_ID_FLAG);
}
/*
@@ -7431,3 +9196,274 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
{
get_stateid(cstate, &u->write.wr_stateid);
}
+
+/**
+ * nfsd4_vet_deleg_time - vet and set the timespec for a delegated timestamp update
+ * @req: timestamp from the client
+ * @orig: original timestamp in the inode
+ * @now: current time
+ *
+ * Given a timestamp from the client response, check it against the
+ * current timestamp in the inode and the current time. Returns true
+ * if the inode's timestamp needs to be updated, and false otherwise.
+ * @req may also be changed if the timestamp needs to be clamped.
+ */
+bool nfsd4_vet_deleg_time(struct timespec64 *req, const struct timespec64 *orig,
+ const struct timespec64 *now)
+{
+
+ /*
+ * "When the time presented is before the original time, then the
+ * update is ignored." Also no need to update if there is no change.
+ */
+ if (timespec64_compare(req, orig) <= 0)
+ return false;
+
+ /*
+ * "When the time presented is in the future, the server can either
+ * clamp the new time to the current time, or it may
+ * return NFS4ERR_DELAY to the client, allowing it to retry."
+ */
+ if (timespec64_compare(req, now) > 0)
+ *req = *now;
+
+ return true;
+}
+
+static int cb_getattr_update_times(struct dentry *dentry, struct nfs4_delegation *dp)
+{
+ struct inode *inode = d_inode(dentry);
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+ struct iattr attrs = { };
+ int ret;
+
+ if (deleg_attrs_deleg(dp->dl_type)) {
+ struct timespec64 now = current_time(inode);
+
+ attrs.ia_atime = ncf->ncf_cb_atime;
+ attrs.ia_mtime = ncf->ncf_cb_mtime;
+
+ if (nfsd4_vet_deleg_time(&attrs.ia_atime, &dp->dl_atime, &now))
+ attrs.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+
+ if (nfsd4_vet_deleg_time(&attrs.ia_mtime, &dp->dl_mtime, &now)) {
+ attrs.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
+ attrs.ia_ctime = attrs.ia_mtime;
+ if (nfsd4_vet_deleg_time(&attrs.ia_ctime, &dp->dl_ctime, &now))
+ attrs.ia_valid |= ATTR_CTIME | ATTR_CTIME_SET;
+ }
+ } else {
+ attrs.ia_valid |= ATTR_MTIME | ATTR_CTIME;
+ }
+
+ if (!attrs.ia_valid)
+ return 0;
+
+ attrs.ia_valid |= ATTR_DELEG;
+ inode_lock(inode);
+ ret = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ return ret;
+}
+
+/**
+ * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
+ * @rqstp: RPC transaction context
+ * @dentry: dentry of inode to be checked for a conflict
+ * @pdp: returned WRITE delegation, if one was found
+ *
+ * This function is called when there is a conflict between a write
+ * delegation and a change/size GETATTR from another client. The server
+ * must either use the CB_GETATTR to get the current values of the
+ * attributes from the client that holds the delegation or recall the
+ * delegation before replying to the GETATTR. See RFC 8881 section
+ * 18.7.4.
+ *
+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned. If @pdp is set to a non-NULL value, then the
+ * caller must put the reference.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
+ struct nfs4_delegation **pdp)
+{
+ __be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct file_lock_context *ctx;
+ struct nfs4_delegation *dp = NULL;
+ struct file_lease *fl;
+ struct nfs4_cb_fattr *ncf;
+ struct inode *inode = d_inode(dentry);
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return nfs_ok;
+
+#define NON_NFSD_LEASE ((void *)1)
+
+ spin_lock(&ctx->flc_lock);
+ for_each_file_lock(fl, &ctx->flc_lease) {
+ if (fl->c.flc_flags == FL_LAYOUT)
+ continue;
+ if (fl->c.flc_type == F_WRLCK) {
+ if (fl->fl_lmops == &nfsd_lease_mng_ops)
+ dp = fl->c.flc_owner;
+ else
+ dp = NON_NFSD_LEASE;
+ }
+ break;
+ }
+ if (dp == NULL || dp == NON_NFSD_LEASE ||
+ dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ spin_unlock(&ctx->flc_lock);
+ if (dp == NON_NFSD_LEASE) {
+ status = nfserrno(nfsd_open_break_lease(inode,
+ NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ }
+ return 0;
+ }
+
+ nfsd_stats_wdeleg_getattr_inc(nn);
+ refcount_inc(&dp->dl_stid.sc_count);
+ ncf = &dp->dl_cb_fattr;
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
+ spin_unlock(&ctx->flc_lock);
+
+ wait_on_bit_timeout(&ncf->ncf_getattr.cb_flags, NFSD4_CALLBACK_RUNNING,
+ TASK_UNINTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ if (ncf->ncf_cb_status) {
+ /* Recall delegation only if client didn't respond */
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ goto out_status;
+ }
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
+ ncf->ncf_file_modified = true;
+ if (ncf->ncf_file_modified) {
+ int err;
+
+ /*
+ * Per section 10.4.3 of RFC 8881, the server would
+ * not update the file's metadata with the client's
+ * modified size
+ */
+ err = cb_getattr_update_times(dentry, dp);
+ if (err) {
+ status = nfserrno(err);
+ goto out_status;
+ }
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ *pdp = dp;
+ return nfs_ok;
+ }
+ status = nfs_ok;
+out_status:
+ nfs4_put_stid(&dp->dl_stid);
+ return status;
+}
+
+/**
+ * nfsd_get_dir_deleg - attempt to get a directory delegation
+ * @cstate: compound state
+ * @gdd: GET_DIR_DELEGATION arg/resp structure
+ * @nf: nfsd_file opened on the directory
+ *
+ * Given a GET_DIR_DELEGATION request @gdd, attempt to acquire a delegation
+ * on the directory to which @nf refers. Note that this does not set up any
+ * sort of async notifications for the delegation.
+ */
+struct nfs4_delegation *
+nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
+ struct nfsd4_get_dir_delegation *gdd,
+ struct nfsd_file *nf)
+{
+ struct nfs4_client *clp = cstate->clp;
+ struct nfs4_delegation *dp;
+ struct file_lease *fl;
+ struct nfs4_file *fp, *rfp;
+ int status = 0;
+
+ fp = nfsd4_alloc_file();
+ if (!fp)
+ return ERR_PTR(-ENOMEM);
+
+ nfsd4_file_init(&cstate->current_fh, fp);
+
+ rfp = nfsd4_file_hash_insert(fp, &cstate->current_fh);
+ if (unlikely(!rfp)) {
+ put_nfs4_file(fp);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (rfp != fp) {
+ put_nfs4_file(fp);
+ fp = rfp;
+ }
+
+ /* if this client already has one, return that it's unavailable */
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ /* existing delegation? */
+ if (nfs4_delegation_exists(clp, fp)) {
+ status = -EAGAIN;
+ } else if (!fp->fi_deleg_file) {
+ fp->fi_deleg_file = nfsd_file_get(nf);
+ fp->fi_delegees = 1;
+ } else {
+ ++fp->fi_delegees;
+ }
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+
+ if (status) {
+ put_nfs4_file(fp);
+ return ERR_PTR(status);
+ }
+
+ /* Try to set up the lease */
+ status = -ENOMEM;
+ dp = alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ);
+ if (!dp)
+ goto out_delegees;
+
+ fl = nfs4_alloc_init_lease(dp);
+ if (!fl)
+ goto out_put_stid;
+
+ status = kernel_setlease(nf->nf_file,
+ fl->c.flc_type, &fl, NULL);
+ if (fl)
+ locks_free_lease(fl);
+ if (status)
+ goto out_put_stid;
+
+ /*
+ * Now, try to hash it. This can fail if we race another nfsd task
+ * trying to set a delegation on the same file. If that happens,
+ * then just say UNAVAIL.
+ */
+ spin_lock(&state_lock);
+ spin_lock(&clp->cl_lock);
+ spin_lock(&fp->fi_lock);
+ status = hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&clp->cl_lock);
+ spin_unlock(&state_lock);
+
+ if (!status)
+ return dp;
+
+ /* Something failed. Drop the lease and clean up the stid */
+ kernel_setlease(fp->fi_deleg_file->nf_file, F_UNLCK, NULL, (void **)&dp);
+out_put_stid:
+ nfs4_put_stid(&dp->dl_stid);
+out_delegees:
+ put_deleg_file(fp);
+ return ERR_PTR(status);
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3de42a729093..30ce5851fe4c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,6 +40,11 @@
#include <linux/utsname.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/xattr.h>
+#include <linux/vmalloc.h>
+
+#include <uapi/linux/xattr.h>
#include "idmap.h"
#include "acl.h"
@@ -49,6 +54,10 @@
#include "cache.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
+#include "nfs4xdr_gen.h"
+
+#include "trace.h"
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
@@ -86,6 +95,8 @@ check_filename(char *str, int len)
if (len == 0)
return nfserr_inval;
+ if (len > NFS4_MAXNAMLEN)
+ return nfserr_nametoolong;
if (isdotent(str, len))
return nfserr_badname;
for (i = 0; i < len; i++)
@@ -94,115 +105,6 @@ check_filename(char *str, int len)
return 0;
}
-#define DECODE_HEAD \
- __be32 *p; \
- __be32 status
-#define DECODE_TAIL \
- status = 0; \
-out: \
- return status; \
-xdr_error: \
- dprintk("NFSD: xdr error (%s:%d)\n", \
- __FILE__, __LINE__); \
- status = nfserr_bad_xdr; \
- goto out
-
-#define READMEM(x,nbytes) do { \
- x = (char *)p; \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-#define SAVEMEM(x,nbytes) do { \
- if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
- savemem(argp, p, nbytes) : \
- (char *)p)) { \
- dprintk("NFSD: xdr error (%s:%d)\n", \
- __FILE__, __LINE__); \
- goto xdr_error; \
- } \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-#define COPYMEM(x,nbytes) do { \
- memcpy((x), p, nbytes); \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-
-/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
-#define READ_BUF(nbytes) do { \
- if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \
- p = argp->p; \
- argp->p += XDR_QUADLEN(nbytes); \
- } else if (!(p = read_buf(argp, nbytes))) { \
- dprintk("NFSD: xdr error (%s:%d)\n", \
- __FILE__, __LINE__); \
- goto xdr_error; \
- } \
-} while (0)
-
-static void next_decode_page(struct nfsd4_compoundargs *argp)
-{
- argp->p = page_address(argp->pagelist[0]);
- argp->pagelist++;
- if (argp->pagelen < PAGE_SIZE) {
- argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
- argp->pagelen = 0;
- } else {
- argp->end = argp->p + (PAGE_SIZE>>2);
- argp->pagelen -= PAGE_SIZE;
- }
-}
-
-static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
-{
- /* We want more bytes than seem to be available.
- * Maybe we need a new page, maybe we have just run out
- */
- unsigned int avail = (char *)argp->end - (char *)argp->p;
- __be32 *p;
-
- if (argp->pagelen == 0) {
- struct kvec *vec = &argp->rqstp->rq_arg.tail[0];
-
- if (!argp->tail) {
- argp->tail = true;
- avail = vec->iov_len;
- argp->p = vec->iov_base;
- argp->end = vec->iov_base + avail;
- }
-
- if (avail < nbytes)
- return NULL;
-
- p = argp->p;
- argp->p += XDR_QUADLEN(nbytes);
- return p;
- }
-
- if (avail + argp->pagelen < nbytes)
- return NULL;
- if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
- return NULL;
- /* ok, we can do it with the current plus the next page */
- if (nbytes <= sizeof(argp->tmp))
- p = argp->tmp;
- else {
- kfree(argp->tmpp);
- p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
- if (!p)
- return NULL;
-
- }
- /*
- * The following memcpy is safe because read_buf is always
- * called with nbytes > avail, and the two cases above both
- * guarantee p points to at least nbytes bytes.
- */
- memcpy(p, argp->p, avail);
- next_decode_page(argp);
- memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
- argp->p += XDR_QUADLEN(nbytes - avail);
- return p;
-}
-
static int zero_clientid(clientid_t *clid)
{
return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -211,17 +113,17 @@ static int zero_clientid(clientid_t *clid)
/**
* svcxdr_tmpalloc - allocate memory to be freed after compound processing
* @argp: NFSv4 compound argument structure
- * @p: pointer to be freed (with kfree())
+ * @len: length of buffer to allocate
*
- * Marks @p to be freed when processing the compound operation
- * described in @argp finishes.
+ * Allocates a buffer of size @len to be freed when processing the compound
+ * operation described in @argp finishes.
*/
static void *
-svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
+svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, size_t len)
{
struct svcxdr_tmpbuf *tb;
- tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL);
+ tb = kmalloc(struct_size(tb, buf, len), GFP_KERNEL);
if (!tb)
return NULL;
tb->next = argp->to_free;
@@ -237,9 +139,9 @@ svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
* buffer might end on a page boundary.
*/
static char *
-svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
+svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, size_t len)
{
- char *p = svcxdr_tmpalloc(argp, len + 1);
+ char *p = svcxdr_tmpalloc(argp, size_add(len, 1));
if (!p)
return NULL;
@@ -248,87 +150,246 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
return p;
}
-/**
- * savemem - duplicate a chunk of memory for later processing
- * @argp: NFSv4 compound argument structure to be freed with
- * @p: pointer to be duplicated
- * @nbytes: length to be duplicated
- *
- * Returns a pointer to a copy of @nbytes bytes of memory at @p
- * that are preserved until processing of the NFSv4 compound
- * operation described by @argp finishes.
- */
-static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
+static void *
+svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, size_t len)
{
- void *ret;
+ __be32 *tmp;
- ret = svcxdr_tmpalloc(argp, nbytes);
- if (!ret)
+ /*
+ * The location of the decoded data item is stable,
+ * so @p is OK to use. This is the common case.
+ */
+ if (p != argp->xdr->scratch.iov_base)
+ return p;
+
+ tmp = svcxdr_tmpalloc(argp, len);
+ if (!tmp)
return NULL;
- memcpy(ret, p, nbytes);
- return ret;
+ memcpy(tmp, p, len);
+ return tmp;
}
/*
- * We require the high 32 bits of 'seconds' to be 0, and
- * we ignore all 32 bits of 'nseconds'.
+ * NFSv4 basic data type decoders
+ */
+
+/*
+ * This helper handles variable-length opaques which belong to protocol
+ * elements that this implementation does not support.
*/
static __be32
-nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec *tv)
+nfsd4_decode_ignored_string(struct nfsd4_compoundargs *argp, u32 maxlen)
{
- DECODE_HEAD;
- u64 sec;
+ u32 len;
+
+ if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+ return nfserr_bad_xdr;
+ if (maxlen && len > maxlen)
+ return nfserr_bad_xdr;
+ if (!xdr_inline_decode(argp->xdr, len))
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
+{
+ __be32 *p;
+ u32 len;
+
+ if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+ return nfserr_bad_xdr;
+ if (len == 0 || len > NFS4_OPAQUE_LIMIT)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, len);
+ if (!p)
+ return nfserr_bad_xdr;
+ o->data = svcxdr_savemem(argp, p, len);
+ if (!o->data)
+ return nfserr_jukebox;
+ o->len = len;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp)
+{
+ __be32 *p, status;
+
+ if (xdr_stream_decode_u32(argp->xdr, lenp) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, *lenp);
+ if (!p)
+ return nfserr_bad_xdr;
+ status = check_filename((char *)p, *lenp);
+ if (status)
+ return status;
+ *namp = svcxdr_savemem(argp, p, *lenp);
+ if (!*namp)
+ return nfserr_jukebox;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
+{
+ __be32 *p;
- READ_BUF(12);
- p = xdr_decode_hyper(p, &sec);
- tv->tv_sec = sec;
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT * 3);
+ if (!p)
+ return nfserr_bad_xdr;
+ p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
+{
+ __be32 *p;
- DECODE_TAIL;
+ p = xdr_inline_decode(argp->xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(verf->data, p, sizeof(verf->data));
+ return nfs_ok;
}
+/**
+ * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4
+ * @argp: NFSv4 compound argument structure
+ * @bmval: pointer to an array of u32's to decode into
+ * @bmlen: size of the @bmval array
+ *
+ * The server needs to return nfs_ok rather than nfserr_bad_xdr when
+ * encountering bitmaps containing bits it does not recognize. This
+ * includes bits in bitmap words past WORDn, where WORDn is the last
+ * bitmap WORD the implementation currently supports. Thus we are
+ * careful here to simply ignore bits in bitmap words that this
+ * implementation has yet to support explicitly.
+ *
+ * Return values:
+ * %nfs_ok: @bmval populated successfully
+ * %nfserr_bad_xdr: the encoded bitmap was invalid
+ */
static __be32
-nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
+nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
{
- u32 bmlen;
- DECODE_HEAD;
+ ssize_t status;
- bmval[0] = 0;
- bmval[1] = 0;
- bmval[2] = 0;
+ status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen);
+ return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok;
+}
- READ_BUF(4);
- bmlen = be32_to_cpup(p++);
- if (bmlen > 1000)
- goto xdr_error;
+static __be32
+nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace)
+{
+ __be32 *p, status;
+ u32 length;
+
+ if (xdr_stream_decode_u32(argp->xdr, &ace->type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &ace->flag) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &ace->access_mask) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(bmlen << 2);
- if (bmlen > 0)
- bmval[0] = be32_to_cpup(p++);
- if (bmlen > 1)
- bmval[1] = be32_to_cpup(p++);
- if (bmlen > 2)
- bmval[2] = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ ace->whotype = nfs4_acl_get_whotype((char *)p, length);
+ if (ace->whotype != NFS4_ACL_WHO_NAMED)
+ status = nfs_ok;
+ else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+ status = nfsd_map_name_to_gid(argp->rqstp,
+ (char *)p, length, &ace->who_gid);
+ else
+ status = nfsd_map_name_to_uid(argp->rqstp,
+ (char *)p, length, &ace->who_uid);
- DECODE_TAIL;
+ return status;
+}
+
+/* A counted array of nfsace4's */
+static noinline __be32
+nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl)
+{
+ struct nfs4_ace *ace;
+ __be32 status;
+ u32 count;
+
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+
+ if (count > xdr_stream_remaining(argp->xdr) / 20)
+ /*
+ * Even with 4-byte names there wouldn't be
+ * space for that many aces; something fishy is
+ * going on:
+ */
+ return nfserr_fbig;
+
+ *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(count));
+ if (*acl == NULL)
+ return nfserr_jukebox;
+
+ (*acl)->naces = count;
+ for (ace = (*acl)->aces; ace < (*acl)->aces + count; ace++) {
+ status = nfsd4_decode_nfsace4(argp, ace);
+ if (status)
+ return status;
+ }
+
+ return nfs_ok;
+}
+
+static noinline __be32
+nfsd4_decode_security_label(struct nfsd4_compoundargs *argp,
+ struct xdr_netobj *label)
+{
+ u32 lfs, pi, length;
+ __be32 *p;
+
+ if (xdr_stream_decode_u32(argp->xdr, &lfs) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &pi) < 0)
+ return nfserr_bad_xdr;
+
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ if (length > NFS4_MAXLABELLEN)
+ return nfserr_badlabel;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ label->len = length;
+ label->data = svcxdr_dupstr(argp, p, length);
+ if (!label->data)
+ return nfserr_jukebox;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
- struct iattr *iattr, struct nfs4_acl **acl,
- struct xdr_netobj *label, int *umask)
+nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
+ struct iattr *iattr, struct nfs4_acl **acl,
+ struct xdr_netobj *label, int *umask)
{
- struct timespec ts;
- int expected_len, len = 0;
- u32 dummy32;
- char *buf;
+ unsigned int starting_pos;
+ u32 attrlist4_count;
+ __be32 *p, status;
- DECODE_HEAD;
iattr->ia_valid = 0;
- if ((status = nfsd4_decode_bitmap(argp, bmval)))
- return status;
+ status = nfsd4_decode_bitmap4(argp, bmval, bmlen);
+ if (status)
+ return nfserr_bad_xdr;
if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
|| bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
@@ -338,92 +399,69 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
return nfserr_attrnotsupp;
}
- READ_BUF(4);
- expected_len = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &attrlist4_count) < 0)
+ return nfserr_bad_xdr;
+ starting_pos = xdr_stream_pos(argp->xdr);
if (bmval[0] & FATTR4_WORD0_SIZE) {
- READ_BUF(8);
- len += 8;
- p = xdr_decode_hyper(p, &iattr->ia_size);
+ u64 size;
+
+ if (xdr_stream_decode_u64(argp->xdr, &size) < 0)
+ return nfserr_bad_xdr;
+ iattr->ia_size = size;
iattr->ia_valid |= ATTR_SIZE;
}
if (bmval[0] & FATTR4_WORD0_ACL) {
- u32 nace;
- struct nfs4_ace *ace;
-
- READ_BUF(4); len += 4;
- nace = be32_to_cpup(p++);
-
- if (nace > NFS4_ACL_MAX)
- return nfserr_fbig;
-
- *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
- if (*acl == NULL)
- return nfserr_jukebox;
-
- (*acl)->naces = nace;
- for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
- READ_BUF(16); len += 16;
- ace->type = be32_to_cpup(p++);
- ace->flag = be32_to_cpup(p++);
- ace->access_mask = be32_to_cpup(p++);
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- len += XDR_QUADLEN(dummy32) << 2;
- READMEM(buf, dummy32);
- ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
- status = nfs_ok;
- if (ace->whotype != NFS4_ACL_WHO_NAMED)
- ;
- else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
- status = nfsd_map_name_to_gid(argp->rqstp,
- buf, dummy32, &ace->who_gid);
- else
- status = nfsd_map_name_to_uid(argp->rqstp,
- buf, dummy32, &ace->who_uid);
- if (status)
- return status;
- }
+ status = nfsd4_decode_acl(argp, acl);
+ if (status)
+ return status;
} else
*acl = NULL;
if (bmval[1] & FATTR4_WORD1_MODE) {
- READ_BUF(4);
- len += 4;
- iattr->ia_mode = be32_to_cpup(p++);
+ u32 mode;
+
+ if (xdr_stream_decode_u32(argp->xdr, &mode) < 0)
+ return nfserr_bad_xdr;
+ iattr->ia_mode = mode;
iattr->ia_mode &= (S_IFMT | S_IALLUGO);
iattr->ia_valid |= ATTR_MODE;
}
if (bmval[1] & FATTR4_WORD1_OWNER) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- len += (XDR_QUADLEN(dummy32) << 2);
- READMEM(buf, dummy32);
- if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ u32 length;
+
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ status = nfsd_map_name_to_uid(argp->rqstp, (char *)p, length,
+ &iattr->ia_uid);
+ if (status)
return status;
iattr->ia_valid |= ATTR_UID;
}
if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- len += (XDR_QUADLEN(dummy32) << 2);
- READMEM(buf, dummy32);
- if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ u32 length;
+
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ status = nfsd_map_name_to_gid(argp->rqstp, (char *)p, length,
+ &iattr->ia_gid);
+ if (status)
return status;
iattr->ia_valid |= ATTR_GID;
}
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- switch (dummy32) {
+ u32 set_it;
+
+ if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0)
+ return nfserr_bad_xdr;
+ switch (set_it) {
case NFS4_SET_TO_CLIENT_TIME:
- len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_atime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_nfstime4(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -432,18 +470,26 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
iattr->ia_valid |= ATTR_ATIME;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
}
+ if (bmval[1] & FATTR4_WORD1_TIME_CREATE) {
+ struct timespec64 ts;
+
+ /* No Linux filesystem supports setting this attribute. */
+ bmval[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ status = nfsd4_decode_nfstime4(argp, &ts);
+ if (status)
+ return status;
+ }
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- switch (dummy32) {
+ u32 set_it;
+
+ if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0)
+ return nfserr_bad_xdr;
+ switch (set_it) {
case NFS4_SET_TO_CLIENT_TIME:
- len += 12;
- status = nfsd4_decode_time(argp, &ts);
- iattr->ia_mtime = timespec_to_timespec64(ts);
+ status = nfsd4_decode_nfstime4(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -452,221 +498,343 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
iattr->ia_valid |= ATTR_MTIME;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
}
-
label->len = 0;
if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) &&
bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- if (dummy32 > NFS4_MAXLABELLEN)
- return nfserr_badlabel;
- len += (XDR_QUADLEN(dummy32) << 2);
- READMEM(buf, dummy32);
- label->len = dummy32;
- label->data = svcxdr_dupstr(argp, buf, dummy32);
- if (!label->data)
- return nfserr_jukebox;
+ status = nfsd4_decode_security_label(argp, label);
+ if (status)
+ return status;
}
if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
+ u32 mode, mask;
+
if (!umask)
- goto xdr_error;
- READ_BUF(8);
- len += 8;
- dummy32 = be32_to_cpup(p++);
- iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO);
- dummy32 = be32_to_cpup(p++);
- *umask = dummy32 & S_IRWXUGO;
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &mode) < 0)
+ return nfserr_bad_xdr;
+ iattr->ia_mode = mode & (S_IFMT | S_IALLUGO);
+ if (xdr_stream_decode_u32(argp->xdr, &mask) < 0)
+ return nfserr_bad_xdr;
+ *umask = mask & S_IRWXUGO;
iattr->ia_valid |= ATTR_MODE;
}
- if (len != expected_len)
- goto xdr_error;
+ if (bmval[2] & FATTR4_WORD2_TIME_DELEG_ACCESS) {
+ fattr4_time_deleg_access access;
- DECODE_TAIL;
+ if (!xdrgen_decode_fattr4_time_deleg_access(argp->xdr, &access))
+ return nfserr_bad_xdr;
+ iattr->ia_atime.tv_sec = access.seconds;
+ iattr->ia_atime.tv_nsec = access.nseconds;
+ iattr->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET | ATTR_DELEG;
+ }
+ if (bmval[2] & FATTR4_WORD2_TIME_DELEG_MODIFY) {
+ fattr4_time_deleg_modify modify;
+
+ if (!xdrgen_decode_fattr4_time_deleg_modify(argp->xdr, &modify))
+ return nfserr_bad_xdr;
+ iattr->ia_mtime.tv_sec = modify.seconds;
+ iattr->ia_mtime.tv_nsec = modify.nseconds;
+ iattr->ia_ctime.tv_sec = modify.seconds;
+ iattr->ia_ctime.tv_nsec = modify.nseconds;
+ iattr->ia_valid |= ATTR_CTIME | ATTR_CTIME_SET |
+ ATTR_MTIME | ATTR_MTIME_SET | ATTR_DELEG;
+ }
+
+ /* request sanity: did attrlist4 contain the expected number of words? */
+ if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)
+nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid)
{
- DECODE_HEAD;
+ __be32 *p;
- READ_BUF(sizeof(stateid_t));
+ p = xdr_inline_decode(argp->xdr, NFS4_STATEID_SIZE);
+ if (!p)
+ return nfserr_bad_xdr;
sid->si_generation = be32_to_cpup(p++);
- COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+ memcpy(&sid->si_opaque, p, sizeof(sid->si_opaque));
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_clientid4(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+{
+ __be32 *p;
- DECODE_TAIL;
+ p = xdr_inline_decode(argp->xdr, sizeof(__be64));
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(clientid, p, sizeof(*clientid));
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp,
+ clientid_t *clientid, struct xdr_netobj *owner)
+{
+ __be32 status;
+
+ status = nfsd4_decode_clientid4(argp, clientid);
+ if (status)
+ return status;
+ return nfsd4_decode_opaque(argp, owner);
}
+#ifdef CONFIG_NFSD_PNFS
+
static __be32
-nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
+nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_layoutcommit *lcp)
{
- DECODE_HEAD;
+ u32 len;
- READ_BUF(4);
- access->ac_req_access = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
+ return nfserr_bad_xdr;
+ if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
+ return nfserr_bad_xdr;
+
+ if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+ return nfserr_bad_xdr;
+ if (!xdr_stream_subsegment(argp->xdr, &lcp->lc_up_layout, len))
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
-static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+static __be32
+nfsd4_decode_layoutreturn4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_layoutreturn *lrp)
{
- DECODE_HEAD;
- u32 dummy, uid, gid;
- char *machine_name;
- int i;
- int nr_secflavs;
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_return_type) < 0)
+ return nfserr_bad_xdr;
+ switch (lrp->lr_return_type) {
+ case RETURN_FILE:
+ if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.length) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lrp->lr_sid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lrf_body_len) < 0)
+ return nfserr_bad_xdr;
+ if (lrp->lrf_body_len > 0) {
+ lrp->lrf_body = xdr_inline_decode(argp->xdr, lrp->lrf_body_len);
+ if (!lrp->lrf_body)
+ return nfserr_bad_xdr;
+ }
+ break;
+ case RETURN_FSID:
+ case RETURN_ALL:
+ lrp->lr_seg.offset = 0;
+ lrp->lr_seg.length = NFS4_MAX_UINT64;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
+}
+
+#endif /* CONFIG_NFSD_PNFS */
+
+static __be32
+nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp,
+ struct nfs4_sessionid *sessionid)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(argp->xdr, NFS4_MAX_SESSIONID_LEN);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(sessionid->data, p, sizeof(sessionid->data));
+ return nfs_ok;
+}
+
+/* Defined in Appendix A of RFC 5531 */
+static __be32
+nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp,
+ struct nfsd4_cb_sec *cbs)
+{
+ u32 stamp, gidcount, uid, gid;
+ __be32 *p, status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &stamp) < 0)
+ return nfserr_bad_xdr;
+ /* machine name */
+ status = nfsd4_decode_ignored_string(argp, 255);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &uid) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &gid) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &gidcount) < 0)
+ return nfserr_bad_xdr;
+ if (gidcount > 16)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, gidcount << 2);
+ if (!p)
+ return nfserr_bad_xdr;
+ if (cbs->flavor == (u32)(-1)) {
+ struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
+
+ kuid_t kuid = make_kuid(userns, uid);
+ kgid_t kgid = make_kgid(userns, gid);
+ if (uid_valid(kuid) && gid_valid(kgid)) {
+ cbs->uid = kuid;
+ cbs->gid = kgid;
+ cbs->flavor = RPC_AUTH_UNIX;
+ } else {
+ dprintk("RPC_AUTH_UNIX with invalid uid or gid, ignoring!\n");
+ }
+ }
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_gss_cb_handles4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_cb_sec *cbs)
+{
+ __be32 status;
+ u32 service;
+
+ dprintk("RPC_AUTH_GSS callback secflavor not supported!\n");
+
+ if (xdr_stream_decode_u32(argp->xdr, &service) < 0)
+ return nfserr_bad_xdr;
+ if (service < RPC_GSS_SVC_NONE || service > RPC_GSS_SVC_PRIVACY)
+ return nfserr_bad_xdr;
+ /* gcbp_handle_from_server */
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+ /* gcbp_handle_from_client */
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+
+ return nfs_ok;
+}
+
+/* a counted array of callback_sec_parms4 items */
+static __be32
+nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+{
+ u32 i, secflavor, nr_secflavs;
+ __be32 status;
/* callback_sec_params4 */
- READ_BUF(4);
- nr_secflavs = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &nr_secflavs) < 0)
+ return nfserr_bad_xdr;
if (nr_secflavs)
cbs->flavor = (u32)(-1);
else
/* Is this legal? Be generous, take it to mean AUTH_NONE: */
cbs->flavor = 0;
+
for (i = 0; i < nr_secflavs; ++i) {
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- switch (dummy) {
+ if (xdr_stream_decode_u32(argp->xdr, &secflavor) < 0)
+ return nfserr_bad_xdr;
+ switch (secflavor) {
case RPC_AUTH_NULL:
- /* Nothing to read */
+ /* void */
if (cbs->flavor == (u32)(-1))
cbs->flavor = RPC_AUTH_NULL;
break;
case RPC_AUTH_UNIX:
- READ_BUF(8);
- /* stamp */
- dummy = be32_to_cpup(p++);
-
- /* machine name */
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- SAVEMEM(machine_name, dummy);
-
- /* uid, gid */
- READ_BUF(8);
- uid = be32_to_cpup(p++);
- gid = be32_to_cpup(p++);
-
- /* more gids */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- if (cbs->flavor == (u32)(-1)) {
- kuid_t kuid = make_kuid(&init_user_ns, uid);
- kgid_t kgid = make_kgid(&init_user_ns, gid);
- if (uid_valid(kuid) && gid_valid(kgid)) {
- cbs->uid = kuid;
- cbs->gid = kgid;
- cbs->flavor = RPC_AUTH_UNIX;
- } else {
- dprintk("RPC_AUTH_UNIX with invalid"
- "uid or gid ignoring!\n");
- }
- }
+ status = nfsd4_decode_authsys_parms(argp, cbs);
+ if (status)
+ return status;
break;
case RPC_AUTH_GSS:
- dprintk("RPC_AUTH_GSS callback secflavor "
- "not supported!\n");
- READ_BUF(8);
- /* gcbp_service */
- dummy = be32_to_cpup(p++);
- /* gcbp_handle_from_server */
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
- /* gcbp_handle_from_client */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
+ status = nfsd4_decode_gss_cb_handles4(argp, cbs);
+ if (status)
+ return status;
break;
default:
- dprintk("Illegal callback secflavor\n");
return nfserr_inval;
}
}
- DECODE_TAIL;
-}
-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
-{
- DECODE_HEAD;
+ return nfs_ok;
+}
- READ_BUF(4);
- bc->bc_cb_program = be32_to_cpup(p++);
- nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
- DECODE_TAIL;
-}
+/*
+ * NFSv4 operation argument decoders
+ */
-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+static __be32
+nfsd4_decode_access(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
-
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
- COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- bcts->dir = be32_to_cpup(p++);
- /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker
- * could help us figure out we should be using it. */
- DECODE_TAIL;
+ struct nfsd4_access *access = &u->access;
+ if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+nfsd4_decode_close(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
-
- READ_BUF(4);
- close->cl_seqid = be32_to_cpup(p++);
- return nfsd4_decode_stateid(argp, &close->cl_stateid);
-
- DECODE_TAIL;
+ struct nfsd4_close *close = &u->close;
+ if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_stateid4(argp, &close->cl_stateid);
}
static __be32
-nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
+nfsd4_decode_commit(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
-
- READ_BUF(12);
- p = xdr_decode_hyper(p, &commit->co_offset);
- commit->co_count = be32_to_cpup(p++);
-
- DECODE_TAIL;
+ struct nfsd4_commit *commit = &u->commit;
+ if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
+ return nfserr_bad_xdr;
+ memset(&commit->co_verf, 0, sizeof(commit->co_verf));
+ return nfs_ok;
}
static __be32
-nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
+nfsd4_decode_create(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_create *create = &u->create;
+ __be32 *p, status;
- READ_BUF(4);
- create->cr_type = be32_to_cpup(p++);
+ memset(create, 0, sizeof(*create));
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0)
+ return nfserr_bad_xdr;
switch (create->cr_type) {
case NF4LNK:
- READ_BUF(4);
- create->cr_datalen = be32_to_cpup(p++);
- READ_BUF(create->cr_datalen);
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_datalen) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, create->cr_datalen);
+ if (!p)
+ return nfserr_bad_xdr;
create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
if (!create->cr_data)
return nfserr_jukebox;
break;
case NF4BLK:
case NF4CHR:
- READ_BUF(8);
- create->cr_specdata1 = be32_to_cpup(p++);
- create->cr_specdata2 = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata1) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata2) < 0)
+ return nfserr_bad_xdr;
break;
case NF4SOCK:
case NF4FIFO:
@@ -674,151 +842,221 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
default:
break;
}
-
- READ_BUF(4);
- create->cr_namelen = be32_to_cpup(p++);
- READ_BUF(create->cr_namelen);
- SAVEMEM(create->cr_name, create->cr_namelen);
- if ((status = check_filename(create->cr_name, create->cr_namelen)))
+ status = nfsd4_decode_component4(argp, &create->cr_name,
+ &create->cr_namelen);
+ if (status)
return status;
-
- status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
- &create->cr_acl, &create->cr_label,
- &create->cr_umask);
+ status = nfsd4_decode_fattr4(argp, create->cr_bmval,
+ ARRAY_SIZE(create->cr_bmval),
+ &create->cr_iattr, &create->cr_acl,
+ &create->cr_label, &create->cr_umask);
if (status)
- goto out;
+ return status;
- DECODE_TAIL;
+ return nfs_ok;
}
static inline __be32
-nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- return nfsd4_decode_stateid(argp, &dr->dr_stateid);
+ struct nfsd4_delegreturn *dr = &u->delegreturn;
+ return nfsd4_decode_stateid4(argp, &dr->dr_stateid);
}
static inline __be32
-nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
+nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
+ struct nfsd4_getattr *getattr = &u->getattr;
+ memset(getattr, 0, sizeof(*getattr));
+ return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
+ ARRAY_SIZE(getattr->ga_bmval));
}
static __be32
-nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
+nfsd4_decode_link(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_link *link = &u->link;
+ memset(link, 0, sizeof(*link));
+ return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
+}
- READ_BUF(4);
- link->li_namelen = be32_to_cpup(p++);
- READ_BUF(link->li_namelen);
- SAVEMEM(link->li_name, link->li_namelen);
- if ((status = check_filename(link->li_name, link->li_namelen)))
- return status;
+static __be32
+nfsd4_decode_open_to_lock_owner4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_lock *lock)
+{
+ __be32 status;
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_open_seqid) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lock->lk_new_open_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_lock_seqid) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid,
+ &lock->lk_new_owner);
}
static __be32
-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+nfsd4_decode_exist_lock_owner4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_lock *lock)
{
- DECODE_HEAD;
+ __be32 status;
- /*
- * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
- */
- READ_BUF(28);
- lock->lk_type = be32_to_cpup(p++);
- if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
- goto xdr_error;
- lock->lk_reclaim = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &lock->lk_offset);
- p = xdr_decode_hyper(p, &lock->lk_length);
- lock->lk_is_new = be32_to_cpup(p++);
-
- if (lock->lk_is_new) {
- READ_BUF(4);
- lock->lk_new_open_seqid = be32_to_cpup(p++);
- status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
- if (status)
- return status;
- READ_BUF(8 + sizeof(clientid_t));
- lock->lk_new_lock_seqid = be32_to_cpup(p++);
- COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
- lock->lk_new_owner.len = be32_to_cpup(p++);
- READ_BUF(lock->lk_new_owner.len);
- READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
- } else {
- status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid);
- if (status)
- return status;
- READ_BUF(4);
- lock->lk_old_lock_seqid = be32_to_cpup(p++);
- }
+ status = nfsd4_decode_stateid4(argp, &lock->lk_old_lock_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_old_lock_seqid) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
{
- DECODE_HEAD;
-
- READ_BUF(32);
- lockt->lt_type = be32_to_cpup(p++);
- if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
- goto xdr_error;
- p = xdr_decode_hyper(p, &lockt->lt_offset);
- p = xdr_decode_hyper(p, &lockt->lt_length);
- COPYMEM(&lockt->lt_clientid, 8);
- lockt->lt_owner.len = be32_to_cpup(p++);
- READ_BUF(lockt->lt_owner.len);
- READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
+ if (xdr_stream_decode_bool(argp->xdr, &lock->lk_is_new) < 0)
+ return nfserr_bad_xdr;
+ if (lock->lk_is_new)
+ return nfsd4_decode_open_to_lock_owner4(argp, lock);
+ return nfsd4_decode_exist_lock_owner4(argp, lock);
+}
+
+static __be32
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+{
+ struct nfsd4_lock *lock = &u->lock;
+ memset(lock, 0, sizeof(*lock));
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
+ return nfserr_bad_xdr;
+ if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_bool(argp->xdr, &lock->lk_reclaim) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lock->lk_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lock->lk_length) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_locker4(argp, lock);
+}
- DECODE_TAIL;
+static __be32
+nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+{
+ struct nfsd4_lockt *lockt = &u->lockt;
+ memset(lockt, 0, sizeof(*lockt));
+ if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
+ return nfserr_bad_xdr;
+ if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_length) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_state_owner4(argp, &lockt->lt_clientid,
+ &lockt->lt_owner);
}
static __be32
-nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+nfsd4_decode_locku(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_locku *locku = &u->locku;
+ __be32 status;
- READ_BUF(8);
- locku->lu_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0)
+ return nfserr_bad_xdr;
if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
- goto xdr_error;
- locku->lu_seqid = be32_to_cpup(p++);
- status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &locku->lu_seqid) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &locku->lu_stateid);
if (status)
return status;
- READ_BUF(16);
- p = xdr_decode_hyper(p, &locku->lu_offset);
- p = xdr_decode_hyper(p, &locku->lu_length);
+ if (xdr_stream_decode_u64(argp->xdr, &locku->lu_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &locku->lu_length) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
+nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_lookup *lookup = &u->lookup;
+ return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len);
+}
- READ_BUF(4);
- lookup->lo_len = be32_to_cpup(p++);
- READ_BUF(lookup->lo_len);
- SAVEMEM(lookup->lo_name, lookup->lo_len);
- if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
- return status;
+static __be32
+nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_createmode) < 0)
+ return nfserr_bad_xdr;
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ case NFS4_CREATE_GUARDED:
+ status = nfsd4_decode_fattr4(argp, open->op_bmval,
+ ARRAY_SIZE(open->op_bmval),
+ &open->op_iattr, &open->op_acl,
+ &open->op_label, &open->op_umask);
+ if (status)
+ return status;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ status = nfsd4_decode_verifier4(argp, &open->op_verf);
+ if (status)
+ return status;
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ if (argp->minorversion < 1)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_verifier4(argp, &open->op_verf);
+ if (status)
+ return status;
+ status = nfsd4_decode_fattr4(argp, open->op_bmval,
+ ARRAY_SIZE(open->op_bmval),
+ &open->op_iattr, &open->op_acl,
+ &open->op_label, &open->op_umask);
+ if (status)
+ return status;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
- DECODE_TAIL;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_create) < 0)
+ return nfserr_bad_xdr;
+ switch (open->op_create) {
+ case NFS4_OPEN_NOCREATE:
+ break;
+ case NFS4_OPEN_CREATE:
+ status = nfsd4_decode_createhow4(argp, open);
+ if (status)
+ return status;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
}
static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when)
{
- __be32 *p;
u32 w;
- READ_BUF(4);
- w = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &w) < 0)
+ return nfserr_bad_xdr;
*share_access = w & NFS4_SHARE_ACCESS_MASK;
*deleg_want = w & NFS4_SHARE_WANT_MASK;
if (deleg_when)
@@ -837,13 +1075,13 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
return nfs_ok;
if (!argp->minorversion)
return nfserr_bad_xdr;
- switch (w & NFS4_SHARE_WANT_MASK) {
- case NFS4_SHARE_WANT_NO_PREFERENCE:
- case NFS4_SHARE_WANT_READ_DELEG:
- case NFS4_SHARE_WANT_WRITE_DELEG:
- case NFS4_SHARE_WANT_ANY_DELEG:
- case NFS4_SHARE_WANT_NO_DELEG:
- case NFS4_SHARE_WANT_CANCEL:
+ switch (w & NFS4_SHARE_WANT_TYPE_MASK) {
+ case OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE:
+ case OPEN4_SHARE_ACCESS_WANT_READ_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_ANY_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_NO_DELEG:
+ case OPEN4_SHARE_ACCESS_WANT_CANCEL:
break;
default:
return nfserr_bad_xdr;
@@ -861,1031 +1099,1349 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED):
return nfs_ok;
}
-xdr_error:
return nfserr_bad_xdr;
}
static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
{
- __be32 *p;
-
- READ_BUF(4);
- *x = be32_to_cpup(p++);
- /* Note: unlinke access bits, deny bits may be zero. */
- if (*x & ~NFS4_SHARE_DENY_BOTH)
+ if (xdr_stream_decode_u32(argp->xdr, x) < 0)
return nfserr_bad_xdr;
- return nfs_ok;
-xdr_error:
- return nfserr_bad_xdr;
-}
-
-static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
-{
- __be32 *p;
-
- READ_BUF(4);
- o->len = be32_to_cpup(p++);
-
- if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
+ /* Note: unlike access bits, deny bits may be zero. */
+ if (*x & ~NFS4_SHARE_DENY_BOTH)
return nfserr_bad_xdr;
- READ_BUF(o->len);
- SAVEMEM(o->data, o->len);
return nfs_ok;
-xdr_error:
- return nfserr_bad_xdr;
}
static __be32
-nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_open *open)
{
- DECODE_HEAD;
- u32 dummy;
-
- memset(open->op_bmval, 0, sizeof(open->op_bmval));
- open->op_iattr.ia_valid = 0;
- open->op_openowner = NULL;
-
- open->op_xdr_error = 0;
- /* seqid, share_access, share_deny, clientid, ownerlen */
- READ_BUF(4);
- open->op_seqid = be32_to_cpup(p++);
- /* decode, yet ignore deleg_when until supported */
- status = nfsd4_decode_share_access(argp, &open->op_share_access,
- &open->op_deleg_want, &dummy);
- if (status)
- goto xdr_error;
- status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
- if (status)
- goto xdr_error;
- READ_BUF(sizeof(clientid_t));
- COPYMEM(&open->op_clientid, sizeof(clientid_t));
- status = nfsd4_decode_opaque(argp, &open->op_owner);
- if (status)
- goto xdr_error;
- READ_BUF(4);
- open->op_create = be32_to_cpup(p++);
- switch (open->op_create) {
- case NFS4_OPEN_NOCREATE:
- break;
- case NFS4_OPEN_CREATE:
- READ_BUF(4);
- open->op_createmode = be32_to_cpup(p++);
- switch (open->op_createmode) {
- case NFS4_CREATE_UNCHECKED:
- case NFS4_CREATE_GUARDED:
- status = nfsd4_decode_fattr(argp, open->op_bmval,
- &open->op_iattr, &open->op_acl, &open->op_label,
- &open->op_umask);
- if (status)
- goto out;
- break;
- case NFS4_CREATE_EXCLUSIVE:
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
- break;
- case NFS4_CREATE_EXCLUSIVE4_1:
- if (argp->minorversion < 1)
- goto xdr_error;
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
- status = nfsd4_decode_fattr(argp, open->op_bmval,
- &open->op_iattr, &open->op_acl, &open->op_label,
- &open->op_umask);
- if (status)
- goto out;
- break;
- default:
- goto xdr_error;
- }
- break;
- default:
- goto xdr_error;
- }
+ __be32 status;
- /* open_claim */
- READ_BUF(4);
- open->op_claim_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_claim_type) < 0)
+ return nfserr_bad_xdr;
switch (open->op_claim_type) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
- READ_BUF(4);
- open->op_fname.len = be32_to_cpup(p++);
- READ_BUF(open->op_fname.len);
- SAVEMEM(open->op_fname.data, open->op_fname.len);
- if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
+ status = nfsd4_decode_component4(argp, &open->op_fname,
+ &open->op_fnamelen);
+ if (status)
return status;
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
- READ_BUF(4);
- open->op_delegate_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_delegate_type) < 0)
+ return nfserr_bad_xdr;
break;
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
- status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
+ status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid);
if (status)
return status;
- READ_BUF(4);
- open->op_fname.len = be32_to_cpup(p++);
- READ_BUF(open->op_fname.len);
- SAVEMEM(open->op_fname.data, open->op_fname.len);
- if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
+ status = nfsd4_decode_component4(argp, &open->op_fname,
+ &open->op_fnamelen);
+ if (status)
return status;
break;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
if (argp->minorversion < 1)
- goto xdr_error;
+ return nfserr_bad_xdr;
/* void */
break;
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
if (argp->minorversion < 1)
- goto xdr_error;
- status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid);
if (status)
return status;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_open *open = &u->open;
+ __be32 status;
+ u32 dummy;
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
+ memset(open, 0, sizeof(*open));
- status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0)
+ return nfserr_bad_xdr;
+ /* deleg_want is ignored */
+ status = nfsd4_decode_share_access(argp, &open->op_share_access,
+ &open->op_deleg_want, &dummy);
if (status)
return status;
- READ_BUF(4);
- open_conf->oc_seqid = be32_to_cpup(p++);
-
- DECODE_TAIL;
-}
-
-static __be32
-nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
-{
- DECODE_HEAD;
-
- status = nfsd4_decode_stateid(argp, &open_down->od_stateid);
+ status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
if (status)
return status;
- READ_BUF(4);
- open_down->od_seqid = be32_to_cpup(p++);
- status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
- &open_down->od_deleg_want, NULL);
+ status = nfsd4_decode_state_owner4(argp, &open->op_clientid,
+ &open->op_owner);
if (status)
return status;
- status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
+ status = nfsd4_decode_openflag4(argp, open);
if (status)
return status;
- DECODE_TAIL;
+ return nfsd4_decode_open_claim4(argp, open);
}
static __be32
-nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_open_confirm *open_conf = &u->open_confirm;
+ __be32 status;
- READ_BUF(4);
- putfh->pf_fhlen = be32_to_cpup(p++);
- if (putfh->pf_fhlen > NFS4_FHSIZE)
- goto xdr_error;
- READ_BUF(putfh->pf_fhlen);
- SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen);
+ if (argp->minorversion >= 1)
+ return nfserr_notsupp;
- DECODE_TAIL;
-}
+ status = nfsd4_decode_stateid4(argp, &open_conf->oc_req_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0)
+ return nfserr_bad_xdr;
-static __be32
-nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
-{
- if (argp->minorversion == 0)
- return nfs_ok;
- return nfserr_notsupp;
+ memset(&open_conf->oc_resp_stateid, 0,
+ sizeof(open_conf->oc_resp_stateid));
+ return nfs_ok;
}
static __be32
-nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_open_downgrade *open_down = &u->open_downgrade;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &read->rd_stateid);
+ memset(open_down, 0, sizeof(*open_down));
+ status = nfsd4_decode_stateid4(argp, &open_down->od_stateid);
if (status)
return status;
- READ_BUF(12);
- p = xdr_decode_hyper(p, &read->rd_offset);
- read->rd_length = be32_to_cpup(p++);
-
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &open_down->od_seqid) < 0)
+ return nfserr_bad_xdr;
+ /* deleg_want is ignored */
+ status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
+ &open_down->od_deleg_want, NULL);
+ if (status)
+ return status;
+ return nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
}
static __be32
-nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
+nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_putfh *putfh = &u->putfh;
+ __be32 *p;
- READ_BUF(24);
- p = xdr_decode_hyper(p, &readdir->rd_cookie);
- COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
- readdir->rd_dircount = be32_to_cpup(p++);
- readdir->rd_maxcount = be32_to_cpup(p++);
- if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
- goto out;
+ if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0)
+ return nfserr_bad_xdr;
+ if (putfh->pf_fhlen > NFS4_FHSIZE)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen);
+ if (!p)
+ return nfserr_bad_xdr;
+ putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen);
+ if (!putfh->pf_fhval)
+ return nfserr_jukebox;
- DECODE_TAIL;
+ putfh->no_verify = false;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+nfsd4_decode_read(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_read *read = &u->read;
+ __be32 status;
- READ_BUF(4);
- remove->rm_namelen = be32_to_cpup(p++);
- READ_BUF(remove->rm_namelen);
- SAVEMEM(remove->rm_name, remove->rm_namelen);
- if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
+ memset(read, 0, sizeof(*read));
+ status = nfsd4_decode_stateid4(argp, &read->rd_stateid);
+ if (status)
return status;
+ if (xdr_stream_decode_u64(argp->xdr, &read->rd_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &read->rd_length) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
+nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_readdir *readdir = &u->readdir;
+ __be32 status;
- READ_BUF(4);
- rename->rn_snamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_snamelen);
- SAVEMEM(rename->rn_sname, rename->rn_snamelen);
- READ_BUF(4);
- rename->rn_tnamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_tnamelen);
- SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
- if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
- return status;
- if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen)))
+ memset(readdir, 0, sizeof(*readdir));
+ if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_verifier4(argp, &readdir->rd_verf);
+ if (status)
return status;
+ if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_dircount) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_maxcount) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_uint32_array(argp->xdr, readdir->rd_bmval,
+ ARRAY_SIZE(readdir->rd_bmval)) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+nfsd4_decode_remove(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
-
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
- READ_BUF(sizeof(clientid_t));
- COPYMEM(clientid, sizeof(clientid_t));
-
- DECODE_TAIL;
+ struct nfsd4_remove *remove = &u->remove;
+ memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo));
+ return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
}
static __be32
-nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
- struct nfsd4_secinfo *secinfo)
+nfsd4_decode_rename(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_rename *rename = &u->rename;
+ __be32 status;
- READ_BUF(4);
- secinfo->si_namelen = be32_to_cpup(p++);
- READ_BUF(secinfo->si_namelen);
- SAVEMEM(secinfo->si_name, secinfo->si_namelen);
- status = check_filename(secinfo->si_name, secinfo->si_namelen);
+ memset(rename, 0, sizeof(*rename));
+ status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen);
if (status)
return status;
- DECODE_TAIL;
+ return nfsd4_decode_component4(argp, &rename->rn_tname, &rename->rn_tnamelen);
}
static __be32
-nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
- struct nfsd4_secinfo_no_name *sin)
+nfsd4_decode_renew(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ clientid_t *clientid = &u->renew;
+ return nfsd4_decode_clientid4(argp, clientid);
+}
- READ_BUF(4);
- sin->sin_style = be32_to_cpup(p++);
- DECODE_TAIL;
+static __be32
+nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_secinfo *secinfo = &u->secinfo;
+ secinfo->si_exp = NULL;
+ return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
}
static __be32
-nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
+ struct nfsd4_setattr *setattr = &u->setattr;
__be32 status;
- status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
+ memset(setattr, 0, sizeof(*setattr));
+ status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid);
if (status)
return status;
- return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
- &setattr->sa_acl, &setattr->sa_label, NULL);
+ return nfsd4_decode_fattr4(argp, setattr->sa_bmval,
+ ARRAY_SIZE(setattr->sa_bmval),
+ &setattr->sa_iattr, &setattr->sa_acl,
+ &setattr->sa_label, NULL);
}
static __be32
-nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
+nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_setclientid *setclientid = &u->setclientid;
+ __be32 *p, status;
+
+ memset(setclientid, 0, sizeof(*setclientid));
if (argp->minorversion >= 1)
return nfserr_notsupp;
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE);
-
+ status = nfsd4_decode_verifier4(argp, &setclientid->se_verf);
+ if (status)
+ return status;
status = nfsd4_decode_opaque(argp, &setclientid->se_name);
if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_prog) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_netid_len) < 0)
return nfserr_bad_xdr;
- READ_BUF(8);
- setclientid->se_callback_prog = be32_to_cpup(p++);
- setclientid->se_callback_netid_len = be32_to_cpup(p++);
- READ_BUF(setclientid->se_callback_netid_len);
- SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
- READ_BUF(4);
- setclientid->se_callback_addr_len = be32_to_cpup(p++);
+ p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ setclientid->se_callback_netid_val = svcxdr_savemem(argp, p,
+ setclientid->se_callback_netid_len);
+ if (!setclientid->se_callback_netid_val)
+ return nfserr_jukebox;
- READ_BUF(setclientid->se_callback_addr_len);
- SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
- READ_BUF(4);
- setclientid->se_callback_ident = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ setclientid->se_callback_addr_val = svcxdr_savemem(argp, p,
+ setclientid->se_callback_addr_len);
+ if (!setclientid->se_callback_addr_val)
+ return nfserr_jukebox;
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
+nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_setclientid_confirm *scd_c = &u->setclientid_confirm;
+ __be32 status;
if (argp->minorversion >= 1)
return nfserr_notsupp;
- READ_BUF(8 + NFS4_VERIFIER_SIZE);
- COPYMEM(&scd_c->sc_clientid, 8);
- COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE);
-
- DECODE_TAIL;
+ status = nfsd4_decode_clientid4(argp, &scd_c->sc_clientid);
+ if (status)
+ return status;
+ return nfsd4_decode_verifier4(argp, &scd_c->sc_confirm);
}
/* Also used for NVERIFY */
static __be32
-nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
+nfsd4_decode_verify(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_verify *verify = &u->verify;
+ __be32 *p, status;
- if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
- goto out;
+ memset(verify, 0, sizeof(*verify));
+
+ status = nfsd4_decode_bitmap4(argp, verify->ve_bmval,
+ ARRAY_SIZE(verify->ve_bmval));
+ if (status)
+ return status;
/* For convenience's sake, we compare raw xdr'd attributes in
* nfsd4_proc_verify */
- READ_BUF(4);
- verify->ve_attrlen = be32_to_cpup(p++);
- READ_BUF(verify->ve_attrlen);
- SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
+ if (xdr_stream_decode_u32(argp->xdr, &verify->ve_attrlen) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, verify->ve_attrlen);
+ if (!p)
+ return nfserr_bad_xdr;
+ verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen);
+ if (!verify->ve_attrval)
+ return nfserr_jukebox;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+nfsd4_decode_write(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
{
- int avail;
- int len;
- DECODE_HEAD;
+ struct nfsd4_write *write = &u->write;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &write->wr_stateid);
+ status = nfsd4_decode_stateid4(argp, &write->wr_stateid);
if (status)
return status;
- READ_BUF(16);
- p = xdr_decode_hyper(p, &write->wr_offset);
- write->wr_stable_how = be32_to_cpup(p++);
+ if (xdr_stream_decode_u64(argp->xdr, &write->wr_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &write->wr_stable_how) < 0)
+ return nfserr_bad_xdr;
if (write->wr_stable_how > NFS_FILE_SYNC)
- goto xdr_error;
- write->wr_buflen = be32_to_cpup(p++);
-
- /* Sorry .. no magic macros for this.. *
- * READ_BUF(write->wr_buflen);
- * SAVEMEM(write->wr_buf, write->wr_buflen);
- */
- avail = (char*)argp->end - (char*)argp->p;
- if (avail + argp->pagelen < write->wr_buflen) {
- dprintk("NFSD: xdr error (%s:%d)\n",
- __FILE__, __LINE__);
- goto xdr_error;
- }
- write->wr_head.iov_base = p;
- write->wr_head.iov_len = avail;
- write->wr_pagelist = argp->pagelist;
-
- len = XDR_QUADLEN(write->wr_buflen) << 2;
- if (len >= avail) {
- int pages;
-
- len -= avail;
-
- pages = len >> PAGE_SHIFT;
- argp->pagelist += pages;
- argp->pagelen -= pages * PAGE_SIZE;
- len -= pages * PAGE_SIZE;
-
- next_decode_page(argp);
- }
- argp->p += XDR_QUADLEN(len);
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &write->wr_buflen) < 0)
+ return nfserr_bad_xdr;
+ if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen))
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ write->wr_bytes_written = 0;
+ write->wr_how_written = 0;
+ memset(&write->wr_verifier, 0, sizeof(write->wr_verifier));
+ return nfs_ok;
}
static __be32
-nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
+nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner;
+ __be32 status;
if (argp->minorversion >= 1)
return nfserr_notsupp;
- READ_BUF(12);
- COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
- rlockowner->rl_owner.len = be32_to_cpup(p++);
- READ_BUF(rlockowner->rl_owner.len);
- READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
+ status = nfsd4_decode_state_owner4(argp, &rlockowner->rl_clientid,
+ &rlockowner->rl_owner);
+ if (status)
+ return status;
if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid))
return nfserr_inval;
- DECODE_TAIL;
+
+ return nfs_ok;
+}
+
+static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_backchannel_ctl *bc = &u->backchannel_ctl;
+ memset(bc, 0, sizeof(*bc));
+ if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+}
+
+static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
+ u32 use_conn_in_rdma_mode;
+ __be32 status;
+
+ memset(bcts, 0, sizeof(*bcts));
+ status = nfsd4_decode_sessionid4(argp, &bcts->sessionid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &bcts->dir) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &use_conn_in_rdma_mode) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
- struct nfsd4_exchange_id *exid)
+nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
{
- int dummy, tmp;
- DECODE_HEAD;
+ __be32 status;
+
+ status = nfsd4_decode_bitmap4(argp, exid->spo_must_enforce,
+ ARRAY_SIZE(exid->spo_must_enforce));
+ if (status)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_bitmap4(argp, exid->spo_must_allow,
+ ARRAY_SIZE(exid->spo_must_allow));
+ if (status)
+ return nfserr_bad_xdr;
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE);
+ return nfs_ok;
+}
- status = nfsd4_decode_opaque(argp, &exid->clname);
+/*
+ * This implementation currently does not support SP4_SSV.
+ * This decoder simply skips over these arguments.
+ */
+static noinline __be32
+nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ u32 count, window, num_gss_handles;
+ __be32 status;
+
+ /* ssp_ops */
+ status = nfsd4_decode_state_protect_ops(argp, exid);
if (status)
+ return status;
+
+ /* ssp_hash_algs<> */
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
return nfserr_bad_xdr;
+ while (count--) {
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+ }
- READ_BUF(4);
- exid->flags = be32_to_cpup(p++);
+ /* ssp_encr_algs<> */
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ while (count--) {
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+ }
- /* Ignore state_protect4_a */
- READ_BUF(4);
- exid->spa_how = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &window) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &num_gss_handles) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &exid->spa_how) < 0)
+ return nfserr_bad_xdr;
switch (exid->spa_how) {
case SP4_NONE:
break;
case SP4_MACH_CRED:
- /* spo_must_enforce */
- status = nfsd4_decode_bitmap(argp,
- exid->spo_must_enforce);
- if (status)
- goto out;
- /* spo_must_allow */
- status = nfsd4_decode_bitmap(argp, exid->spo_must_allow);
+ status = nfsd4_decode_state_protect_ops(argp, exid);
if (status)
- goto out;
+ return status;
break;
case SP4_SSV:
- /* ssp_ops */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- p += dummy;
-
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- p += dummy;
-
- /* ssp_hash_algs<> */
- READ_BUF(4);
- tmp = be32_to_cpup(p++);
- while (tmp--) {
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
- }
+ status = nfsd4_decode_ssv_sp_parms(argp, exid);
+ if (status)
+ return status;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
- /* ssp_encr_algs<> */
- READ_BUF(4);
- tmp = be32_to_cpup(p++);
- while (tmp--) {
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
- }
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+ u32 count;
- /* ignore ssp_window and ssp_num_gss_handles: */
- READ_BUF(8);
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ switch (count) {
+ case 0:
+ break;
+ case 1:
+ /* Note that RFC 8881 places no length limit on
+ * nii_domain, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes */
+ status = nfsd4_decode_opaque(argp, &exid->nii_domain);
+ if (status)
+ return status;
+ /* Note that RFC 8881 places no length limit on
+ * nii_name, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes */
+ status = nfsd4_decode_opaque(argp, &exid->nii_name);
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &exid->nii_time);
+ if (status)
+ return status;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
- /* Ignore Implementation ID */
- READ_BUF(4); /* nfs_impl_id4 array length */
- dummy = be32_to_cpup(p++);
-
- if (dummy > 1)
- goto xdr_error;
-
- if (dummy == 1) {
- /* nii_domain */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
-
- /* nii_name */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
-
- /* nii_date */
- READ_BUF(12);
- p += 3;
- }
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
- struct nfsd4_create_session *sess)
+nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
- u32 dummy;
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ __be32 status;
- READ_BUF(16);
- COPYMEM(&sess->clientid, 8);
- sess->seqid = be32_to_cpup(p++);
- sess->flags = be32_to_cpup(p++);
-
- /* Fore channel attrs */
- READ_BUF(28);
- dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
- sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
- sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
- sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
- sess->fore_channel.maxops = be32_to_cpup(p++);
- sess->fore_channel.maxreqs = be32_to_cpup(p++);
- sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++);
- if (sess->fore_channel.nr_rdma_attrs == 1) {
- READ_BUF(4);
- sess->fore_channel.rdma_attrs = be32_to_cpup(p++);
- } else if (sess->fore_channel.nr_rdma_attrs > 1) {
- dprintk("Too many fore channel attr bitmaps!\n");
- goto xdr_error;
- }
+ memset(exid, 0, sizeof(*exid));
+ status = nfsd4_decode_verifier4(argp, &exid->verifier);
+ if (status)
+ return status;
+ status = nfsd4_decode_opaque(argp, &exid->clname);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &exid->flags) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_state_protect4_a(argp, exid);
+ if (status)
+ return status;
+ return nfsd4_decode_nfs_impl_id4(argp, exid);
+}
- /* Back channel attrs */
- READ_BUF(28);
- dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
- sess->back_channel.maxreq_sz = be32_to_cpup(p++);
- sess->back_channel.maxresp_sz = be32_to_cpup(p++);
- sess->back_channel.maxresp_cached = be32_to_cpup(p++);
- sess->back_channel.maxops = be32_to_cpup(p++);
- sess->back_channel.maxreqs = be32_to_cpup(p++);
- sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++);
- if (sess->back_channel.nr_rdma_attrs == 1) {
- READ_BUF(4);
- sess->back_channel.rdma_attrs = be32_to_cpup(p++);
- } else if (sess->back_channel.nr_rdma_attrs > 1) {
- dprintk("Too many back channel attr bitmaps!\n");
- goto xdr_error;
+static __be32
+nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_channel_attrs *ca)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT * 7);
+ if (!p)
+ return nfserr_bad_xdr;
+
+ /* headerpadsz is ignored */
+ p++;
+ ca->maxreq_sz = be32_to_cpup(p++);
+ ca->maxresp_sz = be32_to_cpup(p++);
+ ca->maxresp_cached = be32_to_cpup(p++);
+ ca->maxops = be32_to_cpup(p++);
+ ca->maxreqs = be32_to_cpup(p++);
+ ca->nr_rdma_attrs = be32_to_cpup(p);
+ switch (ca->nr_rdma_attrs) {
+ case 0:
+ break;
+ case 1:
+ if (xdr_stream_decode_u32(argp->xdr, &ca->rdma_attrs) < 0)
+ return nfserr_bad_xdr;
+ break;
+ default:
+ return nfserr_bad_xdr;
}
- READ_BUF(4);
- sess->callback_prog = be32_to_cpup(p++);
- nfsd4_decode_cb_sec(argp, &sess->cb_sec);
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
- struct nfsd4_destroy_session *destroy_session)
+nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
- READ_BUF(NFS4_MAX_SESSIONID_LEN);
- COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ struct nfsd4_create_session *sess = &u->create_session;
+ __be32 status;
- DECODE_TAIL;
+ memset(sess, 0, sizeof(*sess));
+ status = nfsd4_decode_clientid4(argp, &sess->clientid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->seqid) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->flags) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel);
+ if (status)
+ return status;
+ status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_cb_sec(argp, &sess->cb_sec);
+}
+
+static __be32
+nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_destroy_session *destroy_session = &u->destroy_session;
+ return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid);
}
static __be32
nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
- struct nfsd4_free_stateid *free_stateid)
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_free_stateid *free_stateid = &u->free_stateid;
+ return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
+}
- READ_BUF(sizeof(stateid_t));
- free_stateid->fr_stateid.si_generation = be32_to_cpup(p++);
- COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));
+static __be32
+nfsd4_decode_get_dir_delegation(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ __be32 status;
- DECODE_TAIL;
+ memset(gdd, 0, sizeof(*gdd));
+
+ if (xdr_stream_decode_bool(argp->xdr, &gdd->gdda_signal_deleg_avail) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_bitmap4(argp, gdd->gdda_notification_types,
+ ARRAY_SIZE(gdd->gdda_notification_types));
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &gdd->gdda_child_attr_delay);
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &gdd->gdda_dir_attr_delay);
+ if (status)
+ return status;
+ status = nfsd4_decode_bitmap4(argp, gdd->gdda_child_attributes,
+ ARRAY_SIZE(gdd->gdda_child_attributes));
+ if (status)
+ return status;
+ return nfsd4_decode_bitmap4(argp, gdd->gdda_dir_attributes,
+ ARRAY_SIZE(gdd->gdda_dir_attributes));
+}
+
+#ifdef CONFIG_NFSD_PNFS
+static __be32
+nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
+ __be32 status;
+
+ memset(gdev, 0, sizeof(*gdev));
+ status = nfsd4_decode_deviceid4(argp->xdr, &gdev->gd_devid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_maxcount) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_uint32_array(argp->xdr,
+ &gdev->gd_notify_types, 1) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
+ __be32 *p, status;
+
+ memset(lcp, 0, sizeof(*lcp));
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_reclaim) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lcp->lc_sid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_newoffset) < 0)
+ return nfserr_bad_xdr;
+ if (lcp->lc_newoffset) {
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0)
+ return nfserr_bad_xdr;
+ } else
+ lcp->lc_last_wr = 0;
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT);
+ if (!p)
+ return nfserr_bad_xdr;
+ if (xdr_item_is_present(p)) {
+ status = nfsd4_decode_nfstime4(argp, &lcp->lc_mtime);
+ if (status)
+ return status;
+ } else {
+ lcp->lc_mtime.tv_nsec = UTIME_NOW;
+ }
+ return nfsd4_decode_layoutupdate4(argp, lcp);
+}
+
+static __be32
+nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_layoutget *lgp = &u->layoutget;
+ __be32 status;
+
+ memset(lgp, 0, sizeof(*lgp));
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_seg.iomode) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.length) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_minlength) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lgp->lg_sid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_maxcount) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
+ memset(lrp, 0, sizeof(*lrp));
+ if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_seg.iomode) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_layoutreturn4(argp, lrp);
+}
+#endif /* CONFIG_NFSD_PNFS */
+
+static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_secinfo_no_name *sin = &u->secinfo_no_name;
+ if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
+ return nfserr_bad_xdr;
+
+ sin->sin_exp = NULL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
- struct nfsd4_sequence *seq)
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_sequence *seq = &u->sequence;
+ __be32 *p, status;
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
- COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ status = nfsd4_decode_sessionid4(argp, &seq->sessionid);
+ if (status)
+ return status;
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT * 4);
+ if (!p)
+ return nfserr_bad_xdr;
seq->seqid = be32_to_cpup(p++);
seq->slotid = be32_to_cpup(p++);
- seq->maxslots = be32_to_cpup(p++);
- seq->cachethis = be32_to_cpup(p++);
+ /* sa_highest_slotid counts from 0 but maxslots counts from 1 ... */
+ seq->maxslots = be32_to_cpup(p++) + 1;
+ seq->cachethis = be32_to_cpup(p);
- DECODE_TAIL;
+ seq->status_flags = 0;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
+nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- int i;
- __be32 *p, status;
+ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
struct nfsd4_test_stateid_id *stateid;
+ __be32 status;
+ u32 i;
- READ_BUF(4);
- test_stateid->ts_num_ids = ntohl(*p++);
+ memset(test_stateid, 0, sizeof(*test_stateid));
+ if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0)
+ return nfserr_bad_xdr;
INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
-
for (i = 0; i < test_stateid->ts_num_ids; i++) {
stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
- if (!stateid) {
- status = nfserrno(-ENOMEM);
- goto out;
- }
-
+ if (!stateid)
+ return nfserr_jukebox;
INIT_LIST_HEAD(&stateid->ts_id_list);
list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
-
- status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid);
+ status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid);
if (status)
- goto out;
+ return status;
}
- status = 0;
-out:
- return status;
-xdr_error:
- dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__);
- status = nfserr_bad_xdr;
- goto out;
+ return nfs_ok;
+}
+
+static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_destroy_clientid *dc = &u->destroy_clientid;
+ return nfsd4_decode_clientid4(argp, &dc->clientid);
}
-static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc)
+static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_reclaim_complete *rc = &u->reclaim_complete;
+ if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_fallocate *fallocate = &u->allocate;
+ __be32 status;
- READ_BUF(8);
- COPYMEM(&dc->clientid, 8);
+ status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_length) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
-static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
+static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
+ struct nl4_server *ns)
{
- DECODE_HEAD;
+ struct nfs42_netaddr *naddr;
+ __be32 *p;
+
+ if (xdr_stream_decode_u32(argp->xdr, &ns->nl4_type) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(4);
- rc->rca_one_fs = be32_to_cpup(p++);
+ /* currently support for 1 inter-server source server */
+ switch (ns->nl4_type) {
+ case NL4_NETADDR:
+ naddr = &ns->u.nl4_addr;
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &naddr->netid_len) < 0)
+ return nfserr_bad_xdr;
+ if (naddr->netid_len > RPCBIND_MAXNETIDLEN)
+ return nfserr_bad_xdr;
+
+ p = xdr_inline_decode(argp->xdr, naddr->netid_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(naddr->netid, p, naddr->netid_len);
+
+ if (xdr_stream_decode_u32(argp->xdr, &naddr->addr_len) < 0)
+ return nfserr_bad_xdr;
+ if (naddr->addr_len > RPCBIND_MAXUADDRLEN)
+ return nfserr_bad_xdr;
+
+ p = xdr_inline_decode(argp->xdr, naddr->addr_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(naddr->addr, p, naddr->addr_len);
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
}
-#ifdef CONFIG_NFSD_PNFS
static __be32
-nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
- struct nfsd4_getdeviceinfo *gdev)
-{
- DECODE_HEAD;
- u32 num, i;
-
- READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
- COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
- gdev->gd_layout_type = be32_to_cpup(p++);
- gdev->gd_maxcount = be32_to_cpup(p++);
- num = be32_to_cpup(p++);
- if (num) {
- if (num > 1000)
- goto xdr_error;
- READ_BUF(4 * num);
- gdev->gd_notify_types = be32_to_cpup(p++);
- for (i = 1; i < num; i++) {
- if (be32_to_cpup(p++)) {
- status = nfserr_inval;
- goto out;
- }
+nfsd4_decode_copy(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+{
+ struct nfsd4_copy *copy = &u->copy;
+ u32 consecutive, i, count, sync;
+ struct nl4_server *ns_dummy;
+ __be32 status;
+
+ memset(copy, 0, sizeof(*copy));
+ status = nfsd4_decode_stateid4(argp, &copy->cp_src_stateid);
+ if (status)
+ return status;
+ status = nfsd4_decode_stateid4(argp, &copy->cp_dst_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u64(argp->xdr, &copy->cp_src_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &copy->cp_dst_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &copy->cp_count) < 0)
+ return nfserr_bad_xdr;
+ /* ca_consecutive: we always do consecutive copies */
+ if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_bool(argp->xdr, &sync) < 0)
+ return nfserr_bad_xdr;
+ nfsd4_copy_set_sync(copy, sync);
+
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src));
+ if (copy->cp_src == NULL)
+ return nfserr_jukebox;
+ if (count == 0) { /* intra-server copy */
+ __set_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+ return nfs_ok;
+ }
+
+ /* decode all the supplied server addresses but use only the first */
+ status = nfsd4_decode_nl4_server(argp, copy->cp_src);
+ if (status)
+ return status;
+
+ ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
+ if (ns_dummy == NULL)
+ return nfserr_jukebox;
+ for (i = 0; i < count - 1; i++) {
+ status = nfsd4_decode_nl4_server(argp, ns_dummy);
+ if (status) {
+ kfree(ns_dummy);
+ return status;
}
}
- DECODE_TAIL;
+ kfree(ns_dummy);
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutget *lgp)
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ __be32 status;
- READ_BUF(36);
- lgp->lg_signal = be32_to_cpup(p++);
- lgp->lg_layout_type = be32_to_cpup(p++);
- lgp->lg_seg.iomode = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
- p = xdr_decode_hyper(p, &lgp->lg_seg.length);
- p = xdr_decode_hyper(p, &lgp->lg_minlength);
+ memset(cn, 0, sizeof(*cn));
+ cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src));
+ if (cn->cpn_src == NULL)
+ return nfserr_jukebox;
+ cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst));
+ if (cn->cpn_dst == NULL)
+ return nfserr_jukebox;
- status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
+ status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid);
if (status)
return status;
-
- READ_BUF(4);
- lgp->lg_maxcount = be32_to_cpup(p++);
-
- DECODE_TAIL;
+ return nfsd4_decode_nl4_server(argp, cn->cpn_dst);
}
static __be32
-nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutcommit *lcp)
+nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
- u32 timechange;
+ struct nfsd4_offload_status *os = &u->offload_status;
+ os->count = 0;
+ os->status = 0;
+ return nfsd4_decode_stateid4(argp, &os->stateid);
+}
- READ_BUF(20);
- p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
- p = xdr_decode_hyper(p, &lcp->lc_seg.length);
- lcp->lc_reclaim = be32_to_cpup(p++);
+static __be32
+nfsd4_decode_seek(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+{
+ struct nfsd4_seek *seek = &u->seek;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
+ status = nfsd4_decode_stateid4(argp, &seek->seek_stateid);
if (status)
return status;
+ if (xdr_stream_decode_u64(argp->xdr, &seek->seek_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(4);
- lcp->lc_newoffset = be32_to_cpup(p++);
- if (lcp->lc_newoffset) {
- READ_BUF(8);
- p = xdr_decode_hyper(p, &lcp->lc_last_wr);
- } else
- lcp->lc_last_wr = 0;
- READ_BUF(4);
- timechange = be32_to_cpup(p++);
- if (timechange) {
- status = nfsd4_decode_time(argp, &lcp->lc_mtime);
- if (status)
- return status;
- } else {
- lcp->lc_mtime.tv_nsec = UTIME_NOW;
- }
- READ_BUF(8);
- lcp->lc_layout_type = be32_to_cpup(p++);
+ seek->seek_eof = 0;
+ seek->seek_pos = 0;
+ return nfs_ok;
+}
- /*
- * Save the layout update in XDR format and let the layout driver deal
- * with it later.
- */
- lcp->lc_up_len = be32_to_cpup(p++);
- if (lcp->lc_up_len > 0) {
- READ_BUF(lcp->lc_up_len);
- READMEM(lcp->lc_up_layout, lcp->lc_up_len);
- }
+static __be32
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u)
+{
+ struct nfsd4_clone *clone = &u->clone;
+ __be32 status;
- DECODE_TAIL;
+ status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid);
+ if (status)
+ return status;
+ status = nfsd4_decode_stateid4(argp, &clone->cl_dst_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u64(argp->xdr, &clone->cl_src_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &clone->cl_dst_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &clone->cl_count) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
+/*
+ * XDR data that is more than PAGE_SIZE in size is normally part of a
+ * read or write. However, the size of extended attributes is limited
+ * by the maximum request size, and then further limited by the underlying
+ * filesystem limits. This can exceed PAGE_SIZE (currently, XATTR_SIZE_MAX
+ * is 64k). Since there is no kvec- or page-based interface to xattrs,
+ * and we're not dealing with contiguous pages, we need to do some copying.
+ */
+
+/*
+ * Decode data into buffer.
+ */
static __be32
-nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutreturn *lrp)
+nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr,
+ char **bufp, size_t buflen)
{
- DECODE_HEAD;
+ struct page **pages = xdr->pages;
+ struct kvec *head = xdr->head;
+ char *tmp, *dp;
+ u32 len;
- READ_BUF(16);
- lrp->lr_reclaim = be32_to_cpup(p++);
- lrp->lr_layout_type = be32_to_cpup(p++);
- lrp->lr_seg.iomode = be32_to_cpup(p++);
- lrp->lr_return_type = be32_to_cpup(p++);
- if (lrp->lr_return_type == RETURN_FILE) {
- READ_BUF(16);
- p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
- p = xdr_decode_hyper(p, &lrp->lr_seg.length);
+ if (buflen <= head->iov_len) {
+ /*
+ * We're in luck, the head has enough space. Just return
+ * the head, no need for copying.
+ */
+ *bufp = head->iov_base;
+ return 0;
+ }
- status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
- if (status)
- return status;
+ tmp = svcxdr_tmpalloc(argp, buflen);
+ if (tmp == NULL)
+ return nfserr_jukebox;
- READ_BUF(4);
- lrp->lrf_body_len = be32_to_cpup(p++);
- if (lrp->lrf_body_len > 0) {
- READ_BUF(lrp->lrf_body_len);
- READMEM(lrp->lrf_body, lrp->lrf_body_len);
- }
- } else {
- lrp->lr_seg.offset = 0;
- lrp->lr_seg.length = NFS4_MAX_UINT64;
+ dp = tmp;
+ memcpy(dp, head->iov_base, head->iov_len);
+ buflen -= head->iov_len;
+ dp += head->iov_len;
+
+ while (buflen > 0) {
+ len = min_t(u32, buflen, PAGE_SIZE);
+ memcpy(dp, page_address(*pages), len);
+
+ buflen -= len;
+ dp += len;
+ pages++;
}
- DECODE_TAIL;
+ *bufp = tmp;
+ return 0;
}
-#endif /* CONFIG_NFSD_PNFS */
+/*
+ * Get a user extended attribute name from the XDR buffer.
+ * It will not have the "user." prefix, so prepend it.
+ * Lastly, check for nul characters in the name.
+ */
static __be32
-nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
- struct nfsd4_fallocate *fallocate)
+nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
{
- DECODE_HEAD;
+ char *name, *sp, *dp;
+ u32 namelen, cnt;
+ __be32 *p;
- status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid);
- if (status)
- return status;
+ if (xdr_stream_decode_u32(argp->xdr, &namelen) < 0)
+ return nfserr_bad_xdr;
+ if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN))
+ return nfserr_nametoolong;
+ if (namelen == 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, namelen);
+ if (!p)
+ return nfserr_bad_xdr;
+ name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1);
+ if (!name)
+ return nfserr_jukebox;
+ memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
- READ_BUF(16);
- p = xdr_decode_hyper(p, &fallocate->falloc_offset);
- xdr_decode_hyper(p, &fallocate->falloc_length);
+ /*
+ * Copy the extended attribute name over while checking for 0
+ * characters.
+ */
+ sp = (char *)p;
+ dp = name + XATTR_USER_PREFIX_LEN;
+ cnt = namelen;
+
+ while (cnt-- > 0) {
+ if (*sp == '\0')
+ return nfserr_bad_xdr;
+ *dp++ = *sp++;
+ }
+ *dp = '\0';
- DECODE_TAIL;
+ *namep = name;
+
+ return nfs_ok;
}
+/*
+ * A GETXATTR op request comes without a length specifier. We just set the
+ * maximum length for the reply based on XATTR_SIZE_MAX and the maximum
+ * channel reply size. nfsd_getxattr will probe the length of the xattr,
+ * check it against getxa_len, and allocate + return the value.
+ */
static __be32
-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_getxattr *getxattr = &u->getxattr;
+ __be32 status;
+ u32 maxcount;
- status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
- if (status)
- return status;
- status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
+ memset(getxattr, 0, sizeof(*getxattr));
+ status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name);
if (status)
return status;
- READ_BUF(8 + 8 + 8);
- p = xdr_decode_hyper(p, &clone->cl_src_pos);
- p = xdr_decode_hyper(p, &clone->cl_dst_pos);
- p = xdr_decode_hyper(p, &clone->cl_count);
- DECODE_TAIL;
+ maxcount = svc_max_payload(argp->rqstp);
+ maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
+
+ getxattr->getxa_len = maxcount;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
- unsigned int tmp;
+ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ u32 flags, maxcount, size;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
- if (status)
- return status;
- status = nfsd4_decode_stateid(argp, &copy->cp_dst_stateid);
+ memset(setxattr, 0, sizeof(*setxattr));
+
+ if (xdr_stream_decode_u32(argp->xdr, &flags) < 0)
+ return nfserr_bad_xdr;
+
+ if (flags > SETXATTR4_REPLACE)
+ return nfserr_inval;
+ setxattr->setxa_flags = flags;
+
+ status = nfsd4_decode_xattr_name(argp, &setxattr->setxa_name);
if (status)
return status;
- READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
- p = xdr_decode_hyper(p, &copy->cp_src_pos);
- p = xdr_decode_hyper(p, &copy->cp_dst_pos);
- p = xdr_decode_hyper(p, &copy->cp_count);
- p++; /* ca_consecutive: we always do consecutive copies */
- copy->cp_synchronous = be32_to_cpup(p++);
- tmp = be32_to_cpup(p); /* Source server list not supported */
+ maxcount = svc_max_payload(argp->rqstp);
+ maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
- DECODE_TAIL;
-}
+ if (xdr_stream_decode_u32(argp->xdr, &size) < 0)
+ return nfserr_bad_xdr;
+ if (size > maxcount)
+ return nfserr_xattr2big;
-static __be32
-nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
- struct nfsd4_offload_status *os)
-{
- return nfsd4_decode_stateid(argp, &os->stateid);
+ setxattr->setxa_len = size;
+ if (size > 0) {
+ struct xdr_buf payload;
+
+ if (!xdr_stream_subsegment(argp->xdr, &payload, size))
+ return nfserr_bad_xdr;
+ status = nfsd4_vbuf_from_vector(argp, &payload,
+ &setxattr->setxa_buf, size);
+ }
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
{
- DECODE_HEAD;
+ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+ u32 maxcount;
- status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
- if (status)
- return status;
+ memset(listxattrs, 0, sizeof(*listxattrs));
+
+ if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0)
+ return nfserr_bad_xdr;
+
+ /*
+ * If the cookie is too large to have even one user.x attribute
+ * plus trailing '\0' left in a maximum size buffer, it's invalid.
+ */
+ if (listxattrs->lsxa_cookie >=
+ (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2)))
+ return nfserr_badcookie;
+
+ if (xdr_stream_decode_u32(argp->xdr, &maxcount) < 0)
+ return nfserr_bad_xdr;
+ if (maxcount < 8)
+ /* Always need at least 2 words (length and one character) */
+ return nfserr_inval;
- READ_BUF(8 + 4);
- p = xdr_decode_hyper(p, &seek->seek_offset);
- seek->seek_whence = be32_to_cpup(p);
+ maxcount = min(maxcount, svc_max_payload(argp->rqstp));
+ listxattrs->lsxa_maxcount = maxcount;
- DECODE_TAIL;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ memset(removexattr, 0, sizeof(*removexattr));
+ return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name);
}
static __be32
-nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
+nfsd4_decode_noop(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
{
return nfs_ok;
}
static __be32
-nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
+nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, union nfsd4_op_u *p)
{
return nfserr_notsupp;
}
-typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
+typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, union nfsd4_op_u *u);
static const nfsd4_dec nfsd4_dec_ops[] = {
- [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
- [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
- [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
- [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
- [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
- [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
- [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
- [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
- [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
- [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
- [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
- [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
- [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
- [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
- [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
- [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
- [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_putpubfh,
- [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
- [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
- [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
- [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
- [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
- [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
- [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
- [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
- [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
- [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
- [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
- [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
- [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
+ [OP_ACCESS] = nfsd4_decode_access,
+ [OP_CLOSE] = nfsd4_decode_close,
+ [OP_COMMIT] = nfsd4_decode_commit,
+ [OP_CREATE] = nfsd4_decode_create,
+ [OP_DELEGPURGE] = nfsd4_decode_notsupp,
+ [OP_DELEGRETURN] = nfsd4_decode_delegreturn,
+ [OP_GETATTR] = nfsd4_decode_getattr,
+ [OP_GETFH] = nfsd4_decode_noop,
+ [OP_LINK] = nfsd4_decode_link,
+ [OP_LOCK] = nfsd4_decode_lock,
+ [OP_LOCKT] = nfsd4_decode_lockt,
+ [OP_LOCKU] = nfsd4_decode_locku,
+ [OP_LOOKUP] = nfsd4_decode_lookup,
+ [OP_LOOKUPP] = nfsd4_decode_noop,
+ [OP_NVERIFY] = nfsd4_decode_verify,
+ [OP_OPEN] = nfsd4_decode_open,
+ [OP_OPENATTR] = nfsd4_decode_notsupp,
+ [OP_OPEN_CONFIRM] = nfsd4_decode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = nfsd4_decode_open_downgrade,
+ [OP_PUTFH] = nfsd4_decode_putfh,
+ [OP_PUTPUBFH] = nfsd4_decode_noop,
+ [OP_PUTROOTFH] = nfsd4_decode_noop,
+ [OP_READ] = nfsd4_decode_read,
+ [OP_READDIR] = nfsd4_decode_readdir,
+ [OP_READLINK] = nfsd4_decode_noop,
+ [OP_REMOVE] = nfsd4_decode_remove,
+ [OP_RENAME] = nfsd4_decode_rename,
+ [OP_RENEW] = nfsd4_decode_renew,
+ [OP_RESTOREFH] = nfsd4_decode_noop,
+ [OP_SAVEFH] = nfsd4_decode_noop,
+ [OP_SECINFO] = nfsd4_decode_secinfo,
+ [OP_SETATTR] = nfsd4_decode_setattr,
+ [OP_SETCLIENTID] = nfsd4_decode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = nfsd4_decode_setclientid_confirm,
+ [OP_VERIFY] = nfsd4_decode_verify,
+ [OP_WRITE] = nfsd4_decode_write,
+ [OP_RELEASE_LOCKOWNER] = nfsd4_decode_release_lockowner,
/* new operations for NFSv4.1 */
- [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_backchannel_ctl,
- [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_bind_conn_to_session,
- [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id,
- [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session,
- [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
- [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
- [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_BACKCHANNEL_CTL] = nfsd4_decode_backchannel_ctl,
+ [OP_BIND_CONN_TO_SESSION] = nfsd4_decode_bind_conn_to_session,
+ [OP_EXCHANGE_ID] = nfsd4_decode_exchange_id,
+ [OP_CREATE_SESSION] = nfsd4_decode_create_session,
+ [OP_DESTROY_SESSION] = nfsd4_decode_destroy_session,
+ [OP_FREE_STATEID] = nfsd4_decode_free_stateid,
+ [OP_GET_DIR_DELEGATION] = nfsd4_decode_get_dir_delegation,
#ifdef CONFIG_NFSD_PNFS
- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
+ [OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo,
+ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
+ [OP_LAYOUTCOMMIT] = nfsd4_decode_layoutcommit,
+ [OP_LAYOUTGET] = nfsd4_decode_layoutget,
+ [OP_LAYOUTRETURN] = nfsd4_decode_layoutreturn,
#else
- [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_GETDEVICEINFO] = nfsd4_decode_notsupp,
+ [OP_GETDEVICELIST] = nfsd4_decode_notsupp,
+ [OP_LAYOUTCOMMIT] = nfsd4_decode_notsupp,
+ [OP_LAYOUTGET] = nfsd4_decode_notsupp,
+ [OP_LAYOUTRETURN] = nfsd4_decode_notsupp,
#endif
- [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
- [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
- [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid,
- [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
- [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
+ [OP_SECINFO_NO_NAME] = nfsd4_decode_secinfo_no_name,
+ [OP_SEQUENCE] = nfsd4_decode_sequence,
+ [OP_SET_SSV] = nfsd4_decode_notsupp,
+ [OP_TEST_STATEID] = nfsd4_decode_test_stateid,
+ [OP_WANT_DELEGATION] = nfsd4_decode_notsupp,
+ [OP_DESTROY_CLIENTID] = nfsd4_decode_destroy_clientid,
+ [OP_RECLAIM_COMPLETE] = nfsd4_decode_reclaim_complete,
/* new operations for NFSv4.2 */
- [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
- [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
- [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
+ [OP_ALLOCATE] = nfsd4_decode_fallocate,
+ [OP_COPY] = nfsd4_decode_copy,
+ [OP_COPY_NOTIFY] = nfsd4_decode_copy_notify,
+ [OP_DEALLOCATE] = nfsd4_decode_fallocate,
+ [OP_IO_ADVISE] = nfsd4_decode_notsupp,
+ [OP_LAYOUTERROR] = nfsd4_decode_notsupp,
+ [OP_LAYOUTSTATS] = nfsd4_decode_notsupp,
+ [OP_OFFLOAD_CANCEL] = nfsd4_decode_offload_status,
+ [OP_OFFLOAD_STATUS] = nfsd4_decode_offload_status,
+ [OP_READ_PLUS] = nfsd4_decode_read,
+ [OP_SEEK] = nfsd4_decode_seek,
+ [OP_WRITE_SAME] = nfsd4_decode_notsupp,
+ [OP_CLONE] = nfsd4_decode_clone,
+ /* RFC 8276 extended atributes operations */
+ [OP_GETXATTR] = nfsd4_decode_getxattr,
+ [OP_SETXATTR] = nfsd4_decode_setxattr,
+ [OP_LISTXATTRS] = nfsd4_decode_listxattrs,
+ [OP_REMOVEXATTR] = nfsd4_decode_removexattr,
};
static inline bool
@@ -1902,43 +2458,46 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
return true;
}
-static __be32
+static bool
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
- DECODE_HEAD;
struct nfsd4_op *op;
bool cachethis = false;
int auth_slack= argp->rqstp->rq_auth_slack;
int max_reply = auth_slack + 8; /* opcnt, status */
int readcount = 0;
int readbytes = 0;
+ __be32 *p;
int i;
- READ_BUF(4);
- argp->taglen = be32_to_cpup(p++);
- READ_BUF(argp->taglen);
- SAVEMEM(argp->tag, argp->taglen);
- READ_BUF(8);
- argp->minorversion = be32_to_cpup(p++);
- argp->opcnt = be32_to_cpup(p++);
- max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
-
- if (argp->taglen > NFSD4_MAX_TAGLEN)
- goto xdr_error;
- /*
- * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
- * here, so we return success at the xdr level so that
- * nfsd4_proc can handle this is an NFS-level error.
- */
- if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
- return 0;
+ if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0)
+ return false;
+ max_reply += XDR_UNIT;
+ argp->tag = NULL;
+ if (unlikely(argp->taglen)) {
+ if (argp->taglen > NFSD4_MAX_TAGLEN)
+ return false;
+ p = xdr_inline_decode(argp->xdr, argp->taglen);
+ if (!p)
+ return false;
+ argp->tag = svcxdr_savemem(argp, p, argp->taglen);
+ if (!argp->tag)
+ return false;
+ max_reply += xdr_align_size(argp->taglen);
+ }
+
+ if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
+ return false;
+ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
+ return false;
+ argp->opcnt = min_t(u32, argp->client_opcnt,
+ NFSD_MAX_OPS_PER_COMPOUND);
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
- argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+ argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
if (!argp->ops) {
argp->ops = argp->iops;
- dprintk("nfsd: couldn't allocate room for COMPOUND\n");
- goto xdr_error;
+ return false;
}
}
@@ -1948,24 +2507,30 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
for (i = 0; i < argp->opcnt; i++) {
op = &argp->ops[i];
op->replay = NULL;
+ op->opdesc = NULL;
- READ_BUF(4);
- op->opnum = be32_to_cpup(p++);
-
- if (nfsd4_opnum_in_range(argp, op))
+ if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
+ return false;
+ if (nfsd4_opnum_in_range(argp, op)) {
+ op->opdesc = OPDESC(op);
op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
- else {
+ if (op->status != nfs_ok)
+ trace_nfsd_compound_decode_err(argp->rqstp,
+ argp->opcnt, i,
+ op->opnum,
+ op->status);
+ } else {
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
}
- op->opdesc = OPDESC(op);
+
/*
* We'll try to cache the result in the DRC if any one
* op in the compound wants to be cached:
*/
cachethis |= nfsd4_cache_this_op(op);
- if (op->opnum == OP_READ) {
+ if (op->opnum == OP_READ || op->opnum == OP_READ_PLUS) {
readcount++;
readbytes += nfsd4_max_reply(argp->rqstp, op);
} else
@@ -1987,68 +2552,72 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
/* Sessions make the DRC unnecessary: */
if (argp->minorversion)
cachethis = false;
- svc_reserve(argp->rqstp, max_reply + readbytes);
+ svc_reserve_auth(argp->rqstp, max_reply + readbytes);
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+ argp->splice_ok = nfsd_read_splice_ok(argp->rqstp);
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
- clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
+ argp->splice_ok = false;
- DECODE_TAIL;
+ return true;
}
-static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
- struct svc_export *exp)
+static __be32 nfsd4_encode_nfs_fh4(struct xdr_stream *xdr,
+ struct knfsd_fh *fh_handle)
{
- if (exp->ex_flags & NFSEXP_V4ROOT) {
- *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
- *p++ = 0;
- } else if (IS_I_VERSION(inode)) {
- p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode));
- } else {
- *p++ = cpu_to_be32(stat->ctime.tv_sec);
- *p++ = cpu_to_be32(stat->ctime.tv_nsec);
- }
- return p;
+ return nfsd4_encode_opaque(xdr, fh_handle->fh_raw, fh_handle->fh_size);
}
-/*
- * ctime (in NFSv4, time_metadata) is not writeable, and the client
- * doesn't really care what resolution could theoretically be stored by
- * the filesystem.
- *
- * The client cares how close together changes can be while still
- * guaranteeing ctime changes. For most filesystems (which have
- * timestamps with nanosecond fields) that is limited by the resolution
- * of the time returned from current_time() (which I'm assuming to be
- * 1/HZ).
- */
-static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
+/* This is a frequently-encoded type; open-coded for speed */
+static __be32 nfsd4_encode_nfstime4(struct xdr_stream *xdr,
+ const struct timespec64 *tv)
{
- struct timespec ts;
- u32 ns;
+ __be32 *p;
- ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
- ts = ns_to_timespec(ns);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_hyper(p, tv->tv_sec);
+ *p = cpu_to_be32(tv->tv_nsec);
+ return nfs_ok;
+}
- p = xdr_encode_hyper(p, ts.tv_sec);
- *p++ = cpu_to_be32(ts.tv_nsec);
+static __be32 nfsd4_encode_specdata4(struct xdr_stream *xdr,
+ unsigned int major, unsigned int minor)
+{
+ __be32 status;
- return p;
+ status = nfsd4_encode_uint32_t(xdr, major);
+ if (status != nfs_ok)
+ return status;
+ return nfsd4_encode_uint32_t(xdr, minor);
}
-static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
+static __be32
+nfsd4_encode_change_info4(struct xdr_stream *xdr, const struct nfsd4_change_info *c)
{
- *p++ = cpu_to_be32(c->atomic);
- if (c->change_supported) {
- p = xdr_encode_hyper(p, c->before_change);
- p = xdr_encode_hyper(p, c->after_change);
- } else {
- *p++ = cpu_to_be32(c->before_ctime_sec);
- *p++ = cpu_to_be32(c->before_ctime_nsec);
- *p++ = cpu_to_be32(c->after_ctime_sec);
- *p++ = cpu_to_be32(c->after_ctime_nsec);
- }
- return p;
+ __be32 status;
+
+ status = nfsd4_encode_bool(xdr, c->atomic);
+ if (status != nfs_ok)
+ return status;
+ status = nfsd4_encode_changeid4(xdr, c->before_change);
+ if (status != nfs_ok)
+ return status;
+ return nfsd4_encode_changeid4(xdr, c->after_change);
+}
+
+static __be32 nfsd4_encode_netaddr4(struct xdr_stream *xdr,
+ const struct nfs42_netaddr *addr)
+{
+ __be32 status;
+
+ /* na_r_netid */
+ status = nfsd4_encode_opaque(xdr, addr->netid, addr->netid_len);
+ if (status != nfs_ok)
+ return status;
+ /* na_r_addr */
+ return nfsd4_encode_opaque(xdr, addr->addr, addr->addr_len);
}
/* Encode as an array of strings the string given with components
@@ -2061,10 +2630,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
__be32 *p;
__be32 pathlen;
int pathlen_offset;
- int strlen, count=0;
char *str, *end, *next;
-
- dprintk("nfsd4_encode_components(%s)\n", components);
+ int count = 0;
pathlen_offset = xdr->buf->len;
p = xdr_reserve_space(xdr, 4);
@@ -2091,15 +2658,11 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
for (; *end && (*end != sep); end++)
/* find sep or end of string */;
- strlen = end - str;
- if (strlen) {
- p = xdr_reserve_space(xdr, strlen + 4);
- if (!p)
+ if (end > str) {
+ if (xdr_stream_encode_opaque(xdr, str, end - str) < 0)
return nfserr_resource;
- p = xdr_encode_opaque(p, str, strlen);
count++;
- }
- else
+ } else
end++;
if (found_esc)
end = next;
@@ -2120,9 +2683,6 @@ static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep,
return nfsd4_encode_components_esc(xdr, sep, components, 0, 0);
}
-/*
- * encode a location element of a fs_locations structure
- */
static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr,
struct nfsd4_fs_location *location)
{
@@ -2135,18 +2695,14 @@ static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr,
status = nfsd4_encode_components(xdr, '/', location->path);
if (status)
return status;
- return 0;
+ return nfs_ok;
}
-/*
- * Encode a path in RFC3530 'pathname4' format
- */
-static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
- const struct path *root,
- const struct path *path)
+static __be32 nfsd4_encode_pathname4(struct xdr_stream *xdr,
+ const struct path *root,
+ const struct path *path)
{
struct path cur = *path;
- __be32 *p;
struct dentry **components = NULL;
unsigned int ncomponents = 0;
__be32 err = nfserr_jukebox;
@@ -2177,24 +2733,19 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
components[ncomponents++] = cur.dentry;
cur.dentry = dget_parent(cur.dentry);
}
+
err = nfserr_resource;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ if (xdr_stream_encode_u32(xdr, ncomponents) != XDR_UNIT)
goto out_free;
- *p++ = cpu_to_be32(ncomponents);
-
while (ncomponents) {
struct dentry *dentry = components[ncomponents - 1];
- unsigned int len;
spin_lock(&dentry->d_lock);
- len = dentry->d_name.len;
- p = xdr_reserve_space(xdr, len + 4);
- if (!p) {
+ if (xdr_stream_encode_opaque(xdr, dentry->d_name.name,
+ dentry->d_name.len) < 0) {
spin_unlock(&dentry->d_lock);
goto out_free;
}
- p = xdr_encode_opaque(p, dentry->d_name.name, len);
dprintk("/%pd", dentry);
spin_unlock(&dentry->d_lock);
dput(dentry);
@@ -2211,89 +2762,59 @@ out_free:
return err;
}
-static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr,
- struct svc_rqst *rqstp, const struct path *path)
+static __be32 nfsd4_encode_fs_locations4(struct xdr_stream *xdr,
+ struct svc_rqst *rqstp,
+ struct svc_export *exp)
{
+ struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
struct svc_export *exp_ps;
- __be32 res;
+ unsigned int i;
+ __be32 status;
+ /* fs_root */
exp_ps = rqst_find_fsidzero_export(rqstp);
if (IS_ERR(exp_ps))
return nfserrno(PTR_ERR(exp_ps));
- res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path);
+ status = nfsd4_encode_pathname4(xdr, &exp_ps->ex_path, &exp->ex_path);
exp_put(exp_ps);
- return res;
-}
-
-/*
- * encode a fs_locations structure
- */
-static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr,
- struct svc_rqst *rqstp, struct svc_export *exp)
-{
- __be32 status;
- int i;
- __be32 *p;
- struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
-
- status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path);
- if (status)
+ if (status != nfs_ok)
return status;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+
+ /* locations<> */
+ if (xdr_stream_encode_u32(xdr, fslocs->locations_count) != XDR_UNIT)
return nfserr_resource;
- *p++ = cpu_to_be32(fslocs->locations_count);
- for (i=0; i<fslocs->locations_count; i++) {
+ for (i = 0; i < fslocs->locations_count; i++) {
status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]);
- if (status)
+ if (status != nfs_ok)
return status;
}
- return 0;
-}
-static u32 nfs4_file_type(umode_t mode)
-{
- switch (mode & S_IFMT) {
- case S_IFIFO: return NF4FIFO;
- case S_IFCHR: return NF4CHR;
- case S_IFDIR: return NF4DIR;
- case S_IFBLK: return NF4BLK;
- case S_IFLNK: return NF4LNK;
- case S_IFREG: return NF4REG;
- case S_IFSOCK: return NF4SOCK;
- default: return NF4BAD;
- };
+ return nfs_ok;
}
-static inline __be32
-nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- struct nfs4_ace *ace)
+static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+ struct nfs4_ace *ace)
{
+ __be32 status;
+
+ /* type */
+ status = nfsd4_encode_acetype4(xdr, ace->type);
+ if (status != nfs_ok)
+ return nfserr_resource;
+ /* flag */
+ status = nfsd4_encode_aceflag4(xdr, ace->flag);
+ if (status != nfs_ok)
+ return nfserr_resource;
+ /* access mask */
+ status = nfsd4_encode_acemask4(xdr, ace->access_mask & NFS4_ACE_MASK_ALL);
+ if (status != nfs_ok)
+ return nfserr_resource;
+ /* who */
if (ace->whotype != NFS4_ACL_WHO_NAMED)
return nfs4_acl_write_who(xdr, ace->whotype);
- else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+ if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
return nfsd4_encode_group(xdr, rqstp, ace->who_gid);
- else
- return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
-}
-
-static inline __be32
-nfsd4_encode_layout_types(struct xdr_stream *xdr, u32 layout_types)
-{
- __be32 *p;
- unsigned long i = hweight_long(layout_types);
-
- p = xdr_reserve_space(xdr, 4 + 4 * i);
- if (!p)
- return nfserr_resource;
-
- *p++ = cpu_to_be32(i);
-
- for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i)
- if (layout_types & (1 << i))
- *p++ = cpu_to_be32(i);
-
- return 0;
+ return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
}
#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -2304,11 +2825,11 @@ nfsd4_encode_layout_types(struct xdr_stream *xdr, u32 layout_types)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static inline __be32
nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- void *context, int len)
+ const struct lsm_context *context)
{
__be32 *p;
- p = xdr_reserve_space(xdr, len + 4 + 4 + 4);
+ p = xdr_reserve_space(xdr, context->len + 4 + 4 + 4);
if (!p)
return nfserr_resource;
@@ -2318,13 +2839,13 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
*/
*p++ = cpu_to_be32(0); /* lfs */
*p++ = cpu_to_be32(0); /* pi */
- p = xdr_encode_opaque(p, context, len);
+ p = xdr_encode_opaque(p, context->context, context->len);
return 0;
}
#else
static inline __be32
nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
- void *context, int len)
+ struct lsm_context *context)
{ return 0; }
#endif
@@ -2346,9 +2867,10 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32
}
-static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
+static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino)
{
struct path path = exp->ex_path;
+ struct kstat stat;
int err;
path_get(&path);
@@ -2356,18 +2878,20 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
if (path.dentry != path.mnt->mnt_root)
break;
}
- err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
path_put(&path);
+ if (!err)
+ *pino = stat.ino;
return err;
}
static __be32
-nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
+nfsd4_encode_bitmap4(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
{
__be32 *p;
if (bmval2) {
- p = xdr_reserve_space(xdr, 16);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 4);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(3);
@@ -2375,83 +2899,778 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
*p++ = cpu_to_be32(bmval1);
*p++ = cpu_to_be32(bmval2);
} else if (bmval1) {
- p = xdr_reserve_space(xdr, 12);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(2);
*p++ = cpu_to_be32(bmval0);
*p++ = cpu_to_be32(bmval1);
} else {
- p = xdr_reserve_space(xdr, 8);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 2);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(1);
*p++ = cpu_to_be32(bmval0);
}
- return 0;
+ return nfs_ok;
out_resource:
return nfserr_resource;
}
+struct nfsd4_fattr_args {
+ struct svc_rqst *rqstp;
+ struct svc_fh *fhp;
+ struct svc_export *exp;
+ struct dentry *dentry;
+ struct kstat stat;
+ struct kstatfs statfs;
+ struct nfs4_acl *acl;
+ u64 change_attr;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+ struct lsm_context context;
+#endif
+ u32 rdattr_err;
+ bool contextsupport;
+ bool ignore_crossmnt;
+};
+
+typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args);
+
+static __be32 nfsd4_encode_fattr4__inval(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfserr_inval;
+}
+
+static __be32 nfsd4_encode_fattr4__noop(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfs_ok;
+}
+
+static __be32 nfsd4_encode_fattr4__true(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_bool(xdr, true);
+}
+
+static __be32 nfsd4_encode_fattr4__false(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_bool(xdr, false);
+}
+
+static __be32 nfsd4_encode_fattr4_supported_attrs(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct nfsd4_compoundres *resp = args->rqstp->rq_resp;
+ u32 minorversion = resp->cstate.minorversion;
+ u32 supp[3];
+
+ memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
+ if (!IS_POSIXACL(d_inode(args->dentry)))
+ supp[0] &= ~FATTR4_WORD0_ACL;
+ if (!args->contextsupport)
+ supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+
+ return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]);
+}
+
+static __be32 nfsd4_encode_fattr4_type(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT);
+ if (!p)
+ return nfserr_resource;
+
+ switch (args->stat.mode & S_IFMT) {
+ case S_IFIFO:
+ *p = cpu_to_be32(NF4FIFO);
+ break;
+ case S_IFCHR:
+ *p = cpu_to_be32(NF4CHR);
+ break;
+ case S_IFDIR:
+ *p = cpu_to_be32(NF4DIR);
+ break;
+ case S_IFBLK:
+ *p = cpu_to_be32(NF4BLK);
+ break;
+ case S_IFLNK:
+ *p = cpu_to_be32(NF4LNK);
+ break;
+ case S_IFREG:
+ *p = cpu_to_be32(NF4REG);
+ break;
+ case S_IFSOCK:
+ *p = cpu_to_be32(NF4SOCK);
+ break;
+ default:
+ return nfserr_serverfault;
+ }
+
+ return nfs_ok;
+}
+
+static __be32 nfsd4_encode_fattr4_fh_expire_type(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ u32 mask;
+
+ mask = NFS4_FH_PERSISTENT;
+ if (!(args->exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+ mask |= NFS4_FH_VOL_RENAME;
+ return nfsd4_encode_uint32_t(xdr, mask);
+}
+
+static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ const struct svc_export *exp = args->exp;
+
+ if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) {
+ u32 flush_time = convert_to_wallclock(exp->cd->flush_time);
+
+ if (xdr_stream_encode_u32(xdr, flush_time) != XDR_UNIT)
+ return nfserr_resource;
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+ return nfsd4_encode_changeid4(xdr, args->change_attr);
+}
+
+static __be32 nfsd4_encode_fattr4_size(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, args->stat.size);
+}
+
+static __be32 nfsd4_encode_fattr4_fsid(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 2 + XDR_UNIT * 2);
+ if (!p)
+ return nfserr_resource;
+
+ if (unlikely(args->exp->ex_fslocs.migrated)) {
+ p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR);
+ xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR);
+ return nfs_ok;
+ }
+ switch (fsid_source(args->fhp)) {
+ case FSIDSOURCE_FSID:
+ p = xdr_encode_hyper(p, (u64)args->exp->ex_fsid);
+ xdr_encode_hyper(p, (u64)0);
+ break;
+ case FSIDSOURCE_DEV:
+ *p++ = xdr_zero;
+ *p++ = cpu_to_be32(MAJOR(args->stat.dev));
+ *p++ = xdr_zero;
+ *p = cpu_to_be32(MINOR(args->stat.dev));
+ break;
+ case FSIDSOURCE_UUID:
+ xdr_encode_opaque_fixed(p, args->exp->ex_uuid, EX_UUID_LEN);
+ break;
+ }
+
+ return nfs_ok;
+}
+
+static __be32 nfsd4_encode_fattr4_lease_time(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(args->rqstp), nfsd_net_id);
+
+ return nfsd4_encode_nfs_lease4(xdr, nn->nfsd4_lease);
+}
+
+static __be32 nfsd4_encode_fattr4_rdattr_error(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint32_t(xdr, args->rdattr_err);
+}
+
+static __be32 nfsd4_encode_fattr4_aclsupport(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ u32 mask;
+
+ mask = 0;
+ if (IS_POSIXACL(d_inode(args->dentry)))
+ mask = ACL4_SUPPORT_ALLOW_ACL | ACL4_SUPPORT_DENY_ACL;
+ return nfsd4_encode_uint32_t(xdr, mask);
+}
+
+static __be32 nfsd4_encode_fattr4_acl(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct nfs4_acl *acl = args->acl;
+ struct nfs4_ace *ace;
+ __be32 status;
+
+ /* nfsace4<> */
+ if (!acl) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ } else {
+ if (xdr_stream_encode_u32(xdr, acl->naces) != XDR_UNIT)
+ return nfserr_resource;
+ for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
+ status = nfsd4_encode_nfsace4(xdr, args->rqstp, ace);
+ if (status != nfs_ok)
+ return status;
+ }
+ }
+ return nfs_ok;
+}
+
+static __be32 nfsd4_encode_fattr4_filehandle(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_nfs_fh4(xdr, &args->fhp->fh_handle);
+}
+
+static __be32 nfsd4_encode_fattr4_fileid(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, args->stat.ino);
+}
+
+static __be32 nfsd4_encode_fattr4_files_avail(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, args->statfs.f_ffree);
+}
+
+static __be32 nfsd4_encode_fattr4_files_free(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, args->statfs.f_ffree);
+}
+
+static __be32 nfsd4_encode_fattr4_files_total(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, args->statfs.f_files);
+}
+
+static __be32 nfsd4_encode_fattr4_fs_locations(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_fs_locations4(xdr, args->rqstp, args->exp);
+}
+
+static __be32 nfsd4_encode_fattr4_maxfilesize(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct super_block *sb = args->exp->ex_path.mnt->mnt_sb;
+
+ return nfsd4_encode_uint64_t(xdr, sb->s_maxbytes);
+}
+
+static __be32 nfsd4_encode_fattr4_maxlink(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint32_t(xdr, 255);
+}
+
+static __be32 nfsd4_encode_fattr4_maxname(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint32_t(xdr, args->statfs.f_namelen);
+}
+
+static __be32 nfsd4_encode_fattr4_maxread(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, svc_max_payload(args->rqstp));
+}
+
+static __be32 nfsd4_encode_fattr4_maxwrite(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, svc_max_payload(args->rqstp));
+}
+
+static __be32 nfsd4_encode_fattr4_mode(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_mode4(xdr, args->stat.mode & S_IALLUGO);
+}
+
+static __be32 nfsd4_encode_fattr4_numlinks(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint32_t(xdr, args->stat.nlink);
+}
+
+static __be32 nfsd4_encode_fattr4_owner(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_user(xdr, args->rqstp, args->stat.uid);
+}
+
+static __be32 nfsd4_encode_fattr4_owner_group(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_group(xdr, args->rqstp, args->stat.gid);
+}
+
+static __be32 nfsd4_encode_fattr4_rawdev(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_specdata4(xdr, MAJOR(args->stat.rdev),
+ MINOR(args->stat.rdev));
+}
+
+static __be32 nfsd4_encode_fattr4_space_avail(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ u64 avail = (u64)args->statfs.f_bavail * (u64)args->statfs.f_bsize;
+
+ return nfsd4_encode_uint64_t(xdr, avail);
+}
+
+static __be32 nfsd4_encode_fattr4_space_free(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ u64 free = (u64)args->statfs.f_bfree * (u64)args->statfs.f_bsize;
+
+ return nfsd4_encode_uint64_t(xdr, free);
+}
+
+static __be32 nfsd4_encode_fattr4_space_total(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ u64 total = (u64)args->statfs.f_blocks * (u64)args->statfs.f_bsize;
+
+ return nfsd4_encode_uint64_t(xdr, total);
+}
+
+static __be32 nfsd4_encode_fattr4_space_used(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint64_t(xdr, (u64)args->stat.blocks << 9);
+}
+
+static __be32 nfsd4_encode_fattr4_time_access(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_nfstime4(xdr, &args->stat.atime);
+}
+
+static __be32 nfsd4_encode_fattr4_time_create(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_nfstime4(xdr, &args->stat.btime);
+}
+
+/*
+ * ctime (in NFSv4, time_metadata) is not writeable, and the client
+ * doesn't really care what resolution could theoretically be stored by
+ * the filesystem.
+ *
+ * The client cares how close together changes can be while still
+ * guaranteeing ctime changes. For most filesystems (which have
+ * timestamps with nanosecond fields) that is limited by the resolution
+ * of the time returned from current_time() (which I'm assuming to be
+ * 1/HZ).
+ */
+static __be32 nfsd4_encode_fattr4_time_delta(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ const struct inode *inode = d_inode(args->dentry);
+ u32 ns = max_t(u32, NSEC_PER_SEC/HZ, inode->i_sb->s_time_gran);
+ struct timespec64 ts = ns_to_timespec64(ns);
+
+ return nfsd4_encode_nfstime4(xdr, &ts);
+}
+
+static __be32 nfsd4_encode_fattr4_time_metadata(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_nfstime4(xdr, &args->stat.ctime);
+}
+
+static __be32 nfsd4_encode_fattr4_time_modify(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_nfstime4(xdr, &args->stat.mtime);
+}
+
+static __be32 nfsd4_encode_fattr4_mounted_on_fileid(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ u64 ino;
+ int err;
+
+ if (!args->ignore_crossmnt &&
+ args->dentry == args->exp->ex_path.mnt->mnt_root) {
+ err = nfsd4_get_mounted_on_ino(args->exp, &ino);
+ if (err)
+ return nfserrno(err);
+ } else
+ ino = args->stat.ino;
+
+ return nfsd4_encode_uint64_t(xdr, ino);
+}
+
+#ifdef CONFIG_NFSD_PNFS
+
+static __be32 nfsd4_encode_fattr4_fs_layout_types(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ unsigned long mask = args->exp->ex_layout_types;
+ int i;
+
+ /* Hamming weight of @mask is the number of layout types to return */
+ if (xdr_stream_encode_u32(xdr, hweight_long(mask)) != XDR_UNIT)
+ return nfserr_resource;
+ for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i)
+ if (mask & BIT(i)) {
+ /* layouttype4 */
+ if (xdr_stream_encode_u32(xdr, i) != XDR_UNIT)
+ return nfserr_resource;
+ }
+ return nfs_ok;
+}
+
+static __be32 nfsd4_encode_fattr4_layout_types(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ unsigned long mask = args->exp->ex_layout_types;
+ int i;
+
+ /* Hamming weight of @mask is the number of layout types to return */
+ if (xdr_stream_encode_u32(xdr, hweight_long(mask)) != XDR_UNIT)
+ return nfserr_resource;
+ for (i = LAYOUT_NFSV4_1_FILES; i < LAYOUT_TYPE_MAX; ++i)
+ if (mask & BIT(i)) {
+ /* layouttype4 */
+ if (xdr_stream_encode_u32(xdr, i) != XDR_UNIT)
+ return nfserr_resource;
+ }
+ return nfs_ok;
+}
+
+static __be32 nfsd4_encode_fattr4_layout_blksize(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_uint32_t(xdr, args->stat.blksize);
+}
+
+#endif
+
+static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct nfsd4_compoundres *resp = args->rqstp->rq_resp;
+ u32 supp[3];
+
+ memcpy(supp, nfsd_suppattrs[resp->cstate.minorversion], sizeof(supp));
+ supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
+ supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
+ supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
+
+ return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]);
+}
+
+/*
+ * Copied from generic_remap_checks/generic_remap_file_range_prep.
+ *
+ * These generic functions use the file system's s_blocksize, but
+ * individual file systems aren't required to use
+ * generic_remap_file_range_prep. Until there is a mechanism for
+ * determining a particular file system's (or file's) clone block
+ * size, this is the best NFSD can do.
+ */
+static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ struct inode *inode = d_inode(args->dentry);
+
+ return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize);
+}
+
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfsd4_encode_security_label(xdr, args->rqstp, &args->context);
+}
+#endif
+
+static __be32 nfsd4_encode_fattr4_xattr_support(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ int err = xattr_supports_user_prefix(d_inode(args->dentry));
+
+ return nfsd4_encode_bool(xdr, err == 0);
+}
+
+#define NFSD_OA_SHARE_ACCESS (BIT(OPEN_ARGS_SHARE_ACCESS_READ) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WRITE) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_BOTH))
+
+#define NFSD_OA_SHARE_DENY (BIT(OPEN_ARGS_SHARE_DENY_NONE) | \
+ BIT(OPEN_ARGS_SHARE_DENY_READ) | \
+ BIT(OPEN_ARGS_SHARE_DENY_WRITE) | \
+ BIT(OPEN_ARGS_SHARE_DENY_BOTH))
+
+#define NFSD_OA_SHARE_ACCESS_WANT (BIT(OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS) | \
+ BIT(OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION))
+
+#define NFSD_OA_OPEN_CLAIM (BIT(OPEN_ARGS_OPEN_CLAIM_NULL) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_PREVIOUS) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV)| \
+ BIT(OPEN_ARGS_OPEN_CLAIM_FH) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH) | \
+ BIT(OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH))
+
+#define NFSD_OA_CREATE_MODE (BIT(OPEN_ARGS_CREATEMODE_UNCHECKED4) | \
+ BIT(OPEN_ARGS_CREATE_MODE_GUARDED) | \
+ BIT(OPEN_ARGS_CREATEMODE_EXCLUSIVE4) | \
+ BIT(OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1))
+
+static uint32_t oa_share_access = NFSD_OA_SHARE_ACCESS;
+static uint32_t oa_share_deny = NFSD_OA_SHARE_DENY;
+static uint32_t oa_share_access_want = NFSD_OA_SHARE_ACCESS_WANT;
+static uint32_t oa_open_claim = NFSD_OA_OPEN_CLAIM;
+static uint32_t oa_create_mode = NFSD_OA_CREATE_MODE;
+
+static const struct open_arguments4 nfsd_open_arguments = {
+ .oa_share_access = { .count = 1, .element = &oa_share_access },
+ .oa_share_deny = { .count = 1, .element = &oa_share_deny },
+ .oa_share_access_want = { .count = 1, .element = &oa_share_access_want },
+ .oa_open_claim = { .count = 1, .element = &oa_open_claim },
+ .oa_create_mode = { .count = 1, .element = &oa_create_mode },
+};
+
+static __be32 nfsd4_encode_fattr4_open_arguments(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ if (!xdrgen_encode_fattr4_open_arguments(xdr, &nfsd_open_arguments))
+ return nfserr_resource;
+ return nfs_ok;
+}
+
+static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
+ [FATTR4_SUPPORTED_ATTRS] = nfsd4_encode_fattr4_supported_attrs,
+ [FATTR4_TYPE] = nfsd4_encode_fattr4_type,
+ [FATTR4_FH_EXPIRE_TYPE] = nfsd4_encode_fattr4_fh_expire_type,
+ [FATTR4_CHANGE] = nfsd4_encode_fattr4_change,
+ [FATTR4_SIZE] = nfsd4_encode_fattr4_size,
+ [FATTR4_LINK_SUPPORT] = nfsd4_encode_fattr4__true,
+ [FATTR4_SYMLINK_SUPPORT] = nfsd4_encode_fattr4__true,
+ [FATTR4_NAMED_ATTR] = nfsd4_encode_fattr4__false,
+ [FATTR4_FSID] = nfsd4_encode_fattr4_fsid,
+ [FATTR4_UNIQUE_HANDLES] = nfsd4_encode_fattr4__true,
+ [FATTR4_LEASE_TIME] = nfsd4_encode_fattr4_lease_time,
+ [FATTR4_RDATTR_ERROR] = nfsd4_encode_fattr4_rdattr_error,
+ [FATTR4_ACL] = nfsd4_encode_fattr4_acl,
+ [FATTR4_ACLSUPPORT] = nfsd4_encode_fattr4_aclsupport,
+ [FATTR4_ARCHIVE] = nfsd4_encode_fattr4__noop,
+ [FATTR4_CANSETTIME] = nfsd4_encode_fattr4__true,
+ [FATTR4_CASE_INSENSITIVE] = nfsd4_encode_fattr4__false,
+ [FATTR4_CASE_PRESERVING] = nfsd4_encode_fattr4__true,
+ [FATTR4_CHOWN_RESTRICTED] = nfsd4_encode_fattr4__true,
+ [FATTR4_FILEHANDLE] = nfsd4_encode_fattr4_filehandle,
+ [FATTR4_FILEID] = nfsd4_encode_fattr4_fileid,
+ [FATTR4_FILES_AVAIL] = nfsd4_encode_fattr4_files_avail,
+ [FATTR4_FILES_FREE] = nfsd4_encode_fattr4_files_free,
+ [FATTR4_FILES_TOTAL] = nfsd4_encode_fattr4_files_total,
+ [FATTR4_FS_LOCATIONS] = nfsd4_encode_fattr4_fs_locations,
+ [FATTR4_HIDDEN] = nfsd4_encode_fattr4__noop,
+ [FATTR4_HOMOGENEOUS] = nfsd4_encode_fattr4__true,
+ [FATTR4_MAXFILESIZE] = nfsd4_encode_fattr4_maxfilesize,
+ [FATTR4_MAXLINK] = nfsd4_encode_fattr4_maxlink,
+ [FATTR4_MAXNAME] = nfsd4_encode_fattr4_maxname,
+ [FATTR4_MAXREAD] = nfsd4_encode_fattr4_maxread,
+ [FATTR4_MAXWRITE] = nfsd4_encode_fattr4_maxwrite,
+ [FATTR4_MIMETYPE] = nfsd4_encode_fattr4__noop,
+ [FATTR4_MODE] = nfsd4_encode_fattr4_mode,
+ [FATTR4_NO_TRUNC] = nfsd4_encode_fattr4__true,
+ [FATTR4_NUMLINKS] = nfsd4_encode_fattr4_numlinks,
+ [FATTR4_OWNER] = nfsd4_encode_fattr4_owner,
+ [FATTR4_OWNER_GROUP] = nfsd4_encode_fattr4_owner_group,
+ [FATTR4_QUOTA_AVAIL_HARD] = nfsd4_encode_fattr4__noop,
+ [FATTR4_QUOTA_AVAIL_SOFT] = nfsd4_encode_fattr4__noop,
+ [FATTR4_QUOTA_USED] = nfsd4_encode_fattr4__noop,
+ [FATTR4_RAWDEV] = nfsd4_encode_fattr4_rawdev,
+ [FATTR4_SPACE_AVAIL] = nfsd4_encode_fattr4_space_avail,
+ [FATTR4_SPACE_FREE] = nfsd4_encode_fattr4_space_free,
+ [FATTR4_SPACE_TOTAL] = nfsd4_encode_fattr4_space_total,
+ [FATTR4_SPACE_USED] = nfsd4_encode_fattr4_space_used,
+ [FATTR4_SYSTEM] = nfsd4_encode_fattr4__noop,
+ [FATTR4_TIME_ACCESS] = nfsd4_encode_fattr4_time_access,
+ [FATTR4_TIME_ACCESS_SET] = nfsd4_encode_fattr4__noop,
+ [FATTR4_TIME_BACKUP] = nfsd4_encode_fattr4__noop,
+ [FATTR4_TIME_CREATE] = nfsd4_encode_fattr4_time_create,
+ [FATTR4_TIME_DELTA] = nfsd4_encode_fattr4_time_delta,
+ [FATTR4_TIME_METADATA] = nfsd4_encode_fattr4_time_metadata,
+ [FATTR4_TIME_MODIFY] = nfsd4_encode_fattr4_time_modify,
+ [FATTR4_TIME_MODIFY_SET] = nfsd4_encode_fattr4__noop,
+ [FATTR4_MOUNTED_ON_FILEID] = nfsd4_encode_fattr4_mounted_on_fileid,
+ [FATTR4_DIR_NOTIF_DELAY] = nfsd4_encode_fattr4__noop,
+ [FATTR4_DIRENT_NOTIF_DELAY] = nfsd4_encode_fattr4__noop,
+ [FATTR4_DACL] = nfsd4_encode_fattr4__noop,
+ [FATTR4_SACL] = nfsd4_encode_fattr4__noop,
+ [FATTR4_CHANGE_POLICY] = nfsd4_encode_fattr4__noop,
+ [FATTR4_FS_STATUS] = nfsd4_encode_fattr4__noop,
+
+#ifdef CONFIG_NFSD_PNFS
+ [FATTR4_FS_LAYOUT_TYPES] = nfsd4_encode_fattr4_fs_layout_types,
+ [FATTR4_LAYOUT_HINT] = nfsd4_encode_fattr4__noop,
+ [FATTR4_LAYOUT_TYPES] = nfsd4_encode_fattr4_layout_types,
+ [FATTR4_LAYOUT_BLKSIZE] = nfsd4_encode_fattr4_layout_blksize,
+ [FATTR4_LAYOUT_ALIGNMENT] = nfsd4_encode_fattr4__noop,
+#else
+ [FATTR4_FS_LAYOUT_TYPES] = nfsd4_encode_fattr4__noop,
+ [FATTR4_LAYOUT_HINT] = nfsd4_encode_fattr4__noop,
+ [FATTR4_LAYOUT_TYPES] = nfsd4_encode_fattr4__noop,
+ [FATTR4_LAYOUT_BLKSIZE] = nfsd4_encode_fattr4__noop,
+ [FATTR4_LAYOUT_ALIGNMENT] = nfsd4_encode_fattr4__noop,
+#endif
+
+ [FATTR4_FS_LOCATIONS_INFO] = nfsd4_encode_fattr4__noop,
+ [FATTR4_MDSTHRESHOLD] = nfsd4_encode_fattr4__noop,
+ [FATTR4_RETENTION_GET] = nfsd4_encode_fattr4__noop,
+ [FATTR4_RETENTION_SET] = nfsd4_encode_fattr4__noop,
+ [FATTR4_RETENTEVT_GET] = nfsd4_encode_fattr4__noop,
+ [FATTR4_RETENTEVT_SET] = nfsd4_encode_fattr4__noop,
+ [FATTR4_RETENTION_HOLD] = nfsd4_encode_fattr4__noop,
+ [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop,
+ [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat,
+ [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop,
+ [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize,
+ [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop,
+ [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop,
+
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+ [FATTR4_SEC_LABEL] = nfsd4_encode_fattr4_sec_label,
+#else
+ [FATTR4_SEC_LABEL] = nfsd4_encode_fattr4__noop,
+#endif
+
+ [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop,
+ [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support,
+ [FATTR4_TIME_DELEG_ACCESS] = nfsd4_encode_fattr4__inval,
+ [FATTR4_TIME_DELEG_MODIFY] = nfsd4_encode_fattr4__inval,
+ [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments,
+};
+
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
* ourselves.
*/
static __be32
-nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
- struct svc_export *exp,
- struct dentry *dentry, u32 *bmval,
- struct svc_rqst *rqstp, int ignore_crossmnt)
-{
- u32 bmval0 = bmval[0];
- u32 bmval1 = bmval[1];
- u32 bmval2 = bmval[2];
- struct kstat stat;
+nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry, const u32 *bmval,
+ int ignore_crossmnt)
+{
+ DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ struct nfs4_delegation *dp = NULL;
+ struct nfsd4_fattr_args args;
struct svc_fh *tempfh = NULL;
- struct kstatfs statfs;
- __be32 *p;
int starting_len = xdr->buf->len;
- int attrlen_offset;
- __be32 attrlen;
- u32 dummy;
- u64 dummy64;
- u32 rdattr_err = 0;
- __be32 status;
+ unsigned int attrlen_offset;
+ __be32 attrlen, status;
+ u32 attrmask[3];
int err;
- struct nfs4_acl *acl = NULL;
- void *context = NULL;
- int contextlen;
- bool contextsupport = false;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
u32 minorversion = resp->cstate.minorversion;
struct path path = {
.mnt = exp->ex_path.mnt,
.dentry = dentry,
};
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ unsigned long bit;
- BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
- BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
+ WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
+ WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
+ args.rqstp = rqstp;
+ args.exp = exp;
+ args.dentry = dentry;
+ args.ignore_crossmnt = (ignore_crossmnt != 0);
+ args.acl = NULL;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+ args.context.context = NULL;
+#endif
+
+ /*
+ * Make a local copy of the attribute bitmap that can be modified.
+ */
+ attrmask[0] = bmval[0];
+ attrmask[1] = bmval[1];
+ attrmask[2] = bmval[2];
+
+ args.rdattr_err = 0;
if (exp->ex_fslocs.migrated) {
- status = fattr_handle_absent_fs(&bmval0, &bmval1, &bmval2, &rdattr_err);
+ status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+ &attrmask[2], &args.rdattr_err);
+ if (status)
+ goto out;
+ }
+ if ((attrmask[0] & (FATTR4_WORD0_CHANGE |
+ FATTR4_WORD0_SIZE)) ||
+ (attrmask[1] & (FATTR4_WORD1_TIME_ACCESS |
+ FATTR4_WORD1_TIME_MODIFY |
+ FATTR4_WORD1_TIME_METADATA))) {
+ status = nfsd4_deleg_getattr_conflict(rqstp, dentry, &dp);
if (status)
goto out;
}
- err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ err = vfs_getattr(&path, &args.stat,
+ STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
+ AT_STATX_SYNC_AS_STAT);
+ if (dp) {
+ struct nfs4_cb_fattr *ncf = &dp->dl_cb_fattr;
+
+ if (ncf->ncf_file_modified) {
+ ++ncf->ncf_initial_cinfo;
+ args.stat.size = ncf->ncf_cur_fsize;
+ if (!timespec64_is_epoch(&ncf->ncf_cb_mtime))
+ args.stat.mtime = ncf->ncf_cb_mtime;
+ }
+ args.change_attr = ncf->ncf_initial_cinfo;
+
+ if (!timespec64_is_epoch(&ncf->ncf_cb_atime))
+ args.stat.atime = ncf->ncf_cb_atime;
+
+ nfs4_put_stid(&dp->dl_stid);
+ } else {
+ args.change_attr = nfsd4_change_attribute(&args.stat);
+ }
+
if (err)
goto out_nfserr;
- if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+
+ if (!(args.stat.result_mask & STATX_BTIME))
+ /* underlying FS does not offer btime so we can't share it */
+ attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
- (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
- err = vfs_statfs(&path, &statfs);
+ err = vfs_statfs(&path, &args.statfs);
if (err)
goto out_nfserr;
}
- if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
+ if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+ !fhp) {
tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
status = nfserr_jukebox;
if (!tempfh)
@@ -2460,12 +3679,14 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
status = fh_compose(tempfh, exp, dentry, NULL);
if (status)
goto out;
- fhp = tempfh;
- }
- if (bmval0 & FATTR4_WORD0_ACL) {
- err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
+ args.fhp = tempfh;
+ } else
+ args.fhp = fhp;
+
+ if (attrmask[0] & FATTR4_WORD0_ACL) {
+ err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
if (err == -EOPNOTSUPP)
- bmval0 &= ~FATTR4_WORD0_ACL;
+ attrmask[0] &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
@@ -2473,456 +3694,54 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out_nfserr;
}
+ args.contextsupport = false;
+
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- if ((bmval2 & FATTR4_WORD2_SECURITY_LABEL) ||
- bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+ attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
err = security_inode_getsecctx(d_inode(dentry),
- &context, &contextlen);
+ &args.context);
else
err = -EOPNOTSUPP;
- contextsupport = (err == 0);
- if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
+ args.contextsupport = (err == 0);
+ if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
if (err == -EOPNOTSUPP)
- bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL;
+ attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
else if (err)
goto out_nfserr;
}
}
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
- status = nfsd4_encode_bitmap(xdr, bmval0, bmval1, bmval2);
+ /* attrmask */
+ status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+ attrmask[2]);
if (status)
goto out;
+ /* attr_vals */
attrlen_offset = xdr->buf->len;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
goto out_resource;
- p++; /* to be backfilled later */
-
- if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
- u32 supp[3];
-
- memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
-
- if (!IS_POSIXACL(dentry->d_inode))
- supp[0] &= ~FATTR4_WORD0_ACL;
- if (!contextsupport)
- supp[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
- if (!supp[2]) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(2);
- *p++ = cpu_to_be32(supp[0]);
- *p++ = cpu_to_be32(supp[1]);
- } else {
- p = xdr_reserve_space(xdr, 16);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(3);
- *p++ = cpu_to_be32(supp[0]);
- *p++ = cpu_to_be32(supp[1]);
- *p++ = cpu_to_be32(supp[2]);
- }
- }
- if (bmval0 & FATTR4_WORD0_TYPE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- dummy = nfs4_file_type(stat.mode);
- if (dummy == NF4BAD) {
- status = nfserr_serverfault;
- goto out;
- }
- *p++ = cpu_to_be32(dummy);
- }
- if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
- *p++ = cpu_to_be32(NFS4_FH_PERSISTENT);
- else
- *p++ = cpu_to_be32(NFS4_FH_PERSISTENT|
- NFS4_FH_VOL_RENAME);
- }
- if (bmval0 & FATTR4_WORD0_CHANGE) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = encode_change(p, &stat, d_inode(dentry), exp);
- }
- if (bmval0 & FATTR4_WORD0_SIZE) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, stat.size);
- }
- if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval0 & FATTR4_WORD0_NAMED_ATTR) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(0);
- }
- if (bmval0 & FATTR4_WORD0_FSID) {
- p = xdr_reserve_space(xdr, 16);
- if (!p)
- goto out_resource;
- if (exp->ex_fslocs.migrated) {
- p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR);
- p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR);
- } else switch(fsid_source(fhp)) {
- case FSIDSOURCE_FSID:
- p = xdr_encode_hyper(p, (u64)exp->ex_fsid);
- p = xdr_encode_hyper(p, (u64)0);
- break;
- case FSIDSOURCE_DEV:
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(MAJOR(stat.dev));
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(MINOR(stat.dev));
- break;
- case FSIDSOURCE_UUID:
- p = xdr_encode_opaque_fixed(p, exp->ex_uuid,
- EX_UUID_LEN);
- break;
- }
- }
- if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(0);
- }
- if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(nn->nfsd4_lease);
- }
- if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(rdattr_err);
- }
- if (bmval0 & FATTR4_WORD0_ACL) {
- struct nfs4_ace *ace;
-
- if (acl == NULL) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
-
- *p++ = cpu_to_be32(0);
- goto out_acl;
- }
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(acl->naces);
-
- for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
- p = xdr_reserve_space(xdr, 4*3);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(ace->type);
- *p++ = cpu_to_be32(ace->flag);
- *p++ = cpu_to_be32(ace->access_mask &
- NFS4_ACE_MASK_ALL);
- status = nfsd4_encode_aclname(xdr, rqstp, ace);
- if (status)
- goto out;
- }
- }
-out_acl:
- if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ?
- ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
- }
- if (bmval0 & FATTR4_WORD0_CANSETTIME) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(0);
- }
- if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval0 & FATTR4_WORD0_FILEHANDLE) {
- p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4);
- if (!p)
- goto out_resource;
- p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base,
- fhp->fh_handle.fh_size);
- }
- if (bmval0 & FATTR4_WORD0_FILEID) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, stat.ino);
- }
- if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (u64) statfs.f_ffree);
- }
- if (bmval0 & FATTR4_WORD0_FILES_FREE) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (u64) statfs.f_ffree);
- }
- if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (u64) statfs.f_files);
- }
- if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
- status = nfsd4_encode_fs_locations(xdr, rqstp, exp);
- if (status)
+ bitmap_from_arr32(attr_bitmap, attrmask,
+ ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+ for_each_set_bit(bit, attr_bitmap,
+ ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
+ status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
+ if (status != nfs_ok)
goto out;
}
- if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes);
- }
- if (bmval0 & FATTR4_WORD0_MAXLINK) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(255);
- }
- if (bmval0 & FATTR4_WORD0_MAXNAME) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(statfs.f_namelen);
- }
- if (bmval0 & FATTR4_WORD0_MAXREAD) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));
- }
- if (bmval0 & FATTR4_WORD0_MAXWRITE) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));
- }
- if (bmval1 & FATTR4_WORD1_MODE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(stat.mode & S_IALLUGO);
- }
- if (bmval1 & FATTR4_WORD1_NO_TRUNC) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- }
- if (bmval1 & FATTR4_WORD1_NUMLINKS) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(stat.nlink);
- }
- if (bmval1 & FATTR4_WORD1_OWNER) {
- status = nfsd4_encode_user(xdr, rqstp, stat.uid);
- if (status)
- goto out;
- }
- if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
- status = nfsd4_encode_group(xdr, rqstp, stat.gid);
- if (status)
- goto out;
- }
- if (bmval1 & FATTR4_WORD1_RAWDEV) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32((u32) MAJOR(stat.rdev));
- *p++ = cpu_to_be32((u32) MINOR(stat.rdev));
- }
- if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize;
- p = xdr_encode_hyper(p, dummy64);
- }
- if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize;
- p = xdr_encode_hyper(p, dummy64);
- }
- if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize;
- p = xdr_encode_hyper(p, dummy64);
- }
- if (bmval1 & FATTR4_WORD1_SPACE_USED) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- dummy64 = (u64)stat.blocks << 9;
- p = xdr_encode_hyper(p, dummy64);
- }
- if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
- *p++ = cpu_to_be32(stat.atime.tv_nsec);
- }
- if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = encode_time_delta(p, d_inode(dentry));
- }
- if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
- *p++ = cpu_to_be32(stat.ctime.tv_nsec);
- }
- if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
- p = xdr_reserve_space(xdr, 12);
- if (!p)
- goto out_resource;
- p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
- *p++ = cpu_to_be32(stat.mtime.tv_nsec);
- }
- if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
- struct kstat parent_stat;
- u64 ino = stat.ino;
-
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- /*
- * Get parent's attributes if not ignoring crossmount
- * and this is the root of a cross-mounted filesystem.
- */
- if (ignore_crossmnt == 0 &&
- dentry == exp->ex_path.mnt->mnt_root) {
- err = get_parent_attributes(exp, &parent_stat);
- if (err)
- goto out_nfserr;
- ino = parent_stat.ino;
- }
- p = xdr_encode_hyper(p, ino);
- }
-#ifdef CONFIG_NFSD_PNFS
- if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
- status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
- if (status)
- goto out;
- }
-
- if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
- status = nfsd4_encode_layout_types(xdr, exp->ex_layout_types);
- if (status)
- goto out;
- }
-
- if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(stat.blksize);
- }
-#endif /* CONFIG_NFSD_PNFS */
- if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
- u32 supp[3];
-
- memcpy(supp, nfsd_suppattrs[minorversion], sizeof(supp));
- supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
- supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
- supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
-
- status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]);
- if (status)
- goto out;
- }
-
- if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- if (IS_I_VERSION(d_inode(dentry)))
- *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
- else
- *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
- }
-
- if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
- status = nfsd4_encode_security_label(xdr, rqstp, context,
- contextlen);
- if (status)
- goto out;
- }
-
- attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
- write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
+ attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+ write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT);
status = nfs_ok;
out:
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- if (context)
- security_release_secctx(context, contextlen);
+ if (args.context.context)
+ security_release_secctx(&args.context);
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
- kfree(acl);
+ kfree(args.acl);
if (tempfh) {
fh_put(tempfh);
kfree(tempfh);
@@ -2963,12 +3782,28 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
__be32 ret;
svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
- ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp,
- ignore_crossmnt);
+ ret = nfsd4_encode_fattr4(rqstp, &xdr, fhp, exp, dentry, bmval,
+ ignore_crossmnt);
*p = xdr.p;
return ret;
}
+/*
+ * The buffer space for this field was reserved during a previous
+ * call to nfsd4_encode_entry4().
+ */
+static void nfsd4_encode_entry4_nfs_cookie4(const struct nfsd4_readdir *readdir,
+ u64 offset)
+{
+ __be64 cookie = cpu_to_be64(offset);
+ struct xdr_stream *xdr = readdir->xdr;
+
+ if (!readdir->cookie_offset)
+ return;
+ write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset, &cookie,
+ sizeof(cookie));
+}
+
static inline int attributes_need_mount(u32 *bmval)
{
if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME))
@@ -2979,26 +3814,19 @@ static inline int attributes_need_mount(u32 *bmval)
}
static __be32
-nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
- const char *name, int namlen)
+nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
+ int namlen)
{
struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry;
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_one_positive_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, namlen),
+ cd->rd_fhp->fh_dentry);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
- if (d_really_is_negative(dentry)) {
- /*
- * we're not holding the i_mutex here, so there's
- * a window where this directory entry could have gone
- * away.
- */
- dput(dentry);
- return nfserr_noent;
- }
exp_get(exp);
/*
@@ -3026,39 +3854,40 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
nfserr = nfserrno(err);
goto out_put;
}
- nfserr = check_nfsd_access(exp, cd->rd_rqstp);
+ nfserr = check_nfsd_access(exp, cd->rd_rqstp, false);
if (nfserr)
goto out_put;
}
out_encode:
- nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
- cd->rd_rqstp, ignore_crossmnt);
+ nfserr = nfsd4_encode_fattr4(cd->rd_rqstp, cd->xdr, NULL, exp, dentry,
+ cd->rd_bmval, ignore_crossmnt);
out_put:
dput(dentry);
exp_put(exp);
return nfserr;
}
-static __be32 *
-nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
+static __be32
+nfsd4_encode_entry4_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
- __be32 *p;
-
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return NULL;
- *p++ = htonl(2);
- *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
- *p++ = htonl(0); /* bmval1 */
+ __be32 status;
- *p++ = htonl(4); /* attribute length */
- *p++ = nfserr; /* no htonl */
- return p;
+ /* attrmask */
+ status = nfsd4_encode_bitmap4(xdr, FATTR4_WORD0_RDATTR_ERROR, 0, 0);
+ if (status != nfs_ok)
+ return status;
+ /* attr_vals */
+ if (xdr_stream_encode_u32(xdr, XDR_UNIT) != XDR_UNIT)
+ return nfserr_resource;
+ /* rdattr_error */
+ if (xdr_stream_encode_be32(xdr, nfserr) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
}
static int
-nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
+nfsd4_encode_entry4(void *ccdv, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct readdir_cd *ccd = ccdv;
@@ -3069,8 +3898,6 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
u32 name_and_cookie;
int entry_bytes;
__be32 nfserr = nfserr_toosmall;
- __be64 wire_offset;
- __be32 *p;
/* In nfsv4, "." and ".." never make it onto the wire.. */
if (name && isdotent(name, namlen)) {
@@ -3078,24 +3905,19 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
return 0;
}
- if (cd->cookie_offset) {
- wire_offset = cpu_to_be64(offset);
- write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
- &wire_offset, 8);
- }
+ /* Encode the previous entry's cookie value */
+ nfsd4_encode_entry4_nfs_cookie4(cd, offset);
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ if (xdr_stream_encode_item_present(xdr) != XDR_UNIT)
goto fail;
- *p++ = xdr_one; /* mark entry present */
+
+ /* Reserve send buffer space for this entry's cookie value. */
cookie_offset = xdr->buf->len;
- p = xdr_reserve_space(xdr, 3*4 + namlen);
- if (!p)
+ if (nfsd4_encode_nfs_cookie4(xdr, OFFSET_MAX) != nfs_ok)
goto fail;
- p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
- p = xdr_encode_array(p, name, namlen); /* name length & name */
-
- nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
+ if (nfsd4_encode_component4(xdr, name, namlen) != nfs_ok)
+ goto fail;
+ nfserr = nfsd4_encode_entry4_fattr(cd, name, namlen);
switch (nfserr) {
case nfs_ok:
break;
@@ -3105,6 +3927,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
case nfserr_noent:
xdr_truncate_encode(xdr, start_offset);
goto skip_entry;
+ case nfserr_jukebox:
+ /*
+ * The pseudoroot should only display dentries that lead to
+ * exports. If we get EJUKEBOX here, then we can't tell whether
+ * this entry should be included. Just fail the whole READDIR
+ * with NFS4ERR_DELAY in that case, and hope that the situation
+ * will resolve itself by the client's next attempt.
+ */
+ if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT)
+ goto fail;
+ fallthrough;
default:
/*
* If the client requested the RDATTR_ERROR attribute,
@@ -3115,8 +3948,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
*/
if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
goto fail;
- p = nfsd4_encode_rdattr_error(xdr, nfserr);
- if (p == NULL) {
+ if (nfsd4_encode_entry4_rdattr_error(xdr, nfserr)) {
nfserr = nfserr_toosmall;
goto fail;
}
@@ -3127,15 +3959,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
goto fail;
cd->rd_maxcount -= entry_bytes;
/*
- * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
- * let's always let through the first entry, at least:
+ * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and
+ * notes that it could be zero. If it is zero, then the server
+ * should enforce only the rd_maxcount value.
*/
- if (!cd->rd_dircount)
- goto fail;
- name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
- if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
- goto fail;
- cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (cd->rd_dircount) {
+ name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
+ if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
+ goto fail;
+ cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (!cd->rd_dircount)
+ cd->rd_maxcount = 0;
+ }
cd->cookie_offset = cookie_offset;
skip_entry:
@@ -3148,647 +3983,782 @@ fail:
}
static __be32
-nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
+nfsd4_encode_verifier4(struct xdr_stream *xdr, const nfs4_verifier *verf)
{
__be32 *p;
- p = xdr_reserve_space(xdr, sizeof(stateid_t));
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
- *p++ = cpu_to_be32(sid->si_generation);
- p = xdr_encode_opaque_fixed(p, &sid->si_opaque,
- sizeof(stateid_opaque_t));
- return 0;
+ memcpy(p, verf->data, sizeof(verf->data));
+ return nfs_ok;
}
static __be32
-nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
+nfsd4_encode_clientid4(struct xdr_stream *xdr, const clientid_t *clientid)
{
- struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- p = xdr_reserve_space(xdr, 8);
+ p = xdr_reserve_space(xdr, sizeof(__be64));
if (!p)
return nfserr_resource;
- *p++ = cpu_to_be32(access->ac_supported);
- *p++ = cpu_to_be32(access->ac_resp_access);
- return 0;
+ memcpy(p, clientid, sizeof(*clientid));
+ return nfs_ok;
}
-static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
+/* This is a frequently-encoded item; open-coded for speed */
+static __be32
+nfsd4_encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
{
- struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8);
+ p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
if (!p)
return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
- NFS4_MAX_SESSIONID_LEN);
- *p++ = cpu_to_be32(bcts->dir);
- /* Upshifting from TCP to RDMA is not supported */
- *p++ = cpu_to_be32(0);
- return 0;
+ *p++ = cpu_to_be32(sid->si_generation);
+ memcpy(p, &sid->si_opaque, sizeof(sid->si_opaque));
+ return nfs_ok;
}
static __be32
-nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
+nfsd4_encode_sessionid4(struct xdr_stream *xdr,
+ const struct nfs4_sessionid *sessionid)
{
- struct xdr_stream *xdr = &resp->xdr;
+ return nfsd4_encode_opaque_fixed(xdr, sessionid->data,
+ NFS4_MAX_SESSIONID_LEN);
+}
- return nfsd4_encode_stateid(xdr, &close->cl_stateid);
+static __be32
+nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_access *access = &u->access;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 status;
+
+ /* supported */
+ status = nfsd4_encode_uint32_t(xdr, access->ac_supported);
+ if (status != nfs_ok)
+ return status;
+ /* access */
+ return nfsd4_encode_uint32_t(xdr, access->ac_resp_access);
}
+static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_bind_conn_to_session *bcts = &u->bind_conn_to_session;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* bctsr_sessid */
+ nfserr = nfsd4_encode_sessionid4(xdr, &bcts->sessionid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* bctsr_dir */
+ if (xdr_stream_encode_u32(xdr, bcts->dir) != XDR_UNIT)
+ return nfserr_resource;
+ /* bctsr_use_conn_in_rdma_mode */
+ return nfsd4_encode_bool(xdr, false);
+}
static __be32
-nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
+nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_close *close = &u->close;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
- NFS4_VERIFIER_SIZE);
- return 0;
+ /* open_stateid */
+ return nfsd4_encode_stateid4(xdr, &close->cl_stateid);
}
+
static __be32
-nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_commit *commit = &u->commit;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- encode_cinfo(p, &create->cr_cinfo);
- nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
- create->cr_bmval[1], create->cr_bmval[2]);
- return 0;
+ return nfsd4_encode_verifier4(resp->xdr, &commit->co_verf);
}
static __be32
-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
+nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
+ struct nfsd4_create *create = &u->create;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* cinfo */
+ nfserr = nfsd4_encode_change_info4(xdr, &create->cr_cinfo);
+ if (nfserr)
+ return nfserr;
+ /* attrset */
+ return nfsd4_encode_bitmap4(xdr, create->cr_bmval[0],
+ create->cr_bmval[1], create->cr_bmval[2]);
+}
+
+static __be32
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_getattr *getattr = &u->getattr;
struct svc_fh *fhp = getattr->ga_fhp;
- struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_stream *xdr = resp->xdr;
- return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry,
- getattr->ga_bmval, resp->rqstp, 0);
+ /* obj_attributes */
+ return nfsd4_encode_fattr4(resp->rqstp, xdr, fhp, fhp->fh_export,
+ fhp->fh_dentry, getattr->ga_bmval, 0);
}
static __be32
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- struct svc_fh *fhp = *fhpp;
- unsigned int len;
- __be32 *p;
+ struct xdr_stream *xdr = resp->xdr;
+ struct svc_fh *fhp = u->getfh;
- len = fhp->fh_handle.fh_size;
- p = xdr_reserve_space(xdr, len + 4);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
- return 0;
+ /* object */
+ return nfsd4_encode_nfs_fh4(xdr, &fhp->fh_handle);
}
-/*
-* Including all fields other than the name, a LOCK4denied structure requires
-* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.
-*/
static __be32
-nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)
+nfsd4_encode_lock_owner4(struct xdr_stream *xdr, const clientid_t *clientid,
+ const struct xdr_netobj *owner)
{
- struct xdr_netobj *conf = &ld->ld_owner;
- __be32 *p;
+ __be32 status;
-again:
- p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len));
- if (!p) {
- /*
- * Don't fail to return the result just because we can't
- * return the conflicting open:
- */
- if (conf->len) {
- kfree(conf->data);
- conf->len = 0;
- conf->data = NULL;
- goto again;
- }
+ /* clientid */
+ status = nfsd4_encode_clientid4(xdr, clientid);
+ if (status != nfs_ok)
+ return status;
+ /* owner */
+ return nfsd4_encode_opaque(xdr, owner->data, owner->len);
+}
+
+static __be32
+nfsd4_encode_lock4denied(struct xdr_stream *xdr,
+ const struct nfsd4_lock_denied *ld)
+{
+ __be32 status;
+
+ /* offset */
+ status = nfsd4_encode_offset4(xdr, ld->ld_start);
+ if (status != nfs_ok)
+ return status;
+ /* length */
+ status = nfsd4_encode_length4(xdr, ld->ld_length);
+ if (status != nfs_ok)
+ return status;
+ /* locktype */
+ if (xdr_stream_encode_u32(xdr, ld->ld_type) != XDR_UNIT)
return nfserr_resource;
- }
- p = xdr_encode_hyper(p, ld->ld_start);
- p = xdr_encode_hyper(p, ld->ld_length);
- *p++ = cpu_to_be32(ld->ld_type);
- if (conf->len) {
- p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8);
- p = xdr_encode_opaque(p, conf->data, conf->len);
- kfree(conf->data);
- } else { /* non - nfsv4 lock in conflict, no clientid nor owner */
- p = xdr_encode_hyper(p, (u64)0); /* clientid */
- *p++ = cpu_to_be32(0); /* length of owner name */
- }
- return nfserr_denied;
+ /* owner */
+ return nfsd4_encode_lock_owner4(xdr, &ld->ld_clientid,
+ &ld->ld_owner);
}
static __be32
-nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_lock *lock = &u->lock;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 status;
- if (!nfserr)
- nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
- else if (nfserr == nfserr_denied)
- nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);
+ switch (nfserr) {
+ case nfs_ok:
+ /* resok4 */
+ status = nfsd4_encode_stateid4(xdr, &lock->lk_resp_stateid);
+ break;
+ case nfserr_denied:
+ /* denied */
+ status = nfsd4_encode_lock4denied(xdr, &lock->lk_denied);
+ break;
+ default:
+ return nfserr;
+ }
+ return status != nfs_ok ? status : nfserr;
+}
+
+static __be32
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_lockt *lockt = &u->lockt;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 status;
+ if (nfserr == nfserr_denied) {
+ /* denied */
+ status = nfsd4_encode_lock4denied(xdr, &lockt->lt_denied);
+ if (status != nfs_ok)
+ return status;
+ }
return nfserr;
}
static __be32
-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_locku *locku = &u->locku;
+ struct xdr_stream *xdr = resp->xdr;
- if (nfserr == nfserr_denied)
- nfsd4_encode_lock_denied(xdr, &lockt->lt_denied);
- return nfserr;
+ /* lock_stateid */
+ return nfsd4_encode_stateid4(xdr, &locku->lu_stateid);
}
+
static __be32
-nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
+nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_link *link = &u->link;
+ struct xdr_stream *xdr = resp->xdr;
- return nfsd4_encode_stateid(xdr, &locku->lu_stateid);
+ return nfsd4_encode_change_info4(xdr, &link->li_cinfo);
}
+/*
+ * This implementation does not yet support returning an ACE in an
+ * OPEN that offers a delegation.
+ */
+static __be32
+nfsd4_encode_open_nfsace4(struct xdr_stream *xdr)
+{
+ __be32 status;
+
+ /* type */
+ status = nfsd4_encode_acetype4(xdr, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ if (status != nfs_ok)
+ return nfserr_resource;
+ /* flag */
+ status = nfsd4_encode_aceflag4(xdr, 0);
+ if (status != nfs_ok)
+ return nfserr_resource;
+ /* access mask */
+ status = nfsd4_encode_acemask4(xdr, 0);
+ if (status != nfs_ok)
+ return nfserr_resource;
+ /* who - empty for now */
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+}
static __be32
-nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
+nfsd4_encode_open_read_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ __be32 status;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
+ /* stateid */
+ status = nfsd4_encode_stateid4(xdr, &open->op_delegate_stateid);
+ if (status != nfs_ok)
+ return status;
+ /* recall */
+ status = nfsd4_encode_bool(xdr, open->op_recall);
+ if (status != nfs_ok)
+ return status;
+ /* permissions */
+ return nfsd4_encode_open_nfsace4(xdr);
+}
+
+static __be32
+nfsd4_encode_nfs_space_limit4(struct xdr_stream *xdr, u64 filesize)
+{
+ /* limitby */
+ if (xdr_stream_encode_u32(xdr, NFS4_LIMIT_SIZE) != XDR_UNIT)
return nfserr_resource;
- p = encode_cinfo(p, &link->li_cinfo);
- return 0;
+ /* filesize */
+ return nfsd4_encode_uint64_t(xdr, filesize);
}
+static __be32
+nfsd4_encode_open_write_delegation4(struct xdr_stream *xdr,
+ struct nfsd4_open *open)
+{
+ __be32 status;
+
+ /* stateid */
+ status = nfsd4_encode_stateid4(xdr, &open->op_delegate_stateid);
+ if (status != nfs_ok)
+ return status;
+ /* recall */
+ status = nfsd4_encode_bool(xdr, open->op_recall);
+ if (status != nfs_ok)
+ return status;
+ /* space_limit */
+ status = nfsd4_encode_nfs_space_limit4(xdr, 0);
+ if (status != nfs_ok)
+ return status;
+ return nfsd4_encode_open_nfsace4(xdr);
+}
static __be32
-nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
+nfsd4_encode_open_none_delegation4(struct xdr_stream *xdr,
+ struct nfsd4_open *open)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ __be32 status = nfs_ok;
- nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
- if (nfserr)
- return nfserr;
- p = xdr_reserve_space(xdr, 24);
- if (!p)
+ /* ond_why */
+ if (xdr_stream_encode_u32(xdr, open->op_why_no_deleg) != XDR_UNIT)
return nfserr_resource;
- p = encode_cinfo(p, &open->op_cinfo);
- *p++ = cpu_to_be32(open->op_rflags);
+ switch (open->op_why_no_deleg) {
+ case WND4_CONTENTION:
+ /* ond_server_will_push_deleg */
+ status = nfsd4_encode_bool(xdr, false);
+ break;
+ case WND4_RESOURCE:
+ /* ond_server_will_signal_avail */
+ status = nfsd4_encode_bool(xdr, false);
+ }
+ return status;
+}
- nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
- open->op_bmval[2]);
- if (nfserr)
- return nfserr;
+static __be32
+nfsd4_encode_open_delegation4(struct xdr_stream *xdr, struct nfsd4_open *open)
+{
+ __be32 status;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ /* delegation_type */
+ if (xdr_stream_encode_u32(xdr, open->op_delegate_type) != XDR_UNIT)
return nfserr_resource;
-
- *p++ = cpu_to_be32(open->op_delegate_type);
switch (open->op_delegate_type) {
- case NFS4_OPEN_DELEGATE_NONE:
+ case OPEN_DELEGATE_NONE:
+ status = nfs_ok;
break;
- case NFS4_OPEN_DELEGATE_READ:
- nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
- if (nfserr)
- return nfserr;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(open->op_recall);
-
- /*
- * TODO: ACE's in delegations
- */
- *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */
+ case OPEN_DELEGATE_READ:
+ case OPEN_DELEGATE_READ_ATTRS_DELEG:
+ /* read */
+ status = nfsd4_encode_open_read_delegation4(xdr, open);
break;
- case NFS4_OPEN_DELEGATE_WRITE:
- nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
- if (nfserr)
- return nfserr;
- p = xdr_reserve_space(xdr, 32);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0);
-
- /*
- * TODO: space_limit's in delegations
- */
- *p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
- *p++ = cpu_to_be32(~(u32)0);
- *p++ = cpu_to_be32(~(u32)0);
-
- /*
- * TODO: ACE's in delegations
- */
- *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */
+ case OPEN_DELEGATE_WRITE:
+ case OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ /* write */
+ status = nfsd4_encode_open_write_delegation4(xdr, open);
break;
- case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */
- switch (open->op_why_no_deleg) {
- case WND4_CONTENTION:
- case WND4_RESOURCE:
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(open->op_why_no_deleg);
- /* deleg signaling not supported yet: */
- *p++ = cpu_to_be32(0);
- break;
- default:
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(open->op_why_no_deleg);
- }
+ case OPEN_DELEGATE_NONE_EXT:
+ /* od_whynone */
+ status = nfsd4_encode_open_none_delegation4(xdr, open);
break;
default:
- BUG();
+ status = nfserr_serverfault;
}
- /* XXX save filehandle here */
- return 0;
+
+ return status;
+}
+
+static __be32
+nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_open *open = &u->open;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* stateid */
+ nfserr = nfsd4_encode_stateid4(xdr, &open->op_stateid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* cinfo */
+ nfserr = nfsd4_encode_change_info4(xdr, &open->op_cinfo);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* rflags */
+ nfserr = nfsd4_encode_uint32_t(xdr, open->op_rflags);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* attrset */
+ nfserr = nfsd4_encode_bitmap4(xdr, open->op_bmval[0],
+ open->op_bmval[1], open->op_bmval[2]);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* delegation */
+ return nfsd4_encode_open_delegation4(xdr, open);
}
static __be32
-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_open_confirm *oc = &u->open_confirm;
+ struct xdr_stream *xdr = resp->xdr;
- return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
+ /* open_stateid */
+ return nfsd4_encode_stateid4(xdr, &oc->oc_resp_stateid);
}
static __be32
-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_open_downgrade *od = &u->open_downgrade;
+ struct xdr_stream *xdr = resp->xdr;
- return nfsd4_encode_stateid(xdr, &od->od_stateid);
+ /* open_stateid */
+ return nfsd4_encode_stateid4(xdr, &od->od_stateid);
}
+/*
+ * The operation of this function assumes that this is the only
+ * READ operation in the COMPOUND. If there are multiple READs,
+ * we use nfsd4_encode_readv().
+ */
static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
struct file *file, unsigned long maxcount)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_stream *xdr = resp->xdr;
struct xdr_buf *buf = xdr->buf;
- u32 eof;
- long len;
- int space_left;
+ int status, space_left;
__be32 nfserr;
- __be32 *p = xdr->p - 2;
- /* Make sure there will be room for padding if needed */
- if (xdr->end - xdr->p < 1)
+ /*
+ * Splice read doesn't work if encoding has already wandered
+ * into the XDR buf's page array.
+ */
+ if (unlikely(xdr->buf->page_len)) {
+ WARN_ON_ONCE(1);
+ return nfserr_serverfault;
+ }
+
+ /*
+ * Make sure there is room at the end of buf->head for
+ * svcxdr_encode_opaque_pages() to create a tail buffer
+ * to XDR-pad the payload.
+ */
+ if (xdr->iov != xdr->buf->head || xdr->end - xdr->p < 1)
return nfserr_resource;
- len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
- file, read->rd_offset, &maxcount);
+ file, read->rd_offset, &maxcount,
+ &read->rd_eof);
read->rd_length = maxcount;
- if (nfserr) {
- /*
- * nfsd_splice_actor may have already messed with the
- * page length; reset it so as not to confuse
- * xdr_truncate_encode:
- */
- buf->page_len = 0;
- return nfserr;
- }
-
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
-
- *(p++) = htonl(eof);
- *(p++) = htonl(maxcount);
-
- buf->page_len = maxcount;
- buf->len += maxcount;
- xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
- / PAGE_SIZE;
-
- /* Use rest of head for padding and remaining ops: */
- buf->tail[0].iov_base = xdr->p;
- buf->tail[0].iov_len = 0;
- xdr->iov = buf->tail;
- if (maxcount&3) {
- int pad = 4 - (maxcount&3);
-
- *(xdr->p++) = 0;
-
- buf->tail[0].iov_base += maxcount&3;
- buf->tail[0].iov_len = pad;
- buf->len += pad;
+ if (nfserr)
+ goto out_err;
+ svcxdr_encode_opaque_pages(read->rd_rqstp, xdr, buf->pages,
+ buf->page_base, maxcount);
+ status = svc_encode_result_payload(read->rd_rqstp,
+ buf->head[0].iov_len, maxcount);
+ if (status) {
+ nfserr = nfserrno(status);
+ goto out_err;
}
+ /*
+ * Prepare to encode subsequent operations.
+ *
+ * xdr_truncate_encode() is not safe to use after a successful
+ * splice read has been done, so the following stream
+ * manipulations are open-coded.
+ */
space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
buf->buflen - buf->len);
buf->buflen = buf->len + space_left;
xdr->end = (__be32 *)((void *)xdr->end + space_left);
- return 0;
+ return nfs_ok;
+
+out_err:
+ /*
+ * nfsd_splice_actor may have already messed with the
+ * page length; reset it so as not to confuse
+ * xdr_truncate_encode in our caller.
+ */
+ buf->page_len = 0;
+ return nfserr;
}
static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
- struct file *file, unsigned long maxcount)
-{
- struct xdr_stream *xdr = &resp->xdr;
- u32 eof;
- int v;
- int starting_len = xdr->buf->len - 8;
- long len;
- int thislen;
+ unsigned long maxcount)
+{
+ struct xdr_stream *xdr = resp->xdr;
+ unsigned int base = xdr->buf->page_len & ~PAGE_MASK;
+ unsigned int starting_len = xdr->buf->len;
+ __be32 zero = xdr_zero;
__be32 nfserr;
- __be32 tmp;
- __be32 *p;
- u32 zzz = 0;
- int pad;
-
- len = maxcount;
- v = 0;
-
- thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
- p = xdr_reserve_space(xdr, (thislen+3)&~3);
- WARN_ON_ONCE(!p);
- resp->rqstp->rq_vec[v].iov_base = p;
- resp->rqstp->rq_vec[v].iov_len = thislen;
- v++;
- len -= thislen;
-
- while (len) {
- thislen = min_t(long, len, PAGE_SIZE);
- p = xdr_reserve_space(xdr, (thislen+3)&~3);
- WARN_ON_ONCE(!p);
- resp->rqstp->rq_vec[v].iov_base = p;
- resp->rqstp->rq_vec[v].iov_len = thislen;
- v++;
- len -= thislen;
- }
- read->rd_vlen = v;
- len = maxcount;
- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, &maxcount);
+ nfserr = nfsd_iter_read(resp->rqstp, read->rd_fhp, read->rd_nf,
+ read->rd_offset, &maxcount, base,
+ &read->rd_eof);
read->rd_length = maxcount;
if (nfserr)
return nfserr;
- xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
-
- eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
- d_inode(read->rd_fhp->fh_dentry)->i_size);
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
- tmp = htonl(maxcount);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+ /*
+ * svcxdr_encode_opaque_pages() is not used here because
+ * we don't want to encode subsequent results in this
+ * COMPOUND into the xdr->buf's tail, but rather those
+ * results should follow the NFS READ payload in the
+ * buf's pages.
+ */
+ if (xdr_reserve_space_vec(xdr, maxcount) < 0)
+ return nfserr_resource;
- pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
- &zzz, pad);
- return 0;
+ /*
+ * Mark the buffer location of the NFS READ payload so that
+ * direct placement-capable transports send only the
+ * payload bytes out-of-band.
+ */
+ if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount))
+ return nfserr_io;
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero,
+ xdr_pad_size(maxcount));
+ return nfs_ok;
}
static __be32
nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_read *read)
+ union nfsd4_op_u *u)
{
+ struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp;
+ struct nfsd4_read *read = &u->read;
+ struct xdr_stream *xdr = resp->xdr;
+ bool splice_ok = argp->splice_ok;
+ unsigned int eof_offset;
unsigned long maxcount;
- struct xdr_stream *xdr = &resp->xdr;
- struct file *file = read->rd_filp;
- int starting_len = xdr->buf->len;
- struct raparms *ra = NULL;
- __be32 *p;
+ __be32 wire_data[2];
+ struct file *file;
- p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
- if (!p) {
- WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
- return nfserr_resource;
- }
- if (resp->xdr.buf->page_len &&
- test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
- WARN_ON_ONCE(1);
+ if (nfserr)
+ return nfserr;
+
+ eof_offset = xdr->buf->len;
+ file = read->rd_nf->nf_file;
+
+ /* Reserve space for the eof flag and byte count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2))) {
+ WARN_ON_ONCE(splice_ok);
return nfserr_resource;
}
xdr_commit_encode(xdr);
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(unsigned long, maxcount,
+ maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
- maxcount = min_t(unsigned long, maxcount, read->rd_length);
-
- if (read->rd_tmp_file)
- ra = nfsd_init_raparms(file);
- if (file->f_op->splice_read &&
- test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
+ if (file->f_op->splice_read && splice_ok)
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
- nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
-
- if (ra)
- nfsd_put_raparams(file, ra);
-
- if (nfserr)
- xdr_truncate_encode(xdr, starting_len);
+ nfserr = nfsd4_encode_readv(resp, read, maxcount);
+ if (nfserr) {
+ xdr_truncate_encode(xdr, eof_offset);
+ return nfserr;
+ }
- return nfserr;
+ wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+ wire_data[1] = cpu_to_be32(read->rd_length);
+ write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
+ return nfs_ok;
}
static __be32
-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
-{
- int maxcount;
- __be32 wire_count;
- int zero = 0;
- struct xdr_stream *xdr = &resp->xdr;
- int length_offset = xdr->buf->len;
- __be32 *p;
-
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_readlink *readlink = &u->readlink;
+ __be32 *p, wire_count, zero = xdr_zero;
+ struct xdr_stream *xdr = resp->xdr;
+ unsigned int length_offset;
+ int maxcount, status;
+
+ /* linktext4.count */
+ length_offset = xdr->buf->len;
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
return nfserr_resource;
- maxcount = PAGE_SIZE;
+ /* linktext4.data */
+ maxcount = PAGE_SIZE;
p = xdr_reserve_space(xdr, maxcount);
if (!p)
return nfserr_resource;
- /*
- * XXX: By default, vfs_readlink() will truncate symlinks if they
- * would overflow the buffer. Is this kosher in NFSv4? If not, one
- * easy fix is: if vfs_readlink() precisely fills the buffer, assume
- * that truncation occurred, and return NFS4ERR_RESOURCE.
- */
nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
(char *)p, &maxcount);
if (nfserr == nfserr_isdir)
nfserr = nfserr_inval;
- if (nfserr) {
- xdr_truncate_encode(xdr, length_offset);
- return nfserr;
- }
+ if (nfserr)
+ goto out_err;
+ status = svc_encode_result_payload(readlink->rl_rqstp, length_offset,
+ maxcount);
+ if (status) {
+ nfserr = nfserrno(status);
+ goto out_err;
+ }
+
+ wire_count = cpu_to_be32(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, XDR_UNIT);
+ xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount));
+ write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero,
+ xdr_pad_size(maxcount));
+ return nfs_ok;
- wire_count = htonl(maxcount);
- write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
- xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4));
- if (maxcount & 3)
- write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
- &zero, 4 - (maxcount&3));
- return 0;
+out_err:
+ xdr_truncate_encode(xdr, length_offset);
+ return nfserr;
}
-static __be32
-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
+static __be32 nfsd4_encode_dirlist4(struct xdr_stream *xdr,
+ struct nfsd4_readdir *readdir,
+ u32 max_payload)
{
- int maxcount;
- int bytes_left;
+ int bytes_left, maxcount, starting_len = xdr->buf->len;
loff_t offset;
- __be64 wire_offset;
- struct xdr_stream *xdr = &resp->xdr;
- int starting_len = xdr->buf->len;
- __be32 *p;
-
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
-
- /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
- - (char *)resp->xdr.buf->head[0].iov_base;
+ __be32 status;
/*
* Number of bytes left for directory entries allowing for the
- * final 8 bytes of the readdir and a following failed op:
+ * final 8 bytes of the readdir and a following failed op.
*/
- bytes_left = xdr->buf->buflen - xdr->buf->len
- - COMPOUND_ERR_SLACK_SPACE - 8;
- if (bytes_left < 0) {
- nfserr = nfserr_resource;
- goto err_no_verf;
- }
- maxcount = svc_max_payload(resp->rqstp);
- maxcount = min_t(u32, readdir->rd_maxcount, maxcount);
+ bytes_left = xdr->buf->buflen - xdr->buf->len -
+ COMPOUND_ERR_SLACK_SPACE - XDR_UNIT * 2;
+ if (bytes_left < 0)
+ return nfserr_resource;
+ maxcount = min_t(u32, readdir->rd_maxcount, max_payload);
+
/*
- * Note the rfc defines rd_maxcount as the size of the
- * READDIR4resok structure, which includes the verifier above
- * and the 8 bytes encoded at the end of this function:
+ * The RFC defines rd_maxcount as the size of the
+ * READDIR4resok structure, which includes the verifier
+ * and the 8 bytes encoded at the end of this function.
*/
- if (maxcount < 16) {
- nfserr = nfserr_toosmall;
- goto err_no_verf;
- }
- maxcount = min_t(int, maxcount-16, bytes_left);
+ if (maxcount < XDR_UNIT * 4)
+ return nfserr_toosmall;
+ maxcount = min_t(int, maxcount - XDR_UNIT * 4, bytes_left);
- /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */
+ /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0 */
if (!readdir->rd_dircount)
- readdir->rd_dircount = svc_max_payload(resp->rqstp);
+ readdir->rd_dircount = max_payload;
+ /* *entries */
readdir->xdr = xdr;
readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
readdir->cookie_offset = 0;
-
offset = readdir->rd_cookie;
- nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
- &offset,
- &readdir->common, nfsd4_encode_dirent);
- if (nfserr == nfs_ok &&
- readdir->common.err == nfserr_toosmall &&
- xdr->buf->len == starting_len + 8) {
- /* nothing encoded; which limit did we hit?: */
- if (maxcount - 16 < bytes_left)
- /* It was the fault of rd_maxcount: */
- nfserr = nfserr_toosmall;
- else
- /* We ran out of buffer space: */
- nfserr = nfserr_resource;
+ status = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, &offset,
+ &readdir->common, nfsd4_encode_entry4);
+ if (status)
+ return status;
+ if (readdir->common.err == nfserr_toosmall &&
+ xdr->buf->len == starting_len) {
+ /* No entries were encoded. Which limit did we hit? */
+ if (maxcount - XDR_UNIT * 4 < bytes_left)
+ /* It was the fault of rd_maxcount */
+ return nfserr_toosmall;
+ /* We ran out of buffer space */
+ return nfserr_resource;
}
- if (nfserr)
- goto err_no_verf;
+ /* Encode the final entry's cookie value */
+ nfsd4_encode_entry4_nfs_cookie4(readdir, offset);
+ /* No entries follow */
+ if (xdr_stream_encode_item_absent(xdr) != XDR_UNIT)
+ return nfserr_resource;
- if (readdir->cookie_offset) {
- wire_offset = cpu_to_be64(offset);
- write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
- &wire_offset, 8);
- }
+ /* eof */
+ return nfsd4_encode_bool(xdr, readdir->common.err == nfserr_eof);
+}
- p = xdr_reserve_space(xdr, 8);
- if (!p) {
- WARN_ON_ONCE(1);
- goto err_no_verf;
- }
- *p++ = 0; /* no more entries */
- *p++ = htonl(readdir->common.err == nfserr_eof);
+static __be32
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_readdir *readdir = &u->readdir;
+ struct xdr_stream *xdr = resp->xdr;
+ int starting_len = xdr->buf->len;
- return 0;
-err_no_verf:
- xdr_truncate_encode(xdr, starting_len);
+ /* cookieverf */
+ nfserr = nfsd4_encode_verifier4(xdr, &readdir->rd_verf);
+ if (nfserr != nfs_ok)
+ return nfserr;
+
+ /* reply */
+ nfserr = nfsd4_encode_dirlist4(xdr, readdir, svc_max_payload(resp->rqstp));
+ if (nfserr != nfs_ok)
+ xdr_truncate_encode(xdr, starting_len);
return nfserr;
}
static __be32
-nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_remove *remove = &u->remove;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &remove->rm_cinfo);
- return 0;
+ return nfsd4_encode_change_info4(xdr, &remove->rm_cinfo);
}
static __be32
-nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_rename *rename = &u->rename;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 40);
- if (!p)
+ nfserr = nfsd4_encode_change_info4(xdr, &rename->rn_sinfo);
+ if (nfserr)
+ return nfserr;
+ return nfsd4_encode_change_info4(xdr, &rename->rn_tinfo);
+}
+
+static __be32
+nfsd4_encode_rpcsec_gss_info(struct xdr_stream *xdr,
+ struct rpcsec_gss_info *info)
+{
+ __be32 status;
+
+ /* oid */
+ if (xdr_stream_encode_opaque(xdr, info->oid.data, info->oid.len) < 0)
return nfserr_resource;
- p = encode_cinfo(p, &rename->rn_sinfo);
- p = encode_cinfo(p, &rename->rn_tinfo);
- return 0;
+ /* qop */
+ status = nfsd4_encode_qop4(xdr, info->qop);
+ if (status != nfs_ok)
+ return status;
+ /* service */
+ if (xdr_stream_encode_u32(xdr, info->service) != XDR_UNIT)
+ return nfserr_resource;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_secinfo4(struct xdr_stream *xdr, rpc_authflavor_t pf,
+ u32 *supported)
+{
+ struct rpcsec_gss_info info;
+ __be32 status;
+
+ if (rpcauth_get_gssinfo(pf, &info) == 0) {
+ (*supported)++;
+
+ /* flavor */
+ status = nfsd4_encode_uint32_t(xdr, RPC_AUTH_GSS);
+ if (status != nfs_ok)
+ return status;
+ /* flavor_info */
+ status = nfsd4_encode_rpcsec_gss_info(xdr, &info);
+ if (status != nfs_ok)
+ return status;
+ } else if (pf < RPC_AUTH_MAXFLAVOR) {
+ (*supported)++;
+
+ /* flavor */
+ status = nfsd4_encode_uint32_t(xdr, pf);
+ if (status != nfs_ok)
+ return status;
+ }
+ return nfs_ok;
}
static __be32
-nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
+nfsd4_encode_SECINFO4resok(struct xdr_stream *xdr, struct svc_export *exp)
{
u32 i, nflavs, supported;
struct exp_flavor_info *flavs;
struct exp_flavor_info def_flavs[2];
- __be32 *p, *flavorsp;
- static bool report = true;
+ unsigned int count_offset;
+ __be32 status, wire_count;
if (exp->ex_nflavors) {
flavs = exp->ex_flavors;
@@ -3810,508 +4780,965 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
}
}
- supported = 0;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ count_offset = xdr->buf->len;
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
return nfserr_resource;
- flavorsp = p++; /* to be backfilled later */
-
- for (i = 0; i < nflavs; i++) {
- rpc_authflavor_t pf = flavs[i].pseudoflavor;
- struct rpcsec_gss_info info;
- if (rpcauth_get_gssinfo(pf, &info) == 0) {
- supported++;
- p = xdr_reserve_space(xdr, 4 + 4 +
- XDR_LEN(info.oid.len) + 4 + 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(RPC_AUTH_GSS);
- p = xdr_encode_opaque(p, info.oid.data, info.oid.len);
- *p++ = cpu_to_be32(info.qop);
- *p++ = cpu_to_be32(info.service);
- } else if (pf < RPC_AUTH_MAXFLAVOR) {
- supported++;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(pf);
- } else {
- if (report)
- pr_warn("NFS: SECINFO: security flavor %u "
- "is not supported\n", pf);
- }
+ for (i = 0, supported = 0; i < nflavs; i++) {
+ status = nfsd4_encode_secinfo4(xdr, flavs[i].pseudoflavor,
+ &supported);
+ if (status != nfs_ok)
+ return status;
}
- if (nflavs != supported)
- report = false;
- *flavorsp = htonl(supported);
+ wire_count = cpu_to_be32(supported);
+ write_bytes_to_xdr_buf(xdr->buf, count_offset, &wire_count,
+ XDR_UNIT);
return 0;
}
static __be32
nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_secinfo *secinfo)
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_secinfo *secinfo = &u->secinfo;
+ struct xdr_stream *xdr = resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
+ return nfsd4_encode_SECINFO4resok(xdr, secinfo->si_exp);
}
static __be32
nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_secinfo_no_name *secinfo)
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_secinfo_no_name *secinfo = &u->secinfo_no_name;
+ struct xdr_stream *xdr = resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
+ return nfsd4_encode_SECINFO4resok(xdr, secinfo->sin_exp);
}
-/*
- * The SETATTR encode routine is special -- it always encodes a bitmap,
- * regardless of the error status.
- */
static __be32
-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_setattr *setattr = &u->setattr;
+ __be32 status;
- p = xdr_reserve_space(xdr, 16);
- if (!p)
- return nfserr_resource;
- if (nfserr) {
- *p++ = cpu_to_be32(3);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
- }
- else {
- *p++ = cpu_to_be32(3);
- *p++ = cpu_to_be32(setattr->sa_bmval[0]);
- *p++ = cpu_to_be32(setattr->sa_bmval[1]);
- *p++ = cpu_to_be32(setattr->sa_bmval[2]);
+ switch (nfserr) {
+ case nfs_ok:
+ /* attrsset */
+ status = nfsd4_encode_bitmap4(resp->xdr, setattr->sa_bmval[0],
+ setattr->sa_bmval[1],
+ setattr->sa_bmval[2]);
+ break;
+ default:
+ /* attrsset */
+ status = nfsd4_encode_bitmap4(resp->xdr, 0, 0, 0);
}
- return nfserr;
+ return status != nfs_ok ? status : nfserr;
}
static __be32
-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_setclientid *scd = &u->setclientid;
+ struct xdr_stream *xdr = resp->xdr;
if (!nfserr) {
- p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8);
- p = xdr_encode_opaque_fixed(p, &scd->se_confirm,
- NFS4_VERIFIER_SIZE);
- }
- else if (nfserr == nfserr_clid_inuse) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(0);
+ nfserr = nfsd4_encode_clientid4(xdr, &scd->se_clientid);
+ if (nfserr != nfs_ok)
+ goto out;
+ nfserr = nfsd4_encode_verifier4(xdr, &scd->se_confirm);
+ } else if (nfserr == nfserr_clid_inuse) {
+ /* empty network id */
+ if (xdr_stream_encode_u32(xdr, 0) < 0) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
+ /* empty universal address */
+ if (xdr_stream_encode_u32(xdr, 0) < 0) {
+ nfserr = nfserr_resource;
+ goto out;
+ }
}
+out:
return nfserr;
}
static __be32
-nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
+nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_write *write = &u->write;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 16);
- if (!p)
+ /* count */
+ nfserr = nfsd4_encode_count4(xdr, write->wr_bytes_written);
+ if (nfserr)
+ return nfserr;
+ /* committed */
+ if (xdr_stream_encode_u32(xdr, write->wr_how_written) != XDR_UNIT)
return nfserr_resource;
- *p++ = cpu_to_be32(write->wr_bytes_written);
- *p++ = cpu_to_be32(write->wr_how_written);
- p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
- NFS4_VERIFIER_SIZE);
- return 0;
+ /* writeverf */
+ return nfsd4_encode_verifier4(xdr, &write->wr_verifier);
}
static __be32
-nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_exchange_id *exid)
+nfsd4_encode_state_protect_ops4(struct xdr_stream *xdr,
+ struct nfsd4_exchange_id *exid)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
- char *major_id;
- char *server_scope;
- int major_id_sz;
- int server_scope_sz;
- uint64_t minor_id = 0;
-
- major_id = utsname()->nodename;
- major_id_sz = strlen(major_id);
- server_scope = utsname()->nodename;
- server_scope_sz = strlen(server_scope);
-
- p = xdr_reserve_space(xdr,
- 8 /* eir_clientid */ +
- 4 /* eir_sequenceid */ +
- 4 /* eir_flags */ +
- 4 /* spr_how */);
- if (!p)
- return nfserr_resource;
+ __be32 status;
- p = xdr_encode_opaque_fixed(p, &exid->clientid, 8);
- *p++ = cpu_to_be32(exid->seqid);
- *p++ = cpu_to_be32(exid->flags);
+ /* spo_must_enforce */
+ status = nfsd4_encode_bitmap4(xdr, exid->spo_must_enforce[0],
+ exid->spo_must_enforce[1],
+ exid->spo_must_enforce[2]);
+ if (status != nfs_ok)
+ return status;
+ /* spo_must_allow */
+ return nfsd4_encode_bitmap4(xdr, exid->spo_must_allow[0],
+ exid->spo_must_allow[1],
+ exid->spo_must_allow[2]);
+}
- *p++ = cpu_to_be32(exid->spa_how);
+static __be32
+nfsd4_encode_state_protect4_r(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+ if (xdr_stream_encode_u32(xdr, exid->spa_how) != XDR_UNIT)
+ return nfserr_resource;
switch (exid->spa_how) {
case SP4_NONE:
+ status = nfs_ok;
break;
case SP4_MACH_CRED:
- /* spo_must_enforce bitmap: */
- nfserr = nfsd4_encode_bitmap(xdr,
- exid->spo_must_enforce[0],
- exid->spo_must_enforce[1],
- exid->spo_must_enforce[2]);
- if (nfserr)
- return nfserr;
- /* spo_must_allow bitmap: */
- nfserr = nfsd4_encode_bitmap(xdr,
- exid->spo_must_allow[0],
- exid->spo_must_allow[1],
- exid->spo_must_allow[2]);
- if (nfserr)
- return nfserr;
+ /* spr_mach_ops */
+ status = nfsd4_encode_state_protect_ops4(xdr, exid);
break;
default:
- WARN_ON_ONCE(1);
+ status = nfserr_serverfault;
}
+ return status;
+}
- p = xdr_reserve_space(xdr,
- 8 /* so_minor_id */ +
- 4 /* so_major_id.len */ +
- (XDR_QUADLEN(major_id_sz) * 4) +
- 4 /* eir_server_scope.len */ +
- (XDR_QUADLEN(server_scope_sz) * 4) +
- 4 /* eir_server_impl_id.count (0) */);
- if (!p)
- return nfserr_resource;
+static __be32
+nfsd4_encode_server_owner4(struct xdr_stream *xdr, struct svc_rqst *rqstp)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ __be32 status;
- /* The server_owner struct */
- p = xdr_encode_hyper(p, minor_id); /* Minor id */
- /* major id */
- p = xdr_encode_opaque(p, major_id, major_id_sz);
+ /* so_minor_id */
+ status = nfsd4_encode_uint64_t(xdr, 0);
+ if (status != nfs_ok)
+ return status;
+ /* so_major_id */
+ return nfsd4_encode_opaque(xdr, nn->nfsd_name, strlen(nn->nfsd_name));
+}
- /* Server scope */
- p = xdr_encode_opaque(p, server_scope, server_scope_sz);
+static __be32
+nfsd4_encode_nfs_impl_id4(struct xdr_stream *xdr, struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
- /* Implementation id */
- *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */
- return 0;
+ /* nii_domain */
+ status = nfsd4_encode_opaque(xdr, exid->nii_domain.data,
+ exid->nii_domain.len);
+ if (status != nfs_ok)
+ return status;
+ /* nii_name */
+ status = nfsd4_encode_opaque(xdr, exid->nii_name.data,
+ exid->nii_name.len);
+ if (status != nfs_ok)
+ return status;
+ /* nii_time */
+ return nfsd4_encode_nfstime4(xdr, &exid->nii_time);
}
static __be32
-nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_create_session *sess)
+nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id);
+ struct nfsd4_exchange_id *exid = &u->exchange_id;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 24);
- if (!p)
+ /* eir_clientid */
+ nfserr = nfsd4_encode_clientid4(xdr, &exid->clientid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* eir_sequenceid */
+ nfserr = nfsd4_encode_sequenceid4(xdr, exid->seqid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* eir_flags */
+ nfserr = nfsd4_encode_uint32_t(xdr, exid->flags);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* eir_state_protect */
+ nfserr = nfsd4_encode_state_protect4_r(xdr, exid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* eir_server_owner */
+ nfserr = nfsd4_encode_server_owner4(xdr, resp->rqstp);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* eir_server_scope */
+ nfserr = nfsd4_encode_opaque(xdr, nn->nfsd_name,
+ strlen(nn->nfsd_name));
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* eir_server_impl_id<1> */
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, sess->sessionid.data,
- NFS4_MAX_SESSIONID_LEN);
- *p++ = cpu_to_be32(sess->seqid);
- *p++ = cpu_to_be32(sess->flags);
+ nfserr = nfsd4_encode_nfs_impl_id4(xdr, exid);
+ if (nfserr != nfs_ok)
+ return nfserr;
- p = xdr_reserve_space(xdr, 28);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(0); /* headerpadsz */
- *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz);
- *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz);
- *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached);
- *p++ = cpu_to_be32(sess->fore_channel.maxops);
- *p++ = cpu_to_be32(sess->fore_channel.maxreqs);
- *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs);
-
- if (sess->fore_channel.nr_rdma_attrs) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs);
- }
+ return nfs_ok;
+}
- p = xdr_reserve_space(xdr, 28);
- if (!p)
+static __be32
+nfsd4_encode_channel_attrs4(struct xdr_stream *xdr,
+ const struct nfsd4_channel_attrs *attrs)
+{
+ __be32 status;
+
+ /* ca_headerpadsize */
+ status = nfsd4_encode_count4(xdr, 0);
+ if (status != nfs_ok)
+ return status;
+ /* ca_maxrequestsize */
+ status = nfsd4_encode_count4(xdr, attrs->maxreq_sz);
+ if (status != nfs_ok)
+ return status;
+ /* ca_maxresponsesize */
+ status = nfsd4_encode_count4(xdr, attrs->maxresp_sz);
+ if (status != nfs_ok)
+ return status;
+ /* ca_maxresponsesize_cached */
+ status = nfsd4_encode_count4(xdr, attrs->maxresp_cached);
+ if (status != nfs_ok)
+ return status;
+ /* ca_maxoperations */
+ status = nfsd4_encode_count4(xdr, attrs->maxops);
+ if (status != nfs_ok)
+ return status;
+ /* ca_maxrequests */
+ status = nfsd4_encode_count4(xdr, attrs->maxreqs);
+ if (status != nfs_ok)
+ return status;
+ /* ca_rdma_ird<1> */
+ if (xdr_stream_encode_u32(xdr, attrs->nr_rdma_attrs) != XDR_UNIT)
return nfserr_resource;
- *p++ = cpu_to_be32(0); /* headerpadsz */
- *p++ = cpu_to_be32(sess->back_channel.maxreq_sz);
- *p++ = cpu_to_be32(sess->back_channel.maxresp_sz);
- *p++ = cpu_to_be32(sess->back_channel.maxresp_cached);
- *p++ = cpu_to_be32(sess->back_channel.maxops);
- *p++ = cpu_to_be32(sess->back_channel.maxreqs);
- *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs);
-
- if (sess->back_channel.nr_rdma_attrs) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(sess->back_channel.rdma_attrs);
- }
- return 0;
+ if (attrs->nr_rdma_attrs)
+ return nfsd4_encode_uint32_t(xdr, attrs->rdma_attrs);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_create_session *sess = &u->create_session;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* csr_sessionid */
+ nfserr = nfsd4_encode_sessionid4(xdr, &sess->sessionid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* csr_sequence */
+ nfserr = nfsd4_encode_sequenceid4(xdr, sess->seqid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* csr_flags */
+ nfserr = nfsd4_encode_uint32_t(xdr, sess->flags);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* csr_fore_chan_attrs */
+ nfserr = nfsd4_encode_channel_attrs4(xdr, &sess->fore_channel);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* csr_back_chan_attrs */
+ return nfsd4_encode_channel_attrs4(xdr, &sess->back_channel);
}
static __be32
nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_sequence *seq)
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_sequence *seq = &u->sequence;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, seq->sessionid.data,
- NFS4_MAX_SESSIONID_LEN);
- *p++ = cpu_to_be32(seq->seqid);
- *p++ = cpu_to_be32(seq->slotid);
+ /* sr_sessionid */
+ nfserr = nfsd4_encode_sessionid4(xdr, &seq->sessionid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* sr_sequenceid */
+ nfserr = nfsd4_encode_sequenceid4(xdr, seq->seqid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* sr_slotid */
+ nfserr = nfsd4_encode_slotid4(xdr, seq->slotid);
+ if (nfserr != nfs_ok)
+ return nfserr;
/* Note slotid's are numbered from zero: */
- *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */
- *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */
- *p++ = cpu_to_be32(seq->status_flags);
+ /* sr_highest_slotid */
+ nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots_response - 1);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* sr_target_highest_slotid */
+ nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* sr_status_flags */
+ nfserr = nfsd4_encode_uint32_t(xdr, seq->status_flags);
+ if (nfserr != nfs_ok)
+ return nfserr;
resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */
- return 0;
+ return nfs_ok;
}
static __be32
nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_test_stateid *test_stateid)
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct nfsd4_test_stateid *test_stateid = &u->test_stateid;
struct nfsd4_test_stateid_id *stateid, *next;
- __be32 *p;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids));
- if (!p)
+ /* tsr_status_codes<> */
+ if (xdr_stream_encode_u32(xdr, test_stateid->ts_num_ids) != XDR_UNIT)
return nfserr_resource;
- *p++ = htonl(test_stateid->ts_num_ids);
-
- list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {
- *p++ = stateid->ts_id_status;
+ list_for_each_entry_safe(stateid, next,
+ &test_stateid->ts_stateid_list, ts_id_list) {
+ if (xdr_stream_encode_be32(xdr, stateid->ts_id_status) != XDR_UNIT)
+ return nfserr_resource;
}
+ return nfs_ok;
+}
- return 0;
+static __be32
+nfsd4_encode_get_dir_delegation(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 status = nfserr_resource;
+
+ switch(gdd->gddrnf_status) {
+ case GDD4_OK:
+ if (xdr_stream_encode_u32(xdr, GDD4_OK) != XDR_UNIT)
+ break;
+ status = nfsd4_encode_verifier4(xdr, &gdd->gddr_cookieverf);
+ if (status)
+ break;
+ status = nfsd4_encode_stateid4(xdr, &gdd->gddr_stateid);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_notification[0], 0, 0);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_child_attributes[0],
+ gdd->gddr_child_attributes[1],
+ gdd->gddr_child_attributes[2]);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_dir_attributes[0],
+ gdd->gddr_dir_attributes[1],
+ gdd->gddr_dir_attributes[2]);
+ break;
+ default:
+ pr_warn("nfsd: bad gddrnf_status (%u)\n", gdd->gddrnf_status);
+ gdd->gddrnf_will_signal_deleg_avail = 0;
+ fallthrough;
+ case GDD4_UNAVAIL:
+ if (xdr_stream_encode_u32(xdr, GDD4_UNAVAIL) != XDR_UNIT)
+ break;
+ status = nfsd4_encode_bool(xdr, gdd->gddrnf_will_signal_deleg_avail);
+ break;
+ }
+ return status;
}
#ifdef CONFIG_NFSD_PNFS
static __be32
-nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_getdeviceinfo *gdev)
+nfsd4_encode_device_addr4(struct xdr_stream *xdr,
+ const struct nfsd4_getdeviceinfo *gdev)
{
- struct xdr_stream *xdr = &resp->xdr;
+ u32 needed_len, starting_len = xdr->buf->len;
const struct nfsd4_layout_ops *ops;
- u32 starting_len = xdr->buf->len, needed_len;
- __be32 *p;
+ __be32 status;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ /* da_layout_type */
+ if (xdr_stream_encode_u32(xdr, gdev->gd_layout_type) != XDR_UNIT)
return nfserr_resource;
-
- *p++ = cpu_to_be32(gdev->gd_layout_type);
-
- /* If maxcount is 0 then just update notifications */
- if (gdev->gd_maxcount != 0) {
- ops = nfsd4_layout_ops[gdev->gd_layout_type];
- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
- if (nfserr) {
- /*
- * We don't bother to burden the layout drivers with
- * enforcing gd_maxcount, just tell the client to
- * come back with a bigger buffer if it's not enough.
- */
- if (xdr->buf->len + 4 > gdev->gd_maxcount)
- goto toosmall;
- return nfserr;
- }
+ /* da_addr_body */
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
+ status = ops->encode_getdeviceinfo(xdr, gdev);
+ if (status != nfs_ok) {
+ /*
+ * Don't burden the layout drivers with enforcing
+ * gd_maxcount. Just tell the client to come back
+ * with a bigger buffer if it's not enough.
+ */
+ if (xdr->buf->len + XDR_UNIT > gdev->gd_maxcount)
+ goto toosmall;
+ return status;
}
- if (gdev->gd_notify_types) {
- p = xdr_reserve_space(xdr, 4 + 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(1); /* bitmap length */
- *p++ = cpu_to_be32(gdev->gd_notify_types);
- } else {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = 0;
- }
+ return nfs_ok;
- return 0;
toosmall:
- dprintk("%s: maxcount too small\n", __func__);
- needed_len = xdr->buf->len + 4 /* notifications */;
+ needed_len = xdr->buf->len + XDR_UNIT; /* notifications */
xdr_truncate_encode(xdr, starting_len);
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(needed_len);
+
+ status = nfsd4_encode_count4(xdr, needed_len);
+ if (status != nfs_ok)
+ return status;
return nfserr_toosmall;
}
static __be32
-nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_layoutget *lgp)
+nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- const struct nfsd4_layout_ops *ops;
- __be32 *p;
+ struct nfsd4_getdeviceinfo *gdev = &u->getdeviceinfo;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
- if (!p)
- return nfserr_resource;
-
- *p++ = cpu_to_be32(1); /* we always set return-on-close */
- *p++ = cpu_to_be32(lgp->lg_sid.si_generation);
- p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque,
- sizeof(stateid_opaque_t));
+ /* gdir_device_addr */
+ nfserr = nfsd4_encode_device_addr4(xdr, gdev);
+ if (nfserr)
+ return nfserr;
+ /* gdir_notification */
+ return nfsd4_encode_bitmap4(xdr, gdev->gd_notify_types, 0, 0);
+}
- *p++ = cpu_to_be32(1); /* we always return a single layout */
- p = xdr_encode_hyper(p, lgp->lg_seg.offset);
- p = xdr_encode_hyper(p, lgp->lg_seg.length);
- *p++ = cpu_to_be32(lgp->lg_seg.iomode);
- *p++ = cpu_to_be32(lgp->lg_layout_type);
+static __be32
+nfsd4_encode_layout4(struct xdr_stream *xdr, const struct nfsd4_layoutget *lgp)
+{
+ const struct nfsd4_layout_ops *ops = nfsd4_layout_ops[lgp->lg_layout_type];
+ __be32 status;
- ops = nfsd4_layout_ops[lgp->lg_layout_type];
+ /* lo_offset */
+ status = nfsd4_encode_offset4(xdr, lgp->lg_seg.offset);
+ if (status != nfs_ok)
+ return status;
+ /* lo_length */
+ status = nfsd4_encode_length4(xdr, lgp->lg_seg.length);
+ if (status != nfs_ok)
+ return status;
+ /* lo_iomode */
+ if (xdr_stream_encode_u32(xdr, lgp->lg_seg.iomode) != XDR_UNIT)
+ return nfserr_resource;
+ /* lo_content */
+ if (xdr_stream_encode_u32(xdr, lgp->lg_layout_type) != XDR_UNIT)
+ return nfserr_resource;
return ops->encode_layoutget(xdr, lgp);
}
static __be32
-nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_layoutcommit *lcp)
+nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_layoutget *lgp = &u->layoutget;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
+ /* logr_return_on_close */
+ nfserr = nfsd4_encode_bool(xdr, true);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* logr_stateid */
+ nfserr = nfsd4_encode_stateid4(xdr, &lgp->lg_sid);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* logr_layout<> */
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
return nfserr_resource;
- *p++ = cpu_to_be32(lcp->lc_size_chg);
- if (lcp->lc_size_chg) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_hyper(p, lcp->lc_newsize);
- }
+ return nfsd4_encode_layout4(xdr, lgp);
+}
- return 0;
+static __be32
+nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_layoutcommit *lcp = &u->layoutcommit;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* ns_sizechanged */
+ nfserr = nfsd4_encode_bool(xdr, lcp->lc_size_chg);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ if (lcp->lc_size_chg)
+ /* ns_size */
+ return nfsd4_encode_length4(xdr, lcp->lc_newsize);
+ return nfs_ok;
}
static __be32
nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_layoutreturn *lrp)
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_layoutreturn *lrp = &u->layoutreturn;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(lrp->lrs_present);
+ /* lrs_present */
+ nfserr = nfsd4_encode_bool(xdr, lrp->lrs_present);
+ if (nfserr != nfs_ok)
+ return nfserr;
if (lrp->lrs_present)
- return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
- return 0;
+ /* lrs_stateid */
+ return nfsd4_encode_stateid4(xdr, &lrp->lr_sid);
+ return nfs_ok;
}
#endif /* CONFIG_NFSD_PNFS */
static __be32
-nfsd42_encode_write_res(struct nfsd4_compoundres *resp,
- struct nfsd42_write_res *write, bool sync)
+nfsd4_encode_write_response4(struct xdr_stream *xdr,
+ const struct nfsd4_copy *copy)
{
- __be32 *p;
- p = xdr_reserve_space(&resp->xdr, 4);
- if (!p)
- return nfserr_resource;
+ const struct nfsd42_write_res *write = &copy->cp_res;
+ u32 count = nfsd4_copy_is_sync(copy) ? 0 : 1;
+ __be32 status;
- if (sync)
- *p++ = cpu_to_be32(0);
- else {
- __be32 nfserr;
- *p++ = cpu_to_be32(1);
- nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid);
- if (nfserr)
- return nfserr;
+ /* wr_callback_id<1> */
+ if (xdr_stream_encode_u32(xdr, count) != XDR_UNIT)
+ return nfserr_resource;
+ if (count) {
+ status = nfsd4_encode_stateid4(xdr, &write->cb_stateid);
+ if (status != nfs_ok)
+ return status;
}
- p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE);
- if (!p)
+
+ /* wr_count */
+ status = nfsd4_encode_length4(xdr, write->wr_bytes_written);
+ if (status != nfs_ok)
+ return status;
+ /* wr_committed */
+ if (xdr_stream_encode_u32(xdr, write->wr_stable_how) != XDR_UNIT)
return nfserr_resource;
+ /* wr_writeverf */
+ return nfsd4_encode_verifier4(xdr, &write->wr_verifier);
+}
- p = xdr_encode_hyper(p, write->wr_bytes_written);
- *p++ = cpu_to_be32(write->wr_stable_how);
- p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
- NFS4_VERIFIER_SIZE);
- return nfs_ok;
+static __be32 nfsd4_encode_copy_requirements4(struct xdr_stream *xdr,
+ const struct nfsd4_copy *copy)
+{
+ __be32 status;
+
+ /* cr_consecutive */
+ status = nfsd4_encode_bool(xdr, true);
+ if (status != nfs_ok)
+ return status;
+ /* cr_synchronous */
+ return nfsd4_encode_bool(xdr, nfsd4_copy_is_sync(copy));
}
static __be32
nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_copy *copy)
+ union nfsd4_op_u *u)
{
- __be32 *p;
+ struct nfsd4_copy *copy = &u->copy;
- nfserr = nfsd42_encode_write_res(resp, &copy->cp_res,
- copy->cp_synchronous);
- if (nfserr)
+ nfserr = nfsd4_encode_write_response4(resp->xdr, copy);
+ if (nfserr != nfs_ok)
return nfserr;
+ return nfsd4_encode_copy_requirements4(resp->xdr, copy);
+}
- p = xdr_reserve_space(&resp->xdr, 4 + 4);
- *p++ = xdr_one; /* cr_consecutive */
- *p++ = cpu_to_be32(copy->cp_synchronous);
- return 0;
+static __be32
+nfsd4_encode_netloc4(struct xdr_stream *xdr, const struct nl4_server *ns)
+{
+ __be32 status;
+
+ if (xdr_stream_encode_u32(xdr, ns->nl4_type) != XDR_UNIT)
+ return nfserr_resource;
+ switch (ns->nl4_type) {
+ case NL4_NETADDR:
+ /* nl_addr */
+ status = nfsd4_encode_netaddr4(xdr, &ns->u.nl4_addr);
+ break;
+ default:
+ status = nfserr_serverfault;
+ }
+ return status;
+}
+
+static __be32
+nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_copy_notify *cn = &u->copy_notify;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* cnr_lease_time */
+ nfserr = nfsd4_encode_nfstime4(xdr, &cn->cpn_lease_time);
+ if (nfserr)
+ return nfserr;
+ /* cnr_stateid */
+ nfserr = nfsd4_encode_stateid4(xdr, &cn->cpn_cnr_stateid);
+ if (nfserr)
+ return nfserr;
+ /* cnr_source_server<> */
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
+ return nfserr_resource;
+ return nfsd4_encode_netloc4(xdr, cn->cpn_src);
}
static __be32
nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_offload_status *os)
+ union nfsd4_op_u *u)
{
- struct xdr_stream *xdr = &resp->xdr;
- __be32 *p;
+ struct nfsd4_offload_status *os = &u->offload_status;
+ struct xdr_stream *xdr = resp->xdr;
- p = xdr_reserve_space(xdr, 8 + 4);
- if (!p)
+ /* osr_count */
+ nfserr = nfsd4_encode_length4(xdr, os->count);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* osr_complete<1> */
+ if (os->completed) {
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
+ return nfserr_resource;
+ if (xdr_stream_encode_be32(xdr, os->status) != XDR_UNIT)
+ return nfserr_resource;
+ } else if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
return nfserr_resource;
- p = xdr_encode_hyper(p, os->count);
- *p++ = cpu_to_be32(0);
+ return nfs_ok;
+}
+static __be32
+nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
+ struct nfsd4_read *read)
+{
+ struct nfsd4_compoundargs *argp = resp->rqstp->rq_argp;
+ struct file *file = read->rd_nf->nf_file;
+ struct xdr_stream *xdr = resp->xdr;
+ bool splice_ok = argp->splice_ok;
+ unsigned int offset_offset;
+ __be32 nfserr, wire_count;
+ unsigned long maxcount;
+ __be64 wire_offset;
+
+ if (xdr_stream_encode_u32(xdr, NFS4_CONTENT_DATA) != XDR_UNIT)
+ return nfserr_io;
+
+ offset_offset = xdr->buf->len;
+
+ /* Reserve space for the byte offset and count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 3)))
+ return nfserr_io;
+ xdr_commit_encode(xdr);
+
+ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));
+
+ if (file->f_op->splice_read && splice_ok)
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ else
+ nfserr = nfsd4_encode_readv(resp, read, maxcount);
+ if (nfserr)
+ return nfserr;
+
+ wire_offset = cpu_to_be64(read->rd_offset);
+ write_bytes_to_xdr_buf(xdr->buf, offset_offset, &wire_offset,
+ XDR_UNIT * 2);
+ wire_count = cpu_to_be32(read->rd_length);
+ write_bytes_to_xdr_buf(xdr->buf, offset_offset + XDR_UNIT * 2,
+ &wire_count, XDR_UNIT);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_read *read = &u->read;
+ struct file *file = read->rd_nf->nf_file;
+ struct xdr_stream *xdr = resp->xdr;
+ unsigned int eof_offset;
+ __be32 wire_data[2];
+ u32 segments = 0;
+
+ if (nfserr)
+ return nfserr;
+
+ eof_offset = xdr->buf->len;
+
+ /* Reserve space for the eof flag and segment count */
+ if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT * 2)))
+ return nfserr_io;
+ xdr_commit_encode(xdr);
+
+ read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
+ if (read->rd_eof)
+ goto out;
+
+ nfserr = nfsd4_encode_read_plus_data(resp, read);
+ if (nfserr) {
+ xdr_truncate_encode(xdr, eof_offset);
+ return nfserr;
+ }
+
+ segments++;
+
+out:
+ wire_data[0] = read->rd_eof ? xdr_one : xdr_zero;
+ wire_data[1] = cpu_to_be32(segments);
+ write_bytes_to_xdr_buf(xdr->buf, eof_offset, &wire_data, XDR_UNIT * 2);
return nfserr;
}
static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_seek *seek)
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_seek *seek = &u->seek;
+ struct xdr_stream *xdr = resp->xdr;
+
+ /* sr_eof */
+ nfserr = nfsd4_encode_bool(xdr, seek->seek_eof);
+ if (nfserr != nfs_ok)
+ return nfserr;
+ /* sr_offset */
+ return nfsd4_encode_offset4(xdr, seek->seek_pos);
+}
+
+static __be32
+nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *p)
+{
+ return nfserr;
+}
+
+/*
+ * Encode kmalloc-ed buffer in to XDR stream.
+ */
+static __be32
+nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen)
{
+ u32 cplen;
__be32 *p;
- p = xdr_reserve_space(&resp->xdr, 4 + 8);
- *p++ = cpu_to_be32(seek->seek_eof);
- p = xdr_encode_hyper(p, seek->seek_pos);
+ cplen = min_t(unsigned long, buflen,
+ ((void *)xdr->end - (void *)xdr->p));
+ p = xdr_reserve_space(xdr, cplen);
+ if (!p)
+ return nfserr_resource;
+
+ memcpy(p, buf, cplen);
+ buf += cplen;
+ buflen -= cplen;
+
+ while (buflen) {
+ cplen = min_t(u32, buflen, PAGE_SIZE);
+ p = xdr_reserve_space(xdr, cplen);
+ if (!p)
+ return nfserr_resource;
+
+ memcpy(p, buf, cplen);
+
+ if (cplen < PAGE_SIZE) {
+ /*
+ * We're done, with a length that wasn't page
+ * aligned, so possibly not word aligned. Pad
+ * any trailing bytes with 0.
+ */
+ xdr_encode_opaque_fixed(p, NULL, cplen);
+ break;
+ }
+
+ buflen -= PAGE_SIZE;
+ buf += PAGE_SIZE;
+ }
return 0;
}
static __be32
-nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
+nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
{
- return nfserr;
+ struct nfsd4_getxattr *getxattr = &u->getxattr;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 *p, err;
+
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+
+ *p = cpu_to_be32(getxattr->getxa_len);
+
+ if (getxattr->getxa_len == 0)
+ return 0;
+
+ err = nfsd4_vbuf_to_stream(xdr, getxattr->getxa_buf,
+ getxattr->getxa_len);
+
+ kvfree(getxattr->getxa_buf);
+
+ return err;
+}
+
+static __be32
+nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_setxattr *setxattr = &u->setxattr;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_change_info4(xdr, &setxattr->setxa_cinfo);
+}
+
+/*
+ * See if there are cookie values that can be rejected outright.
+ */
+static __be32
+nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
+ u32 *offsetp)
+{
+ u64 cookie = listxattrs->lsxa_cookie;
+
+ /*
+ * If the cookie is larger than the maximum number we can fit
+ * in the buffer we just got back from vfs_listxattr, it's invalid.
+ */
+ if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2))
+ return nfserr_badcookie;
+
+ *offsetp = (u32)cookie;
+ return 0;
+}
+
+static __be32
+nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_listxattrs *listxattrs = &u->listxattrs;
+ struct xdr_stream *xdr = resp->xdr;
+ u32 cookie_offset, count_offset, eof;
+ u32 left, xdrleft, slen, count;
+ u32 xdrlen, offset;
+ u64 cookie;
+ char *sp;
+ __be32 status, tmp;
+ __be64 wire_cookie;
+ __be32 *p;
+ u32 nuser;
+
+ eof = 1;
+
+ status = nfsd4_listxattr_validate_cookie(listxattrs, &offset);
+ if (status)
+ goto out;
+
+ /*
+ * Reserve space for the cookie and the name array count. Record
+ * the offsets to save them later.
+ */
+ cookie_offset = xdr->buf->len;
+ count_offset = cookie_offset + 8;
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
+ if (!p) {
+ status = nfserr_resource;
+ goto out;
+ }
+
+ count = 0;
+ left = listxattrs->lsxa_len;
+ sp = listxattrs->lsxa_buf;
+ nuser = 0;
+
+ /* Bytes left is maxcount - 8 (cookie) - 4 (array count) */
+ xdrleft = listxattrs->lsxa_maxcount - XDR_UNIT * 3;
+
+ while (left > 0 && xdrleft > 0) {
+ slen = strlen(sp);
+
+ /*
+ * Check if this is a "user." attribute, skip it if not.
+ */
+ if (strncmp(sp, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ goto contloop;
+
+ slen -= XATTR_USER_PREFIX_LEN;
+ xdrlen = 4 + ((slen + 3) & ~3);
+ /* Check if both entry and eof can fit in the XDR buffer */
+ if (xdrlen + XDR_UNIT > xdrleft) {
+ if (count == 0) {
+ /*
+ * Can't even fit the first attribute name.
+ */
+ status = nfserr_toosmall;
+ goto out;
+ }
+ eof = 0;
+ goto wreof;
+ }
+
+ left -= XATTR_USER_PREFIX_LEN;
+ sp += XATTR_USER_PREFIX_LEN;
+ if (nuser++ < offset)
+ goto contloop;
+
+
+ p = xdr_reserve_space(xdr, xdrlen);
+ if (!p) {
+ status = nfserr_resource;
+ goto out;
+ }
+
+ xdr_encode_opaque(p, sp, slen);
+
+ xdrleft -= xdrlen;
+ count++;
+contloop:
+ sp += slen + 1;
+ left -= slen + 1;
+ }
+
+ /*
+ * If there were user attributes to copy, but we didn't copy
+ * any, the offset was too large (e.g. the cookie was invalid).
+ */
+ if (nuser > 0 && count == 0) {
+ status = nfserr_badcookie;
+ goto out;
+ }
+
+wreof:
+ p = xdr_reserve_space(xdr, 4);
+ if (!p) {
+ status = nfserr_resource;
+ goto out;
+ }
+ *p = cpu_to_be32(eof);
+
+ cookie = offset + count;
+
+ wire_cookie = cpu_to_be64(cookie);
+ write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &wire_cookie, 8);
+ tmp = cpu_to_be32(count);
+ write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4);
+out:
+ if (listxattrs->lsxa_len)
+ kvfree(listxattrs->lsxa_buf);
+ return status;
}
-typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
+static __be32
+nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_removexattr *removexattr = &u->removexattr;
+ struct xdr_stream *xdr = resp->xdr;
+
+ return nfsd4_encode_change_info4(xdr, &removexattr->rmxa_cinfo);
+}
+
+typedef __be32(*nfsd4_enc)(struct nfsd4_compoundres *, __be32, union nfsd4_op_u *u);
/*
* Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
@@ -4319,87 +5746,93 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
* done in the decoding phase.
*/
static const nfsd4_enc nfsd4_enc_ops[] = {
- [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
- [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
- [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
- [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
- [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
- [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
- [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
- [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
- [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
- [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
- [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
- [OP_OPENATTR] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
- [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
- [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
- [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
- [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
- [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
- [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
- [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
- [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
- [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
- [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
- [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_ACCESS] = nfsd4_encode_access,
+ [OP_CLOSE] = nfsd4_encode_close,
+ [OP_COMMIT] = nfsd4_encode_commit,
+ [OP_CREATE] = nfsd4_encode_create,
+ [OP_DELEGPURGE] = nfsd4_encode_noop,
+ [OP_DELEGRETURN] = nfsd4_encode_noop,
+ [OP_GETATTR] = nfsd4_encode_getattr,
+ [OP_GETFH] = nfsd4_encode_getfh,
+ [OP_LINK] = nfsd4_encode_link,
+ [OP_LOCK] = nfsd4_encode_lock,
+ [OP_LOCKT] = nfsd4_encode_lockt,
+ [OP_LOCKU] = nfsd4_encode_locku,
+ [OP_LOOKUP] = nfsd4_encode_noop,
+ [OP_LOOKUPP] = nfsd4_encode_noop,
+ [OP_NVERIFY] = nfsd4_encode_noop,
+ [OP_OPEN] = nfsd4_encode_open,
+ [OP_OPENATTR] = nfsd4_encode_noop,
+ [OP_OPEN_CONFIRM] = nfsd4_encode_open_confirm,
+ [OP_OPEN_DOWNGRADE] = nfsd4_encode_open_downgrade,
+ [OP_PUTFH] = nfsd4_encode_noop,
+ [OP_PUTPUBFH] = nfsd4_encode_noop,
+ [OP_PUTROOTFH] = nfsd4_encode_noop,
+ [OP_READ] = nfsd4_encode_read,
+ [OP_READDIR] = nfsd4_encode_readdir,
+ [OP_READLINK] = nfsd4_encode_readlink,
+ [OP_REMOVE] = nfsd4_encode_remove,
+ [OP_RENAME] = nfsd4_encode_rename,
+ [OP_RENEW] = nfsd4_encode_noop,
+ [OP_RESTOREFH] = nfsd4_encode_noop,
+ [OP_SAVEFH] = nfsd4_encode_noop,
+ [OP_SECINFO] = nfsd4_encode_secinfo,
+ [OP_SETATTR] = nfsd4_encode_setattr,
+ [OP_SETCLIENTID] = nfsd4_encode_setclientid,
+ [OP_SETCLIENTID_CONFIRM] = nfsd4_encode_noop,
+ [OP_VERIFY] = nfsd4_encode_noop,
+ [OP_WRITE] = nfsd4_encode_write,
+ [OP_RELEASE_LOCKOWNER] = nfsd4_encode_noop,
/* NFSv4.1 operations */
- [OP_BACKCHANNEL_CTL] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
- [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id,
- [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session,
- [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_BACKCHANNEL_CTL] = nfsd4_encode_noop,
+ [OP_BIND_CONN_TO_SESSION] = nfsd4_encode_bind_conn_to_session,
+ [OP_EXCHANGE_ID] = nfsd4_encode_exchange_id,
+ [OP_CREATE_SESSION] = nfsd4_encode_create_session,
+ [OP_DESTROY_SESSION] = nfsd4_encode_noop,
+ [OP_FREE_STATEID] = nfsd4_encode_noop,
+ [OP_GET_DIR_DELEGATION] = nfsd4_encode_get_dir_delegation,
#ifdef CONFIG_NFSD_PNFS
- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
+ [OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo,
+ [OP_GETDEVICELIST] = nfsd4_encode_noop,
+ [OP_LAYOUTCOMMIT] = nfsd4_encode_layoutcommit,
+ [OP_LAYOUTGET] = nfsd4_encode_layoutget,
+ [OP_LAYOUTRETURN] = nfsd4_encode_layoutreturn,
#else
- [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_GETDEVICEINFO] = nfsd4_encode_noop,
+ [OP_GETDEVICELIST] = nfsd4_encode_noop,
+ [OP_LAYOUTCOMMIT] = nfsd4_encode_noop,
+ [OP_LAYOUTGET] = nfsd4_encode_noop,
+ [OP_LAYOUTRETURN] = nfsd4_encode_noop,
#endif
- [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
- [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
- [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid,
- [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_SECINFO_NO_NAME] = nfsd4_encode_secinfo_no_name,
+ [OP_SEQUENCE] = nfsd4_encode_sequence,
+ [OP_SET_SSV] = nfsd4_encode_noop,
+ [OP_TEST_STATEID] = nfsd4_encode_test_stateid,
+ [OP_WANT_DELEGATION] = nfsd4_encode_noop,
+ [OP_DESTROY_CLIENTID] = nfsd4_encode_noop,
+ [OP_RECLAIM_COMPLETE] = nfsd4_encode_noop,
/* NFSv4.2 operations */
- [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
- [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
- [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_ALLOCATE] = nfsd4_encode_noop,
+ [OP_COPY] = nfsd4_encode_copy,
+ [OP_COPY_NOTIFY] = nfsd4_encode_copy_notify,
+ [OP_DEALLOCATE] = nfsd4_encode_noop,
+ [OP_IO_ADVISE] = nfsd4_encode_noop,
+ [OP_LAYOUTERROR] = nfsd4_encode_noop,
+ [OP_LAYOUTSTATS] = nfsd4_encode_noop,
+ [OP_OFFLOAD_CANCEL] = nfsd4_encode_noop,
+ [OP_OFFLOAD_STATUS] = nfsd4_encode_offload_status,
+ [OP_READ_PLUS] = nfsd4_encode_read_plus,
+ [OP_SEEK] = nfsd4_encode_seek,
+ [OP_WRITE_SAME] = nfsd4_encode_noop,
+ [OP_CLONE] = nfsd4_encode_noop,
+
+ /* RFC 8276 extended atributes operations */
+ [OP_GETXATTR] = nfsd4_encode_getxattr,
+ [OP_SETXATTR] = nfsd4_encode_setxattr,
+ [OP_LISTXATTRS] = nfsd4_encode_listxattrs,
+ [OP_REMOVEXATTR] = nfsd4_encode_removexattr,
};
/*
@@ -4431,36 +5864,50 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize)
return nfserr_rep_too_big;
}
+static __be32 nfsd4_map_status(__be32 status, u32 minor)
+{
+ switch (status) {
+ case nfs_ok:
+ break;
+ case nfserr_wrong_type:
+ /* RFC 8881 - 15.1.2.9 */
+ if (minor == 0)
+ status = nfserr_inval;
+ break;
+ case nfserr_symlink_not_dir:
+ status = nfserr_symlink;
+ break;
+ }
+ return status;
+}
+
void
nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
{
- struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_stream *xdr = resp->xdr;
struct nfs4_stateowner *so = resp->cstate.replay_owner;
struct svc_rqst *rqstp = resp->rqstp;
const struct nfsd4_operation *opdesc = op->opdesc;
- int post_err_offset;
+ unsigned int op_status_offset;
nfsd4_enc encoder;
- __be32 *p;
- p = xdr_reserve_space(xdr, 8);
- if (!p) {
- WARN_ON_ONCE(1);
- return;
- }
- *p++ = cpu_to_be32(op->opnum);
- post_err_offset = xdr->buf->len;
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+ goto release;
+ op_status_offset = xdr->buf->len;
+ if (!xdr_reserve_space(xdr, XDR_UNIT))
+ goto release;
if (op->opnum == OP_ILLEGAL)
goto status;
if (op->status && opdesc &&
!(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE))
goto status;
- BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
+ BUG_ON(op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
!nfsd4_enc_ops[op->opnum]);
encoder = nfsd4_enc_ops[op->opnum];
op->status = encoder(resp, op->status, &op->u);
- if (opdesc && opdesc->op_release)
- opdesc->op_release(&op->u);
+ if (op->status)
+ trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
xdr_commit_encode(xdr);
/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -4489,50 +5936,49 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
* bug if we had to do this on a non-idempotent op:
*/
warn_on_nonidempotent_op(op);
- xdr_truncate_encode(xdr, post_err_offset);
- }
- if (so) {
- int len = xdr->buf->len - post_err_offset;
+ xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT);
+ } else if (so) {
+ int len = xdr->buf->len - (op_status_offset + XDR_UNIT);
so->so_replay.rp_status = op->status;
so->so_replay.rp_buflen = len;
- read_bytes_from_xdr_buf(xdr->buf, post_err_offset,
+ read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT,
so->so_replay.rp_buf, len);
}
status:
- /* Note that op->status is already in network byte order: */
- write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4);
+ op->status = nfsd4_map_status(op->status,
+ resp->cstate.minorversion);
+ write_bytes_to_xdr_buf(xdr->buf, op_status_offset,
+ &op->status, XDR_UNIT);
+release:
+ if (opdesc && opdesc->op_release)
+ opdesc->op_release(&op->u);
+
+ /*
+ * Account for pages consumed while encoding this operation.
+ * The xdr_stream primitives don't manage rq_next_page.
+ */
+ rqstp->rq_next_page = xdr->page_ptr + 1;
}
-/*
- * Encode the reply stored in the stateowner reply cache
- *
- * XDR note: do not encode rp->rp_buflen: the buffer contains the
- * previously sent already encoded operation.
+/**
+ * nfsd4_encode_replay - encode a result stored in the stateowner reply cache
+ * @xdr: send buffer's XDR stream
+ * @op: operation being replayed
+ *
+ * @op->replay->rp_buf contains the previously-sent already-encoded result.
*/
-void
-nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
+void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
{
- __be32 *p;
struct nfs4_replay *rp = op->replay;
- BUG_ON(!rp);
+ trace_nfsd_stateowner_replay(op->opnum, rp);
- p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
- if (!p) {
- WARN_ON_ONCE(1);
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
return;
- }
- *p++ = cpu_to_be32(op->opnum);
- *p++ = rp->rp_status; /* already xdr'ed */
-
- p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
-}
-
-int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
+ if (xdr_stream_encode_be32(xdr, rp->rp_status) != XDR_UNIT)
+ return;
+ xdr_stream_encode_opaque_fixed(xdr, rp->rp_buf, rp->rp_buflen);
}
void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
@@ -4540,11 +5986,9 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
struct nfsd4_compoundargs *args = rqstp->rq_argp;
if (args->ops != args->iops) {
- kfree(args->ops);
+ vfree(args->ops);
args->ops = args->iops;
}
- kfree(args->tmpp);
- args->tmpp = NULL;
while (args->to_free) {
struct svcxdr_tmpbuf *tb = args->to_free;
args->to_free = tb->next;
@@ -4552,56 +5996,39 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
}
}
-int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd4_compoundargs *args = rqstp->rq_argp;
- if (rqstp->rq_arg.head[0].iov_len % 4) {
- /* client is nuts */
- dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
- __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
- return 0;
- }
- args->p = p;
- args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
- args->pagelist = rqstp->rq_arg.pages;
- args->pagelen = rqstp->rq_arg.page_len;
- args->tail = false;
- args->tmpp = NULL;
+ /* svcxdr_tmp_alloc */
args->to_free = NULL;
+
+ args->xdr = xdr;
args->ops = args->iops;
args->rqstp = rqstp;
- return !nfsd4_decode_compound(args);
+ return nfsd4_decode_compound(args);
}
-int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
- /*
- * All that remains is to write the tag and operation count...
- */
struct nfsd4_compoundres *resp = rqstp->rq_resp;
- struct xdr_buf *buf = resp->xdr.buf;
-
- WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
- buf->tail[0].iov_len);
+ __be32 *p;
- rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+ /*
+ * Send buffer space for the following items is reserved
+ * at the top of nfsd4_proc_compound().
+ */
+ p = resp->statusp;
- p = resp->tagp;
+ *p++ = resp->cstate.status;
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
p += XDR_QUADLEN(resp->taglen);
*p++ = htonl(resp->opcnt);
nfsd4_sequence_done(resp);
- return 1;
+ return true;
}
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/fs/nfsd/nfs4xdr_gen.c b/fs/nfsd/nfs4xdr_gen.c
new file mode 100644
index 000000000000..a17b5d8e60b3
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x
+// XDR specification modification time: Mon Oct 14 09:10:13 2024
+
+#include <linux/sunrpc/svc.h>
+
+#include "nfs4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_int64_t(struct xdr_stream *xdr, int64_t *ptr)
+{
+ return xdrgen_decode_hyper(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_uint32_t(struct xdr_stream *xdr, uint32_t *ptr)
+{
+ return xdrgen_decode_unsigned_int(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_bitmap4(struct xdr_stream *xdr, bitmap4 *ptr)
+{
+ if (xdr_stream_decode_u32(xdr, &ptr->count) < 0)
+ return false;
+ for (u32 i = 0; i < ptr->count; i++)
+ if (!xdrgen_decode_uint32_t(xdr, &ptr->element[i]))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_nfstime4(struct xdr_stream *xdr, struct nfstime4 *ptr)
+{
+ if (!xdrgen_decode_int64_t(xdr, &ptr->seconds))
+ return false;
+ if (!xdrgen_decode_uint32_t(xdr, &ptr->nseconds))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_fattr4_offline(struct xdr_stream *xdr, fattr4_offline *ptr)
+{
+ return xdrgen_decode_bool(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_arguments4(struct xdr_stream *xdr, struct open_arguments4 *ptr)
+{
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_deny))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_share_access_want))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_open_claim))
+ return false;
+ if (!xdrgen_decode_bitmap4(xdr, &ptr->oa_create_mode))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+bool
+xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr)
+{
+ return xdrgen_decode_open_arguments4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr)
+{
+ return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+bool
+xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr)
+{
+ return xdrgen_decode_nfstime4(xdr, ptr);
+};
+
+static bool __maybe_unused
+xdrgen_decode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_int64_t(struct xdr_stream *xdr, const int64_t value)
+{
+ return xdrgen_encode_hyper(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_uint32_t(struct xdr_stream *xdr, const uint32_t value)
+{
+ return xdrgen_encode_unsigned_int(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_bitmap4(struct xdr_stream *xdr, const bitmap4 value)
+{
+ if (xdr_stream_encode_u32(xdr, value.count) != XDR_UNIT)
+ return false;
+ for (u32 i = 0; i < value.count; i++)
+ if (!xdrgen_encode_uint32_t(xdr, value.element[i]))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_nfstime4(struct xdr_stream *xdr, const struct nfstime4 *value)
+{
+ if (!xdrgen_encode_int64_t(xdr, value->seconds))
+ return false;
+ if (!xdrgen_encode_uint32_t(xdr, value->nseconds))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_fattr4_offline(struct xdr_stream *xdr, const fattr4_offline value)
+{
+ return xdrgen_encode_bool(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_arguments4(struct xdr_stream *xdr, const struct open_arguments4 *value)
+{
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_deny))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_share_access_want))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_open_claim))
+ return false;
+ if (!xdrgen_encode_bitmap4(xdr, value->oa_create_mode))
+ return false;
+ return true;
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access4(struct xdr_stream *xdr, open_args_share_access4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_deny4(struct xdr_stream *xdr, open_args_share_deny4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_share_access_want4(struct xdr_stream *xdr, open_args_share_access_want4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_open_claim4(struct xdr_stream *xdr, open_args_open_claim4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_open_args_createmode4(struct xdr_stream *xdr, open_args_createmode4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+bool
+xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value)
+{
+ return xdrgen_encode_open_arguments4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value)
+{
+ return xdrgen_encode_nfstime4(xdr, value);
+};
+
+bool
+xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value)
+{
+ return xdrgen_encode_nfstime4(xdr, value);
+};
+
+static bool __maybe_unused
+xdrgen_encode_open_delegation_type4(struct xdr_stream *xdr, open_delegation_type4 value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
diff --git a/fs/nfsd/nfs4xdr_gen.h b/fs/nfsd/nfs4xdr_gen.h
new file mode 100644
index 000000000000..41a0033b7256
--- /dev/null
+++ b/fs/nfsd/nfs4xdr_gen.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */
+/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */
+
+#ifndef _LINUX_XDRGEN_NFS4_1_DECL_H
+#define _LINUX_XDRGEN_NFS4_1_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nfs4_1.h>
+
+bool xdrgen_decode_fattr4_open_arguments(struct xdr_stream *xdr, fattr4_open_arguments *ptr);
+bool xdrgen_encode_fattr4_open_arguments(struct xdr_stream *xdr, const fattr4_open_arguments *value);
+
+bool xdrgen_decode_fattr4_time_deleg_access(struct xdr_stream *xdr, fattr4_time_deleg_access *ptr);
+bool xdrgen_encode_fattr4_time_deleg_access(struct xdr_stream *xdr, const fattr4_time_deleg_access *value);
+
+bool xdrgen_decode_fattr4_time_deleg_modify(struct xdr_stream *xdr, fattr4_time_deleg_modify *ptr);
+bool xdrgen_encode_fattr4_time_deleg_modify(struct xdr_stream *xdr, const fattr4_time_deleg_modify *value);
+
+#endif /* _LINUX_XDRGEN_NFS4_1_DECL_H */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index da52b594362a..ab13ee9c7fd8 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -9,6 +9,7 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/sunrpc/addr.h>
@@ -19,15 +20,14 @@
#include "nfsd.h"
#include "cache.h"
-
-#define NFSDDBG_FACILITY NFSDDBG_REPCACHE
+#include "trace.h"
/*
* We use this value to determine the number of hash buckets from the max
* cache size, the idea being that when the cache is at its maximum number
* of entries, then this should be the average number of entries per bucket.
*/
-#define TARGET_BUCKET_SIZE 64
+#define TARGET_BUCKET_SIZE 8
struct nfsd_drc_bucket {
struct rb_root rb_head;
@@ -35,48 +35,14 @@ struct nfsd_drc_bucket {
spinlock_t cache_lock;
};
-static struct nfsd_drc_bucket *drc_hashtbl;
static struct kmem_cache *drc_slab;
-/* max number of entries allowed in the cache */
-static unsigned int max_drc_entries;
-
-/* number of significant bits in the hash value */
-static unsigned int maskbits;
-static unsigned int drc_hashsize;
-
-/*
- * Stats and other tracking of on the duplicate reply cache. All of these and
- * the "rc" fields in nfsdstats are protected by the cache_lock
- */
-
-/* total number of entries */
-static atomic_t num_drc_entries;
-
-/* cache misses due only to checksum comparison failures */
-static unsigned int payload_misses;
-
-/* amount of memory (in bytes) currently consumed by the DRC */
-static unsigned int drc_mem_usage;
-
-/* longest hash chain seen */
-static unsigned int longest_chain;
-
-/* size of cache when we saw the longest hash chain */
-static unsigned int longest_chain_cachesize;
-
static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
-static struct shrinker nfsd_reply_cache_shrinker = {
- .scan_objects = nfsd_reply_cache_scan,
- .count_objects = nfsd_reply_cache_count,
- .seeks = 1,
-};
-
/*
* Put a cap on the size of the DRC based on the amount of available
* low memory in the machine.
@@ -94,6 +60,9 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* ...with a hard cap of 256k entries. In the worst case, each entry will be
* ~1k, so the above numbers should give a rough max of the amount of memory
* used in k.
+ *
+ * XXX: these limits are per-container, so memory used will increase
+ * linearly with number of containers. Maybe that's OK.
*/
static unsigned int
nfsd_cache_size_limit(void)
@@ -115,16 +84,11 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
}
-static u32
-nfsd_cache_hash(__be32 xid)
-{
- return hash_32(be32_to_cpu(xid), maskbits);
-}
-
-static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
+static struct nfsd_cacherep *
+nfsd_cacherep_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
if (rp) {
@@ -146,187 +110,276 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum)
return rp;
}
-static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+static void nfsd_cacherep_free(struct nfsd_cacherep *rp)
{
- if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- drc_mem_usage -= rp->c_replvec.iov_len;
+ if (rp->c_type == RC_REPLBUFF)
kfree(rp->c_replvec.iov_base);
+ kmem_cache_free(drc_slab, rp);
+}
+
+static unsigned long
+nfsd_cacherep_dispose(struct list_head *dispose)
+{
+ struct nfsd_cacherep *rp;
+ unsigned long freed = 0;
+
+ while (!list_empty(dispose)) {
+ rp = list_first_entry(dispose, struct nfsd_cacherep, c_lru);
+ list_del(&rp->c_lru);
+ nfsd_cacherep_free(rp);
+ freed++;
}
+ return freed;
+}
+
+static void
+nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ struct nfsd_cacherep *rp)
+{
+ if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base)
+ nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
- atomic_dec(&num_drc_entries);
- drc_mem_usage -= sizeof(*rp);
+ atomic_dec(&nn->num_drc_entries);
+ nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
}
- kmem_cache_free(drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
+ struct nfsd_net *nn)
+{
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ nfsd_cacherep_free(rp);
+}
+
+static void
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
+ struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp);
+ nfsd_cacherep_unlink_locked(nn, b, rp);
spin_unlock(&b->cache_lock);
+ nfsd_cacherep_free(rp);
+}
+
+int nfsd_drc_slab_create(void)
+{
+ drc_slab = KMEM_CACHE(nfsd_cacherep, 0);
+ return drc_slab ? 0: -ENOMEM;
+}
+
+void nfsd_drc_slab_free(void)
+{
+ kmem_cache_destroy(drc_slab);
}
-int nfsd_reply_cache_init(void)
+int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
unsigned int i;
- int status = 0;
-
- max_drc_entries = nfsd_cache_size_limit();
- atomic_set(&num_drc_entries, 0);
- hashsize = nfsd_hashsize(max_drc_entries);
- maskbits = ilog2(hashsize);
-
- status = register_shrinker(&nfsd_reply_cache_shrinker);
- if (status)
- return status;
-
- drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
- 0, 0, NULL);
- if (!drc_slab)
- goto out_nomem;
-
- drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
- if (!drc_hashtbl) {
- drc_hashtbl = vzalloc(array_size(hashsize,
- sizeof(*drc_hashtbl)));
- if (!drc_hashtbl)
- goto out_nomem;
- }
+
+ nn->max_drc_entries = nfsd_cache_size_limit();
+ atomic_set(&nn->num_drc_entries, 0);
+ hashsize = nfsd_hashsize(nn->max_drc_entries);
+ nn->maskbits = ilog2(hashsize);
+
+ nn->drc_hashtbl = kvzalloc(array_size(hashsize,
+ sizeof(*nn->drc_hashtbl)), GFP_KERNEL);
+ if (!nn->drc_hashtbl)
+ return -ENOMEM;
+
+ nn->nfsd_reply_cache_shrinker = shrinker_alloc(0, "nfsd-reply:%s",
+ nn->nfsd_name);
+ if (!nn->nfsd_reply_cache_shrinker)
+ goto out_shrinker;
+
+ nn->nfsd_reply_cache_shrinker->scan_objects = nfsd_reply_cache_scan;
+ nn->nfsd_reply_cache_shrinker->count_objects = nfsd_reply_cache_count;
+ nn->nfsd_reply_cache_shrinker->seeks = 1;
+ nn->nfsd_reply_cache_shrinker->private_data = nn;
+
+ shrinker_register(nn->nfsd_reply_cache_shrinker);
for (i = 0; i < hashsize; i++) {
- INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
- spin_lock_init(&drc_hashtbl[i].cache_lock);
+ INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head);
+ spin_lock_init(&nn->drc_hashtbl[i].cache_lock);
}
- drc_hashsize = hashsize;
+ nn->drc_hashsize = hashsize;
return 0;
-out_nomem:
+out_shrinker:
+ kvfree(nn->drc_hashtbl);
printk(KERN_ERR "nfsd: failed to allocate reply cache\n");
- nfsd_reply_cache_shutdown();
return -ENOMEM;
}
-void nfsd_reply_cache_shutdown(void)
+void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
unsigned int i;
- unregister_shrinker(&nfsd_reply_cache_shrinker);
+ shrinker_free(nn->nfsd_reply_cache_shrinker);
- for (i = 0; i < drc_hashsize; i++) {
- struct list_head *head = &drc_hashtbl[i].lru_head;
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
- rp = list_first_entry(head, struct svc_cacherep, c_lru);
- nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp);
+ rp = list_first_entry(head, struct nfsd_cacherep, c_lru);
+ nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
+ rp, nn);
}
}
- kvfree(drc_hashtbl);
- drc_hashtbl = NULL;
- drc_hashsize = 0;
+ kvfree(nn->drc_hashtbl);
+ nn->drc_hashtbl = NULL;
+ nn->drc_hashsize = 0;
- kmem_cache_destroy(drc_slab);
- drc_slab = NULL;
}
-/*
- * Move cache entry to end of LRU list, and queue the cleaner to run if it's
- * not already scheduled.
- */
static void
-lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+lru_put_end(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &b->lru_head);
}
-static long
-prune_bucket(struct nfsd_drc_bucket *b)
+static noinline struct nfsd_drc_bucket *
+nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
{
- struct svc_cacherep *rp, *tmp;
- long freed = 0;
+ unsigned int hash = hash_32((__force u32)xid, nn->maskbits);
- list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
- /*
- * Don't free entries attached to calls that are still
- * in-progress, but do keep scanning the list.
- */
- if (rp->c_state == RC_INPROG)
- continue;
- if (atomic_read(&num_drc_entries) <= max_drc_entries &&
- time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
- break;
- nfsd_reply_cache_free_locked(b, rp);
- freed++;
- }
- return freed;
+ return &nn->drc_hashtbl[hash];
}
/*
- * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
- * Also prune the oldest ones when the total exceeds the max number of entries.
+ * Remove and return no more than @max expired entries in bucket @b.
+ * If @max is zero, do not limit the number of removed entries.
*/
-static long
-prune_cache_entries(void)
+static void
+nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ unsigned int max, struct list_head *dispose)
{
- unsigned int i;
- long freed = 0;
+ unsigned long expiry = jiffies - RC_EXPIRE;
+ struct nfsd_cacherep *rp, *tmp;
+ unsigned int freed = 0;
- for (i = 0; i < drc_hashsize; i++) {
- struct nfsd_drc_bucket *b = &drc_hashtbl[i];
+ lockdep_assert_held(&b->cache_lock);
- if (list_empty(&b->lru_head))
- continue;
- spin_lock(&b->cache_lock);
- freed += prune_bucket(b);
- spin_unlock(&b->cache_lock);
+ /* The bucket LRU is ordered oldest-first. */
+ list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
+ if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
+ time_before(expiry, rp->c_timestamp))
+ break;
+
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ list_add(&rp->c_lru, dispose);
+
+ if (max && ++freed > max)
+ break;
}
- return freed;
}
+/**
+ * nfsd_reply_cache_count - count_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Returns the total number of entries in the duplicate reply cache. To
+ * keep things simple and quick, this is not the number of expired entries
+ * in the cache (ie, the number that would be removed by a call to
+ * nfsd_reply_cache_scan).
+ */
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return atomic_read(&num_drc_entries);
+ struct nfsd_net *nn = shrink->private_data;
+
+ return atomic_read(&nn->num_drc_entries);
}
+/**
+ * nfsd_reply_cache_scan - scan_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Free expired entries on each bucket's LRU list until we've released
+ * nr_to_scan freed objects. Nothing will be released if the cache
+ * has not exceeded it's max_drc_entries limit.
+ *
+ * Returns the number of entries released by this call.
+ */
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_cache_entries();
+ struct nfsd_net *nn = shrink->private_data;
+ unsigned long freed = 0;
+ LIST_HEAD(dispose);
+ unsigned int i;
+
+ for (i = 0; i < nn->drc_hashsize; i++) {
+ struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
+
+ if (list_empty(&b->lru_head))
+ continue;
+
+ spin_lock(&b->cache_lock);
+ nfsd_prune_bucket_locked(nn, b, 0, &dispose);
+ spin_unlock(&b->cache_lock);
+
+ freed += nfsd_cacherep_dispose(&dispose);
+ if (freed > sc->nr_to_scan)
+ break;
+ }
+ return freed;
}
-/*
- * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
+
+/**
+ * nfsd_cache_csum - Checksum incoming NFS Call arguments
+ * @buf: buffer containing a whole RPC Call message
+ * @start: starting byte of the NFS Call header
+ * @remaining: size of the NFS Call header, in bytes
+ *
+ * Compute a weak checksum of the leading bytes of an NFS procedure
+ * call header to help verify that a retransmitted Call matches an
+ * entry in the duplicate reply cache.
+ *
+ * To avoid assumptions about how the RPC message is laid out in
+ * @buf and what else it might contain (eg, a GSS MIC suffix), the
+ * caller passes us the exact location and length of the NFS Call
+ * header.
+ *
+ * Returns a 32-bit checksum value, as defined in RFC 793.
*/
-static __wsum
-nfsd_cache_csum(struct svc_rqst *rqstp)
+static __wsum nfsd_cache_csum(struct xdr_buf *buf, unsigned int start,
+ unsigned int remaining)
{
+ unsigned int base, len;
+ struct xdr_buf subbuf;
+ __wsum csum = 0;
+ void *p;
int idx;
- unsigned int base;
- __wsum csum;
- struct xdr_buf *buf = &rqstp->rq_arg;
- const unsigned char *p = buf->head[0].iov_base;
- size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len,
- RC_CSUMLEN);
- size_t len = min(buf->head[0].iov_len, csum_len);
+
+ if (remaining > RC_CSUMLEN)
+ remaining = RC_CSUMLEN;
+ if (xdr_buf_subsegment(buf, &subbuf, start, remaining))
+ return csum;
/* rq_arg.head first */
- csum = csum_partial(p, len, 0);
- csum_len -= len;
+ if (subbuf.head[0].iov_len) {
+ len = min_t(unsigned int, subbuf.head[0].iov_len, remaining);
+ csum = csum_partial(subbuf.head[0].iov_base, len, csum);
+ remaining -= len;
+ }
/* Continue into page array */
- idx = buf->page_base / PAGE_SIZE;
- base = buf->page_base & ~PAGE_MASK;
- while (csum_len) {
- p = page_address(buf->pages[idx]) + base;
- len = min_t(size_t, PAGE_SIZE - base, csum_len);
+ idx = subbuf.page_base / PAGE_SIZE;
+ base = subbuf.page_base & ~PAGE_MASK;
+ while (remaining) {
+ p = page_address(subbuf.pages[idx]) + base;
+ len = min_t(unsigned int, PAGE_SIZE - base, remaining);
csum = csum_partial(p, len, csum);
- csum_len -= len;
+ remaining -= len;
base = 0;
++idx;
}
@@ -334,11 +387,14 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp)
+nfsd_cache_key_cmp(const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
- key->c_key.k_csum != rp->c_key.k_csum)
- ++payload_misses;
+ key->c_key.k_csum != rp->c_key.k_csum) {
+ nfsd_stats_payload_misses_inc(nn);
+ trace_nfsd_drc_mismatch(nn, key, rp);
+ }
return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
@@ -348,10 +404,11 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp
* Must be called with cache_lock held. Returns the found entry or
* inserts an empty key on failure.
*/
-static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
+static struct nfsd_cacherep *
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key,
+ struct nfsd_net *nn)
{
- struct svc_cacherep *rp, *ret = key;
+ struct nfsd_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
*parent = NULL;
unsigned int entries = 0;
@@ -360,9 +417,9 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
while (*p != NULL) {
++entries;
parent = *p;
- rp = rb_entry(parent, struct svc_cacherep, c_node);
+ rp = rb_entry(parent, struct nfsd_cacherep, c_node);
- cmp = nfsd_cache_key_cmp(key, rp);
+ cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
p = &parent->rb_left;
else if (cmp > 0)
@@ -376,117 +433,130 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key)
rb_insert_color(&key->c_node, &b->rb_head);
out:
/* tally hash chain length stats */
- if (entries > longest_chain) {
- longest_chain = entries;
- longest_chain_cachesize = atomic_read(&num_drc_entries);
- } else if (entries == longest_chain) {
+ if (entries > nn->longest_chain) {
+ nn->longest_chain = entries;
+ nn->longest_chain_cachesize = atomic_read(&nn->num_drc_entries);
+ } else if (entries == nn->longest_chain) {
/* prefer to keep the smallest cachesize possible here */
- longest_chain_cachesize = min_t(unsigned int,
- longest_chain_cachesize,
- atomic_read(&num_drc_entries));
+ nn->longest_chain_cachesize = min_t(unsigned int,
+ nn->longest_chain_cachesize,
+ atomic_read(&nn->num_drc_entries));
}
-
- lru_put_end(b, ret);
return ret;
}
-/*
+/**
+ * nfsd_cache_lookup - Find an entry in the duplicate reply cache
+ * @rqstp: Incoming Call to find
+ * @start: starting byte in @rqstp->rq_arg of the NFS Call header
+ * @len: size of the NFS Call header, in bytes
+ * @cacherep: OUT: DRC entry for this request
+ *
* Try to find an entry matching the current call in the cache. When none
* is found, we try to grab the oldest expired entry off the LRU list. If
* a suitable one isn't there, then drop the cache_lock and allocate a
* new one, then search again in case one got inserted while this thread
* didn't hold the lock.
+ *
+ * Return values:
+ * %RC_DOIT: Process the request normally
+ * %RC_REPLY: Reply from cache
+ * %RC_DROPIT: Do not process the request further
*/
-int
-nfsd_cache_lookup(struct svc_rqst *rqstp)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
+ unsigned int len, struct nfsd_cacherep **cacherep)
{
- struct svc_cacherep *rp, *found;
- __be32 xid = rqstp->rq_xid;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_cacherep *rp, *found;
__wsum csum;
- u32 hash = nfsd_cache_hash(xid);
- struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
+ struct nfsd_drc_bucket *b;
int type = rqstp->rq_cachetype;
+ LIST_HEAD(dispose);
int rtn = RC_DOIT;
- rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) {
- nfsdstats.rcnocache++;
- return rtn;
+ nfsd_stats_rc_nocache_inc(nn);
+ goto out;
}
- csum = nfsd_cache_csum(rqstp);
+ csum = nfsd_cache_csum(&rqstp->rq_arg, start, len);
/*
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- rp = nfsd_reply_cache_alloc(rqstp, csum);
- if (!rp) {
- dprintk("nfsd: unable to allocate DRC entry!\n");
- return rtn;
- }
+ rp = nfsd_cacherep_alloc(rqstp, csum, nn);
+ if (!rp)
+ goto out;
+ b = nfsd_cache_bucket_find(rqstp->rq_xid, nn);
spin_lock(&b->cache_lock);
- found = nfsd_cache_insert(b, rp);
- if (found != rp) {
- nfsd_reply_cache_free_locked(NULL, rp);
- rp = found;
+ found = nfsd_cache_insert(b, rp, nn);
+ if (found != rp)
goto found_entry;
- }
-
- nfsdstats.rcmisses++;
- rqstp->rq_cacherep = rp;
+ *cacherep = rp;
rp->c_state = RC_INPROG;
+ nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+ spin_unlock(&b->cache_lock);
- atomic_inc(&num_drc_entries);
- drc_mem_usage += sizeof(*rp);
+ nfsd_cacherep_dispose(&dispose);
- /* go ahead and prune the cache */
- prune_bucket(b);
- out:
- spin_unlock(&b->cache_lock);
- return rtn;
+ nfsd_stats_rc_misses_inc(nn);
+ atomic_inc(&nn->num_drc_entries);
+ nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
+ goto out;
found_entry:
/* We found a matching entry which is either in progress or done. */
- nfsdstats.rchits++;
+ nfsd_reply_cache_free_locked(NULL, rp, nn);
+ nfsd_stats_rc_hits_inc(nn);
rtn = RC_DROPIT;
+ rp = found;
/* Request being processed */
if (rp->c_state == RC_INPROG)
- goto out;
+ goto out_trace;
/* From the hall of fame of impractical attacks:
* Is this a user who tries to snoop on the cache? */
rtn = RC_DOIT;
if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
- goto out;
+ goto out_trace;
/* Compose RPC reply header */
switch (rp->c_type) {
case RC_NOCACHE:
break;
case RC_REPLSTAT:
- svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
+ xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat);
rtn = RC_REPLY;
break;
case RC_REPLBUFF:
if (!nfsd_cache_append(rqstp, &rp->c_replvec))
- goto out; /* should not happen */
+ goto out_unlock; /* should not happen */
rtn = RC_REPLY;
break;
default:
- printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
- nfsd_reply_cache_free_locked(b, rp);
+ WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type);
}
- goto out;
+out_trace:
+ trace_nfsd_drc_found(nn, rqstp, rtn);
+out_unlock:
+ spin_unlock(&b->cache_lock);
+out:
+ return rtn;
}
-/*
- * Update a cache entry. This is called from nfsd_dispatch when
- * the procedure has been executed and the complete reply is in
- * rqstp->rq_res.
+/**
+ * nfsd_cache_update - Update an entry in the duplicate reply cache.
+ * @rqstp: svc_rqst with a finished Reply
+ * @rp: IN: DRC entry for this request
+ * @cachetype: which cache to update
+ * @statp: pointer to Reply's NFS status code, or NULL
+ *
+ * This is called from nfsd_dispatch when the procedure has been
+ * executed and the complete reply is in rqstp->rq_res.
*
* We're copying around data here rather than swapping buffers because
* the toplevel loop requires max-sized buffers, which would be a waste
@@ -499,12 +569,11 @@ found_entry:
* nfsd failed to encode a reply that otherwise would have been cached.
* In this case, nfsd_cache_update is called with statp == NULL.
*/
-void
-nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp)
{
- struct svc_cacherep *rp = rqstp->rq_cacherep;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
- u32 hash;
struct nfsd_drc_bucket *b;
int len;
size_t bufsize = 0;
@@ -512,15 +581,14 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
if (!rp)
return;
- hash = nfsd_cache_hash(rp->c_key.k_xid);
- b = &drc_hashtbl[hash];
+ b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn);
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
@@ -535,18 +603,18 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
- nfsd_reply_cache_free(b, rp);
+ nfsd_reply_cache_free(b, rp, nn);
return;
}
spin_lock(&b->cache_lock);
- drc_mem_usage += bufsize;
+ nfsd_stats_drc_mem_usage_add(nn, bufsize);
lru_put_end(b, rp);
rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
rp->c_type = cachetype;
@@ -555,24 +623,17 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
return;
}
-/*
- * Copy cached reply to current reply buffer. Should always fit.
- * FIXME as reply is in a page, we should just attach the page, and
- * keep a refcount....
- */
static int
nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
{
- struct kvec *vec = &rqstp->rq_res.head[0];
-
- if (vec->iov_len + data->iov_len > PAGE_SIZE) {
- printk(KERN_WARNING "nfsd: cached reply too large (%zd).\n",
- data->iov_len);
- return 0;
- }
- memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
- vec->iov_len += data->iov_len;
- return 1;
+ __be32 *p;
+
+ p = xdr_reserve_space(&rqstp->rq_res_stream, data->iov_len);
+ if (unlikely(!p))
+ return false;
+ memcpy(p, data->iov_base, data->iov_len);
+ xdr_commit_encode(&rqstp->rq_res_stream);
+ return true;
}
/*
@@ -580,23 +641,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
* scraping this file for info should test the labels to ensure they're
* getting the correct field.
*/
-static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "max entries: %u\n", max_drc_entries);
+ struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info,
+ nfsd_net_id);
+
+ seq_printf(m, "max entries: %u\n", nn->max_drc_entries);
seq_printf(m, "num entries: %u\n",
- atomic_read(&num_drc_entries));
- seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
- seq_printf(m, "mem usage: %u\n", drc_mem_usage);
- seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
- seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses);
- seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache);
- seq_printf(m, "payload misses: %u\n", payload_misses);
- seq_printf(m, "longest chain len: %u\n", longest_chain);
- seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
+ atomic_read(&nn->num_drc_entries));
+ seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
+ seq_printf(m, "mem usage: %lld\n",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE]));
+ seq_printf(m, "cache hits: %lld\n",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]));
+ seq_printf(m, "cache misses: %lld\n",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]));
+ seq_printf(m, "not cached: %lld\n",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]));
+ seq_printf(m, "payload misses: %lld\n",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]));
+ seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
+ seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
}
-
-int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nfsd_reply_cache_stats_show, NULL);
-}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b33f9785b756..5ce9a49e76ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Syscall interface to knfsd.
*
@@ -7,14 +8,17 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/ctype.h>
+#include <linux/fs_context.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/gss_api.h>
-#include <linux/sunrpc/gss_krb5_enctypes.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/svc.h>
#include <linux/module.h>
+#include <linux/fsnotify.h>
+#include <linux/nfslocalio.h>
#include "idmap.h"
#include "nfsd.h"
@@ -22,6 +26,9 @@
#include "state.h"
#include "netns.h"
#include "pnfs.h"
+#include "filecache.h"
+#include "trace.h"
+#include "netlink.h"
/*
* We have a single directory with several nodes in it.
@@ -29,6 +36,7 @@
enum {
NFSD_Root = 1,
NFSD_List,
+ NFSD_Export_Stats,
NFSD_Export_features,
NFSD_Fh,
NFSD_FO_UnlockIP,
@@ -40,18 +48,12 @@ enum {
NFSD_Versions,
NFSD_Ports,
NFSD_MaxBlkSize,
- NFSD_MaxConnections,
- NFSD_SupportedEnctypes,
- /*
- * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
- * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
- */
-#ifdef CONFIG_NFSD_V4
+ NFSD_Filecache,
NFSD_Leasetime,
NFSD_Gracetime,
NFSD_RecoveryDir,
NFSD_V4EndGrace,
-#endif
+ NFSD_MaxReserved
};
/*
@@ -65,11 +67,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
+#endif
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
#endif
@@ -82,11 +85,12 @@ static ssize_t (*const write_op[])(struct file *, char *, size_t) = {
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
[NFSD_MaxBlkSize] = write_maxblksize,
- [NFSD_MaxConnections] = write_maxconn,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
[NFSD_RecoveryDir] = write_recoverydir,
+#endif
[NFSD_V4EndGrace] = write_v4_end_grace,
#endif
};
@@ -104,12 +108,12 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
if (IS_ERR(data))
return PTR_ERR(data);
- rv = write_op[ino](file, data, size);
- if (rv >= 0) {
- simple_transaction_set(file, rv);
- rv = size;
- }
- return rv;
+ rv = write_op[ino](file, data, size);
+ if (rv < 0)
+ return rv;
+
+ simple_transaction_set(file, rv);
+ return size;
}
static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
@@ -148,18 +152,6 @@ static int exports_net_open(struct net *net, struct file *file)
return 0;
}
-static int exports_proc_open(struct inode *inode, struct file *file)
-{
- return exports_net_open(current->nsproxy->net_ns, file);
-}
-
-static const struct file_operations exports_proc_operations = {
- .open = exports_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
static int exports_nfsd_open(struct inode *inode, struct file *file)
{
return exports_net_open(inode->i_sb->s_fs_info, file);
@@ -178,51 +170,25 @@ static int export_features_show(struct seq_file *m, void *v)
return 0;
}
-static int export_features_open(struct inode *inode, struct file *file)
-{
- return single_open(file, export_features_show, NULL);
-}
-
-static const struct file_operations export_features_operations = {
- .open = export_features_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(export_features);
-#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
-static int supported_enctypes_show(struct seq_file *m, void *v)
+static int nfsd_pool_stats_open(struct inode *inode, struct file *file)
{
- seq_printf(m, KRB5_SUPPORTED_ENCTYPES);
- return 0;
-}
+ struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
-static int supported_enctypes_open(struct inode *inode, struct file *file)
-{
- return single_open(file, supported_enctypes_show, NULL);
+ return svc_pool_stats_open(&nn->nfsd_info, file);
}
-static const struct file_operations supported_enctypes_ops = {
- .open = supported_enctypes_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
-
static const struct file_operations pool_stats_operations = {
.open = nfsd_pool_stats_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = nfsd_pool_stats_release,
+ .release = seq_release,
};
-static const struct file_operations reply_cache_stats_operations = {
- .open = nfsd_reply_cache_stats_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats);
+
+DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats);
/*----------------------------------------------------------------------------*/
/*
@@ -234,7 +200,7 @@ static inline struct net *netns(struct file *file)
return file_inode(file)->i_sb->s_fs_info;
}
-/**
+/*
* write_unlock_ip - Release all locks used by a client
*
* Experimental.
@@ -270,10 +236,11 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
if (rpc_pton(net, fo_path, size, sap, salen) == 0)
return -EINVAL;
+ trace_nfsd_ctl_unlock_ip(net, buf);
return nlmsvc_unlock_all_by_ip(sap);
}
-/**
+/*
* write_unlock_fs - Release all locks on a local file system
*
* Experimental.
@@ -303,7 +270,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
fo_path = buf;
if (qword_get(&buf, fo_path, size) < 0)
return -EINVAL;
-
+ trace_nfsd_ctl_unlock_fs(netns(file), fo_path);
error = kern_path(fo_path, 0, &path);
if (error)
return error;
@@ -318,12 +285,13 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system?
*/
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+ nfsd4_revoke_states(netns(file), path.dentry->d_sb);
path_put(&path);
return error;
}
-/**
+/*
* write_filehandle - Get a variable-length NFS file handle by path
*
* On input, the buffer contains a '\n'-terminated C string comprised of
@@ -347,7 +315,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
{
char *dname, *path;
- int uninitialized_var(maxsize);
+ int maxsize;
char *mesg = buf;
int len;
struct auth_domain *dom;
@@ -364,7 +332,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
len = qword_get(&mesg, dname, size);
if (len <= 0)
return -EINVAL;
-
+
path = dname+len+1;
len = qword_get(&mesg, path, size);
if (len <= 0)
@@ -378,27 +346,29 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
return -EINVAL;
maxsize = min(maxsize, NFS3_FHSIZE);
- if (qword_get(&mesg, mesg, size)>0)
+ if (qword_get(&mesg, mesg, size) > 0)
return -EINVAL;
+ trace_nfsd_ctl_filehandle(netns(file), dname, path, maxsize);
+
/* we have all the words, they are in buf.. */
dom = unix_domain_find(dname);
if (!dom)
return -ENOMEM;
- len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
+ len = exp_rootfh(netns(file), dom, path, &fh, maxsize);
auth_domain_put(dom);
if (len)
return len;
-
+
mesg = buf;
len = SIMPLE_TRANSACTION_LIMIT;
- qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size);
+ qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size);
mesg[-1] = '\n';
- return mesg - buf;
+ return mesg - buf;
}
-/**
+/*
* write_threads - Start NFSD, or report the current number of running threads
*
* Input:
@@ -439,7 +409,10 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return rv;
if (newthreads < 0)
return -EINVAL;
- rv = nfsd_svc(newthreads, net);
+ trace_nfsd_ctl_threads(net, newthreads);
+ mutex_lock(&nfsd_mutex);
+ rv = nfsd_svc(1, &newthreads, net, file->f_cred, NULL);
+ mutex_unlock(&nfsd_mutex);
if (rv < 0)
return rv;
} else
@@ -448,7 +421,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv);
}
-/**
+/*
* write_pool_threads - Set or report the current number of threads per pool
*
* Input:
@@ -458,8 +431,8 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: C string containing whitespace-
- * separated unsigned integer values
+ * buf: C string containing whitespace-
+ * separated unsigned integer values
* representing the number of NFSD
* threads to start in each pool
* size: non-zero length of C string in @buf
@@ -511,7 +484,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
rv = -EINVAL;
if (nthreads[i] < 0)
goto out_free;
+ trace_nfsd_ctl_pool_threads(net, i, nthreads[i]);
}
+
+ /*
+ * There must always be a thread in pool 0; the admin
+ * can't shut down NFS completely using pool_threads.
+ */
+ if (nthreads[0] == 0)
+ nthreads[0] = 1;
+
rv = nfsd_set_nrthreads(i, nthreads, net);
if (rv)
goto out_free;
@@ -537,14 +519,14 @@ out_free:
}
static ssize_t
-nfsd_print_version_support(char *buf, int remaining, const char *sep,
- unsigned vers, int minor)
+nfsd_print_version_support(struct nfsd_net *nn, char *buf, int remaining,
+ const char *sep, unsigned vers, int minor)
{
const char *format = minor < 0 ? "%s%c%u" : "%s%c%u.%u";
- bool supported = !!nfsd_vers(vers, NFSD_TEST);
+ bool supported = !!nfsd_vers(nn, vers, NFSD_TEST);
if (vers == 4 && minor >= 0 &&
- !nfsd_minorversion(minor, NFSD_TEST))
+ !nfsd_minorversion(nn, minor, NFSD_TEST))
supported = false;
if (minor == 0 && supported)
/*
@@ -566,7 +548,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
char *sep;
struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
- if (size>0) {
+ if (size > 0) {
if (nn->nfsd_serv)
/* Cannot change versions without updating
* nn->nfsd_serv->sv_xdrsize, and reallocing
@@ -576,6 +558,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
if (buf[size-1] != '\n')
return -EINVAL;
buf[size-1] = 0;
+ trace_nfsd_ctl_version(netns(file), buf);
vers = mesg;
len = qword_get(&mesg, vers, size);
@@ -597,48 +580,51 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
cmd = sign == '-' ? NFSD_CLEAR : NFSD_SET;
switch(num) {
+#ifdef CONFIG_NFSD_V2
case 2:
+#endif
case 3:
- nfsd_vers(num, cmd);
+ nfsd_vers(nn, num, cmd);
break;
case 4:
if (*minorp == '.') {
- if (nfsd_minorversion(minor, cmd) < 0)
+ if (nfsd_minorversion(nn, minor, cmd) < 0)
return -EINVAL;
- } else if ((cmd == NFSD_SET) != nfsd_vers(num, NFSD_TEST)) {
+ } else if ((cmd == NFSD_SET) != nfsd_vers(nn, num, NFSD_TEST)) {
/*
* Either we have +4 and no minors are enabled,
* or we have -4 and at least one minor is enabled.
* In either case, propagate 'cmd' to all minors.
*/
minor = 0;
- while (nfsd_minorversion(minor, cmd) >= 0)
+ while (nfsd_minorversion(nn, minor, cmd) >= 0)
minor++;
}
break;
default:
- return -EINVAL;
+ /* Ignore requests to disable non-existent versions */
+ if (cmd == NFSD_SET)
+ return -EINVAL;
}
vers += len + 1;
} while ((len = qword_get(&mesg, vers, size)) > 0);
/* If all get turned off, turn them back on, as
* having no versions is BAD
*/
- nfsd_reset_versions();
+ nfsd_reset_versions(nn);
}
/* Now write current state into reply buffer */
- len = 0;
sep = "";
remaining = SIMPLE_TRANSACTION_LIMIT;
for (num=2 ; num <= 4 ; num++) {
int minor;
- if (!nfsd_vers(num, NFSD_AVAIL))
+ if (!nfsd_vers(nn, num, NFSD_AVAIL))
continue;
minor = -1;
do {
- len = nfsd_print_version_support(buf, remaining,
+ len = nfsd_print_version_support(nn, buf, remaining,
sep, num, minor);
if (len >= remaining)
goto out;
@@ -657,7 +643,7 @@ out:
return tlen + len;
}
-/**
+/*
* write_versions - Set or report the available NFS protocol versions
*
* Input:
@@ -674,11 +660,11 @@ out:
* OR
*
* Input:
- * buf: C string containing whitespace-
- * separated positive or negative
- * integer values representing NFS
- * protocol versions to enable ("+n")
- * or disable ("-n")
+ * buf: C string containing whitespace-
+ * separated positive or negative
+ * integer values representing NFS
+ * protocol versions to enable ("+n")
+ * or disable ("-n")
* size: non-zero length of C string in @buf
* Output:
* On success: status of zero or more protocol versions has
@@ -717,78 +703,76 @@ static ssize_t __write_ports_names(char *buf, struct net *net)
* a socket of a supported family/protocol, and we use it as an
* nfsd listener.
*/
-static ssize_t __write_ports_addfd(char *buf, struct net *net)
+static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred *cred)
{
char *mesg = buf;
int fd, err;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
err = get_int(&mesg, &fd);
if (err != 0 || fd < 0)
return -EINVAL;
-
- if (svc_alien_sock(net, fd)) {
- printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
- return -EINVAL;
- }
+ trace_nfsd_ctl_ports_addfd(net, fd);
err = nfsd_create_serv(net);
if (err != 0)
return err;
- err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
- if (err < 0) {
- nfsd_destroy(net);
- return err;
- }
+ serv = nn->nfsd_serv;
+ err = svc_addsock(serv, net, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred);
+
+ if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks))
+ nfsd_destroy_serv(net);
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
return err;
}
/*
- * A transport listener is added by writing it's transport name and
+ * A transport listener is added by writing its transport name and
* a port number.
*/
-static ssize_t __write_ports_addxprt(char *buf, struct net *net)
+static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cred *cred)
{
char transport[16];
struct svc_xprt *xprt;
int port, err;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
if (sscanf(buf, "%15s %5u", transport, &port) != 2)
return -EINVAL;
if (port < 1 || port > USHRT_MAX)
return -EINVAL;
+ trace_nfsd_ctl_ports_addxprt(net, transport, port);
err = nfsd_create_serv(net);
if (err != 0)
return err;
- err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET, port, SVC_SOCK_ANONYMOUS);
+ serv = nn->nfsd_serv;
+ err = svc_xprt_create(serv, transport, net,
+ PF_INET, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0)
goto out_err;
- err = svc_create_xprt(nn->nfsd_serv, transport, net,
- PF_INET6, port, SVC_SOCK_ANONYMOUS);
+ err = svc_xprt_create(serv, transport, net,
+ PF_INET6, port, SVC_SOCK_ANONYMOUS, cred);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_close;
- /* Decrease the count, but don't shut down the service */
- nn->nfsd_serv->sv_nrthreads--;
return 0;
out_close:
- xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port);
+ xprt = svc_find_xprt(serv, transport, net, PF_INET, port);
if (xprt != NULL) {
- svc_close_xprt(xprt);
+ svc_xprt_close(xprt);
svc_xprt_put(xprt);
}
out_err:
- nfsd_destroy(net);
+ if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks))
+ nfsd_destroy_serv(net);
+
return err;
}
@@ -799,15 +783,15 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size,
return __write_ports_names(buf, net);
if (isdigit(buf[0]))
- return __write_ports_addfd(buf, net);
+ return __write_ports_addfd(buf, net, file->f_cred);
if (isalpha(buf[0]))
- return __write_ports_addxprt(buf, net);
+ return __write_ports_addxprt(buf, net, file->f_cred);
return -EINVAL;
}
-/**
+/*
* write_ports - Pass a socket file descriptor or transport name to listen on
*
* Input:
@@ -863,7 +847,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int nfsd_max_blksize;
-/**
+/*
* write_maxblksize - Set or report the current NFS blksize
*
* Input:
@@ -873,9 +857,9 @@ int nfsd_max_blksize;
* OR
*
* Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * NFS blksize
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * NFS blksize
* size: non-zero length of C string in @buf
* Output:
* On success: passed-in buffer filled with '\n'-terminated C string
@@ -894,6 +878,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
int rv = get_int(&mesg, &bsize);
if (rv)
return rv;
+ trace_nfsd_ctl_maxblksize(netns(file), bsize);
+
/* force bsize into allowed range and
* required alignment.
*/
@@ -913,47 +899,11 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
nfsd_max_blksize);
}
-/**
- * write_maxconn - Set or report the current max number of connections
- *
- * Input:
- * buf: ignored
- * size: zero
- * OR
- *
- * Input:
- * buf: C string containing an unsigned
- * integer value representing the new
- * number of max connections
- * size: non-zero length of C string in @buf
- * Output:
- * On success: passed-in buffer filled with '\n'-terminated C string
- * containing numeric value of max_connections setting
- * for this net namespace;
- * return code is the size in bytes of the string
- * On error: return code is zero or a negative errno value
- */
-static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
-{
- char *mesg = buf;
- struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
- unsigned int maxconn = nn->max_connections;
-
- if (size > 0) {
- int rv = get_uint(&mesg, &maxconn);
-
- if (rv)
- return rv;
- nn->max_connections = maxconn;
- }
-
- return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
-}
-
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
- time_t *time, struct nfsd_net *nn)
+ time64_t *time, struct nfsd_net *nn)
{
+ struct dentry *dentry = file_dentry(file);
char *mesg = buf;
int rv, i;
@@ -963,6 +913,9 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
rv = get_int(&mesg, &i);
if (rv)
return rv;
+ trace_nfsd_ctl_time(netns(file), dentry->d_name.name,
+ dentry->d_name.len, i);
+
/*
* Some sanity checking. We don't have a reason for
* these particular numbers, but problems with the
@@ -980,11 +933,11 @@ static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
*time = i;
}
- return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", *time);
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%lld\n", *time);
}
static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
- time_t *time, struct nfsd_net *nn)
+ time64_t *time, struct nfsd_net *nn)
{
ssize_t rv;
@@ -994,7 +947,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
return rv;
}
-/**
+/*
* write_leasetime - Set or report the current NFSv4 lease time
*
* Input:
@@ -1021,7 +974,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn);
}
-/**
+/*
* write_gracetime - Set or report current NFSv4 grace period time
*
* As above, but sets the time of the NFSv4 grace period.
@@ -1037,6 +990,7 @@ static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn);
}
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
struct nfsd_net *nn)
{
@@ -1055,6 +1009,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
len = qword_get(&mesg, recdir, size);
if (len <= 0)
return -EINVAL;
+ trace_nfsd_ctl_recoverydir(netns(file), recdir);
status = nfs4_reset_recoverydir(recdir);
if (status)
@@ -1065,7 +1020,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
nfs4_recoverydir());
}
-/**
+/*
* write_recoverydir - Set or report the pathname of the recovery directory
*
* Input:
@@ -1096,8 +1051,9 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
mutex_unlock(&nfsd_mutex);
return rv;
}
+#endif
-/**
+/*
* write_v4_end_grace - release grace period for nfsd's v4.x lock manager
*
* Input:
@@ -1106,7 +1062,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
* OR
*
* Input:
- * buf: any value
+ * buf: any value
* size: non-zero length of C string in @buf
* Output:
* passed-in buffer filled with "Y" or "N" with a newline
@@ -1126,8 +1082,9 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
- if (nn->nfsd_serv)
+ if (!nn->nfsd_serv)
return -EBUSY;
+ trace_nfsd_end_grace(netns(file));
nfsd4_end_grace(nn);
break;
default:
@@ -1146,12 +1103,182 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
* populating the filesystem.
*/
-static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
+static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
{
+ struct inode *inode = new_inode(sb);
+ if (inode) {
+ /* Following advice from simple_fill_super documentation: */
+ inode->i_ino = iunique(sb, NFSD_MaxReserved);
+ inode->i_mode = mode;
+ simple_inode_init_ts(inode);
+ }
+ return inode;
+}
+
+static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *ncl, char *name)
+{
+ struct inode *dir = parent->d_inode;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ inode = nfsd_get_inode(parent->d_sb, S_IFDIR | 0600);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return dentry;
+ }
+ inode->i_fop = &simple_dir_operations;
+ inode->i_op = &simple_dir_inode_operations;
+ inc_nlink(inode);
+ if (ncl) {
+ inode->i_private = ncl;
+ kref_get(&ncl->cl_ref);
+ }
+ d_make_persistent(dentry, inode);
+ inc_nlink(dir);
+ fsnotify_mkdir(dir, dentry);
+ simple_done_creating(dentry);
+ return dentry; // borrowed
+}
+
+#if IS_ENABLED(CONFIG_SUNRPC_GSS)
+/*
+ * @content is assumed to be a NUL-terminated string that lives
+ * longer than the symlink itself.
+ */
+static void _nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
+{
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ inode = nfsd_get_inode(dir->i_sb, S_IFLNK | 0777);
+ if (!inode)
+ return;
+
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return;
+ }
+
+ inode->i_op = &simple_symlink_inode_operations;
+ inode->i_link = (char *)content;
+ inode->i_size = strlen(content);
+
+ d_make_persistent(dentry, inode);
+ fsnotify_create(dir, dentry);
+ simple_done_creating(dentry);
+}
+#else
+static inline void _nfsd_symlink(struct dentry *parent, const char *name,
+ const char *content)
+{
+}
+
+#endif
+
+static void clear_ncl(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+ struct nfsdfs_client *ncl = inode->i_private;
+
+ spin_lock(&inode->i_lock);
+ inode->i_private = NULL;
+ spin_unlock(&inode->i_lock);
+ kref_put(&ncl->cl_ref, ncl->cl_release);
+}
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *inode)
+{
+ struct nfsdfs_client *nc;
+
+ spin_lock(&inode->i_lock);
+ nc = inode->i_private;
+ if (nc)
+ kref_get(&nc->cl_ref);
+ spin_unlock(&inode->i_lock);
+ return nc;
+}
+
+/* XXX: cut'n'paste from simple_fill_super; figure out if we could share
+ * code instead. */
+static int nfsdfs_create_files(struct dentry *root,
+ const struct tree_descr *files,
+ struct nfsdfs_client *ncl,
+ struct dentry **fdentries)
+{
+ struct inode *dir = d_inode(root);
+ struct dentry *dentry;
+
+ for (int i = 0; files->name && files->name[0]; i++, files++) {
+ struct inode *inode = nfsd_get_inode(root->d_sb,
+ S_IFREG | files->mode);
+ if (!inode)
+ return -ENOMEM;
+ dentry = simple_start_creating(root, files->name);
+ if (IS_ERR(dentry)) {
+ iput(inode);
+ return PTR_ERR(dentry);
+ }
+ kref_get(&ncl->cl_ref);
+ inode->i_fop = files->ops;
+ inode->i_private = ncl;
+ d_make_persistent(dentry, inode);
+ fsnotify_create(dir, dentry);
+ if (fdentries)
+ fdentries[i] = dentry; // borrowed
+ simple_done_creating(dentry);
+ }
+ return 0;
+}
+
+/* on success, returns positive number unique to that client. */
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *files,
+ struct dentry **fdentries)
+{
+ struct dentry *dentry;
+ char name[11];
+ int ret;
+
+ sprintf(name, "%u", id);
+
+ dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name);
+ if (IS_ERR(dentry)) /* XXX: tossing errors? */
+ return NULL;
+ ret = nfsdfs_create_files(dentry, files, ncl, fdentries);
+ if (ret) {
+ nfsd_client_rmdir(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+
+/* Taken from __rpc_rmdir: */
+void nfsd_client_rmdir(struct dentry *dentry)
+{
+ simple_recursive_removal(dentry, clear_ncl);
+}
+
+static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ struct dentry *dentry;
+ int ret;
+
static const struct tree_descr nfsd_files[] = {
[NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO},
+ /* Per-export io stats use same ops as exports file */
+ [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO},
[NFSD_Export_features] = {"export_features",
- &export_features_operations, S_IRUGO},
+ &export_features_fops, S_IRUGO},
[NFSD_FO_UnlockIP] = {"unlock_ip",
&transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_FO_UnlockFS] = {"unlock_filesystem",
@@ -1160,50 +1287,91 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
- [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
+ [NFSD_Reply_Cache_Stats] = {"reply_cache_stats",
+ &nfsd_reply_cache_stats_fops, S_IRUGO},
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
- [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
-#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
- [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
-#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
+ [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
[NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
#endif
/* last one */ {""}
};
- get_net(sb->s_fs_info);
- return simple_fill_super(sb, 0x6e667364, nfsd_files);
+
+ ret = simple_fill_super(sb, 0x6e667364, nfsd_files);
+ if (ret)
+ return ret;
+ _nfsd_symlink(sb->s_root, "supported_krb5_enctypes",
+ "/proc/net/rpc/gss_krb5_enctypes");
+ dentry = nfsd_mkdir(sb->s_root, NULL, "clients");
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ nn->nfsd_client_dir = dentry;
+ return 0;
}
-static struct dentry *nfsd_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int nfsd_fs_get_tree(struct fs_context *fc)
{
- struct net *net = current->nsproxy->net_ns;
- return mount_ns(fs_type, flags, data, net, net->user_ns, nfsd_fill_super);
+ return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns));
+}
+
+static void nfsd_fs_free_fc(struct fs_context *fc)
+{
+ if (fc->s_fs_info)
+ put_net(fc->s_fs_info);
+}
+
+static const struct fs_context_operations nfsd_fs_context_ops = {
+ .free = nfsd_fs_free_fc,
+ .get_tree = nfsd_fs_get_tree,
+};
+
+static int nfsd_init_fs_context(struct fs_context *fc)
+{
+ put_user_ns(fc->user_ns);
+ fc->user_ns = get_user_ns(fc->net_ns->user_ns);
+ fc->ops = &nfsd_fs_context_ops;
+ return 0;
}
static void nfsd_umount(struct super_block *sb)
{
struct net *net = sb->s_fs_info;
- kill_litter_super(sb);
+ nfsd_shutdown_threads(net);
+
+ kill_anon_super(sb);
put_net(net);
}
static struct file_system_type nfsd_fs_type = {
.owner = THIS_MODULE,
.name = "nfsd",
- .mount = nfsd_mount,
+ .init_fs_context = nfsd_init_fs_context,
.kill_sb = nfsd_umount,
};
MODULE_ALIAS_FS("nfsd");
#ifdef CONFIG_PROC_FS
+
+static int exports_proc_open(struct inode *inode, struct file *file)
+{
+ return exports_net_open(current->nsproxy->net_ns, file);
+}
+
+static const struct proc_ops exports_proc_ops = {
+ .proc_open = exports_proc_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
static int create_proc_exports_entry(void)
{
struct proc_dir_entry *entry;
@@ -1211,8 +1379,7 @@ static int create_proc_exports_entry(void)
entry = proc_mkdir("fs/nfs", NULL);
if (!entry)
return -ENOMEM;
- entry = proc_create("exports", 0, entry,
- &exports_proc_operations);
+ entry = proc_create("exports", 0, entry, &exports_proc_ops);
if (!entry) {
remove_proc_entry("fs/nfs", NULL);
return -ENOMEM;
@@ -1228,10 +1395,755 @@ static int create_proc_exports_entry(void)
unsigned int nfsd_net_id;
-static __net_init int nfsd_init_net(struct net *net)
+static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct nfsd_genl_rqstp *genl_rqstp)
+{
+ void *hdr;
+ u32 i;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &nfsd_nl_family, 0, NFSD_CMD_RPC_STATUS_GET);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put_be32(skb, NFSD_A_RPC_STATUS_XID, genl_rqstp->rq_xid) ||
+ nla_put_u32(skb, NFSD_A_RPC_STATUS_FLAGS, genl_rqstp->rq_flags) ||
+ nla_put_u32(skb, NFSD_A_RPC_STATUS_PROG, genl_rqstp->rq_prog) ||
+ nla_put_u32(skb, NFSD_A_RPC_STATUS_PROC, genl_rqstp->rq_proc) ||
+ nla_put_u8(skb, NFSD_A_RPC_STATUS_VERSION, genl_rqstp->rq_vers) ||
+ nla_put_s64(skb, NFSD_A_RPC_STATUS_SERVICE_TIME,
+ ktime_to_us(genl_rqstp->rq_stime),
+ NFSD_A_RPC_STATUS_PAD))
+ return -ENOBUFS;
+
+ switch (genl_rqstp->rq_saddr.sa_family) {
+ case AF_INET: {
+ const struct sockaddr_in *s_in, *d_in;
+
+ s_in = (const struct sockaddr_in *)&genl_rqstp->rq_saddr;
+ d_in = (const struct sockaddr_in *)&genl_rqstp->rq_daddr;
+ if (nla_put_in_addr(skb, NFSD_A_RPC_STATUS_SADDR4,
+ s_in->sin_addr.s_addr) ||
+ nla_put_in_addr(skb, NFSD_A_RPC_STATUS_DADDR4,
+ d_in->sin_addr.s_addr) ||
+ nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT,
+ s_in->sin_port) ||
+ nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT,
+ d_in->sin_port))
+ return -ENOBUFS;
+ break;
+ }
+ case AF_INET6: {
+ const struct sockaddr_in6 *s_in, *d_in;
+
+ s_in = (const struct sockaddr_in6 *)&genl_rqstp->rq_saddr;
+ d_in = (const struct sockaddr_in6 *)&genl_rqstp->rq_daddr;
+ if (nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_SADDR6,
+ &s_in->sin6_addr) ||
+ nla_put_in6_addr(skb, NFSD_A_RPC_STATUS_DADDR6,
+ &d_in->sin6_addr) ||
+ nla_put_be16(skb, NFSD_A_RPC_STATUS_SPORT,
+ s_in->sin6_port) ||
+ nla_put_be16(skb, NFSD_A_RPC_STATUS_DPORT,
+ d_in->sin6_port))
+ return -ENOBUFS;
+ break;
+ }
+ }
+
+ for (i = 0; i < genl_rqstp->rq_opcnt; i++)
+ if (nla_put_u32(skb, NFSD_A_RPC_STATUS_COMPOUND_OPS,
+ genl_rqstp->rq_opnum[i]))
+ return -ENOBUFS;
+
+ genlmsg_end(skb, hdr);
+ return 0;
+}
+
+/**
+ * nfsd_nl_rpc_status_get_dumpit - Handle rpc_status_get dumpit
+ * @skb: reply buffer
+ * @cb: netlink metadata and command arguments
+ *
+ * Returns the size of the reply or a negative errno.
+ */
+int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ int i, ret, rqstp_index = 0;
+ struct nfsd_net *nn;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(sock_net(skb->sk), nfsd_net_id);
+ if (!nn->nfsd_serv) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ rcu_read_lock();
+
+ for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
+ struct svc_rqst *rqstp;
+
+ if (i < cb->args[0]) /* already consumed */
+ continue;
+
+ rqstp_index = 0;
+ list_for_each_entry_rcu(rqstp,
+ &nn->nfsd_serv->sv_pools[i].sp_all_threads,
+ rq_all) {
+ struct nfsd_genl_rqstp genl_rqstp;
+ unsigned int status_counter;
+
+ if (rqstp_index++ < cb->args[1]) /* already consumed */
+ continue;
+ /*
+ * Acquire rq_status_counter before parsing the rqst
+ * fields. rq_status_counter is set to an odd value in
+ * order to notify the consumers the rqstp fields are
+ * meaningful.
+ */
+ status_counter =
+ smp_load_acquire(&rqstp->rq_status_counter);
+ if (!(status_counter & 1))
+ continue;
+
+ genl_rqstp.rq_xid = rqstp->rq_xid;
+ genl_rqstp.rq_flags = rqstp->rq_flags;
+ genl_rqstp.rq_vers = rqstp->rq_vers;
+ genl_rqstp.rq_prog = rqstp->rq_prog;
+ genl_rqstp.rq_proc = rqstp->rq_proc;
+ genl_rqstp.rq_stime = rqstp->rq_stime;
+ genl_rqstp.rq_opcnt = 0;
+ memcpy(&genl_rqstp.rq_daddr, svc_daddr(rqstp),
+ sizeof(struct sockaddr));
+ memcpy(&genl_rqstp.rq_saddr, svc_addr(rqstp),
+ sizeof(struct sockaddr));
+
+#ifdef CONFIG_NFSD_V4
+ if (rqstp->rq_vers == NFS4_VERSION &&
+ rqstp->rq_proc == NFSPROC4_COMPOUND) {
+ /* NFSv4 compound */
+ struct nfsd4_compoundargs *args;
+ int j;
+
+ args = rqstp->rq_argp;
+ genl_rqstp.rq_opcnt = min_t(u32, args->opcnt,
+ ARRAY_SIZE(genl_rqstp.rq_opnum));
+ for (j = 0; j < genl_rqstp.rq_opcnt; j++)
+ genl_rqstp.rq_opnum[j] =
+ args->ops[j].opnum;
+ }
+#endif /* CONFIG_NFSD_V4 */
+
+ /*
+ * Acquire rq_status_counter before reporting the rqst
+ * fields to the user.
+ */
+ if (smp_load_acquire(&rqstp->rq_status_counter) !=
+ status_counter)
+ continue;
+
+ ret = nfsd_genl_rpc_status_compose_msg(skb, cb,
+ &genl_rqstp);
+ if (ret)
+ goto out;
+ }
+ }
+
+ cb->args[0] = i;
+ cb->args[1] = rqstp_index;
+ ret = skb->len;
+out:
+ rcu_read_unlock();
+out_unlock:
+ mutex_unlock(&nfsd_mutex);
+
+ return ret;
+}
+
+/**
+ * nfsd_nl_threads_set_doit - set the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem;
+ struct net *net = genl_info_net(info);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ const struct nlattr *attr;
+ const char *scope = NULL;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS))
+ return -EINVAL;
+
+ /* count number of SERVER_THREADS values */
+ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_THREADS, info->nlhdr,
+ GENL_HDRLEN, rem)
+ nrpools++;
+
+ mutex_lock(&nfsd_mutex);
+
+ nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL);
+ if (!nthreads) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ i = 0;
+ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_THREADS, info->nlhdr,
+ GENL_HDRLEN, rem) {
+ nthreads[i++] = nla_get_u32(attr);
+ if (i >= nrpools)
+ break;
+ }
+
+ if (info->attrs[NFSD_A_SERVER_GRACETIME] ||
+ info->attrs[NFSD_A_SERVER_LEASETIME] ||
+ info->attrs[NFSD_A_SERVER_SCOPE]) {
+ ret = -EBUSY;
+ if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ attr = info->attrs[NFSD_A_SERVER_GRACETIME];
+ if (attr) {
+ u32 gracetime = nla_get_u32(attr);
+
+ if (gracetime < 10 || gracetime > 3600)
+ goto out_unlock;
+
+ nn->nfsd4_grace = gracetime;
+ }
+
+ attr = info->attrs[NFSD_A_SERVER_LEASETIME];
+ if (attr) {
+ u32 leasetime = nla_get_u32(attr);
+
+ if (leasetime < 10 || leasetime > 3600)
+ goto out_unlock;
+
+ nn->nfsd4_lease = leasetime;
+ }
+
+ attr = info->attrs[NFSD_A_SERVER_SCOPE];
+ if (attr)
+ scope = nla_data(attr);
+ }
+
+ ret = nfsd_svc(nrpools, nthreads, net, get_current_cred(), scope);
+ if (ret > 0)
+ ret = 0;
+out_unlock:
+ mutex_unlock(&nfsd_mutex);
+ kfree(nthreads);
+ return ret;
+}
+
+/**
+ * nfsd_nl_threads_get_doit - get the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ void *hdr;
+ int err;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+
+ err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME,
+ nn->nfsd4_grace) ||
+ nla_put_u32(skb, NFSD_A_SERVER_LEASETIME,
+ nn->nfsd4_lease) ||
+ nla_put_string(skb, NFSD_A_SERVER_SCOPE,
+ nn->nfsd_name);
+ if (err)
+ goto err_unlock;
+
+ if (nn->nfsd_serv) {
+ int i;
+
+ for (i = 0; i < nfsd_nrpools(net); ++i) {
+ struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
+
+ err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
+ sp->sp_nrthreads);
+ if (err)
+ goto err_unlock;
+ }
+ } else {
+ err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0);
+ if (err)
+ goto err_unlock;
+ }
+
+ mutex_unlock(&nfsd_mutex);
+
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_unlock:
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_version_set_doit - set the nfs enabled versions
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct nlattr *attr;
+ struct nfsd_net *nn;
+ int i, rem;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION))
+ return -EINVAL;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+ if (nn->nfsd_serv) {
+ mutex_unlock(&nfsd_mutex);
+ return -EBUSY;
+ }
+
+ /* clear current supported versions. */
+ nfsd_vers(nn, 2, NFSD_CLEAR);
+ nfsd_vers(nn, 3, NFSD_CLEAR);
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++)
+ nfsd_minorversion(nn, i, NFSD_CLEAR);
+
+ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_PROTO_VERSION, info->nlhdr,
+ GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_VERSION_MAX + 1];
+ u32 major, minor = 0;
+ bool enabled;
+
+ if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr,
+ nfsd_version_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_VERSION_MAJOR])
+ continue;
+
+ major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]);
+ if (tb[NFSD_A_VERSION_MINOR])
+ minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]);
+
+ enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]);
+
+ switch (major) {
+ case 4:
+ nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR);
+ break;
+ case 3:
+ case 2:
+ if (!minor)
+ nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR);
+ break;
+ default:
+ break;
+ }
+ }
+
+ mutex_unlock(&nfsd_mutex);
+
+ return 0;
+}
+
+/**
+ * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nfsd_net *nn;
+ int i, err;
+ void *hdr;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+
+ for (i = 2; i <= 4; i++) {
+ int j;
+
+ for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) {
+ struct nlattr *attr;
+
+ /* Don't record any versions the kernel doesn't have
+ * compiled in
+ */
+ if (!nfsd_support_version(i))
+ continue;
+
+ /* NFSv{2,3} does not support minor numbers */
+ if (i < 4 && j)
+ continue;
+
+ attr = nla_nest_start(skb,
+ NFSD_A_SERVER_PROTO_VERSION);
+ if (!attr) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) ||
+ nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ /* Set the enabled flag if the version is enabled */
+ if (nfsd_vers(nn, i, NFSD_TEST) &&
+ (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) &&
+ nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ nla_nest_end(skb, attr);
+ }
+ }
+
+ mutex_unlock(&nfsd_mutex);
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_nfsd_unlock:
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_listener_set_doit - set the nfs running sockets
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct svc_xprt *xprt, *tmp;
+ const struct nlattr *attr;
+ struct svc_serv *serv;
+ LIST_HEAD(permsocks);
+ struct nfsd_net *nn;
+ bool delete = false;
+ int err, rem;
+
+ mutex_lock(&nfsd_mutex);
+
+ err = nfsd_create_serv(net);
+ if (err) {
+ mutex_unlock(&nfsd_mutex);
+ return err;
+ }
+
+ nn = net_generic(net, nfsd_net_id);
+ serv = nn->nfsd_serv;
+
+ spin_lock_bh(&serv->sv_lock);
+
+ /* Move all of the old listener sockets to a temp list */
+ list_splice_init(&serv->sv_permsocks, &permsocks);
+
+ /*
+ * Walk the list of server_socks from userland and move any that match
+ * back to sv_permsocks
+ */
+ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_SOCK_ADDR, info->nlhdr,
+ GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_SOCK_MAX + 1];
+ const char *xcl_name;
+ struct sockaddr *sa;
+
+ if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr,
+ nfsd_sock_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME])
+ continue;
+
+ if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa))
+ continue;
+
+ xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]);
+ sa = nla_data(tb[NFSD_A_SOCK_ADDR]);
+
+ /* Put back any matching sockets */
+ list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) {
+ /* This shouldn't be possible */
+ if (WARN_ON_ONCE(xprt->xpt_net != net)) {
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+ continue;
+ }
+
+ /* If everything matches, put it back */
+ if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) &&
+ rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) {
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+ break;
+ }
+ }
+ }
+
+ /*
+ * If there are listener transports remaining on the permsocks list,
+ * it means we were asked to remove a listener.
+ */
+ if (!list_empty(&permsocks)) {
+ list_splice_init(&permsocks, &serv->sv_permsocks);
+ delete = true;
+ }
+ spin_unlock_bh(&serv->sv_lock);
+
+ /* Do not remove listeners while there are active threads. */
+ if (serv->sv_nrthreads) {
+ err = -EBUSY;
+ goto out_unlock_mtx;
+ }
+
+ /*
+ * Since we can't delete an arbitrary llist entry, destroy the
+ * remaining listeners and recreate the list.
+ */
+ if (delete)
+ svc_xprt_destroy_all(serv, net, false);
+
+ /* walk list of addrs again, open any that still don't exist */
+ nlmsg_for_each_attr_type(attr, NFSD_A_SERVER_SOCK_ADDR, info->nlhdr,
+ GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_SOCK_MAX + 1];
+ const char *xcl_name;
+ struct sockaddr *sa;
+ int ret;
+
+ if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr,
+ nfsd_sock_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME])
+ continue;
+
+ if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa))
+ continue;
+
+ xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]);
+ sa = nla_data(tb[NFSD_A_SOCK_ADDR]);
+
+ xprt = svc_find_listener(serv, xcl_name, net, sa);
+ if (xprt) {
+ if (delete)
+ WARN_ONCE(1, "Transport type=%s already exists\n",
+ xcl_name);
+ svc_xprt_put(xprt);
+ continue;
+ }
+
+ ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0,
+ get_current_cred());
+ /* always save the latest error */
+ if (ret < 0)
+ err = ret;
+ }
+
+ if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks))
+ nfsd_destroy_serv(net);
+
+out_unlock_mtx:
+ mutex_unlock(&nfsd_mutex);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_listener_get_doit - get the nfs running listeners
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct svc_xprt *xprt;
+ struct svc_serv *serv;
+ struct nfsd_net *nn;
+ void *hdr;
+ int err;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+
+ /* no nfs server? Just send empty socket list */
+ if (!nn->nfsd_serv)
+ goto out_unlock_mtx;
+
+ serv = nn->nfsd_serv;
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ struct nlattr *attr;
+
+ attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR);
+ if (!attr) {
+ err = -EINVAL;
+ goto err_serv_unlock;
+ }
+
+ if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME,
+ xprt->xpt_class->xcl_name) ||
+ nla_put(skb, NFSD_A_SOCK_ADDR,
+ sizeof(struct sockaddr_storage),
+ &xprt->xpt_local)) {
+ err = -EINVAL;
+ goto err_serv_unlock;
+ }
+
+ nla_nest_end(skb, attr);
+ }
+ spin_unlock_bh(&serv->sv_lock);
+out_unlock_mtx:
+ mutex_unlock(&nfsd_mutex);
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_serv_unlock:
+ spin_unlock_bh(&serv->sv_lock);
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_pool_mode_set_doit - set the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct nlattr *attr;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_POOL_MODE_MODE))
+ return -EINVAL;
+
+ attr = info->attrs[NFSD_A_POOL_MODE_MODE];
+ return sunrpc_set_pool_mode(nla_data(attr));
+}
+
+/**
+ * nfsd_nl_pool_mode_get_doit - get info about pool_mode
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ char buf[16];
+ void *hdr;
+ int err;
+
+ if (sunrpc_get_pool_mode(buf, ARRAY_SIZE(buf)) >= ARRAY_SIZE(buf))
+ return -ERANGE;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr)
+ goto err_free_msg;
+
+ err = nla_put_string(skb, NFSD_A_POOL_MODE_MODE, buf) |
+ nla_put_u32(skb, NFSD_A_POOL_MODE_NPOOLS, nfsd_nrpools(net));
+ if (err)
+ goto err_free_msg;
+
+ genlmsg_end(skb, hdr);
+ return genlmsg_reply(skb, info);
+
+err_free_msg:
+ nlmsg_free(skb);
+ return err;
+}
+
+/**
+ * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
+ * @net: a freshly-created network namespace
+ *
+ * This information stays around as long as the network namespace is
+ * alive whether or not there is an NFSD instance running in the
+ * namespace.
+ *
+ * Returns zero on success, or a negative errno otherwise.
+ */
+static __net_init int nfsd_net_init(struct net *net)
{
- int retval;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ int retval;
+ int i;
retval = nfsd_export_init(net);
if (retval)
@@ -1239,32 +2151,81 @@ static __net_init int nfsd_init_net(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- nn->nfsd4_lease = 45; /* default lease time */
- nn->nfsd4_grace = 45;
- nn->somebody_reclaimed = false;
- nn->clverifier_counter = prandom_u32();
- nn->clientid_counter = prandom_u32();
- nn->s2s_cp_cl_id = nn->clientid_counter++;
-
- atomic_set(&nn->ntf_refcnt, 0);
- init_waitqueue_head(&nn->ntf_wq);
+ retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL,
+ NFSD_STATS_COUNTERS_NUM);
+ if (retval)
+ goto out_repcache_error;
+
+ memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
+ nn->nfsd_svcstats.program = &nfsd_programs[0];
+ if (!nfsd_proc_stat_init(net)) {
+ retval = -ENOMEM;
+ goto out_proc_error;
+ }
+
+ for (i = 0; i < sizeof(nn->nfsd_versions); i++)
+ nn->nfsd_versions[i] = nfsd_support_version(i);
+ for (i = 0; i < sizeof(nn->nfsd4_minorversions); i++)
+ nn->nfsd4_minorversions[i] = nfsd_support_version(4);
+ nn->nfsd_info.mutex = &nfsd_mutex;
+ nn->nfsd_serv = NULL;
+ nfsd4_init_leases_net(nn);
+ get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
+ seqlock_init(&nn->writeverf_lock);
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ spin_lock_init(&nn->local_clients_lock);
+ INIT_LIST_HEAD(&nn->local_clients);
+#endif
return 0;
+out_proc_error:
+ percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
+out_repcache_error:
+ nfsd_idmap_shutdown(net);
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
return retval;
}
-static __net_exit void nfsd_exit_net(struct net *net)
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+/**
+ * nfsd_net_pre_exit - Disconnect localio clients from net namespace
+ * @net: a network namespace that is about to be destroyed
+ *
+ * This invalidates ->net pointers held by localio clients
+ * while they can still safely access nn->counter.
+ */
+static __net_exit void nfsd_net_pre_exit(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfs_localio_invalidate_clients(&nn->local_clients,
+ &nn->local_clients_lock);
+}
+#endif
+
+/**
+ * nfsd_net_exit - Release the nfsd_net portion of a net namespace
+ * @net: a network namespace that is about to be destroyed
+ *
+ */
+static __net_exit void nfsd_net_exit(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ nfsd_proc_stat_shutdown(net);
+ percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
}
static struct pernet_operations nfsd_net_ops = {
- .init = nfsd_init_net,
- .exit = nfsd_exit_net,
+ .init = nfsd_net_init,
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ .pre_exit = nfsd_net_pre_exit,
+#endif
+ .exit = nfsd_net_exit,
.id = &nfsd_net_id,
.size = sizeof(struct nfsd_net),
};
@@ -1272,71 +2233,79 @@ static struct pernet_operations nfsd_net_ops = {
static int __init init_nfsd(void)
{
int retval;
- printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
- retval = register_pernet_subsys(&nfsd_net_ops);
- if (retval < 0)
- return retval;
- retval = register_cld_notifier();
- if (retval)
- goto out_unregister_pernet;
+ nfsd_debugfs_init();
+
retval = nfsd4_init_slabs();
if (retval)
- goto out_unregister_notifier;
+ return retval;
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
- retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+ retval = nfsd_drc_slab_create();
if (retval)
- goto out_exit_pnfs;
- nfsd_stat_init(); /* Statistics */
- retval = nfsd_reply_cache_init();
- if (retval)
- goto out_free_stat;
+ goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
- retval = create_proc_exports_entry();
- if (retval)
+ retval = register_pernet_subsys(&nfsd_net_ops);
+ if (retval < 0)
goto out_free_lockd;
+ retval = register_cld_notifier();
+ if (retval)
+ goto out_free_subsys;
+ retval = nfsd4_create_laundry_wq();
+ if (retval)
+ goto out_free_cld;
retval = register_filesystem(&nfsd_fs_type);
if (retval)
+ goto out_free_nfsd4;
+ retval = genl_register_family(&nfsd_nl_family);
+ if (retval)
+ goto out_free_filesystem;
+ retval = create_proc_exports_entry();
+ if (retval)
goto out_free_all;
+ nfsd_localio_ops_init();
+
return 0;
out_free_all:
- remove_proc_entry("fs/nfs/exports", NULL);
- remove_proc_entry("fs/nfs", NULL);
+ genl_unregister_family(&nfsd_nl_family);
+out_free_filesystem:
+ unregister_filesystem(&nfsd_fs_type);
+out_free_nfsd4:
+ nfsd4_destroy_laundry_wq();
+out_free_cld:
+ unregister_cld_notifier();
+out_free_subsys:
+ unregister_pernet_subsys(&nfsd_net_ops);
out_free_lockd:
nfsd_lockd_shutdown();
- nfsd_reply_cache_shutdown();
-out_free_stat:
- nfsd_stat_shutdown();
- nfsd_fault_inject_cleanup();
-out_exit_pnfs:
+ nfsd_drc_slab_free();
+out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
nfsd4_free_slabs();
-out_unregister_notifier:
- unregister_cld_notifier();
-out_unregister_pernet:
- unregister_pernet_subsys(&nfsd_net_ops);
+ nfsd_debugfs_exit();
return retval;
}
static void __exit exit_nfsd(void)
{
- nfsd_reply_cache_shutdown();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
- nfsd_stat_shutdown();
- nfsd_lockd_shutdown();
- nfsd4_free_slabs();
- nfsd4_exit_pnfs();
- nfsd_fault_inject_cleanup();
+ genl_unregister_family(&nfsd_nl_family);
unregister_filesystem(&nfsd_fs_type);
+ nfsd4_destroy_laundry_wq();
unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
+ nfsd_drc_slab_free();
+ nfsd_lockd_shutdown();
+ nfsd4_free_slabs();
+ nfsd4_exit_pnfs();
+ nfsd_debugfs_exit();
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("In-kernel NFS server");
MODULE_LICENSE("GPL");
module_init(init_nfsd)
module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 066899929863..e4263326ca4a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -17,11 +17,12 @@
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/addr.h>
#include <uapi/linux/nfsd/debug.h>
-#include "stats.h"
#include "export.h"
#undef ifdebug
@@ -34,56 +35,90 @@
/*
* nfsd version
*/
+#define NFSD_MINVERS 2
+#define NFSD_MAXVERS 4
#define NFSD_SUPPORTED_MINOR_VERSION 2
-/*
- * Maximum blocksizes supported by daemon under various circumstances.
- */
-#define NFSSVC_MAXBLKSIZE RPCSVC_MAXPAYLOAD
-/* NFSv2 is limited by the protocol specification, see RFC 1094 */
-#define NFSSVC_MAXBLKSIZE_V2 (8*1024)
+bool nfsd_support_version(int vers);
+#include "netns.h"
+#include "stats.h"
/*
- * Largest number of bytes we need to allocate for an NFS
- * call or reply. Used to control buffer sizes. We use
- * the length of v3 WRITE, READDIR and READDIR replies
- * which are an RPC header, up to 26 XDR units of reply
- * data, and some page data.
- *
- * Note that accuracy here doesn't matter too much as the
- * size is rounded up to a page size when allocating space.
+ * Default and maximum payload size (NFS READ or WRITE), in bytes.
+ * The default is historical, and the maximum is an implementation
+ * limit.
*/
-#define NFSD_BUFSIZE ((RPC_MAX_HEADER_WITH_AUTH+26)*XDR_UNIT + NFSSVC_MAXBLKSIZE)
+enum {
+ NFSSVC_DEFBLKSIZE = 1 * 1024 * 1024,
+ NFSSVC_MAXBLKSIZE = RPCSVC_MAXPAYLOAD,
+};
struct readdir_cd {
__be32 err; /* 0, nfserr, or nfserr_eof */
};
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND 200
+
+struct nfsd_genl_rqstp {
+ struct sockaddr rq_daddr;
+ struct sockaddr rq_saddr;
+ unsigned long rq_flags;
+ ktime_t rq_stime;
+ __be32 rq_xid;
+ u32 rq_vers;
+ u32 rq_prog;
+ u32 rq_proc;
+
+ /* NFSv4 compound */
+ u32 rq_opcnt;
+ u32 rq_opnum[16];
+};
-extern struct svc_program nfsd_program;
-extern const struct svc_version nfsd_version2, nfsd_version3,
- nfsd_version4;
+extern struct svc_program nfsd_programs[];
+extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
extern struct mutex nfsd_mutex;
-extern spinlock_t nfsd_drc_lock;
-extern unsigned long nfsd_drc_max_mem;
-extern unsigned long nfsd_drc_mem_used;
+extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
/*
+ * Common void argument and result helpers
+ */
+struct nfsd_voidargs { };
+struct nfsd_voidres { };
+bool nfssvc_decode_voidarg(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+bool nfssvc_encode_voidres(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr);
+
+/*
* Function prototypes.
*/
-int nfsd_svc(int nrservs, struct net *net);
-int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+int nfsd_svc(int n, int *nservers, struct net *net,
+ const struct cred *cred, const char *scope);
+int nfsd_dispatch(struct svc_rqst *rqstp);
int nfsd_nrthreads(struct net *);
int nfsd_nrpools(struct net *);
int nfsd_get_nrthreads(int n, int *, struct net *);
int nfsd_set_nrthreads(int n, int *, struct net *);
-int nfsd_pool_stats_open(struct inode *, struct file *);
-int nfsd_pool_stats_release(struct inode *, struct file *);
+void nfsd_shutdown_threads(struct net *net);
+
+struct svc_rqst *nfsd_current_rqst(void);
+
+struct nfsdfs_client {
+ struct kref cl_ref;
+ void (*cl_release)(struct kref *kref);
+};
+
+struct nfsdfs_client *get_nfsdfs_client(struct inode *);
+struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
+ struct nfsdfs_client *ncl, u32 id,
+ const struct tree_descr *,
+ struct dentry **fdentries);
+void nfsd_client_rmdir(struct dentry *dentry);
-void nfsd_destroy(struct net *net);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
@@ -98,17 +133,50 @@ extern const struct svc_version nfsd_acl_version3;
#endif
#endif
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+extern const struct svc_version localio_version1;
+#endif
+
+struct nfsd_net;
+
enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
-int nfsd_vers(int vers, enum vers_op change);
-int nfsd_minorversion(u32 minorversion, enum vers_op change);
-void nfsd_reset_versions(void);
+int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change);
+int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(struct nfsd_net *nn);
int nfsd_create_serv(struct net *net);
+void nfsd_destroy_serv(struct net *net);
+
+#ifdef CONFIG_DEBUG_FS
+void nfsd_debugfs_init(void);
+void nfsd_debugfs_exit(void);
+#else
+static inline void nfsd_debugfs_init(void) {}
+static inline void nfsd_debugfs_exit(void) {}
+#endif
+
+extern bool nfsd_disable_splice_read __read_mostly;
+
+enum {
+ /* Any new NFSD_IO enum value must be added at the end */
+ NFSD_IO_BUFFERED,
+ NFSD_IO_DONTCACHE,
+ NFSD_IO_DIRECT,
+};
+
+extern u64 nfsd_io_cache_read __read_mostly;
+extern u64 nfsd_io_cache_write __read_mostly;
extern int nfsd_max_blksize;
static inline int nfsd_v4client(struct svc_rqst *rq)
{
- return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
+ return rq && rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
+}
+static inline struct user_namespace *
+nfsd_user_namespace(const struct svc_rqst *rqstp)
+{
+ const struct cred *cred = rqstp->rq_xprt->xpt_cred;
+ return cred ? cred->user_ns : &init_user_ns;
}
/*
@@ -122,10 +190,12 @@ int nfs4_state_start(void);
int nfs4_state_start_net(struct net *net);
void nfs4_state_shutdown(void);
void nfs4_state_shutdown_net(struct net *net);
-void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
bool nfsd4_spo_must_allow(struct svc_rqst *rqstp);
+int nfsd4_create_laundry_wq(void);
+void nfsd4_destroy_laundry_wq(void);
+bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode);
#else
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
@@ -133,13 +203,19 @@ static inline int nfs4_state_start(void) { return 0; }
static inline int nfs4_state_start_net(struct net *net) { return 0; }
static inline void nfs4_state_shutdown(void) { }
static inline void nfs4_state_shutdown_net(struct net *net) { }
-static inline void nfs4_reset_lease(time_t leasetime) { }
static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
static inline char * nfs4_recoverydir(void) {return NULL; }
static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp)
{
return false;
}
+static inline int nfsd4_create_laundry_wq(void) { return 0; };
+static inline void nfsd4_destroy_laundry_wq(void) {};
+static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp,
+ struct inode *inode)
+{
+ return false;
+}
#endif
/*
@@ -169,7 +245,6 @@ void nfsd_lockd_shutdown(void);
#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
-#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
@@ -214,9 +289,11 @@ void nfsd_lockd_shutdown(void);
#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
+#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_delay cpu_to_be32(NFS4ERR_DELAY)
#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
@@ -260,19 +337,35 @@ void nfsd_lockd_shutdown(void);
#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS)
-#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
+#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
+#define nfserr_file_open cpu_to_be32(NFS4ERR_FILE_OPEN)
+#define nfserr_xattr2big cpu_to_be32(NFS4ERR_XATTR2BIG)
+#define nfserr_noxattr cpu_to_be32(NFS4ERR_NOXATTR)
-/* error codes for internal use */
-/* if a request fails due to kmalloc failure, it gets dropped.
- * Client should resend eventually
+/*
+ * Error codes for internal use. We use enum to choose numbers that are
+ * not already assigned, then covert to be32 resulting in a number that
+ * cannot conflict with any existing be32 nfserr value.
*/
-#define nfserr_dropit cpu_to_be32(30000)
+enum {
/* end-of-file indicator in readdir */
-#define nfserr_eof cpu_to_be32(30001)
+ NFSERR_EOF = NFS4ERR_FIRST_FREE,
+#define nfserr_eof cpu_to_be32(NFSERR_EOF)
+
/* replay detected */
-#define nfserr_replay_me cpu_to_be32(11001)
+ NFSERR_REPLAY_ME,
+#define nfserr_replay_me cpu_to_be32(NFSERR_REPLAY_ME)
+
/* nfs41 replay detected */
-#define nfserr_replay_cache cpu_to_be32(11002)
+ NFSERR_REPLAY_CACHE,
+#define nfserr_replay_cache cpu_to_be32(NFSERR_REPLAY_CACHE)
+
+/* symlink found where dir expected - handled differently to
+ * other symlink found errors by NFSv3.
+ */
+ NFSERR_SYMLINK_NOT_DIR,
+#define nfserr_symlink_not_dir cpu_to_be32(NFSERR_SYMLINK_NOT_DIR)
+};
/* Check for dir entries '.' and '..' */
#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
@@ -298,16 +391,20 @@ void nfsd_lockd_shutdown(void);
#define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */
#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
+#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */
+#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
+#define NFS4_CLIENTS_PER_GB 1024
+#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
+#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
/*
- * The following attributes are currently not supported by the NFSv4 server:
+ * The following attributes are not implemented by NFSD:
* ARCHIVE (deprecated anyway)
* HIDDEN (unlikely to be supported any time soon)
* MIMETYPE (unlikely to be supported any time soon)
* QUOTA_* (will be supported in a forthcoming patch)
* SYSTEM (unlikely to be supported any time soon)
* TIME_BACKUP (unlikely to be supported any time soon)
- * TIME_CREATE (unlikely to be supported any time soon)
*/
#define NFSD4_SUPPORTED_ATTRS_WORD0 \
(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
@@ -326,7 +423,7 @@ void nfsd_lockd_shutdown(void);
| FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
| FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
| FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
- | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
+ | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \
| FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
#define NFSD4_SUPPORTED_ATTRS_WORD2 0
@@ -360,12 +457,47 @@ void nfsd_lockd_shutdown(void);
#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
- FATTR4_WORD2_CHANGE_ATTR_TYPE | \
FATTR4_WORD2_MODE_UMASK | \
- NFSD4_2_SECURITY_ATTRS)
+ FATTR4_WORD2_CLONE_BLKSIZE | \
+ NFSD4_2_SECURITY_ATTRS | \
+ FATTR4_WORD2_XATTR_SUPPORT | \
+ FATTR4_WORD2_TIME_DELEG_ACCESS | \
+ FATTR4_WORD2_TIME_DELEG_MODIFY | \
+ FATTR4_WORD2_OPEN_ARGUMENTS)
extern const u32 nfsd_suppattrs[3][3];
+static inline __be32 nfsd4_set_netaddr(struct sockaddr *addr,
+ struct nfs42_netaddr *netaddr)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
+ unsigned int port;
+ size_t ret_addr, ret_port;
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ port = ntohs(sin->sin_port);
+ sprintf(netaddr->netid, "tcp");
+ netaddr->netid_len = 3;
+ break;
+ case AF_INET6:
+ port = ntohs(sin6->sin6_port);
+ sprintf(netaddr->netid, "tcp6");
+ netaddr->netid_len = 4;
+ break;
+ default:
+ return nfserr_inval;
+ }
+ ret_addr = rpc_ntop(addr, netaddr->addr, sizeof(netaddr->addr));
+ ret_port = snprintf(netaddr->addr + ret_addr,
+ RPCBIND_MAXUADDRLEN + 1 - ret_addr,
+ ".%u.%u", port >> 8, port & 0xff);
+ WARN_ON(ret_port >= RPCBIND_MAXUADDRLEN + 1 - ret_addr);
+ netaddr->addr_len = ret_addr + ret_port;
+ return 0;
+}
+
static inline bool bmval_is_subset(const u32 *bm1, const u32 *bm2)
{
return !((bm1[0] & ~bm2[0]) ||
@@ -391,7 +523,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
#define NFSD_WRITEABLE_ATTRS_WORD1 \
(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \
+ | FATTR4_WORD1_TIME_MODIFY_SET)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
FATTR4_WORD2_SECURITY_LABEL
@@ -400,7 +533,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
#endif
#define NFSD_WRITEABLE_ATTRS_WORD2 \
(FATTR4_WORD2_MODE_UMASK \
- | MAYBE_FATTR4_WORD2_SECURITY_LABEL)
+ | MAYBE_FATTR4_WORD2_SECURITY_LABEL \
+ | FATTR4_WORD2_TIME_DELEG_ACCESS \
+ | FATTR4_WORD2_TIME_DELEG_MODIFY \
+ )
#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
NFSD_WRITEABLE_ATTRS_WORD0
@@ -417,12 +553,20 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval)
extern int nfsd4_is_junction(struct dentry *dentry);
extern int register_cld_notifier(void);
extern void unregister_cld_notifier(void);
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
+#endif
+
+extern void nfsd4_init_leases_net(struct nfsd_net *nn);
+
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
{
return 0;
}
+static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
+
#define register_cld_notifier() 0
#define unregister_cld_notifier() do { } while(0)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index b319080288c3..ed85dd43da18 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -14,6 +14,7 @@
#include "nfsd.h"
#include "vfs.h"
#include "auth.h"
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FH
@@ -39,7 +40,8 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
/* make sure parents give x permission to user */
int err;
parent = dget_parent(tdentry);
- err = inode_permission(d_inode(parent), MAY_EXEC);
+ err = inode_permission(&nop_mnt_idmap,
+ d_inode(parent), MAY_EXEC);
if (err < 0) {
dput(parent);
break;
@@ -60,8 +62,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
* the write call).
*/
static inline __be32
-nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
- umode_t requested)
+nfsd_mode_check(struct dentry *dentry, umode_t requested)
{
umode_t mode = d_inode(dentry)->i_mode & S_IFMT;
@@ -74,36 +75,36 @@ nfsd_mode_check(struct svc_rqst *rqstp, struct dentry *dentry,
}
return nfs_ok;
}
- /*
- * v4 has an error more specific than err_notdir which we should
- * return in preference to err_notdir:
- */
- if (rqstp->rq_vers == 4 && mode == S_IFLNK)
+ if (mode == S_IFLNK) {
+ if (requested == S_IFDIR)
+ return nfserr_symlink_not_dir;
return nfserr_symlink;
+ }
if (requested == S_IFDIR)
return nfserr_notdir;
if (mode == S_IFDIR)
return nfserr_isdir;
- return nfserr_inval;
+ return nfserr_wrong_type;
}
-static bool nfsd_originating_port_ok(struct svc_rqst *rqstp, int flags)
+static bool nfsd_originating_port_ok(struct svc_rqst *rqstp,
+ struct svc_cred *cred,
+ struct svc_export *exp)
{
- if (flags & NFSEXP_INSECURE_PORT)
+ if (nfsexp_flags(cred, exp) & NFSEXP_INSECURE_PORT)
return true;
/* We don't require gss requests to use low ports: */
- if (rqstp->rq_cred.cr_flavor >= RPC_AUTH_GSS)
+ if (cred->cr_flavor >= RPC_AUTH_GSS)
return true;
return test_bit(RQ_SECURE, &rqstp->rq_flags);
}
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
+ struct svc_cred *cred,
struct svc_export *exp)
{
- int flags = nfsexp_flags(rqstp, exp);
-
/* Check if the request originated from a secure port. */
- if (!nfsd_originating_port_ok(rqstp, flags)) {
+ if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
@@ -111,23 +112,15 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
}
/* Set user creds for this exportpoint */
- return nfserrno(nfsd_setuser(rqstp, exp));
+ return nfserrno(nfsd_setuser(cred, exp));
}
-static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
- struct dentry *dentry, struct svc_export *exp)
+static inline __be32 check_pseudo_root(struct dentry *dentry,
+ struct svc_export *exp)
{
if (!(exp->ex_flags & NFSEXP_V4ROOT))
return nfs_ok;
/*
- * v2/v3 clients have no need for the V4ROOT export--they use
- * the mount protocl instead; also, further V4ROOT checks may be
- * in v4-specific code, in which case v2/v3 clients could bypass
- * them.
- */
- if (!nfsd_v4client(rqstp))
- return nfserr_stale;
- /*
* We're exposing only the directories and symlinks that have to be
* traversed on the way to real exports:
*/
@@ -149,71 +142,68 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
* dentry. On success, the results are used to set fh_export and
* fh_dentry.
*/
-static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
+static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
+ struct svc_cred *cred,
+ struct auth_domain *client,
+ struct auth_domain *gssclient,
+ struct svc_fh *fhp)
{
struct knfsd_fh *fh = &fhp->fh_handle;
- struct fid *fid = NULL, sfid;
+ struct fid *fid = NULL;
struct svc_export *exp;
struct dentry *dentry;
int fileid_type;
int data_left = fh->fh_size/4;
+ int len;
__be32 error;
- error = nfserr_stale;
- if (rqstp->rq_vers > 2)
- error = nfserr_badhandle;
- if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+ error = nfserr_badhandle;
+ if (fh->fh_size == 0)
return nfserr_nofilehandle;
- if (fh->fh_version == 1) {
- int len;
+ if (fh->fh_version != 1)
+ return error;
- if (--data_left < 0)
- return error;
- if (fh->fh_auth_type != 0)
- return error;
- len = key_len(fh->fh_fsid_type) / 4;
- if (len == 0)
- return error;
- if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
- /* deprecated, convert to type 3 */
- len = key_len(FSID_ENCODE_DEV)/4;
- fh->fh_fsid_type = FSID_ENCODE_DEV;
- /*
- * struct knfsd_fh uses host-endian fields, which are
- * sometimes used to hold net-endian values. This
- * confuses sparse, so we must use __force here to
- * keep it from complaining.
- */
- fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
- ntohl((__force __be32)fh->fh_fsid[1])));
- fh->fh_fsid[1] = fh->fh_fsid[2];
- }
- data_left -= len;
- if (data_left < 0)
- return error;
- exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
- fid = (struct fid *)(fh->fh_fsid + len);
- } else {
- __u32 tfh[2];
- dev_t xdev;
- ino_t xino;
+ if (--data_left < 0)
+ return error;
+ if (fh->fh_auth_type != 0)
+ return error;
+ len = key_len(fh->fh_fsid_type) / 4;
+ if (len == 0)
+ return error;
+ if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
+ u32 *fsid = fh_fsid(fh);
- if (fh->fh_size != NFS_FHSIZE)
- return error;
- /* assume old filehandle format */
- xdev = old_decode_dev(fh->ofh_xdev);
- xino = u32_to_ino_t(fh->ofh_xino);
- mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
- exp = rqst_exp_find(rqstp, FSID_DEV, tfh);
+ /* deprecated, convert to type 3 */
+ len = key_len(FSID_ENCODE_DEV)/4;
+ fh->fh_fsid_type = FSID_ENCODE_DEV;
+ /*
+ * struct knfsd_fh uses host-endian fields, which are
+ * sometimes used to hold net-endian values. This
+ * confuses sparse, so we must use __force here to
+ * keep it from complaining.
+ */
+ fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fsid[0]),
+ ntohl((__force __be32)fsid[1])));
+ fsid[1] = fsid[2];
}
+ data_left -= len;
+ if (data_left < 0)
+ return error;
+ exp = rqst_exp_find(rqstp ? &rqstp->rq_chandle : NULL,
+ net, client, gssclient,
+ fh->fh_fsid_type, fh_fsid(fh));
+ fid = (struct fid *)(fh_fsid(fh) + len);
error = nfserr_stale;
- if (PTR_ERR(exp) == -ENOENT)
- return error;
+ if (IS_ERR(exp)) {
+ trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp));
+
+ if (PTR_ERR(exp) == -ENOENT)
+ return error;
- if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
+ }
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
/* Elevate privileges so that the lack of 'r' or 'x'
@@ -234,9 +224,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
put_cred(override_creds(new));
- put_cred(new);
} else {
- error = nfsd_setuser_and_check_port(rqstp, exp);
+ error = nfsd_setuser_and_check_port(rqstp, cred, exp);
if (error)
goto out;
}
@@ -244,29 +233,27 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
/*
* Look up the dentry using the NFS file handle.
*/
- error = nfserr_stale;
- if (rqstp->rq_vers > 2)
- error = nfserr_badhandle;
-
- if (fh->fh_version != 1) {
- sfid.i32.ino = fh->ofh_ino;
- sfid.i32.gen = fh->ofh_generation;
- sfid.i32.parent_ino = fh->ofh_dirino;
- fid = &sfid;
- data_left = 3;
- if (fh->ofh_dirino == 0)
- fileid_type = FILEID_INO32_GEN;
- else
- fileid_type = FILEID_INO32_GEN_PARENT;
- } else
- fileid_type = fh->fh_fileid_type;
+ error = nfserr_badhandle;
+
+ fileid_type = fh->fh_fileid_type;
if (fileid_type == FILEID_ROOT)
dentry = dget(exp->ex_path.dentry);
else {
- dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
- data_left, fileid_type,
- nfsd_acceptable, exp);
+ dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
+ data_left, fileid_type, 0,
+ nfsd_acceptable, exp);
+ if (IS_ERR_OR_NULL(dentry)) {
+ trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
+ dentry ? PTR_ERR(dentry) : -ESTALE);
+ switch (PTR_ERR(dentry)) {
+ case -ENOMEM:
+ case -ETIMEDOUT:
+ break;
+ default:
+ dentry = ERR_PTR(-ESTALE);
+ }
+ }
}
if (dentry == NULL)
goto out;
@@ -282,8 +269,30 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
dentry);
}
+ switch (fhp->fh_maxsize) {
+ case NFS4_FHSIZE:
+ if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
+ fhp->fh_no_atomic_attr = true;
+ fhp->fh_64bit_cookies = true;
+ break;
+ case NFS3_FHSIZE:
+ if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC)
+ fhp->fh_no_wcc = true;
+ fhp->fh_64bit_cookies = true;
+ if (exp->ex_flags & NFSEXP_V4ROOT)
+ goto out;
+ break;
+ case NFS_FHSIZE:
+ fhp->fh_no_wcc = true;
+ if (EX_WGATHER(exp))
+ fhp->fh_use_wgather = true;
+ if (exp->ex_flags & NFSEXP_V4ROOT)
+ goto out;
+ }
+
fhp->fh_dentry = dentry;
fhp->fh_export = exp;
+
return 0;
out:
exp_put(exp);
@@ -291,48 +300,42 @@ out:
}
/**
- * fh_verify - filehandle lookup and access checking
- * @rqstp: pointer to current rpc request
+ * __fh_verify - filehandle lookup and access checking
+ * @rqstp: RPC transaction context, or NULL
+ * @net: net namespace in which to perform the export lookup
+ * @cred: RPC user credential
+ * @client: RPC auth domain
+ * @gssclient: RPC GSS auth domain, or NULL
* @fhp: filehandle to be verified
* @type: expected type of object pointed to by filehandle
* @access: type of access needed to object
*
- * Look up a dentry from the on-the-wire filehandle, check the client's
- * access to the export, and set the current task's credentials.
- *
- * Regardless of success or failure of fh_verify(), fh_put() should be
- * called on @fhp when the caller is finished with the filehandle.
- *
- * fh_verify() may be called multiple times on a given filehandle, for
- * example, when processing an NFSv4 compound. The first call will look
- * up a dentry using the on-the-wire filehandle. Subsequent calls will
- * skip the lookup and just perform the other checks and possibly change
- * the current task's credentials.
- *
- * @type specifies the type of object expected using one of the S_IF*
- * constants defined in include/linux/stat.h. The caller may use zero
- * to indicate that it doesn't care, or a negative integer to indicate
- * that it expects something not of the given type.
- *
- * @access is formed from the NFSD_MAY_* constants defined in
- * fs/nfsd/vfs.h.
+ * See fh_verify() for further descriptions of @fhp, @type, and @access.
*/
-__be32
-fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+static __be32
+__fh_verify(struct svc_rqst *rqstp,
+ struct net *net, struct svc_cred *cred,
+ struct auth_domain *client,
+ struct auth_domain *gssclient,
+ struct svc_fh *fhp, umode_t type, int access)
{
- struct svc_export *exp;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_export *exp = NULL;
+ bool may_bypass_gss = false;
struct dentry *dentry;
__be32 error;
- dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
-
if (!fhp->fh_dentry) {
- error = nfsd_set_fh_dentry(rqstp, fhp);
+ error = nfsd_set_fh_dentry(rqstp, net, cred, client,
+ gssclient, fhp);
if (error)
goto out;
}
dentry = fhp->fh_dentry;
exp = fhp->fh_export;
+
+ trace_nfsd_fh_verify(rqstp, fhp, type, access);
+
/*
* We still have to do all these permission checks, even when
* fh_dentry is already set:
@@ -349,25 +352,44 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
* (for example, if different id-squashing options are in
* effect on the new filesystem).
*/
- error = check_pseudo_root(rqstp, dentry, exp);
+ error = check_pseudo_root(dentry, exp);
if (error)
goto out;
- error = nfsd_setuser_and_check_port(rqstp, exp);
+ error = nfsd_setuser_and_check_port(rqstp, cred, exp);
if (error)
goto out;
- error = nfsd_mode_check(rqstp, dentry, type);
+ error = nfsd_mode_check(dentry, type);
if (error)
goto out;
/*
- * pseudoflavor restrictions are not enforced on NLM,
- * which clients virtually always use auth_sys for,
- * even while using RPCSEC_GSS for NFS.
+ * If rqstp is NULL, this is a LOCALIO request which will only
+ * ever use a filehandle/credential pair for which access has
+ * been affirmed (by ACCESS or OPEN NFS requests) over the
+ * wire. Skip both the xprtsec policy and the security flavor
+ * checks.
*/
- if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
- goto skip_pseudoflavor_check;
+ if (!rqstp)
+ goto check_permissions;
+
+ if ((access & NFSD_MAY_NLM) && (exp->ex_flags & NFSEXP_NOAUTHNLM))
+ /* NLM is allowed to fully bypass authentication */
+ goto out;
+
+ /*
+ * NLM is allowed to bypass the xprtsec policy check because lockd
+ * doesn't support xprtsec.
+ */
+ if (!(access & NFSD_MAY_NLM)) {
+ error = check_xprtsec_policy(exp, rqstp);
+ if (error)
+ goto out;
+ }
+
+ if (access & NFSD_MAY_BYPASS_GSS)
+ may_bypass_gss = true;
/*
* Clients may expect to be able to use auth_sys during mount,
* even if they use gss for everything else; see section 2.3.2
@@ -375,28 +397,81 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
*/
if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT
&& exp->ex_path.dentry == dentry)
- goto skip_pseudoflavor_check;
+ may_bypass_gss = true;
- error = check_nfsd_access(exp, rqstp);
+ error = check_security_flavor(exp, rqstp, may_bypass_gss);
if (error)
goto out;
-skip_pseudoflavor_check:
- /* Finally, check access permissions. */
- error = nfsd_permission(rqstp, exp, dentry, access);
+ svc_xprt_set_valid(rqstp->rq_xprt);
- if (error) {
- dprintk("fh_verify: %pd2 permission failure, "
- "acc=%x, error=%d\n",
- dentry,
- access, ntohl(error));
- }
+check_permissions:
+ /* Finally, check access permissions. */
+ error = nfsd_permission(cred, exp, dentry, access);
out:
+ trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
- nfsdstats.fh_stale++;
+ nfsd_stats_fh_stale_inc(nn, exp);
return error;
}
+/**
+ * fh_verify_local - filehandle lookup and access checking
+ * @net: net namespace in which to perform the export lookup
+ * @cred: RPC user credential
+ * @client: RPC auth domain
+ * @fhp: filehandle to be verified
+ * @type: expected type of object pointed to by filehandle
+ * @access: type of access needed to object
+ *
+ * This API can be used by callers who do not have an RPC
+ * transaction context (ie are not running in an nfsd thread).
+ *
+ * See fh_verify() for further descriptions of @fhp, @type, and @access.
+ */
+__be32
+fh_verify_local(struct net *net, struct svc_cred *cred,
+ struct auth_domain *client, struct svc_fh *fhp,
+ umode_t type, int access)
+{
+ return __fh_verify(NULL, net, cred, client, NULL,
+ fhp, type, access);
+}
+
+/**
+ * fh_verify - filehandle lookup and access checking
+ * @rqstp: pointer to current rpc request
+ * @fhp: filehandle to be verified
+ * @type: expected type of object pointed to by filehandle
+ * @access: type of access needed to object
+ *
+ * Look up a dentry from the on-the-wire filehandle, check the client's
+ * access to the export, and set the current task's credentials.
+ *
+ * Regardless of success or failure of fh_verify(), fh_put() should be
+ * called on @fhp when the caller is finished with the filehandle.
+ *
+ * fh_verify() may be called multiple times on a given filehandle, for
+ * example, when processing an NFSv4 compound. The first call will look
+ * up a dentry using the on-the-wire filehandle. Subsequent calls will
+ * skip the lookup and just perform the other checks and possibly change
+ * the current task's credentials.
+ *
+ * @type specifies the type of object expected using one of the S_IF*
+ * constants defined in include/linux/stat.h. The caller may use zero
+ * to indicate that it doesn't care, or a negative integer to indicate
+ * that it expects something not of the given type.
+ *
+ * @access is formed from the NFSD_MAY_* constants defined in
+ * fs/nfsd/vfs.h.
+ */
+__be32
+fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
+{
+ return __fh_verify(rqstp, SVC_NET(rqstp), &rqstp->rq_cred,
+ rqstp->rq_client, rqstp->rq_gssclient,
+ fhp, type, access);
+}
/*
* Compose a file handle for an NFS reply.
@@ -410,32 +485,21 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
{
if (dentry != exp->ex_path.dentry) {
struct fid *fid = (struct fid *)
- (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
+ (fh_fsid(&fhp->fh_handle) + fhp->fh_handle.fh_size/4 - 1);
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
- int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
+ int fh_flags = (exp->ex_flags & NFSEXP_NOSUBTREECHECK) ? 0 :
+ EXPORT_FH_CONNECTABLE;
+ int fileid_type =
+ exportfs_encode_fh(dentry, fid, &maxsize, fh_flags);
fhp->fh_handle.fh_fileid_type =
- exportfs_encode_fh(dentry, fid, &maxsize, subtreecheck);
+ fileid_type > 0 ? fileid_type : FILEID_INVALID;
fhp->fh_handle.fh_size += maxsize * 4;
} else {
fhp->fh_handle.fh_fileid_type = FILEID_ROOT;
}
}
-/*
- * for composing old style file handles
- */
-static inline void _fh_update_old(struct dentry *dentry,
- struct svc_export *exp,
- struct knfsd_fh *fh)
-{
- fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
- fh->ofh_generation = d_inode(dentry)->i_generation;
- if (d_is_dir(dentry) ||
- (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
- fh->ofh_dirino = 0;
-}
-
static bool is_root_export(struct svc_export *exp)
{
return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root;
@@ -452,7 +516,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
case FSID_DEV:
if (!old_valid_dev(exp_sb(exp)->s_dev))
return false;
- /* FALL THROUGH */
+ fallthrough;
case FSID_MAJOR_MINOR:
case FSID_ENCODE_DEV:
return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV;
@@ -462,7 +526,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
case FSID_UUID16:
if (!is_root_export(exp))
return false;
- /* fall through */
+ fallthrough;
case FSID_UUID4_INUM:
case FSID_UUID16_INUM:
return exp->ex_uuid != NULL;
@@ -532,9 +596,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
/* ref_fh is a reference file handle.
* if it is non-null and for the same filesystem, then we should compose
* a filehandle which is of the same version, where possible.
- * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
- * Then create a 32byte filehandle using nfs_fhbase_old
- *
*/
struct inode * inode = d_inode(dentry);
@@ -552,10 +613,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*/
set_version_and_fsid_type(fhp, exp, ref_fh);
+ /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */
+ fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false;
+
if (ref_fh == fhp)
fh_put(ref_fh);
- if (fhp->fh_locked || fhp->fh_dentry) {
+ if (fhp->fh_dentry) {
printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n",
dentry);
}
@@ -567,35 +631,21 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
fhp->fh_dentry = dget(dentry); /* our internal copy */
fhp->fh_export = exp_get(exp);
- if (fhp->fh_handle.fh_version == 0xca) {
- /* old style filehandle please */
- memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
- fhp->fh_handle.fh_size = NFS_FHSIZE;
- fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
- fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
- fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
- fhp->fh_handle.ofh_xino =
- ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
- fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
- if (inode)
- _fh_update_old(dentry, exp, &fhp->fh_handle);
- } else {
- fhp->fh_handle.fh_size =
- key_len(fhp->fh_handle.fh_fsid_type) + 4;
- fhp->fh_handle.fh_auth_type = 0;
-
- mk_fsid(fhp->fh_handle.fh_fsid_type,
- fhp->fh_handle.fh_fsid,
- ex_dev,
- d_inode(exp->ex_path.dentry)->i_ino,
- exp->ex_fsid, exp->ex_uuid);
-
- if (inode)
- _fh_update(fhp, exp, dentry);
- if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
- fh_put(fhp);
- return nfserr_opnotsupp;
- }
+ fhp->fh_handle.fh_size =
+ key_len(fhp->fh_handle.fh_fsid_type) + 4;
+ fhp->fh_handle.fh_auth_type = 0;
+
+ mk_fsid(fhp->fh_handle.fh_fsid_type,
+ fh_fsid(&fhp->fh_handle),
+ ex_dev,
+ d_inode(exp->ex_path.dentry)->i_ino,
+ exp->ex_fsid, exp->ex_uuid);
+
+ if (inode)
+ _fh_update(fhp, exp, dentry);
+ if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
+ fh_put(fhp);
+ return nfserr_stale;
}
return 0;
@@ -616,16 +666,12 @@ fh_update(struct svc_fh *fhp)
dentry = fhp->fh_dentry;
if (d_really_is_negative(dentry))
goto out_negative;
- if (fhp->fh_handle.fh_version != 1) {
- _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
- } else {
- if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
- return 0;
+ if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT)
+ return 0;
- _fh_update(fhp, fhp->fh_export, dentry);
- if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
- return nfserr_opnotsupp;
- }
+ _fh_update(fhp, fhp->fh_export, dentry);
+ if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
+ return nfserr_stale;
return 0;
out_bad:
printk(KERN_ERR "fh_update: fh not verified!\n");
@@ -636,6 +682,111 @@ out_negative:
return nfserr_serverfault;
}
+/**
+ * fh_getattr - Retrieve attributes on a local file
+ * @fhp: File handle of target file
+ * @stat: Caller-supplied kstat buffer to be filled in
+ *
+ * Returns nfs_ok on success, otherwise an NFS status code is
+ * returned.
+ */
+__be32 fh_getattr(const struct svc_fh *fhp, struct kstat *stat)
+{
+ struct path p = {
+ .mnt = fhp->fh_export->ex_path.mnt,
+ .dentry = fhp->fh_dentry,
+ };
+ struct inode *inode = d_inode(p.dentry);
+ u32 request_mask = STATX_BASIC_STATS;
+
+ if (S_ISREG(inode->i_mode))
+ request_mask |= (STATX_DIOALIGN | STATX_DIO_READ_ALIGN);
+
+ if (fhp->fh_maxsize == NFS4_FHSIZE)
+ request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
+
+ return nfserrno(vfs_getattr(&p, stat, request_mask,
+ AT_STATX_SYNC_AS_STAT));
+}
+
+/**
+ * fh_fill_pre_attrs - Fill in pre-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct kstat stat;
+ __be32 err;
+
+ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
+ return nfs_ok;
+
+ err = fh_getattr(fhp, &stat);
+ if (err)
+ return err;
+
+ if (v4)
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat);
+
+ fhp->fh_pre_mtime = stat.mtime;
+ fhp->fh_pre_ctime = stat.ctime;
+ fhp->fh_pre_size = stat.size;
+ fhp->fh_pre_saved = true;
+ return nfs_ok;
+}
+
+/**
+ * fh_fill_post_attrs - Fill in post-op attributes
+ * @fhp: file handle to be updated
+ *
+ */
+__be32 fh_fill_post_attrs(struct svc_fh *fhp)
+{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ __be32 err;
+
+ if (fhp->fh_no_wcc)
+ return nfs_ok;
+
+ if (fhp->fh_post_saved)
+ printk("nfsd: inode locked twice during operation.\n");
+
+ err = fh_getattr(fhp, &fhp->fh_post_attr);
+ if (err)
+ return err;
+
+ fhp->fh_post_saved = true;
+ if (v4)
+ fhp->fh_post_change =
+ nfsd4_change_attribute(&fhp->fh_post_attr);
+ return nfs_ok;
+}
+
+/**
+ * fh_fill_both_attrs - Fill pre-op and post-op attributes
+ * @fhp: file handle to be updated
+ *
+ * This is used when the directory wasn't changed, but wcc attributes
+ * are needed anyway.
+ */
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp)
+{
+ __be32 err;
+
+ err = fh_fill_post_attrs(fhp);
+ if (err)
+ return err;
+
+ fhp->fh_pre_change = fhp->fh_post_change;
+ fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
+ fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
+ fhp->fh_pre_size = fhp->fh_post_attr.size;
+ fhp->fh_pre_saved = true;
+ return nfs_ok;
+}
+
/*
* Release a file handle.
*/
@@ -645,16 +796,16 @@ fh_put(struct svc_fh *fhp)
struct dentry * dentry = fhp->fh_dentry;
struct svc_export * exp = fhp->fh_export;
if (dentry) {
- fh_unlock(fhp);
fhp->fh_dentry = NULL;
dput(dentry);
- fh_clear_wcc(fhp);
+ fh_clear_pre_post_attrs(fhp);
}
fh_drop_write(fhp);
if (exp) {
exp_put(exp);
fhp->fh_export = NULL;
}
+ fhp->fh_no_wcc = false;
return;
}
@@ -664,20 +815,15 @@ fh_put(struct svc_fh *fhp)
char * SVCFH_fmt(struct svc_fh *fhp)
{
struct knfsd_fh *fh = &fhp->fh_handle;
+ static char buf[2+1+1+64*3+1];
- static char buf[80];
- sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
- fh->fh_size,
- fh->fh_base.fh_pad[0],
- fh->fh_base.fh_pad[1],
- fh->fh_base.fh_pad[2],
- fh->fh_base.fh_pad[3],
- fh->fh_base.fh_pad[4],
- fh->fh_base.fh_pad[5]);
+ if (fh->fh_size > 64)
+ return "bad-fh";
+ sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw);
return buf;
}
-enum fsid_source fsid_source(struct svc_fh *fhp)
+enum fsid_source fsid_source(const struct svc_fh *fhp)
{
if (fhp->fh_handle.fh_version != 1)
return FSIDSOURCE_DEV;
@@ -704,3 +850,44 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
return FSIDSOURCE_UUID;
return FSIDSOURCE_DEV;
}
+
+/**
+ * nfsd4_change_attribute - Generate an NFSv4 change_attribute value
+ * @stat: inode attributes
+ *
+ * Caller must fill in @stat before calling, typically by invoking
+ * vfs_getattr() with STATX_MODE, STATX_CTIME, and STATX_CHANGE_COOKIE.
+ * Returns an unsigned 64-bit changeid4 value (RFC 8881 Section 3.2).
+ *
+ * We could use i_version alone as the change attribute. However, i_version
+ * can go backwards on a regular file after an unclean shutdown. On its own
+ * that doesn't necessarily cause a problem, but if i_version goes backwards
+ * and then is incremented again it could reuse a value that was previously
+ * used before boot, and a client who queried the two values might incorrectly
+ * assume nothing changed.
+ *
+ * By using both ctime and the i_version counter we guarantee that as long as
+ * time doesn't go backwards we never reuse an old value. If the filesystem
+ * advertises STATX_ATTR_CHANGE_MONOTONIC, then this mitigation is not
+ * needed.
+ *
+ * We only need to do this for regular files as well. For directories, we
+ * assume that the new change attr is always logged to stable storage in some
+ * fashion before the results can be seen.
+ */
+u64 nfsd4_change_attribute(const struct kstat *stat)
+{
+ u64 chattr;
+
+ if (stat->result_mask & STATX_CHANGE_COOKIE) {
+ chattr = stat->change_cookie;
+ if (S_ISREG(stat->mode) &&
+ !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
+ chattr += (u64)stat->ctime.tv_sec << 30;
+ chattr += stat->ctime.tv_nsec;
+ }
+ } else {
+ chattr = time_to_chattr(&stat->ctime);
+ }
+ return chattr;
+}
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 755e256a9103..5ef7191f8ad8 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -10,8 +10,59 @@
#include <linux/crc32.h>
#include <linux/sunrpc/svc.h>
-#include <uapi/linux/nfsd/nfsfh.h>
#include <linux/iversion.h>
+#include <linux/exportfs.h>
+#include <linux/nfs4.h>
+
+#include "export.h"
+
+/*
+ * The file handle starts with a sequence of four-byte words.
+ * The first word contains a version number (1) and three descriptor bytes
+ * that tell how the remaining 3 variable length fields should be handled.
+ * These three bytes are auth_type, fsid_type and fileid_type.
+ *
+ * All four-byte values are in host-byte-order.
+ *
+ * The auth_type field is deprecated and must be set to 0.
+ *
+ * The fsid_type identifies how the filesystem (or export point) is
+ * encoded.
+ * Current values:
+ * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number
+ * NOTE: we cannot use the kdev_t device id value, because kdev_t.h
+ * says we mustn't. We must break it up and reassemble.
+ * 1 - 4 byte user specified identifier
+ * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED
+ * 3 - 4 byte device id, encoded for user-space, 4 byte inode number
+ * 4 - 4 byte inode number and 4 byte uuid
+ * 5 - 8 byte uuid
+ * 6 - 16 byte uuid
+ * 7 - 8 byte inode number and 16 byte uuid
+ *
+ * The fileid_type identifies how the file within the filesystem is encoded.
+ * The values for this field are filesystem specific, exccept that
+ * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ * in include/linux/exportfs.h for currently registered values.
+ */
+
+struct knfsd_fh {
+ unsigned int fh_size; /*
+ * Points to the current size while
+ * building a new file handle.
+ */
+ u8 fh_raw[NFS4_FHSIZE];
+};
+
+#define fh_version fh_raw[0]
+#define fh_auth_type fh_raw[1]
+#define fh_fsid_type fh_raw[2]
+#define fh_fileid_type fh_raw[3]
+
+static inline u32 *fh_fsid(const struct knfsd_fh *fh)
+{
+ return (u32 *)&fh->fh_raw[4];
+}
static inline __u32 ino_t_to_u32(ino_t ino)
{
@@ -33,29 +84,36 @@ typedef struct svc_fh {
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
- bool fh_locked; /* inode locked by us */
bool fh_want_write; /* remount protection taken */
-
-#ifdef CONFIG_NFSD_V3
+ bool fh_no_wcc; /* no wcc data needed */
+ bool fh_no_atomic_attr;
+ /*
+ * wcc data is not atomic with
+ * operation
+ */
+ bool fh_use_wgather; /* NFSv2 wgather option */
+ bool fh_64bit_cookies;/* readdir cookie size */
+ int fh_flags; /* FH flags */
bool fh_post_saved; /* post-op attrs saved */
bool fh_pre_saved; /* pre-op attrs saved */
- /* Pre-op attributes saved during fh_lock */
+ /* Pre-op attributes saved when inode is locked */
__u64 fh_pre_size; /* size before operation */
- struct timespec fh_pre_mtime; /* mtime before oper */
- struct timespec fh_pre_ctime; /* ctime before oper */
+ struct timespec64 fh_pre_mtime; /* mtime before oper */
+ struct timespec64 fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
*/
u64 fh_pre_change;
- /* Post-op attributes saved in fh_unlock */
+ /* Post-op attributes saved in fh_fill_post_attrs() */
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
-
} svc_fh;
+#define NFSD4_FH_FOREIGN (1<<0)
+#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
+#define HAS_FH_FLAG(c, f) ((c)->fh_flags & (f))
enum nfsd_fsid {
FSID_DEV = 0,
@@ -73,7 +131,7 @@ enum fsid_source {
FSIDSOURCE_FSID,
FSIDSOURCE_UUID,
};
-extern enum fsid_source fsid_source(struct svc_fh *fhp);
+extern enum fsid_source fsid_source(const struct svc_fh *fhp);
/*
@@ -162,24 +220,27 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
* Function prototypes
*/
__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
+__be32 fh_verify_local(struct net *, struct svc_cred *, struct auth_domain *,
+ struct svc_fh *, umode_t, int);
+__be32 fh_getattr(const struct svc_fh *fhp, struct kstat *stat);
__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
__be32 fh_update(struct svc_fh *);
void fh_put(struct svc_fh *);
static __inline__ struct svc_fh *
-fh_copy(struct svc_fh *dst, struct svc_fh *src)
+fh_copy(struct svc_fh *dst, const struct svc_fh *src)
{
- WARN_ON(src->fh_dentry || src->fh_locked);
-
+ WARN_ON(src->fh_dentry);
+
*dst = *src;
return dst;
}
static inline void
-fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+fh_copy_shallow(struct knfsd_fh *dst, const struct knfsd_fh *src)
{
dst->fh_size = src->fh_size;
- memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
+ memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size);
}
static __inline__ struct svc_fh *
@@ -190,134 +251,89 @@ fh_init(struct svc_fh *fhp, int maxsize)
return fhp;
}
-static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+static inline bool fh_match(const struct knfsd_fh *fh1,
+ const struct knfsd_fh *fh2)
{
if (fh1->fh_size != fh2->fh_size)
return false;
- if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0)
+ if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0)
return false;
return true;
}
-static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
+static inline bool fh_fsid_match(const struct knfsd_fh *fh1,
+ const struct knfsd_fh *fh2)
{
+ u32 *fsid1 = fh_fsid(fh1);
+ u32 *fsid2 = fh_fsid(fh2);
+
if (fh1->fh_fsid_type != fh2->fh_fsid_type)
return false;
- if (memcmp(fh1->fh_fsid, fh2->fh_fsid, key_len(fh1->fh_fsid_type)) != 0)
+ if (memcmp(fsid1, fsid2, key_len(fh1->fh_fsid_type)) != 0)
return false;
return true;
}
-#ifdef CONFIG_CRC32
/**
- * knfsd_fh_hash - calculate the crc32 hash for the filehandle
- * @fh - pointer to filehandle
+ * fh_want_write - Get write access to an export
+ * @fhp: File handle of file to be written
*
- * returns a crc32 hash for the filehandle that is compatible with
- * the one displayed by "wireshark".
- */
-
-static inline u32
-knfsd_fh_hash(struct knfsd_fh *fh)
-{
- return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size);
-}
-#else
-static inline u32
-knfsd_fh_hash(struct knfsd_fh *fh)
-{
- return 0;
-}
-#endif
-
-#ifdef CONFIG_NFSD_V3
-/*
- * The wcc data stored in current_fh should be cleared
- * between compound ops.
- */
-static inline void
-fh_clear_wcc(struct svc_fh *fhp)
-{
- fhp->fh_post_saved = false;
- fhp->fh_pre_saved = false;
-}
-
-/*
- * We could use i_version alone as the change attribute. However,
- * i_version can go backwards after a reboot. On its own that doesn't
- * necessarily cause a problem, but if i_version goes backwards and then
- * is incremented again it could reuse a value that was previously used
- * before boot, and a client who queried the two values might
- * incorrectly assume nothing changed.
+ * Caller must invoke fh_drop_write() when its write operation
+ * is complete.
*
- * By using both ctime and the i_version counter we guarantee that as
- * long as time doesn't go backwards we never reuse an old value.
+ * Returns 0 if the file handle's export can be written to. Otherwise
+ * the export is not prepared for updates, and the returned negative
+ * errno value reflects the reason for the failure.
*/
-static inline u64 nfsd4_change_attribute(struct kstat *stat,
- struct inode *inode)
+static inline int fh_want_write(struct svc_fh *fhp)
{
- u64 chattr;
-
- chattr = stat->ctime.tv_sec;
- chattr <<= 30;
- chattr += stat->ctime.tv_nsec;
- chattr += inode_query_iversion(inode);
- return chattr;
+ int ret;
+
+ if (fhp->fh_want_write)
+ return 0;
+ ret = mnt_want_write(fhp->fh_export->ex_path.mnt);
+ if (!ret)
+ fhp->fh_want_write = true;
+ return ret;
}
-extern void fill_pre_wcc(struct svc_fh *fhp);
-extern void fill_post_wcc(struct svc_fh *fhp);
-#else
-#define fh_clear_wcc(ignored)
-#define fill_pre_wcc(ignored)
-#define fill_post_wcc(notused)
-#endif /* CONFIG_NFSD_V3 */
-
-
-/*
- * Lock a file handle/inode
- * NOTE: both fh_lock and fh_unlock are done "by hand" in
- * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
- * so, any changes here should be reflected there.
+/**
+ * fh_drop_write - Release write access on an export
+ * @fhp: File handle of file on which fh_want_write() was previously called
*/
-
-static inline void
-fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+static inline void fh_drop_write(struct svc_fh *fhp)
{
- struct dentry *dentry = fhp->fh_dentry;
- struct inode *inode;
-
- BUG_ON(!dentry);
-
- if (fhp->fh_locked) {
- printk(KERN_WARNING "fh_lock: %pd2 already locked!\n",
- dentry);
- return;
+ if (fhp->fh_want_write) {
+ fhp->fh_want_write = false;
+ mnt_drop_write(fhp->fh_export->ex_path.mnt);
}
-
- inode = d_inode(dentry);
- inode_lock_nested(inode, subclass);
- fill_pre_wcc(fhp);
- fhp->fh_locked = true;
}
-static inline void
-fh_lock(struct svc_fh *fhp)
+/**
+ * knfsd_fh_hash - calculate the crc32 hash for the filehandle
+ * @fh - pointer to filehandle
+ *
+ * returns a crc32 hash for the filehandle that is compatible with
+ * the one displayed by "wireshark".
+ */
+static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh)
{
- fh_lock_nested(fhp, I_MUTEX_NORMAL);
+ return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size);
}
-/*
- * Unlock a file handle/inode
+/**
+ * fh_clear_pre_post_attrs - Reset pre/post attributes
+ * @fhp: file handle to be updated
+ *
*/
-static inline void
-fh_unlock(struct svc_fh *fhp)
+static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
{
- if (fhp->fh_locked) {
- fill_post_wcc(fhp);
- inode_unlock(d_inode(fhp->fh_dentry));
- fhp->fh_locked = false;
- }
+ fhp->fh_post_saved = false;
+ fhp->fh_pre_saved = false;
}
+u64 nfsd4_change_attribute(const struct kstat *stat);
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
+__be32 fh_fill_post_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0d20fd161225..481e789a7697 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -10,31 +10,41 @@
#include "cache.h"
#include "xdr.h"
#include "vfs.h"
-
-typedef struct svc_rqst svc_rqst;
-typedef struct svc_buf svc_buf;
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
-
-static __be32
-nfsd_proc_null(struct svc_rqst *rqstp)
+static __be32 nfsd_map_status(__be32 status)
{
- return nfs_ok;
+ switch (status) {
+ case nfs_ok:
+ break;
+ case nfserr_nofilehandle:
+ case nfserr_badhandle:
+ status = nfserr_stale;
+ break;
+ case nfserr_wrongsec:
+ case nfserr_xdev:
+ case nfserr_file_open:
+ status = nfserr_acces;
+ break;
+ case nfserr_symlink_not_dir:
+ status = nfserr_notdir;
+ break;
+ case nfserr_symlink:
+ case nfserr_wrong_type:
+ status = nfserr_inval;
+ break;
+ }
+ return status;
}
static __be32
-nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp)
-{
- if (err) return err;
- return fh_getattr(&resp->fh, &resp->stat);
-}
-static __be32
-nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp)
+nfsd_proc_null(struct svc_rqst *rqstp)
{
- if (err) return err;
- return fh_getattr(&resp->fh, &resp->stat);
+ return rpc_success;
}
+
/*
* Get a file's attributes
* N.B. After this call resp->fh needs an fh_put
@@ -44,13 +54,18 @@ nfsd_proc_getattr(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
- dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
+
+ trace_nfsd_vfs_getattr(rqstp, &argp->fh);
fh_copy(&resp->fh, &argp->fh);
- nfserr = fh_verify(rqstp, &resp->fh, 0,
- NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
- return nfsd_return_attrs(nfserr, resp);
+ resp->status = fh_verify(rqstp, &resp->fh, 0,
+ NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT);
+ if (resp->status != nfs_ok)
+ goto out;
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -63,8 +78,10 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
struct nfsd_sattrargs *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
struct iattr *iap = &argp->attrs;
+ struct nfsd_attrs attrs = {
+ .na_iattr = iap,
+ };
struct svc_fh *fhp;
- __be32 nfserr;
dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
SVCFH_fmt(&argp->fh),
@@ -94,16 +111,16 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
* Solaris, at least, doesn't seem to care what the time
* request is. We require it be within 30 minutes of now.
*/
- time_t delta = iap->ia_atime.tv_sec - get_seconds();
+ time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds();
- nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
- if (nfserr)
- goto done;
+ resp->status = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
+ if (resp->status != nfs_ok)
+ goto out;
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
- setattr_prepare(fhp->fh_dentry, iap) != 0) {
+ setattr_prepare(&nop_mnt_idmap, fhp->fh_dentry, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
@@ -113,9 +130,21 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
}
}
- nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
-done:
- return nfsd_return_attrs(nfserr, resp);
+ resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
+ if (resp->status != nfs_ok)
+ goto out;
+
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
+}
+
+/* Obsolete, replaced by MNTPROC_MNT. */
+static __be32
+nfsd_proc_root(struct svc_rqst *rqstp)
+{
+ return rpc_success;
}
/*
@@ -129,17 +158,21 @@ nfsd_proc_lookup(struct svc_rqst *rqstp)
{
struct nfsd_diropargs *argp = rqstp->rq_argp;
struct nfsd_diropres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: LOOKUP %s %.*s\n",
SVCFH_fmt(&argp->fh), argp->len, argp->name);
fh_init(&resp->fh, NFS_FHSIZE);
- nfserr = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len,
- &resp->fh);
-
+ resp->status = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len,
+ &resp->fh);
fh_put(&argp->fh);
- return nfsd_return_dirop(nfserr, resp);
+ if (resp->status != nfs_ok)
+ goto out;
+
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -148,18 +181,20 @@ nfsd_proc_lookup(struct svc_rqst *rqstp)
static __be32
nfsd_proc_readlink(struct svc_rqst *rqstp)
{
- struct nfsd_readlinkargs *argp = rqstp->rq_argp;
+ struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_readlinkres *resp = rqstp->rq_resp;
- __be32 nfserr;
dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
/* Read the symlink. */
resp->len = NFS_MAXPATHLEN;
- nfserr = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len);
+ resp->page = *(rqstp->rq_next_page++);
+ resp->status = nfsd_readlink(rqstp, &argp->fh,
+ page_address(resp->page), &resp->len);
fh_put(&argp->fh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -171,34 +206,39 @@ nfsd_proc_read(struct svc_rqst *rqstp)
{
struct nfsd_readargs *argp = rqstp->rq_argp;
struct nfsd_readres *resp = rqstp->rq_resp;
- __be32 nfserr;
+ u32 eof;
dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->count, argp->offset);
+ argp->count = min_t(u32, argp->count, NFS_MAXDATA);
+ argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen);
+
+ resp->pages = rqstp->rq_next_page;
+
/* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count.
*/
-
- if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
- char buf[RPC_MAX_ADDRBUFLEN];
- printk(KERN_NOTICE
- "oversized read request from %s (%d bytes)\n",
- svc_print_addr(rqstp, buf, sizeof(buf)),
- argp->count);
- argp->count = NFSSVC_MAXBLKSIZE_V2;
- }
svc_reserve_auth(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
- nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
- argp->offset,
- rqstp->rq_vec, argp->vlen,
- &resp->count);
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
+ &resp->count, &eof);
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
+}
- if (nfserr) return nfserr;
- return fh_getattr(&resp->fh, &resp->stat);
+/* Reserved */
+static __be32
+nfsd_proc_writecache(struct svc_rqst *rqstp)
+{
+ return rpc_success;
}
/*
@@ -210,22 +250,21 @@ nfsd_proc_write(struct svc_rqst *rqstp)
{
struct nfsd_writeargs *argp = rqstp->rq_argp;
struct nfsd_attrstat *resp = rqstp->rq_resp;
- __be32 nfserr;
unsigned long cnt = argp->len;
- unsigned int nvecs;
- dprintk("nfsd: WRITE %s %d bytes at %d\n",
+ dprintk("nfsd: WRITE %s %u bytes at %d\n",
SVCFH_fmt(&argp->fh),
argp->len, argp->offset);
- nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
- &argp->first, cnt);
- if (!nvecs)
- return nfserr_io;
- nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
- argp->offset, rqstp->rq_vec, nvecs,
- &cnt, NFS_DATA_SYNC);
- return nfsd_return_attrs(nfserr, resp);
+ fh_copy(&resp->fh, &argp->fh);
+ resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
+ &argp->payload, &cnt, NFS_DATA_SYNC, NULL);
+ if (resp->status == nfs_ok)
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+ else if (resp->status == nfserr_jukebox)
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -242,53 +281,51 @@ nfsd_proc_create(struct svc_rqst *rqstp)
svc_fh *dirfhp = &argp->fh;
svc_fh *newfhp = &resp->fh;
struct iattr *attr = &argp->attrs;
+ struct nfsd_attrs attrs = {
+ .na_iattr = attr,
+ };
struct inode *inode;
struct dentry *dchild;
int type, mode;
- __be32 nfserr;
int hosterr;
dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
- dprintk("nfsd: CREATE %s %.*s\n",
- SVCFH_fmt(dirfhp), argp->len, argp->name);
-
/* First verify the parent file handle */
- nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
- if (nfserr)
+ resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
+ if (resp->status != nfs_ok)
goto done; /* must fh_put dirfhp even on error */
/* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
- nfserr = nfserr_exist;
+ resp->status = nfserr_exist;
if (isdotent(argp->name, argp->len))
goto done;
hosterr = fh_want_write(dirfhp);
if (hosterr) {
- nfserr = nfserrno(hosterr);
+ resp->status = nfserrno(hosterr);
goto done;
}
- fh_lock_nested(dirfhp, I_MUTEX_PARENT);
- dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+ dchild = start_creating(&nop_mnt_idmap, dirfhp->fh_dentry,
+ &QSTR_LEN(argp->name, argp->len));
if (IS_ERR(dchild)) {
- nfserr = nfserrno(PTR_ERR(dchild));
- goto out_unlock;
+ resp->status = nfserrno(PTR_ERR(dchild));
+ goto out_write;
}
fh_init(newfhp, NFS_FHSIZE);
- nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
- if (!nfserr && d_really_is_negative(dchild))
- nfserr = nfserr_noent;
- dput(dchild);
- if (nfserr) {
- if (nfserr != nfserr_noent)
+ resp->status = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
+ if (!resp->status && d_really_is_negative(dchild))
+ resp->status = nfserr_noent;
+ if (resp->status) {
+ if (resp->status != nfserr_noent)
goto out_unlock;
/*
* If the new file handle wasn't verified, we can't tell
* whether the file exists or not. Time to bail ...
*/
- nfserr = nfserr_acces;
+ resp->status = nfserr_acces;
if (!newfhp->fh_dentry) {
- printk(KERN_WARNING
+ printk(KERN_WARNING
"nfsd_proc_create: file handle not verified\n");
goto out_unlock;
}
@@ -312,18 +349,19 @@ nfsd_proc_create(struct svc_rqst *rqstp)
rdev = inode->i_rdev;
attr->ia_valid |= ATTR_SIZE;
- /* FALLTHROUGH */
+ fallthrough;
case S_IFIFO:
/* this is probably a permission check..
* at least IRIX implements perm checking on
* echo thing > device-special-file-or-pipe
* by doing a CREATE with type==0
*/
- nfserr = nfsd_permission(rqstp,
- newfhp->fh_export,
- newfhp->fh_dentry,
- NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
- if (nfserr && nfserr != nfserr_rofs)
+ resp->status = nfsd_permission(
+ &rqstp->rq_cred,
+ newfhp->fh_export,
+ newfhp->fh_dentry,
+ NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
+ if (resp->status && resp->status != nfserr_rofs)
goto out_unlock;
}
} else
@@ -359,16 +397,19 @@ nfsd_proc_create(struct svc_rqst *rqstp)
attr->ia_valid &= ~ATTR_SIZE;
/* Make sure the type and device matches */
- nfserr = nfserr_exist;
- if (inode && type != (inode->i_mode & S_IFMT))
+ resp->status = nfserr_exist;
+ if (inode && inode_wrong_type(inode, type))
goto out_unlock;
}
- nfserr = 0;
+ resp->status = nfs_ok;
if (!inode) {
/* File doesn't exist. Create it and set attrs */
- nfserr = nfsd_create_locked(rqstp, dirfhp, argp->name,
- argp->len, attr, type, rdev, newfhp);
+ resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type,
+ rdev, newfhp);
+ /* nfsd_create_locked() unlocked the parent */
+ dput(dchild);
+ goto out_write;
} else if (type == S_IFREG) {
dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
argp->name, attr->ia_valid, (long) attr->ia_size);
@@ -378,99 +419,99 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+ resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
+ NULL);
}
out_unlock:
- /* We don't really need to unlock, as fh_put does it. */
- fh_unlock(dirfhp);
+ end_creating(dchild);
+out_write:
fh_drop_write(dirfhp);
done:
fh_put(dirfhp);
- return nfsd_return_dirop(nfserr, resp);
+ if (resp->status != nfs_ok)
+ goto out;
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
static __be32
nfsd_proc_remove(struct svc_rqst *rqstp)
{
struct nfsd_diropargs *argp = rqstp->rq_argp;
- __be32 nfserr;
-
- dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh),
- argp->len, argp->name);
+ struct nfsd_stat *resp = rqstp->rq_resp;
/* Unlink. -SIFDIR means file must not be a directory */
- nfserr = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len);
+ resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR,
+ argp->name, argp->len);
fh_put(&argp->fh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
static __be32
nfsd_proc_rename(struct svc_rqst *rqstp)
{
struct nfsd_renameargs *argp = rqstp->rq_argp;
- __be32 nfserr;
+ struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: RENAME %s %.*s -> \n",
- SVCFH_fmt(&argp->ffh), argp->flen, argp->fname);
- dprintk("nfsd: -> %s %.*s\n",
- SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname);
-
- nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen,
- &argp->tfh, argp->tname, argp->tlen);
+ resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen,
+ &argp->tfh, argp->tname, argp->tlen);
fh_put(&argp->ffh);
fh_put(&argp->tfh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
static __be32
nfsd_proc_link(struct svc_rqst *rqstp)
{
struct nfsd_linkargs *argp = rqstp->rq_argp;
- __be32 nfserr;
-
- dprintk("nfsd: LINK %s ->\n",
- SVCFH_fmt(&argp->ffh));
- dprintk("nfsd: %s %.*s\n",
- SVCFH_fmt(&argp->tfh),
- argp->tlen,
- argp->tname);
+ struct nfsd_stat *resp = rqstp->rq_resp;
- nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen,
- &argp->ffh);
+ resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen,
+ &argp->ffh);
fh_put(&argp->ffh);
fh_put(&argp->tfh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
static __be32
nfsd_proc_symlink(struct svc_rqst *rqstp)
{
struct nfsd_symlinkargs *argp = rqstp->rq_argp;
+ struct nfsd_stat *resp = rqstp->rq_resp;
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
struct svc_fh newfh;
- __be32 nfserr;
- if (argp->tlen > NFS_MAXPATHLEN)
- return nfserr_nametoolong;
+ if (argp->tlen > NFS_MAXPATHLEN) {
+ resp->status = nfserr_nametoolong;
+ goto out;
+ }
argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first,
page_address(rqstp->rq_arg.pages[0]),
argp->tlen);
- if (IS_ERR(argp->tname))
- return nfserrno(PTR_ERR(argp->tname));
-
- dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n",
- SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
- argp->tlen, argp->tname);
+ if (IS_ERR(argp->tname)) {
+ resp->status = nfserrno(PTR_ERR(argp->tname));
+ goto out;
+ }
fh_init(&newfh, NFS_FHSIZE);
- nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
- argp->tname, &newfh);
+ resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
+ argp->tname, &attrs, &newfh);
kfree(argp->tname);
fh_put(&argp->ffh);
fh_put(&newfh);
- return nfserr;
+out:
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -482,9 +523,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
{
struct nfsd_createargs *argp = rqstp->rq_argp;
struct nfsd_diropres *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
+ struct nfsd_attrs attrs = {
+ .na_iattr = &argp->attrs,
+ };
if (resp->fh.fh_dentry) {
printk(KERN_WARNING
@@ -493,10 +534,16 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp)
argp->attrs.ia_valid &= ~ATTR_SIZE;
fh_init(&resp->fh, NFS_FHSIZE);
- nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
- &argp->attrs, S_IFDIR, 0, &resp->fh);
+ resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
+ &attrs, S_IFDIR, 0, &resp->fh);
fh_put(&argp->fh);
- return nfsd_return_dirop(nfserr, resp);
+ if (resp->status != nfs_ok)
+ goto out;
+
+ resp->status = fh_getattr(&resp->fh, &resp->stat);
+out:
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -506,13 +553,31 @@ static __be32
nfsd_proc_rmdir(struct svc_rqst *rqstp)
{
struct nfsd_diropargs *argp = rqstp->rq_argp;
- __be32 nfserr;
+ struct nfsd_stat *resp = rqstp->rq_resp;
- dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
-
- nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len);
+ resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR,
+ argp->name, argp->len);
fh_put(&argp->fh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
+}
+
+static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp,
+ struct nfsd_readdirres *resp,
+ u32 count)
+{
+ struct xdr_buf *buf = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+
+ memset(buf, 0, sizeof(*buf));
+
+ /* Reserve room for the NULL ptr & eof flag (-2 words) */
+ buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE);
+ buf->buflen -= XDR_UNIT * 2;
+ buf->pages = rqstp->rq_next_page;
+ rqstp->rq_next_page++;
+
+ xdr_init_encode_pages(xdr, buf);
}
/*
@@ -523,37 +588,22 @@ nfsd_proc_readdir(struct svc_rqst *rqstp)
{
struct nfsd_readdirargs *argp = rqstp->rq_argp;
struct nfsd_readdirres *resp = rqstp->rq_resp;
- int count;
- __be32 nfserr;
loff_t offset;
- dprintk("nfsd: READDIR %s %d bytes at %d\n",
- SVCFH_fmt(&argp->fh),
- argp->count, argp->cookie);
-
- /* Shrink to the client read size */
- count = (argp->count >> 2) - 2;
+ trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie);
- /* Make sure we've room for the NULL ptr & eof flag */
- count -= 2;
- if (count < 0)
- count = 0;
+ nfsd_init_dirlist_pages(rqstp, resp, argp->count);
- resp->buffer = argp->buffer;
- resp->offset = NULL;
- resp->buflen = count;
resp->common.err = nfs_ok;
- /* Read directory and encode entries on the fly */
+ resp->cookie_offset = 0;
offset = argp->cookie;
- nfserr = nfsd_readdir(rqstp, &argp->fh, &offset,
- &resp->common, nfssvc_encode_entry);
-
- resp->count = resp->buffer - argp->buffer;
- if (resp->offset)
- *resp->offset = htonl(offset);
+ resp->status = nfsd_readdir(rqstp, &argp->fh, &offset,
+ &resp->common, nfssvc_encode_entry);
+ nfssvc_encode_nfscookie(resp, offset);
fh_put(&argp->fh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
@@ -564,21 +614,18 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
{
struct nfsd_fhandle *argp = rqstp->rq_argp;
struct nfsd_statfsres *resp = rqstp->rq_resp;
- __be32 nfserr;
-
- dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh));
- nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats,
- NFSD_MAY_BYPASS_GSS_ON_ROOT);
+ resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats,
+ NFSD_MAY_BYPASS_GSS_ON_ROOT);
fh_put(&argp->fh);
- return nfserr;
+ resp->status = nfsd_map_status(resp->status);
+ return rpc_success;
}
/*
* NFSv2 Server procedures.
* Only the results of non-idempotent operations are cached.
*/
-struct nfsd_void { int dummy; };
#define ST 1 /* status */
#define FH 8 /* filehandle */
@@ -587,237 +634,217 @@ struct nfsd_void { int dummy; };
static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_NULL] = {
.pc_func = nfsd_proc_null,
- .pc_decode = nfssvc_decode_void,
- .pc_encode = nfssvc_encode_void,
- .pc_argsize = sizeof(struct nfsd_void),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = ST,
+ .pc_xdrressize = 0,
+ .pc_name = "NULL",
},
[NFSPROC_GETATTR] = {
.pc_func = nfsd_proc_getattr,
- .pc_decode = nfssvc_decode_fhandle,
- .pc_encode = nfssvc_encode_attrstat,
- .pc_release = nfssvc_release_fhandle,
+ .pc_decode = nfssvc_decode_fhandleargs,
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+AT,
+ .pc_name = "GETATTR",
},
[NFSPROC_SETATTR] = {
.pc_func = nfsd_proc_setattr,
.pc_decode = nfssvc_decode_sattrargs,
- .pc_encode = nfssvc_encode_attrstat,
- .pc_release = nfssvc_release_fhandle,
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_sattrargs),
+ .pc_argzero = sizeof(struct nfsd_sattrargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+AT,
+ .pc_name = "SETATTR",
},
[NFSPROC_ROOT] = {
- .pc_decode = nfssvc_decode_void,
- .pc_encode = nfssvc_encode_void,
- .pc_argsize = sizeof(struct nfsd_void),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_func = nfsd_proc_root,
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = ST,
+ .pc_xdrressize = 0,
+ .pc_name = "ROOT",
},
[NFSPROC_LOOKUP] = {
.pc_func = nfsd_proc_lookup,
.pc_decode = nfssvc_decode_diropargs,
.pc_encode = nfssvc_encode_diropres,
- .pc_release = nfssvc_release_fhandle,
+ .pc_release = nfssvc_release_diropres,
.pc_argsize = sizeof(struct nfsd_diropargs),
+ .pc_argzero = sizeof(struct nfsd_diropargs),
.pc_ressize = sizeof(struct nfsd_diropres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+FH+AT,
+ .pc_name = "LOOKUP",
},
[NFSPROC_READLINK] = {
.pc_func = nfsd_proc_readlink,
- .pc_decode = nfssvc_decode_readlinkargs,
+ .pc_decode = nfssvc_decode_fhandleargs,
.pc_encode = nfssvc_encode_readlinkres,
- .pc_argsize = sizeof(struct nfsd_readlinkargs),
+ .pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_readlinkres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+1+NFS_MAXPATHLEN/4,
+ .pc_name = "READLINK",
},
[NFSPROC_READ] = {
.pc_func = nfsd_proc_read,
.pc_decode = nfssvc_decode_readargs,
.pc_encode = nfssvc_encode_readres,
- .pc_release = nfssvc_release_fhandle,
+ .pc_release = nfssvc_release_readres,
.pc_argsize = sizeof(struct nfsd_readargs),
+ .pc_argzero = sizeof(struct nfsd_readargs),
.pc_ressize = sizeof(struct nfsd_readres),
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
+ .pc_xdrressize = ST+AT+1+NFS_MAXDATA/4,
+ .pc_name = "READ",
},
[NFSPROC_WRITECACHE] = {
- .pc_decode = nfssvc_decode_void,
- .pc_encode = nfssvc_encode_void,
- .pc_argsize = sizeof(struct nfsd_void),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_func = nfsd_proc_writecache,
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_argzero = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
- .pc_xdrressize = ST,
+ .pc_xdrressize = 0,
+ .pc_name = "WRITECACHE",
},
[NFSPROC_WRITE] = {
.pc_func = nfsd_proc_write,
.pc_decode = nfssvc_decode_writeargs,
- .pc_encode = nfssvc_encode_attrstat,
- .pc_release = nfssvc_release_fhandle,
+ .pc_encode = nfssvc_encode_attrstatres,
+ .pc_release = nfssvc_release_attrstat,
.pc_argsize = sizeof(struct nfsd_writeargs),
+ .pc_argzero = sizeof(struct nfsd_writeargs),
.pc_ressize = sizeof(struct nfsd_attrstat),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+AT,
+ .pc_name = "WRITE",
},
[NFSPROC_CREATE] = {
.pc_func = nfsd_proc_create,
.pc_decode = nfssvc_decode_createargs,
.pc_encode = nfssvc_encode_diropres,
- .pc_release = nfssvc_release_fhandle,
+ .pc_release = nfssvc_release_diropres,
.pc_argsize = sizeof(struct nfsd_createargs),
+ .pc_argzero = sizeof(struct nfsd_createargs),
.pc_ressize = sizeof(struct nfsd_diropres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+FH+AT,
+ .pc_name = "CREATE",
},
[NFSPROC_REMOVE] = {
.pc_func = nfsd_proc_remove,
.pc_decode = nfssvc_decode_diropargs,
- .pc_encode = nfssvc_encode_void,
+ .pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_diropargs),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_argzero = sizeof(struct nfsd_diropargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
+ .pc_name = "REMOVE",
},
[NFSPROC_RENAME] = {
.pc_func = nfsd_proc_rename,
.pc_decode = nfssvc_decode_renameargs,
- .pc_encode = nfssvc_encode_void,
+ .pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_renameargs),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_argzero = sizeof(struct nfsd_renameargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
+ .pc_name = "RENAME",
},
[NFSPROC_LINK] = {
.pc_func = nfsd_proc_link,
.pc_decode = nfssvc_decode_linkargs,
- .pc_encode = nfssvc_encode_void,
+ .pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_linkargs),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_argzero = sizeof(struct nfsd_linkargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
+ .pc_name = "LINK",
},
[NFSPROC_SYMLINK] = {
.pc_func = nfsd_proc_symlink,
.pc_decode = nfssvc_decode_symlinkargs,
- .pc_encode = nfssvc_encode_void,
+ .pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_symlinkargs),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_argzero = sizeof(struct nfsd_symlinkargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
+ .pc_name = "SYMLINK",
},
[NFSPROC_MKDIR] = {
.pc_func = nfsd_proc_mkdir,
.pc_decode = nfssvc_decode_createargs,
.pc_encode = nfssvc_encode_diropres,
- .pc_release = nfssvc_release_fhandle,
+ .pc_release = nfssvc_release_diropres,
.pc_argsize = sizeof(struct nfsd_createargs),
+ .pc_argzero = sizeof(struct nfsd_createargs),
.pc_ressize = sizeof(struct nfsd_diropres),
.pc_cachetype = RC_REPLBUFF,
.pc_xdrressize = ST+FH+AT,
+ .pc_name = "MKDIR",
},
[NFSPROC_RMDIR] = {
.pc_func = nfsd_proc_rmdir,
.pc_decode = nfssvc_decode_diropargs,
- .pc_encode = nfssvc_encode_void,
+ .pc_encode = nfssvc_encode_statres,
.pc_argsize = sizeof(struct nfsd_diropargs),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_argzero = sizeof(struct nfsd_diropargs),
+ .pc_ressize = sizeof(struct nfsd_stat),
.pc_cachetype = RC_REPLSTAT,
.pc_xdrressize = ST,
+ .pc_name = "RMDIR",
},
[NFSPROC_READDIR] = {
.pc_func = nfsd_proc_readdir,
.pc_decode = nfssvc_decode_readdirargs,
.pc_encode = nfssvc_encode_readdirres,
.pc_argsize = sizeof(struct nfsd_readdirargs),
+ .pc_argzero = sizeof(struct nfsd_readdirargs),
.pc_ressize = sizeof(struct nfsd_readdirres),
.pc_cachetype = RC_NOCACHE,
+ .pc_name = "READDIR",
},
[NFSPROC_STATFS] = {
.pc_func = nfsd_proc_statfs,
- .pc_decode = nfssvc_decode_fhandle,
+ .pc_decode = nfssvc_decode_fhandleargs,
.pc_encode = nfssvc_encode_statfsres,
.pc_argsize = sizeof(struct nfsd_fhandle),
+ .pc_argzero = sizeof(struct nfsd_fhandle),
.pc_ressize = sizeof(struct nfsd_statfsres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST+5,
+ .pc_name = "STATFS",
},
};
-
-static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)];
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nfsd_count2[ARRAY_SIZE(nfsd_procedures2)]);
const struct svc_version nfsd_version2 = {
.vs_vers = 2,
- .vs_nproc = 18,
+ .vs_nproc = ARRAY_SIZE(nfsd_procedures2),
.vs_proc = nfsd_procedures2,
.vs_count = nfsd_count2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS2_SVC_XDRSIZE,
};
-
-/*
- * Map errnos to NFS errnos.
- */
-__be32
-nfserrno (int errno)
-{
- static struct {
- __be32 nfserr;
- int syserr;
- } nfs_errtbl[] = {
- { nfs_ok, 0 },
- { nfserr_perm, -EPERM },
- { nfserr_noent, -ENOENT },
- { nfserr_io, -EIO },
- { nfserr_nxio, -ENXIO },
- { nfserr_fbig, -E2BIG },
- { nfserr_acces, -EACCES },
- { nfserr_exist, -EEXIST },
- { nfserr_xdev, -EXDEV },
- { nfserr_mlink, -EMLINK },
- { nfserr_nodev, -ENODEV },
- { nfserr_notdir, -ENOTDIR },
- { nfserr_isdir, -EISDIR },
- { nfserr_inval, -EINVAL },
- { nfserr_fbig, -EFBIG },
- { nfserr_nospc, -ENOSPC },
- { nfserr_rofs, -EROFS },
- { nfserr_mlink, -EMLINK },
- { nfserr_nametoolong, -ENAMETOOLONG },
- { nfserr_notempty, -ENOTEMPTY },
-#ifdef EDQUOT
- { nfserr_dquot, -EDQUOT },
-#endif
- { nfserr_stale, -ESTALE },
- { nfserr_jukebox, -ETIMEDOUT },
- { nfserr_jukebox, -ERESTARTSYS },
- { nfserr_jukebox, -EAGAIN },
- { nfserr_jukebox, -EWOULDBLOCK },
- { nfserr_jukebox, -ENOMEM },
- { nfserr_io, -ETXTBSY },
- { nfserr_notsupp, -EOPNOTSUPP },
- { nfserr_toosmall, -ETOOSMALL },
- { nfserr_serverfault, -ESERVERFAULT },
- { nfserr_serverfault, -ENFILE },
- { nfserr_io, -EUCLEAN },
- { nfserr_perm, -ENOKEY },
- };
- int i;
-
- for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
- if (nfs_errtbl[i].syserr == errno)
- return nfs_errtbl[i].nfserr;
- }
- WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
- return nfserr_io;
-}
-
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 89cb484f1cfb..b08ae85d53ef 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -12,12 +12,14 @@
#include <linux/module.h>
#include <linux/fs_struct.h>
#include <linux/swap.h>
+#include <linux/siphash.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
+#include <linux/nfslocalio.h>
#include <linux/seq_file.h>
#include <linux/inetdevice.h>
#include <net/addrconf.h>
@@ -27,25 +29,36 @@
#include "cache.h"
#include "vfs.h"
#include "netns.h"
+#include "filecache.h"
+
+#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC
-extern struct svc_program nfsd_program;
+atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
static int nfsd(void *vrqstp);
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static int nfsd_acl_rpcbind_set(struct net *,
+ const struct svc_program *,
+ u32, int,
+ unsigned short,
+ unsigned short);
+static __be32 nfsd_acl_init_request(struct svc_rqst *,
+ const struct svc_program *,
+ struct svc_process_info *);
+#endif
+static int nfsd_rpcbind_set(struct net *,
+ const struct svc_program *,
+ u32, int,
+ unsigned short,
+ unsigned short);
+static __be32 nfsd_init_request(struct svc_rqst *,
+ const struct svc_program *,
+ struct svc_process_info *);
/*
- * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members
- * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
- * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
- *
- * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a
- * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
- * of nfsd threads must exist and each must listed in ->sp_all_threads in each
- * entry of ->sv_pools[].
- *
- * Transitions of the thread count between zero and non-zero are of particular
- * interest since the svc_serv needs to be created and initialized at that
- * point, or freed.
+ * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members
+ * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks.
*
* Finally, the nfsd_mutex also protects some of the global variables that are
* accessed when nfsd starts and that are settable via the write_* routines in
@@ -57,137 +70,168 @@ static int nfsd(void *vrqstp);
*/
DEFINE_MUTEX(nfsd_mutex);
-/*
- * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
- * nfsd_drc_max_pages limits the total amount of memory available for
- * version 4.1 DRC caches.
- * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
- */
-spinlock_t nfsd_drc_lock;
-unsigned long nfsd_drc_max_mem;
-unsigned long nfsd_drc_mem_used;
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+static const struct svc_version *localio_versions[] = {
+ [1] = &localio_version1,
+};
+
+#define NFSD_LOCALIO_NRVERS ARRAY_SIZE(localio_versions)
+
+#endif /* CONFIG_NFS_LOCALIO */
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-static struct svc_stat nfsd_acl_svcstats;
static const struct svc_version *nfsd_acl_version[] = {
+# if defined(CONFIG_NFSD_V2_ACL)
[2] = &nfsd_acl_version2,
+# endif
+# if defined(CONFIG_NFSD_V3_ACL)
[3] = &nfsd_acl_version3,
+# endif
};
-#define NFSD_ACL_MINVERS 2
+#define NFSD_ACL_MINVERS 2
#define NFSD_ACL_NRVERS ARRAY_SIZE(nfsd_acl_version)
-static const struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];
-static struct svc_program nfsd_acl_program = {
- .pg_prog = NFS_ACL_PROGRAM,
- .pg_nvers = NFSD_ACL_NRVERS,
- .pg_vers = nfsd_acl_versions,
- .pg_name = "nfsacl",
- .pg_class = "nfsd",
- .pg_stats = &nfsd_acl_svcstats,
- .pg_authenticate = &svc_set_client,
-};
-
-static struct svc_stat nfsd_acl_svcstats = {
- .program = &nfsd_acl_program,
-};
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
-static const struct svc_version *nfsd_version[] = {
+static const struct svc_version *nfsd_version[NFSD_MAXVERS+1] = {
+#if defined(CONFIG_NFSD_V2)
[2] = &nfsd_version2,
-#if defined(CONFIG_NFSD_V3)
- [3] = &nfsd_version3,
#endif
+ [3] = &nfsd_version3,
#if defined(CONFIG_NFSD_V4)
[4] = &nfsd_version4,
#endif
};
-#define NFSD_MINVERS 2
-#define NFSD_NRVERS ARRAY_SIZE(nfsd_version)
-static const struct svc_version *nfsd_versions[NFSD_NRVERS];
-
-struct svc_program nfsd_program = {
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- .pg_next = &nfsd_acl_program,
-#endif
+struct svc_program nfsd_programs[] = {
+ {
.pg_prog = NFS_PROGRAM, /* program number */
- .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
- .pg_vers = nfsd_versions, /* version table */
+ .pg_nvers = NFSD_MAXVERS+1, /* nr of entries in nfsd_version */
+ .pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
- .pg_stats = &nfsd_svcstats, /* version table */
- .pg_authenticate = &svc_set_client, /* export authentication */
-
+ .pg_authenticate = svc_set_client, /* export authentication */
+ .pg_init_request = nfsd_init_request,
+ .pg_rpcbind_set = nfsd_rpcbind_set,
+ },
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+ {
+ .pg_prog = NFS_ACL_PROGRAM,
+ .pg_nvers = NFSD_ACL_NRVERS,
+ .pg_vers = nfsd_acl_version,
+ .pg_name = "nfsacl",
+ .pg_class = "nfsd",
+ .pg_authenticate = svc_set_client,
+ .pg_init_request = nfsd_acl_init_request,
+ .pg_rpcbind_set = nfsd_acl_rpcbind_set,
+ },
+#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ {
+ .pg_prog = NFS_LOCALIO_PROGRAM,
+ .pg_nvers = NFSD_LOCALIO_NRVERS,
+ .pg_vers = localio_versions,
+ .pg_name = "nfslocalio",
+ .pg_class = "nfsd",
+ .pg_authenticate = svc_set_client,
+ .pg_init_request = svc_generic_init_request,
+ .pg_rpcbind_set = svc_generic_rpcbind_set,
+ }
+#endif /* CONFIG_NFS_LOCALIO */
};
-static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = {
- [0] = 1,
- [1] = 1,
- [2] = 1,
-};
+bool nfsd_support_version(int vers)
+{
+ if (vers >= NFSD_MINVERS && vers <= NFSD_MAXVERS)
+ return nfsd_version[vers] != NULL;
+ return false;
+}
-int nfsd_vers(int vers, enum vers_op change)
+int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change)
{
- if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
+ if (vers < NFSD_MINVERS || vers > NFSD_MAXVERS)
return 0;
switch(change) {
case NFSD_SET:
- nfsd_versions[vers] = nfsd_version[vers];
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_versions[vers] = nfsd_acl_version[vers];
-#endif
+ nn->nfsd_versions[vers] = nfsd_support_version(vers);
break;
case NFSD_CLEAR:
- nfsd_versions[vers] = NULL;
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
- if (vers < NFSD_ACL_NRVERS)
- nfsd_acl_versions[vers] = NULL;
-#endif
+ nn->nfsd_versions[vers] = false;
break;
case NFSD_TEST:
- return nfsd_versions[vers] != NULL;
+ return nn->nfsd_versions[vers];
case NFSD_AVAIL:
- return nfsd_version[vers] != NULL;
+ return nfsd_support_version(vers);
}
return 0;
}
static void
-nfsd_adjust_nfsd_versions4(void)
+nfsd_adjust_nfsd_versions4(struct nfsd_net *nn)
{
unsigned i;
for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++) {
- if (nfsd_supported_minorversions[i])
+ if (nn->nfsd4_minorversions[i])
return;
}
- nfsd_vers(4, NFSD_CLEAR);
+ nfsd_vers(nn, 4, NFSD_CLEAR);
}
-int nfsd_minorversion(u32 minorversion, enum vers_op change)
+int nfsd_minorversion(struct nfsd_net *nn, u32 minorversion, enum vers_op change)
{
if (minorversion > NFSD_SUPPORTED_MINOR_VERSION &&
change != NFSD_AVAIL)
return -1;
+
switch(change) {
case NFSD_SET:
- nfsd_supported_minorversions[minorversion] = true;
- nfsd_vers(4, NFSD_SET);
+ nfsd_vers(nn, 4, NFSD_SET);
+ nn->nfsd4_minorversions[minorversion] =
+ nfsd_vers(nn, 4, NFSD_TEST);
break;
case NFSD_CLEAR:
- nfsd_supported_minorversions[minorversion] = false;
- nfsd_adjust_nfsd_versions4();
+ nn->nfsd4_minorversions[minorversion] = false;
+ nfsd_adjust_nfsd_versions4(nn);
break;
case NFSD_TEST:
- return nfsd_supported_minorversions[minorversion];
+ return nn->nfsd4_minorversions[minorversion];
case NFSD_AVAIL:
- return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
+ return minorversion <= NFSD_SUPPORTED_MINOR_VERSION &&
+ nfsd_vers(nn, 4, NFSD_AVAIL);
}
return 0;
}
+bool nfsd_net_try_get(struct net *net) __must_hold(rcu)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ return (nn && percpu_ref_tryget_live(&nn->nfsd_net_ref));
+}
+
+void nfsd_net_put(struct net *net) __must_hold(rcu)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ percpu_ref_put(&nn->nfsd_net_ref);
+}
+
+static void nfsd_net_done(struct percpu_ref *ref)
+{
+ struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref);
+
+ complete(&nn->nfsd_net_confirm_done);
+}
+
+static void nfsd_net_free(struct percpu_ref *ref)
+{
+ struct nfsd_net *nn = container_of(ref, struct nfsd_net, nfsd_net_ref);
+
+ complete(&nn->nfsd_net_free_done);
+}
+
/*
* Maximum number of nfsd processes
*/
@@ -205,52 +249,26 @@ int nfsd_nrthreads(struct net *net)
return rv;
}
-static int nfsd_init_socks(struct net *net)
-{
- int error;
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- if (!list_empty(&nn->nfsd_serv->sv_permsocks))
- return 0;
-
- error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS);
- if (error < 0)
- return error;
-
- error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT,
- SVC_SOCK_DEFAULTS);
- if (error < 0)
- return error;
-
- return 0;
-}
-
static int nfsd_users = 0;
-static int nfsd_startup_generic(int nrservs)
+static int nfsd_startup_generic(void)
{
int ret;
if (nfsd_users++)
return 0;
- /*
- * Readahead param cache - will no-op if it already exists.
- * (Note therefore results will be suboptimal if number of
- * threads is modified after nfsd start.)
- */
- ret = nfsd_racache_init(2*nrservs);
+ ret = nfsd_file_cache_init();
if (ret)
goto dec_users;
ret = nfs4_state_start();
if (ret)
- goto out_racache;
+ goto out_file_cache;
return 0;
-out_racache:
- nfsd_racache_shutdown();
+out_file_cache:
+ nfsd_file_cache_shutdown();
dec_users:
nfsd_users--;
return ret;
@@ -262,19 +280,72 @@ static void nfsd_shutdown_generic(void)
return;
nfs4_state_shutdown();
- nfsd_racache_shutdown();
+ nfsd_file_cache_shutdown();
}
-static bool nfsd_needs_lockd(void)
+static bool nfsd_needs_lockd(struct nfsd_net *nn)
{
-#if defined(CONFIG_NFSD_V3)
- return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL);
-#else
- return (nfsd_versions[2] != NULL);
-#endif
+ return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST);
+}
+
+/**
+ * nfsd_copy_write_verifier - Atomically copy a write verifier
+ * @verf: buffer in which to receive the verifier cookie
+ * @nn: NFS net namespace
+ *
+ * This function provides a wait-free mechanism for copying the
+ * namespace's write verifier without tearing it.
+ */
+void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn)
+{
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&nn->writeverf_lock);
+ memcpy(verf, nn->writeverf, sizeof(nn->writeverf));
+ } while (read_seqretry(&nn->writeverf_lock, seq));
+}
+
+static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn)
+{
+ struct timespec64 now;
+ u64 verf;
+
+ /*
+ * Because the time value is hashed, y2038 time_t overflow
+ * is irrelevant in this usage.
+ */
+ ktime_get_raw_ts64(&now);
+ verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key);
+ memcpy(nn->writeverf, &verf, sizeof(nn->writeverf));
+}
+
+/**
+ * nfsd_reset_write_verifier - Generate a new write verifier
+ * @nn: NFS net namespace
+ *
+ * This function updates the ->writeverf field of @nn. This field
+ * contains an opaque cookie that, according to Section 18.32.3 of
+ * RFC 8881, "the client can use to determine whether a server has
+ * changed instance state (e.g., server restart) between a call to
+ * WRITE and a subsequent call to either WRITE or COMMIT. This
+ * cookie MUST be unchanged during a single instance of the NFSv4.1
+ * server and MUST be unique between instances of the NFSv4.1
+ * server."
+ */
+void nfsd_reset_write_verifier(struct nfsd_net *nn)
+{
+ write_seqlock(&nn->writeverf_lock);
+ nfsd_reset_write_verifier_locked(nn);
+ write_sequnlock(&nn->writeverf_lock);
}
-static int nfsd_startup_net(int nrservs, struct net *net)
+/*
+ * Crank up a set of per-namespace resources for a new NFSD instance,
+ * including lockd, a duplicate reply cache, an open file cache
+ * instance, and a cache of NFSv4 state objects.
+ */
+static int nfsd_startup_net(struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
@@ -282,31 +353,49 @@ static int nfsd_startup_net(int nrservs, struct net *net)
if (nn->nfsd_net_up)
return 0;
- ret = nfsd_startup_generic(nrservs);
+ ret = nfsd_startup_generic();
if (ret)
return ret;
- ret = nfsd_init_socks(net);
- if (ret)
+
+ if (list_empty(&nn->nfsd_serv->sv_permsocks)) {
+ pr_warn("NFSD: Failed to start, no listeners configured.\n");
+ ret = -EIO;
goto out_socks;
+ }
- if (nfsd_needs_lockd() && !nn->lockd_up) {
- ret = lockd_up(net);
+ if (nfsd_needs_lockd(nn) && !nn->lockd_up) {
+ ret = lockd_up(net, cred);
if (ret)
goto out_socks;
- nn->lockd_up = 1;
+ nn->lockd_up = true;
}
- ret = nfs4_state_start_net(net);
+ ret = nfsd_file_cache_start_net(net);
if (ret)
goto out_lockd;
+ ret = nfsd_reply_cache_init(nn);
+ if (ret)
+ goto out_filecache;
+
+#ifdef CONFIG_NFSD_V4_2_INTER_SSC
+ nfsd4_ssc_init_umount_work(nn);
+#endif
+ ret = nfs4_state_start_net(net);
+ if (ret)
+ goto out_reply_cache;
+
nn->nfsd_net_up = true;
return 0;
+out_reply_cache:
+ nfsd_reply_cache_shutdown(nn);
+out_filecache:
+ nfsd_file_cache_shutdown_net(net);
out_lockd:
if (nn->lockd_up) {
lockd_down(net);
- nn->lockd_up = 0;
+ nn->lockd_up = false;
}
out_socks:
nfsd_shutdown_generic();
@@ -317,15 +406,29 @@ static void nfsd_shutdown_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ if (!nn->nfsd_net_up)
+ return;
+
+ percpu_ref_kill_and_confirm(&nn->nfsd_net_ref, nfsd_net_done);
+ wait_for_completion(&nn->nfsd_net_confirm_done);
+
+ nfsd_export_flush(net);
nfs4_state_shutdown_net(net);
+ nfsd_reply_cache_shutdown(nn);
+ nfsd_file_cache_shutdown_net(net);
if (nn->lockd_up) {
lockd_down(net);
- nn->lockd_up = 0;
+ nn->lockd_up = false;
}
+
+ wait_for_completion(&nn->nfsd_net_free_done);
+ percpu_ref_exit(&nn->nfsd_net_ref);
+
nn->nfsd_net_up = false;
nfsd_shutdown_generic();
}
+static DEFINE_SPINLOCK(nfsd_notifier_lock);
static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -335,18 +438,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in sin;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ifa->ifa_local;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
out:
return NOTIFY_DONE;
@@ -366,10 +468,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct sockaddr_in6 sin6;
- if ((event != NETDEV_DOWN) ||
- !atomic_inc_not_zero(&nn->ntf_refcnt))
+ if (event != NETDEV_DOWN || !nn->nfsd_serv)
goto out;
+ spin_lock(&nfsd_notifier_lock);
if (nn->nfsd_serv) {
dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr);
sin6.sin6_family = AF_INET6;
@@ -378,8 +480,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this,
sin6.sin6_scope_id = ifa->idev->dev->ifindex;
svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6);
}
- atomic_dec(&nn->ntf_refcnt);
- wake_up(&nn->ntf_wq);
+ spin_unlock(&nfsd_notifier_lock);
+
out:
return NOTIFY_DONE;
}
@@ -392,11 +494,21 @@ static struct notifier_block nfsd_inet6addr_notifier = {
/* Only used under nfsd_mutex, so this atomic may be overkill: */
static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+/**
+ * nfsd_destroy_serv - tear down NFSD's svc_serv for a namespace
+ * @net: network namespace the NFS service is associated with
+ */
+void nfsd_destroy_serv(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv = nn->nfsd_serv;
+
+ lockdep_assert_held(&nfsd_mutex);
+
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
- atomic_dec(&nn->ntf_refcnt);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -404,64 +516,36 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
unregister_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0);
/*
* write_ports can create the server without actually starting
- * any threads--if we get shut down before any threads are
- * started, then nfsd_last_thread will be run before any of this
+ * any threads. If we get shut down before any threads are
+ * started, then nfsd_destroy_serv will be run before any of this
* other initialization has been done except the rpcb information.
*/
- svc_rpcb_cleanup(serv, net);
- if (!nn->nfsd_net_up)
- return;
-
+ svc_xprt_destroy_all(serv, net, true);
nfsd_shutdown_net(net);
- printk(KERN_WARNING "nfsd: last server has exited, flushing export "
- "cache\n");
- nfsd_export_flush(net);
+ svc_destroy(&serv);
}
-void nfsd_reset_versions(void)
+void nfsd_reset_versions(struct nfsd_net *nn)
{
int i;
- for (i = 0; i < NFSD_NRVERS; i++)
- if (nfsd_vers(i, NFSD_TEST))
+ for (i = 0; i <= NFSD_MAXVERS; i++)
+ if (nfsd_vers(nn, i, NFSD_TEST))
return;
- for (i = 0; i < NFSD_NRVERS; i++)
+ for (i = 0; i <= NFSD_MAXVERS; i++)
if (i != 4)
- nfsd_vers(i, NFSD_SET);
+ nfsd_vers(nn, i, NFSD_SET);
else {
int minor = 0;
- while (nfsd_minorversion(minor, NFSD_SET) >= 0)
+ while (nfsd_minorversion(nn, minor, NFSD_SET) >= 0)
minor++;
}
}
-/*
- * Each session guarantees a negotiated per slot memory cache for replies
- * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
- * NFSv4.1 server might want to use more memory for a DRC than a machine
- * with mutiple services.
- *
- * Impose a hard limit on the number of pages for the DRC which varies
- * according to the machines free pages. This is of course only a default.
- *
- * For now this is a #defined shift which could be under admin control
- * in the future.
- */
-static void set_max_drc(void)
-{
- #define NFSD_DRC_SIZE_SHIFT 7
- nfsd_drc_max_mem = (nr_free_buffer_pages()
- >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
- nfsd_drc_mem_used = 0;
- spin_lock_init(&nfsd_drc_lock);
- dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
-}
-
static int nfsd_get_default_max_blksize(void)
{
struct sysinfo i;
@@ -477,46 +561,72 @@ static int nfsd_get_default_max_blksize(void)
*/
target >>= 12;
- ret = NFSSVC_MAXBLKSIZE;
+ ret = NFSSVC_DEFBLKSIZE;
while (ret > target && ret >= 8*1024*2)
ret /= 2;
return ret;
}
-static const struct svc_serv_ops nfsd_thread_sv_ops = {
- .svo_shutdown = nfsd_last_thread,
- .svo_function = nfsd,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads,
- .svo_module = THIS_MODULE,
-};
+void nfsd_shutdown_threads(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
+
+ mutex_lock(&nfsd_mutex);
+ serv = nn->nfsd_serv;
+ if (serv == NULL) {
+ mutex_unlock(&nfsd_mutex);
+ return;
+ }
+
+ /* Kill outstanding nfsd threads */
+ svc_set_num_threads(serv, NULL, 0);
+ nfsd_destroy_serv(net);
+ mutex_unlock(&nfsd_mutex);
+}
+
+struct svc_rqst *nfsd_current_rqst(void)
+{
+ if (kthread_func(current) == nfsd)
+ return kthread_data(current);
+ return NULL;
+}
int nfsd_create_serv(struct net *net)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
- if (nn->nfsd_serv) {
- svc_get(nn->nfsd_serv);
+ if (nn->nfsd_serv)
return 0;
- }
+
+ error = percpu_ref_init(&nn->nfsd_net_ref, nfsd_net_free,
+ 0, GFP_KERNEL);
+ if (error)
+ return error;
+ init_completion(&nn->nfsd_net_free_done);
+ init_completion(&nn->nfsd_net_confirm_done);
+
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
- nfsd_reset_versions();
- nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- &nfsd_thread_sv_ops);
- if (nn->nfsd_serv == NULL)
+ nfsd_reset_versions(nn);
+ serv = svc_create_pooled(nfsd_programs, ARRAY_SIZE(nfsd_programs),
+ &nn->nfsd_svcstats,
+ nfsd_max_blksize, nfsd);
+ if (serv == NULL)
return -ENOMEM;
- nn->nfsd_serv->sv_maxconn = nn->max_connections;
- error = svc_bind(nn->nfsd_serv, net);
+ error = svc_bind(serv, net);
if (error < 0) {
- svc_destroy(nn->nfsd_serv);
+ svc_destroy(&serv);
return error;
}
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = serv;
+ spin_unlock(&nfsd_notifier_lock);
- set_max_drc();
/* check if the notifier is already set */
if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
register_inetaddr_notifier(&nfsd_inetaddr_notifier);
@@ -524,8 +634,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
- atomic_inc(&nn->ntf_refcnt);
- ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
+ nfsd_reset_write_verifier(nn);
return 0;
}
@@ -541,29 +650,29 @@ int nfsd_nrpools(struct net *net)
int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
{
- int i = 0;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv = nn->nfsd_serv;
+ int i;
- if (nn->nfsd_serv != NULL) {
- for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++)
- nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads;
- }
-
+ if (serv)
+ for (i = 0; i < serv->sv_nrpools && i < n; i++)
+ nthreads[i] = serv->sv_pools[i].sp_nrthreads;
return 0;
}
-void nfsd_destroy(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int destroy = (nn->nfsd_serv->sv_nrthreads == 1);
-
- if (destroy)
- svc_shutdown_net(nn->nfsd_serv, net);
- svc_destroy(nn->nfsd_serv);
- if (destroy)
- nn->nfsd_serv = NULL;
-}
-
+/**
+ * nfsd_set_nrthreads - set the number of running threads in the net's service
+ * @n: number of array members in @nthreads
+ * @nthreads: array of thread counts for each pool
+ * @net: network namespace to operate within
+ *
+ * This function alters the number of running threads for the given network
+ * namespace in each pool. If passed an array longer then the number of pools
+ * the extra pool settings are ignored. If passed an array shorter than the
+ * number of pools, the missing values are interpreted as 0's.
+ *
+ * Returns 0 on success or a negative errno on error.
+ */
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
{
int i = 0;
@@ -571,11 +680,18 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
int err = 0;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- WARN_ON(!mutex_is_locked(&nfsd_mutex));
+ lockdep_assert_held(&nfsd_mutex);
if (nn->nfsd_serv == NULL || n <= 0)
return 0;
+ /*
+ * Special case: When n == 1, pass in NULL for the pool, so that the
+ * change is distributed equally among them.
+ */
+ if (n == 1)
+ return svc_set_num_threads(nn->nfsd_serv, NULL, nthreads[0]);
+
if (n > nn->nfsd_serv->sv_nrpools)
n = nn->nfsd_serv->sv_nrpools;
@@ -588,7 +704,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (tot > NFSD_MAXSERVS) {
/* total too large: scale down requested numbers */
for (i = 0; i < n && tot > 0; i++) {
- int new = nthreads[i] * NFSD_MAXSERVS / tot;
+ int new = nthreads[i] * NFSD_MAXSERVS / tot;
tot -= (nthreads[i] - new);
nthreads[i] = new;
}
@@ -598,75 +714,166 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
}
}
- /*
- * There must always be a thread in pool 0; the admin
- * can't shut down NFS completely using pool_threads.
- */
- if (nthreads[0] == 0)
- nthreads[0] = 1;
-
/* apply the new numbers */
- svc_get(nn->nfsd_serv);
for (i = 0; i < n; i++) {
- err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- &nn->nfsd_serv->sv_pools[i], nthreads[i]);
+ err = svc_set_num_threads(nn->nfsd_serv,
+ &nn->nfsd_serv->sv_pools[i],
+ nthreads[i]);
if (err)
- break;
+ goto out;
}
- nfsd_destroy(net);
+
+ /* Anything undefined in array is considered to be 0 */
+ for (i = n; i < nn->nfsd_serv->sv_nrpools; ++i) {
+ err = svc_set_num_threads(nn->nfsd_serv,
+ &nn->nfsd_serv->sv_pools[i],
+ 0);
+ if (err)
+ goto out;
+ }
+out:
return err;
}
-/*
- * Adjust the number of threads and return the new number of threads.
- * This is also the function that starts the server if necessary, if
- * this is the first time nrservs is nonzero.
+/**
+ * nfsd_svc: start up or shut down the nfsd server
+ * @n: number of array members in @nthreads
+ * @nthreads: array of thread counts for each pool
+ * @net: network namespace to operate within
+ * @cred: credentials to use for xprt creation
+ * @scope: server scope value (defaults to nodename)
+ *
+ * Adjust the number of threads in each pool and return the new
+ * total number of threads in the service.
*/
int
-nfsd_svc(int nrservs, struct net *net)
+nfsd_svc(int n, int *nthreads, struct net *net, const struct cred *cred, const char *scope)
{
int error;
- bool nfsd_up_before;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
+
+ lockdep_assert_held(&nfsd_mutex);
- mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
- nrservs = max(nrservs, 0);
- nrservs = min(nrservs, NFSD_MAXSERVS);
- error = 0;
-
- if (nrservs == 0 && nn->nfsd_serv == NULL)
- goto out;
+ strscpy(nn->nfsd_name, scope ? scope : utsname()->nodename,
+ sizeof(nn->nfsd_name));
error = nfsd_create_serv(net);
if (error)
goto out;
+ serv = nn->nfsd_serv;
- nfsd_up_before = nn->nfsd_net_up;
-
- error = nfsd_startup_net(nrservs, net);
+ error = nfsd_startup_net(net, cred);
if (error)
- goto out_destroy;
- error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
- NULL, nrservs);
+ goto out_put;
+ error = nfsd_set_nrthreads(n, nthreads, net);
if (error)
- goto out_shutdown;
- /* We are holding a reference to nn->nfsd_serv which
- * we don't want to count in the return value,
- * so subtract 1
- */
- error = nn->nfsd_serv->sv_nrthreads - 1;
-out_shutdown:
- if (error < 0 && !nfsd_up_before)
- nfsd_shutdown_net(net);
-out_destroy:
- nfsd_destroy(net); /* Release server */
+ goto out_put;
+ error = serv->sv_nrthreads;
+out_put:
+ if (serv->sv_nrthreads == 0)
+ nfsd_destroy_serv(net);
out:
- mutex_unlock(&nfsd_mutex);
return error;
}
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+static bool
+nfsd_support_acl_version(int vers)
+{
+ if (vers >= NFSD_ACL_MINVERS && vers < NFSD_ACL_NRVERS)
+ return nfsd_acl_version[vers] != NULL;
+ return false;
+}
+
+static int
+nfsd_acl_rpcbind_set(struct net *net, const struct svc_program *progp,
+ u32 version, int family, unsigned short proto,
+ unsigned short port)
+{
+ if (!nfsd_support_acl_version(version) ||
+ !nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST))
+ return 0;
+ return svc_generic_rpcbind_set(net, progp, version, family,
+ proto, port);
+}
+
+static __be32
+nfsd_acl_init_request(struct svc_rqst *rqstp,
+ const struct svc_program *progp,
+ struct svc_process_info *ret)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ int i;
+
+ if (likely(nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
+ return svc_generic_init_request(rqstp, progp, ret);
+
+ ret->mismatch.lovers = NFSD_ACL_NRVERS;
+ for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
+ if (nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.lovers = i;
+ break;
+ }
+ }
+ if (ret->mismatch.lovers == NFSD_ACL_NRVERS)
+ return rpc_prog_unavail;
+ ret->mismatch.hivers = NFSD_ACL_MINVERS;
+ for (i = NFSD_ACL_NRVERS - 1; i >= NFSD_ACL_MINVERS; i--) {
+ if (nfsd_support_acl_version(rqstp->rq_vers) &&
+ nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.hivers = i;
+ break;
+ }
+ }
+ return rpc_prog_mismatch;
+}
+#endif
+
+static int
+nfsd_rpcbind_set(struct net *net, const struct svc_program *progp,
+ u32 version, int family, unsigned short proto,
+ unsigned short port)
+{
+ if (!nfsd_vers(net_generic(net, nfsd_net_id), version, NFSD_TEST))
+ return 0;
+ return svc_generic_rpcbind_set(net, progp, version, family,
+ proto, port);
+}
+
+static __be32
+nfsd_init_request(struct svc_rqst *rqstp,
+ const struct svc_program *progp,
+ struct svc_process_info *ret)
+{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ int i;
+
+ if (likely(nfsd_vers(nn, rqstp->rq_vers, NFSD_TEST)))
+ return svc_generic_init_request(rqstp, progp, ret);
+
+ ret->mismatch.lovers = NFSD_MAXVERS + 1;
+ for (i = NFSD_MINVERS; i <= NFSD_MAXVERS; i++) {
+ if (nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.lovers = i;
+ break;
+ }
+ }
+ if (ret->mismatch.lovers > NFSD_MAXVERS)
+ return rpc_prog_unavail;
+ ret->mismatch.hivers = NFSD_MINVERS;
+ for (i = NFSD_MAXVERS; i >= NFSD_MINVERS; i--) {
+ if (nfsd_vers(nn, i, NFSD_TEST)) {
+ ret->mismatch.hivers = i;
+ break;
+ }
+ }
+ return rpc_prog_mismatch;
+}
/*
* This is the NFS server kernel thread
@@ -678,216 +885,146 @@ nfsd(void *vrqstp)
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int err;
-
- /* Lock module and set up kernel thread */
- mutex_lock(&nfsd_mutex);
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
- * umask as defined by the client instead of init's umask. */
- if (unshare_fs_struct() < 0) {
- printk("Unable to start nfsd thread: out of memory\n");
- goto out;
- }
+ * umask as defined by the client instead of init's umask.
+ */
+ svc_thread_init_status(rqstp, unshare_fs_struct());
current->fs->umask = 0;
- /*
- * thread is spawned with all signals set to SIG_IGN, re-enable
- * the ones that will bring down the thread
- */
- allow_signal(SIGKILL);
- allow_signal(SIGHUP);
- allow_signal(SIGINT);
- allow_signal(SIGQUIT);
-
- nfsdstats.th_cnt++;
- mutex_unlock(&nfsd_mutex);
+ atomic_inc(&nfsd_th_cnt);
set_freezable();
/*
* The main request loop
*/
- for (;;) {
- /* Update sv_maxconn if it has changed */
- rqstp->rq_server->sv_maxconn = nn->max_connections;
-
- /*
- * Find a socket with data available and call its
- * recvfrom routine.
- */
- while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
- ;
- if (err == -EINTR)
- break;
- validate_process_creds();
- svc_process(rqstp);
- validate_process_creds();
+ while (!svc_thread_should_stop(rqstp)) {
+ svc_recv(rqstp);
+ nfsd_file_net_dispose(nn);
}
- /* Clear signals before calling svc_exit_thread() */
- flush_signals(current);
-
- mutex_lock(&nfsd_mutex);
- nfsdstats.th_cnt --;
-
-out:
- rqstp->rq_server = NULL;
+ atomic_dec(&nfsd_th_cnt);
/* Release the thread */
svc_exit_thread(rqstp);
-
- nfsd_destroy(net);
-
- /* Release module */
- mutex_unlock(&nfsd_mutex);
- module_put_and_exit(0);
return 0;
}
-static __be32 map_new_errors(u32 vers, __be32 nfserr)
-{
- if (nfserr == nfserr_jukebox && vers == 2)
- return nfserr_dropit;
- if (nfserr == nfserr_wrongsec && vers < 4)
- return nfserr_acces;
- return nfserr;
-}
-
-/*
- * A write procedure can have a large argument, and a read procedure can
- * have a large reply, but no NFSv2 or NFSv3 procedure has argument and
- * reply that can both be larger than a page. The xdr code has taken
- * advantage of this assumption to be a sloppy about bounds checking in
- * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that
- * problem, we enforce these assumptions here:
+/**
+ * nfsd_dispatch - Process an NFS or NFSACL or LOCALIO Request
+ * @rqstp: incoming request
+ *
+ * This RPC dispatcher integrates the NFS server's duplicate reply cache.
+ *
+ * Return values:
+ * %0: Processing complete; do not send a Reply
+ * %1: Processing complete; send Reply in rqstp->rq_res
*/
-static bool nfs_request_too_big(struct svc_rqst *rqstp,
- const struct svc_procedure *proc)
+int nfsd_dispatch(struct svc_rqst *rqstp)
{
+ const struct svc_procedure *proc = rqstp->rq_procinfo;
+ __be32 *statp = rqstp->rq_accept_statp;
+ struct nfsd_cacherep *rp;
+ unsigned int start, len;
+ __be32 *nfs_reply;
+
/*
- * The ACL code has more careful bounds-checking and is not
- * susceptible to this problem:
+ * Give the xdr decoder a chance to change this if it wants
+ * (necessary in the NFSv4.0 compound case)
*/
- if (rqstp->rq_prog != NFS_PROGRAM)
- return false;
+ rqstp->rq_cachetype = proc->pc_cachetype;
+
/*
- * Ditto NFSv4 (which can in theory have argument and reply both
- * more than a page):
+ * ->pc_decode advances the argument stream past the NFS
+ * Call header, so grab the header's starting location and
+ * size now for the call to nfsd_cache_lookup().
*/
- if (rqstp->rq_vers >= 4)
- return false;
- /* The reply will be small, we're OK: */
- if (proc->pc_xdrressize > 0 &&
- proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
- return false;
-
- return rqstp->rq_arg.len > PAGE_SIZE;
-}
-
-int
-nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
-{
- const struct svc_procedure *proc;
- __be32 nfserr;
- __be32 *nfserrp;
-
- dprintk("nfsd_dispatch: vers %d proc %d\n",
- rqstp->rq_vers, rqstp->rq_proc);
- proc = rqstp->rq_procinfo;
+ start = xdr_stream_pos(&rqstp->rq_arg_stream);
+ len = xdr_stream_remaining(&rqstp->rq_arg_stream);
+ if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
+ goto out_decode_err;
- if (nfs_request_too_big(rqstp, proc)) {
- dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
- *statp = rpc_garbage_args;
- return 1;
- }
/*
- * Give the xdr decoder a chance to change this if it wants
- * (necessary in the NFSv4.0 compound case)
+ * Release rq_status_counter setting it to an odd value after the rpc
+ * request has been properly parsed. rq_status_counter is used to
+ * notify the consumers if the rqstp fields are stable
+ * (rq_status_counter is odd) or not meaningful (rq_status_counter
+ * is even).
*/
- rqstp->rq_cachetype = proc->pc_cachetype;
- /* Decode arguments */
- if (proc->pc_decode &&
- !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) {
- dprintk("nfsd: failed to decode arguments!\n");
- *statp = rpc_garbage_args;
- return 1;
- }
+ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
- /* Check whether we have this call in the cache. */
- switch (nfsd_cache_lookup(rqstp)) {
- case RC_DROPIT:
- return 0;
+ rp = NULL;
+ switch (nfsd_cache_lookup(rqstp, start, len, &rp)) {
+ case RC_DOIT:
+ break;
case RC_REPLY:
- return 1;
- case RC_DOIT:;
- /* do it */
+ goto out_cached_reply;
+ case RC_DROPIT:
+ goto out_dropit;
}
- /* need to grab the location to store the status, as
- * nfsv4 does some encoding while processing
- */
- nfserrp = rqstp->rq_res.head[0].iov_base
- + rqstp->rq_res.head[0].iov_len;
- rqstp->rq_res.head[0].iov_len += sizeof(__be32);
-
- /* Now call the procedure handler, and encode NFS status. */
- nfserr = proc->pc_func(rqstp);
- nfserr = map_new_errors(rqstp->rq_vers, nfserr);
- if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
- dprintk("nfsd: Dropping request; may be revisited later\n");
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
- return 0;
- }
+ nfs_reply = xdr_inline_decode(&rqstp->rq_res_stream, 0);
+ *statp = proc->pc_func(rqstp);
+ if (test_bit(RQ_DROPME, &rqstp->rq_flags))
+ goto out_update_drop;
- if (rqstp->rq_proc != 0)
- *nfserrp++ = nfserr;
+ if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
+ goto out_encode_err;
- /* Encode result.
- * For NFSv2, additional info is never returned in case of an error.
+ /*
+ * Release rq_status_counter setting it to an even value after the rpc
+ * request has been properly processed.
*/
- if (!(nfserr && rqstp->rq_vers == 2)) {
- if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) {
- /* Failed to encode result. Release cache entry */
- dprintk("nfsd: failed to encode result!\n");
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
- *statp = rpc_system_err;
- return 1;
- }
- }
+ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
- /* Store reply in cache. */
- nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
+out_cached_reply:
return 1;
-}
-int nfsd_pool_stats_open(struct inode *inode, struct file *file)
-{
- int ret;
- struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
+out_decode_err:
+ trace_nfsd_garbage_args_err(rqstp);
+ *statp = rpc_garbage_args;
+ return 1;
- mutex_lock(&nfsd_mutex);
- if (nn->nfsd_serv == NULL) {
- mutex_unlock(&nfsd_mutex);
- return -ENODEV;
- }
- /* bump up the psudo refcount while traversing */
- svc_get(nn->nfsd_serv);
- ret = svc_pool_stats_open(nn->nfsd_serv, file);
- mutex_unlock(&nfsd_mutex);
- return ret;
+out_update_drop:
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
+out_dropit:
+ return 0;
+
+out_encode_err:
+ trace_nfsd_cant_encode_err(rqstp);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
+ *statp = rpc_system_err;
+ return 1;
}
-int nfsd_pool_stats_release(struct inode *inode, struct file *file)
+/**
+ * nfssvc_decode_voidarg - Decode void arguments
+ * @rqstp: Server RPC transaction context
+ * @xdr: XDR stream positioned at arguments to decode
+ *
+ * Return values:
+ * %false: Arguments were not valid
+ * %true: Decoding was successful
+ */
+bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
- int ret = seq_release(inode, file);
- struct net *net = inode->i_sb->s_fs_info;
+ return true;
+}
- mutex_lock(&nfsd_mutex);
- /* this function really, really should have been called svc_put() */
- nfsd_destroy(net);
- mutex_unlock(&nfsd_mutex);
- return ret;
+/**
+ * nfssvc_encode_voidres - Encode void results
+ * @rqstp: Server RPC transaction context
+ * @xdr: XDR stream into which to encode results
+ *
+ * Return values:
+ * %false: Local error while encoding
+ * %true: Encoding was successful
+ */
+bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ return true;
}
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6b2e8b73d36e..fc262ceafca9 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -9,12 +9,10 @@
#include "xdr.h"
#include "auth.h"
-#define NFSDDBG_FACILITY NFSDDBG_XDR
-
/*
* Mapping of S_IF* types to NFS file types
*/
-static u32 nfs_ftypes[] = {
+static const u32 nfs_ftypes[] = {
NFNON, NFCHR, NFCHR, NFBAD,
NFDIR, NFBAD, NFBLK, NFBAD,
NFREG, NFBAD, NFLNK, NFBAD,
@@ -23,93 +21,168 @@ static u32 nfs_ftypes[] = {
/*
- * XDR functions for basic NFS types
+ * Basic NFSv2 data types (RFC 1094 Section 2.3)
*/
-static __be32 *
-decode_fh(__be32 *p, struct svc_fh *fhp)
+
+/**
+ * svcxdr_encode_stat - Encode an NFSv2 status code
+ * @xdr: XDR stream
+ * @status: status value to encode
+ *
+ * Return values:
+ * %false: Send buffer space was exhausted
+ * %true: Success
+ */
+bool
+svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status)
{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, sizeof(status));
+ if (!p)
+ return false;
+ *p = status;
+
+ return true;
+}
+
+/**
+ * svcxdr_decode_fhandle - Decode an NFSv2 file handle
+ * @xdr: XDR stream positioned at an encoded NFSv2 FH
+ * @fhp: OUT: filled-in server file handle
+ *
+ * Return values:
+ * %false: The encoded file handle was not valid
+ * %true: @fhp has been initialized
+ */
+bool
+svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, NFS_FHSIZE);
+ if (!p)
+ return false;
fh_init(fhp, NFS_FHSIZE);
- memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
+ memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE);
fhp->fh_handle.fh_size = NFS_FHSIZE;
- /* FIXME: Look up export pointer here and verify
- * Sun Secure RPC if requested */
- return p + (NFS_FHSIZE >> 2);
+ return true;
}
-/* Helper function for NFSv2 ACL code */
-__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
+static bool
+svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp)
{
- return decode_fh(p, fhp);
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, NFS_FHSIZE);
+ if (!p)
+ return false;
+ memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE);
+
+ return true;
}
static __be32 *
-encode_fh(__be32 *p, struct svc_fh *fhp)
+encode_timeval(__be32 *p, const struct timespec64 *time)
{
- memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
- return p + (NFS_FHSIZE>> 2);
+ *p++ = cpu_to_be32((u32)time->tv_sec);
+ if (time->tv_nsec)
+ *p++ = cpu_to_be32(time->tv_nsec / NSEC_PER_USEC);
+ else
+ *p++ = xdr_zero;
+ return p;
}
-/*
- * Decode a file name and make sure that the path contains
- * no slashes or null bytes.
- */
-static __be32 *
-decode_filename(__be32 *p, char **namp, unsigned int *lenp)
+static bool
+svcxdr_decode_filename(struct xdr_stream *xdr, char **name, unsigned int *len)
{
- char *name;
- unsigned int i;
-
- if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
- for (i = 0, name = *namp; i < *lenp; i++, name++) {
- if (*name == '\0' || *name == '/')
- return NULL;
- }
- }
+ u32 size, i;
+ __be32 *p;
+ char *c;
+
+ if (xdr_stream_decode_u32(xdr, &size) < 0)
+ return false;
+ if (size == 0 || size > NFS_MAXNAMLEN)
+ return false;
+ p = xdr_inline_decode(xdr, size);
+ if (!p)
+ return false;
- return p;
+ *len = size;
+ *name = (char *)p;
+ for (i = 0, c = *name; i < size; i++, c++)
+ if (*c == '\0' || *c == '/')
+ return false;
+
+ return true;
}
-static __be32 *
-decode_sattr(__be32 *p, struct iattr *iap)
+static bool
+svcxdr_decode_diropargs(struct xdr_stream *xdr, struct svc_fh *fhp,
+ char **name, unsigned int *len)
{
- u32 tmp, tmp1;
+ return svcxdr_decode_fhandle(xdr, fhp) &&
+ svcxdr_decode_filename(xdr, name, len);
+}
+
+static bool
+svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ struct iattr *iap)
+{
+ u32 tmp1, tmp2;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT * 8);
+ if (!p)
+ return false;
iap->ia_valid = 0;
- /* Sun client bug compatibility check: some sun clients seem to
- * put 0xffff in the mode field when they mean 0xffffffff.
- * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah.
+ /*
+ * Some Sun clients put 0xffff in the mode field when they
+ * mean 0xffffffff.
*/
- if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) {
+ tmp1 = be32_to_cpup(p++);
+ if (tmp1 != (u32)-1 && tmp1 != 0xffff) {
iap->ia_valid |= ATTR_MODE;
- iap->ia_mode = tmp;
+ iap->ia_mode = tmp1;
}
- if ((tmp = ntohl(*p++)) != (u32)-1) {
- iap->ia_uid = make_kuid(&init_user_ns, tmp);
+
+ tmp1 = be32_to_cpup(p++);
+ if (tmp1 != (u32)-1) {
+ iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), tmp1);
if (uid_valid(iap->ia_uid))
iap->ia_valid |= ATTR_UID;
}
- if ((tmp = ntohl(*p++)) != (u32)-1) {
- iap->ia_gid = make_kgid(&init_user_ns, tmp);
+
+ tmp1 = be32_to_cpup(p++);
+ if (tmp1 != (u32)-1) {
+ iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), tmp1);
if (gid_valid(iap->ia_gid))
iap->ia_valid |= ATTR_GID;
}
- if ((tmp = ntohl(*p++)) != (u32)-1) {
+
+ tmp1 = be32_to_cpup(p++);
+ if (tmp1 != (u32)-1) {
iap->ia_valid |= ATTR_SIZE;
- iap->ia_size = tmp;
+ iap->ia_size = tmp1;
}
- tmp = ntohl(*p++); tmp1 = ntohl(*p++);
- if (tmp != (u32)-1 && tmp1 != (u32)-1) {
+
+ tmp1 = be32_to_cpup(p++);
+ tmp2 = be32_to_cpup(p++);
+ if (tmp1 != (u32)-1 && tmp2 != (u32)-1) {
iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
- iap->ia_atime.tv_sec = tmp;
- iap->ia_atime.tv_nsec = tmp1 * 1000;
+ iap->ia_atime.tv_sec = tmp1;
+ iap->ia_atime.tv_nsec = tmp2 * NSEC_PER_USEC;
}
- tmp = ntohl(*p++); tmp1 = ntohl(*p++);
- if (tmp != (u32)-1 && tmp1 != (u32)-1) {
+
+ tmp1 = be32_to_cpup(p++);
+ tmp2 = be32_to_cpup(p++);
+ if (tmp1 != (u32)-1 && tmp2 != (u32)-1) {
iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
- iap->ia_mtime.tv_sec = tmp;
- iap->ia_mtime.tv_nsec = tmp1 * 1000;
+ iap->ia_mtime.tv_sec = tmp1;
+ iap->ia_mtime.tv_nsec = tmp2 * NSEC_PER_USEC;
/*
* Passing the invalid value useconds=1000000 for mtime
* is a Sun convention for "set both mtime and atime to
@@ -119,451 +192,472 @@ decode_sattr(__be32 *p, struct iattr *iap)
* sattr in section 6.1 of "NFS Illustrated" by
* Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
*/
- if (tmp1 == 1000000)
+ if (tmp2 == 1000000)
iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET);
}
- return p;
+
+ return true;
}
-static __be32 *
-encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
- struct kstat *stat)
+/**
+ * svcxdr_encode_fattr - Encode NFSv2 file attributes
+ * @rqstp: Context of a completed RPC transaction
+ * @xdr: XDR stream
+ * @fhp: File handle to encode
+ * @stat: Attributes to encode
+ *
+ * Return values:
+ * %false: Send buffer space was exhausted
+ * %true: Success
+ */
+bool
+svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp, const struct kstat *stat)
{
- struct dentry *dentry = fhp->fh_dentry;
- int type;
+ struct user_namespace *userns = nfsd_user_namespace(rqstp);
+ struct dentry *dentry = fhp->fh_dentry;
+ int type = stat->mode & S_IFMT;
struct timespec64 time;
- u32 f;
+ __be32 *p;
+ u32 fsid;
- type = (stat->mode & S_IFMT);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 17);
+ if (!p)
+ return false;
- *p++ = htonl(nfs_ftypes[type >> 12]);
- *p++ = htonl((u32) stat->mode);
- *p++ = htonl((u32) stat->nlink);
- *p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
+ *p++ = cpu_to_be32(nfs_ftypes[type >> 12]);
+ *p++ = cpu_to_be32((u32)stat->mode);
+ *p++ = cpu_to_be32((u32)stat->nlink);
+ *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid));
+ *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid));
- if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
- *p++ = htonl(NFS_MAXPATHLEN);
- } else {
- *p++ = htonl((u32) stat->size);
- }
- *p++ = htonl((u32) stat->blksize);
+ if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN)
+ *p++ = cpu_to_be32(NFS_MAXPATHLEN);
+ else
+ *p++ = cpu_to_be32((u32) stat->size);
+ *p++ = cpu_to_be32((u32) stat->blksize);
if (S_ISCHR(type) || S_ISBLK(type))
- *p++ = htonl(new_encode_dev(stat->rdev));
+ *p++ = cpu_to_be32(new_encode_dev(stat->rdev));
else
- *p++ = htonl(0xffffffff);
- *p++ = htonl((u32) stat->blocks);
+ *p++ = cpu_to_be32(0xffffffff);
+ *p++ = cpu_to_be32((u32)stat->blocks);
+
switch (fsid_source(fhp)) {
- default:
- case FSIDSOURCE_DEV:
- *p++ = htonl(new_encode_dev(stat->dev));
- break;
case FSIDSOURCE_FSID:
- *p++ = htonl((u32) fhp->fh_export->ex_fsid);
+ fsid = (u32)fhp->fh_export->ex_fsid;
break;
case FSIDSOURCE_UUID:
- f = ((u32*)fhp->fh_export->ex_uuid)[0];
- f ^= ((u32*)fhp->fh_export->ex_uuid)[1];
- f ^= ((u32*)fhp->fh_export->ex_uuid)[2];
- f ^= ((u32*)fhp->fh_export->ex_uuid)[3];
- *p++ = htonl(f);
+ fsid = ((u32 *)fhp->fh_export->ex_uuid)[0];
+ fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[1];
+ fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[2];
+ fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[3];
+ break;
+ default:
+ fsid = new_encode_dev(stat->dev);
break;
}
- *p++ = htonl((u32) stat->ino);
- *p++ = htonl((u32) stat->atime.tv_sec);
- *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
+ *p++ = cpu_to_be32(fsid);
+
+ *p++ = cpu_to_be32((u32)stat->ino);
+ p = encode_timeval(p, &stat->atime);
time = stat->mtime;
- lease_get_mtime(d_inode(dentry), &time);
- *p++ = htonl((u32) time.tv_sec);
- *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0);
- *p++ = htonl((u32) stat->ctime.tv_sec);
- *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0);
+ lease_get_mtime(d_inode(dentry), &time);
+ p = encode_timeval(p, &time);
+ encode_timeval(p, &stat->ctime);
- return p;
-}
-
-/* Helper function for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat)
-{
- return encode_fattr(rqstp, p, fhp, stat);
+ return true;
}
/*
* XDR decode functions
*/
-int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_argsize_check(rqstp, p);
-}
-int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_fhandle *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_fhandle(xdr, &args->fh);
}
-int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_sattrargs *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- p = decode_sattr(p, &args->attrs);
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_fhandle(xdr, &args->fh) &&
+ svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
}
-int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_diropargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len)))
- return 0;
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len);
}
-int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_readargs *args = rqstp->rq_argp;
- unsigned int len;
- int v;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
-
- args->offset = ntohl(*p++);
- len = args->count = ntohl(*p++);
- p++; /* totalcount - unused */
-
- len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
-
- /* set up somewhere to store response.
- * We take pages, put them on reslist and include in iovec
- */
- v=0;
- while (len > 0) {
- struct page *p = *(rqstp->rq_next_page++);
-
- rqstp->rq_vec[v].iov_base = page_address(p);
- rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
- len -= rqstp->rq_vec[v].iov_len;
- v++;
- }
- args->vlen = v;
- return xdr_argsize_check(rqstp, p);
+ u32 totalcount;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
+ /* totalcount is ignored */
+ if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
+ return false;
+
+ return true;
}
-int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_writeargs *args = rqstp->rq_argp;
- unsigned int len, hdr, dlen;
- struct kvec *head = rqstp->rq_arg.head;
-
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
-
- p++; /* beginoffset */
- args->offset = ntohl(*p++); /* offset */
- p++; /* totalcount */
- len = args->len = ntohl(*p++);
- /*
- * The protocol specifies a maximum of 8192 bytes.
- */
- if (len > NFSSVC_MAXBLKSIZE_V2)
- return 0;
-
- /*
- * Check to make sure that we got the right number of
- * bytes.
- */
- hdr = (void*)p - head->iov_base;
- if (hdr > head->iov_len)
- return 0;
- dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
-
- /*
- * Round the length of the data which was specified up to
- * the next multiple of XDR units and then compare that
- * against the length which was actually received.
- * Note that when RPCSEC/GSS (for example) is used, the
- * data buffer can be padded so dlen might be larger
- * than required. It must never be smaller.
- */
- if (dlen < XDR_QUADLEN(len)*4)
- return 0;
-
- args->first.iov_base = (void *)p;
- args->first.iov_len = head->iov_len - hdr;
- return 1;
+ u32 beginoffset, totalcount;
+
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+ return false;
+ /* beginoffset is ignored */
+ if (xdr_stream_decode_u32(xdr, &beginoffset) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
+ return false;
+ /* totalcount is ignored */
+ if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
+ return false;
+
+ /* opaque data */
+ if (xdr_stream_decode_u32(xdr, &args->len) < 0)
+ return false;
+ if (args->len > NFS_MAXDATA)
+ return false;
+
+ return xdr_stream_subsegment(xdr, &args->payload, args->len);
}
-int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_createargs *args = rqstp->rq_argp;
- if ( !(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len)))
- return 0;
- p = decode_sattr(p, &args->attrs);
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_diropargs(xdr, &args->fh,
+ &args->name, &args->len) &&
+ svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
}
-int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_renameargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->ffh))
- || !(p = decode_filename(p, &args->fname, &args->flen))
- || !(p = decode_fh(p, &args->tfh))
- || !(p = decode_filename(p, &args->tname, &args->tlen)))
- return 0;
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_diropargs(xdr, &args->ffh,
+ &args->fname, &args->flen) &&
+ svcxdr_decode_diropargs(xdr, &args->tfh,
+ &args->tname, &args->tlen);
}
-int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p)
-{
- struct nfsd_readlinkargs *args = rqstp->rq_argp;
-
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- args->buffer = page_address(*(rqstp->rq_next_page++));
-
- return xdr_argsize_check(rqstp, p);
-}
-
-int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_linkargs *args = rqstp->rq_argp;
- if (!(p = decode_fh(p, &args->ffh))
- || !(p = decode_fh(p, &args->tfh))
- || !(p = decode_filename(p, &args->tname, &args->tlen)))
- return 0;
-
- return xdr_argsize_check(rqstp, p);
+ return svcxdr_decode_fhandle(xdr, &args->ffh) &&
+ svcxdr_decode_diropargs(xdr, &args->tfh,
+ &args->tname, &args->tlen);
}
-int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_symlinkargs *args = rqstp->rq_argp;
- char *base = (char *)p;
- size_t xdrlen;
-
- if ( !(p = decode_fh(p, &args->ffh))
- || !(p = decode_filename(p, &args->fname, &args->flen)))
- return 0;
+ struct kvec *head = rqstp->rq_arg.head;
- args->tlen = ntohl(*p++);
+ if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
+ return false;
if (args->tlen == 0)
- return 0;
+ return false;
- args->first.iov_base = p;
- args->first.iov_len = rqstp->rq_arg.head[0].iov_len;
- args->first.iov_len -= (char *)p - base;
-
- /* This request is never larger than a page. Therefore,
- * transport will deliver either:
- * 1. pathname in the pagelist -> sattr is in the tail.
- * 2. everything in the head buffer -> sattr is in the head.
- */
- if (rqstp->rq_arg.page_len) {
- if (args->tlen != rqstp->rq_arg.page_len)
- return 0;
- p = rqstp->rq_arg.tail[0].iov_base;
- } else {
- xdrlen = XDR_QUADLEN(args->tlen);
- if (xdrlen > args->first.iov_len - (8 * sizeof(__be32)))
- return 0;
- p += xdrlen;
- }
- decode_sattr(p, &args->attrs);
-
- return 1;
+ args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+ args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
+ if (!args->first.iov_base)
+ return false;
+ return svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
}
-int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_readdirargs *args = rqstp->rq_argp;
- p = decode_fh(p, &args->fh);
- if (!p)
- return 0;
- args->cookie = ntohl(*p++);
- args->count = ntohl(*p++);
- args->count = min_t(u32, args->count, PAGE_SIZE);
- args->buffer = page_address(*(rqstp->rq_next_page++));
+ if (!svcxdr_decode_fhandle(xdr, &args->fh))
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->cookie) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &args->count) < 0)
+ return false;
- return xdr_argsize_check(rqstp, p);
+ return true;
}
/*
* XDR encode functions
*/
-int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
+
+bool
+nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
- return xdr_ressize_check(rqstp, p);
+ struct nfsd_stat *resp = rqstp->rq_resp;
+
+ return svcxdr_encode_stat(xdr, resp->status);
}
-int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_attrstat *resp = rqstp->rq_resp;
- p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
- return xdr_ressize_check(rqstp, p);
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+ return false;
+ break;
+ }
+
+ return true;
}
-int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_diropres *resp = rqstp->rq_resp;
- p = encode_fh(p, &resp->fh);
- p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
- return xdr_ressize_check(rqstp, p);
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fhandle(xdr, &resp->fh))
+ return false;
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+ return false;
+ break;
+ }
+
+ return true;
}
-int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_readlinkres *resp = rqstp->rq_resp;
-
- *p++ = htonl(resp->len);
- xdr_ressize_check(rqstp, p);
- rqstp->rq_res.page_len = resp->len;
- if (resp->len & 3) {
- /* need to pad the tail */
- rqstp->rq_res.tail[0].iov_base = p;
- *p = 0;
- rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (xdr_stream_encode_u32(xdr, resp->len) < 0)
+ return false;
+ svcxdr_encode_opaque_pages(rqstp, xdr, &resp->page, 0,
+ resp->len);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
+ return false;
+ break;
}
- return 1;
+
+ return true;
}
-int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_readres *resp = rqstp->rq_resp;
-
- p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
- *p++ = htonl(resp->count);
- xdr_ressize_check(rqstp, p);
-
- /* now update rqstp->rq_res to reflect data as well */
- rqstp->rq_res.page_len = resp->count;
- if (resp->count & 3) {
- /* need to pad the tail */
- rqstp->rq_res.tail[0].iov_base = p;
- *p = 0;
- rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
+ struct kvec *head = rqstp->rq_res.head;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
+ return false;
+ if (xdr_stream_encode_u32(xdr, resp->count) < 0)
+ return false;
+ svcxdr_encode_opaque_pages(rqstp, xdr, resp->pages,
+ rqstp->rq_res.page_base,
+ resp->count);
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
+ return false;
+ break;
}
- return 1;
+
+ return true;
}
-int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_readdirres *resp = rqstp->rq_resp;
+ struct xdr_buf *dirlist = &resp->dirlist;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ svcxdr_encode_opaque_pages(rqstp, xdr, dirlist->pages, 0,
+ dirlist->len);
+ /* no more entries */
+ if (xdr_stream_encode_item_absent(xdr) < 0)
+ return false;
+ if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
+ return false;
+ break;
+ }
- xdr_ressize_check(rqstp, p);
- p = resp->buffer;
- *p++ = 0; /* no more entries */
- *p++ = htonl((resp->common.err == nfserr_eof));
- rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1;
-
- return 1;
+ return true;
}
-int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
+bool
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
{
struct nfsd_statfsres *resp = rqstp->rq_resp;
struct kstatfs *stat = &resp->stats;
+ __be32 *p;
+
+ if (!svcxdr_encode_stat(xdr, resp->status))
+ return false;
+ switch (resp->status) {
+ case nfs_ok:
+ p = xdr_reserve_space(xdr, XDR_UNIT * 5);
+ if (!p)
+ return false;
+ *p++ = cpu_to_be32(NFS_MAXDATA);
+ *p++ = cpu_to_be32(stat->f_bsize);
+ *p++ = cpu_to_be32(stat->f_blocks);
+ *p++ = cpu_to_be32(stat->f_bfree);
+ *p = cpu_to_be32(stat->f_bavail);
+ break;
+ }
- *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */
- *p++ = htonl(stat->f_bsize);
- *p++ = htonl(stat->f_blocks);
- *p++ = htonl(stat->f_bfree);
- *p++ = htonl(stat->f_bavail);
- return xdr_ressize_check(rqstp, p);
+ return true;
}
-int
-nfssvc_encode_entry(void *ccdv, const char *name,
- int namlen, loff_t offset, u64 ino, unsigned int d_type)
+/**
+ * nfssvc_encode_nfscookie - Encode a directory offset cookie
+ * @resp: readdir result context
+ * @offset: offset cookie to encode
+ *
+ * The buffer space for the offset cookie has already been reserved
+ * by svcxdr_encode_entry_common().
+ */
+void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset)
{
- struct readdir_cd *ccd = ccdv;
- struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
- __be32 *p = cd->buffer;
- int buflen, slen;
+ __be32 cookie = cpu_to_be32(offset);
- /*
- dprintk("nfsd: entry(%.*s off %ld ino %ld)\n",
- namlen, name, offset, ino);
- */
+ if (!resp->cookie_offset)
+ return;
- if (offset > ~((u32) 0)) {
- cd->common.err = nfserr_fbig;
- return -EINVAL;
- }
- if (cd->offset)
- *cd->offset = htonl(offset);
+ write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie,
+ sizeof(cookie));
+ resp->cookie_offset = 0;
+}
- /* truncate filename */
- namlen = min(namlen, NFS2_MAXNAMLEN);
- slen = XDR_QUADLEN(namlen);
+static bool
+svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name,
+ int namlen, loff_t offset, u64 ino)
+{
+ struct xdr_buf *dirlist = &resp->dirlist;
+ struct xdr_stream *xdr = &resp->xdr;
+
+ if (xdr_stream_encode_item_present(xdr) < 0)
+ return false;
+ /* fileid */
+ if (xdr_stream_encode_u32(xdr, (u32)ino) < 0)
+ return false;
+ /* name */
+ if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS2_MAXNAMLEN)) < 0)
+ return false;
+ /* cookie */
+ resp->cookie_offset = dirlist->len;
+ if (xdr_stream_encode_u32(xdr, ~0U) < 0)
+ return false;
+
+ return true;
+}
- if ((buflen = cd->buflen - slen - 4) < 0) {
- cd->common.err = nfserr_toosmall;
- return -EINVAL;
- }
- if (ino > ~((u32) 0)) {
- cd->common.err = nfserr_fbig;
- return -EINVAL;
- }
- *p++ = xdr_one; /* mark entry present */
- *p++ = htonl((u32) ino); /* file id */
- p = xdr_encode_array(p, name, namlen);/* name length & name */
- cd->offset = p; /* remember pointer */
- *p++ = htonl(~0U); /* offset of next entry */
-
- cd->buflen = buflen;
- cd->buffer = p;
- cd->common.err = nfs_ok;
+/**
+ * nfssvc_encode_entry - encode one NFSv2 READDIR entry
+ * @data: directory context
+ * @name: name of the object to be encoded
+ * @namlen: length of that name, in bytes
+ * @offset: the offset of the previous entry
+ * @ino: the fileid of this entry
+ * @d_type: unused
+ *
+ * Return values:
+ * %0: Entry was successfully encoded.
+ * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
+ *
+ * On exit, the following fields are updated:
+ * - resp->xdr
+ * - resp->common.err
+ * - resp->cookie_offset
+ */
+int nfssvc_encode_entry(void *data, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type)
+{
+ struct readdir_cd *ccd = data;
+ struct nfsd_readdirres *resp = container_of(ccd,
+ struct nfsd_readdirres,
+ common);
+ unsigned int starting_length = resp->dirlist.len;
+
+ /* The offset cookie for the previous entry */
+ nfssvc_encode_nfscookie(resp, offset);
+
+ if (!svcxdr_encode_entry_common(resp, name, namlen, offset, ino))
+ goto out_toosmall;
+
+ xdr_commit_encode(&resp->xdr);
+ resp->common.err = nfs_ok;
return 0;
+
+out_toosmall:
+ resp->cookie_offset = 0;
+ resp->common.err = nfserr_toosmall;
+ resp->dirlist.len = starting_length;
+ return -EINVAL;
}
/*
* XDR release functions
*/
-void
-nfssvc_release_fhandle(struct svc_rqst *rqstp)
+void nfssvc_release_attrstat(struct svc_rqst *rqstp)
+{
+ struct nfsd_attrstat *resp = rqstp->rq_resp;
+
+ fh_put(&resp->fh);
+}
+
+void nfssvc_release_diropres(struct svc_rqst *rqstp)
{
- struct nfsd_fhandle *resp = rqstp->rq_resp;
+ struct nfsd_diropres *resp = rqstp->rq_resp;
+
+ fh_put(&resp->fh);
+}
+
+void nfssvc_release_readres(struct svc_rqst *rqstp)
+{
+ struct nfsd_readres *resp = rqstp->rq_resp;
fh_put(&resp->fh);
}
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index 4f4282d4eeca..db9af780438b 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -27,17 +27,19 @@ struct nfsd4_layout_ops {
struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdevp);
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
- struct nfsd4_getdeviceinfo *gdevp);
+ const struct nfsd4_getdeviceinfo *gdevp);
- __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
- struct nfsd4_layoutget *lgp);
- __be32 (*encode_layoutget)(struct xdr_stream *,
- struct nfsd4_layoutget *lgp);
+ __be32 (*proc_layoutget)(struct svc_rqst *rqstp, struct inode *inode,
+ const struct svc_fh *fhp, struct nfsd4_layoutget *lgp);
+ __be32 (*encode_layoutget)(struct xdr_stream *xdr,
+ const struct nfsd4_layoutget *lgp);
__be32 (*proc_layoutcommit)(struct inode *inode,
+ struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp);
- void (*fence_client)(struct nfs4_layout_stateid *ls);
+ void (*fence_client)(struct nfs4_layout_stateid *ls,
+ struct nfsd_file *file);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
@@ -72,11 +74,13 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
void nfsd4_return_all_client_layouts(struct nfs4_client *);
void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp);
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
int nfsd4_init_pnfs(void);
void nfsd4_exit_pnfs(void);
#else
struct nfs4_client;
struct nfs4_file;
+struct nfs4_layout_stateid;
static inline void nfsd4_setup_layout_type(struct svc_export *exp)
{
@@ -89,6 +93,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp)
{
}
+static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+}
static inline void nfsd4_exit_pnfs(void)
{
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 396c76755b03..b052c1effdc5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,10 +35,12 @@
#ifndef _NFSD4_STATE_H
#define _NFSD4_STATE_H
+#include <crypto/md5.h>
#include <linux/idr.h>
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
+#include "nfsd.h"
typedef struct {
u32 cl_boot;
@@ -55,27 +57,51 @@ typedef struct {
stateid_opaque_t si_opaque;
} stateid_t;
-#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
-#define STATEID_VAL(s) \
- (s)->si_opaque.so_clid.cl_boot, \
- (s)->si_opaque.so_clid.cl_id, \
- (s)->si_opaque.so_id, \
- (s)->si_generation
+typedef struct {
+ stateid_t cs_stid;
+#define NFS4_COPY_STID 1
+#define NFS4_COPYNOTIFY_STID 2
+ unsigned char cs_type;
+ refcount_t cs_count;
+} copy_stateid_t;
+
+struct nfsd4_referring_call {
+ struct list_head __list;
+
+ u32 rc_sequenceid;
+ u32 rc_slotid;
+};
+
+struct nfsd4_referring_call_list {
+ struct list_head __list;
+
+ struct nfs4_sessionid rcl_sessionid;
+ int __nr_referring_calls;
+ struct list_head rcl_referring_calls;
+};
struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
+#define NFSD4_CALLBACK_RUNNING (0)
+#define NFSD4_CALLBACK_WAKE (1)
+#define NFSD4_CALLBACK_REQUEUE (2)
+ unsigned long cb_flags;
const struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_seq_status;
int cb_status;
- bool cb_need_restart;
+ int cb_held_slot;
+
+ int cb_nr_referring_call_list;
+ struct list_head cb_referring_call_list;
};
struct nfsd4_callback_ops {
void (*prepare)(struct nfsd4_callback *);
int (*done)(struct nfsd4_callback *, struct rpc_task *);
void (*release)(struct nfsd4_callback *);
+ uint32_t opcode;
};
/*
@@ -85,16 +111,36 @@ struct nfsd4_callback_ops {
*/
struct nfs4_stid {
refcount_t sc_count;
-#define NFS4_OPEN_STID 1
-#define NFS4_LOCK_STID 2
-#define NFS4_DELEG_STID 4
-/* For an open stateid kept around *only* to process close replays: */
-#define NFS4_CLOSED_STID 8
+
+ /* A new stateid is added to the cl_stateids idr early before it
+ * is fully initialised. Its sc_type is then zero. After
+ * initialisation the sc_type it set under cl_lock, and then
+ * never changes.
+ */
+#define SC_TYPE_OPEN BIT(0)
+#define SC_TYPE_LOCK BIT(1)
+#define SC_TYPE_DELEG BIT(2)
+#define SC_TYPE_LAYOUT BIT(3)
+ unsigned short sc_type;
+
+/* state_lock protects sc_status for delegation stateids.
+ * ->cl_lock protects sc_status for open and lock stateids.
+ * ->st_mutex also protect sc_status for open stateids.
+ * ->ls_lock protects sc_status for layout stateids.
+ */
+/*
+ * For an open stateid kept around *only* to process close replays.
+ * For deleg stateid, kept in idr until last reference is dropped.
+ */
+#define SC_STATUS_CLOSED BIT(0)
/* For a deleg stateid kept around only to process free_stateid's: */
-#define NFS4_REVOKED_DELEG_STID 16
-#define NFS4_CLOSED_DELEG_STID 32
-#define NFS4_LAYOUT_STID 64
- unsigned char sc_type;
+#define SC_STATUS_REVOKED BIT(1)
+#define SC_STATUS_ADMIN_REVOKED BIT(2)
+#define SC_STATUS_FREEABLE BIT(3)
+#define SC_STATUS_FREED BIT(4)
+ unsigned short sc_status;
+
+ struct list_head sc_cp_list;
stateid_t sc_stateid;
spinlock_t sc_lock;
struct nfs4_client *sc_client;
@@ -102,6 +148,47 @@ struct nfs4_stid {
void (*sc_free)(struct nfs4_stid *);
};
+/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
+ * parent OPEN/LOCK/DELEG stateid.
+ */
+struct nfs4_cpntf_state {
+ copy_stateid_t cp_stateid;
+ struct list_head cp_list; /* per parent nfs4_stid */
+ stateid_t cp_p_stateid; /* copy of parent's stateid */
+ clientid_t cp_p_clid; /* copy of parent's clid */
+ time64_t cpntf_time; /* last time stateid used */
+};
+
+/*
+ * RFC 7862 Section 4.8 states:
+ *
+ * | A copy offload stateid will be valid until either (A) the client
+ * | or server restarts or (B) the client returns the resource by
+ * | issuing an OFFLOAD_CANCEL operation or the client replies to a
+ * | CB_OFFLOAD operation.
+ *
+ * Because a client might not reply to a CB_OFFLOAD, or a reply
+ * might get lost due to connection loss, NFSD purges async copy
+ * state after a short period to prevent it from accumulating
+ * over time.
+ */
+#define NFSD_COPY_INITIAL_TTL 10
+
+struct nfs4_cb_fattr {
+ struct nfsd4_callback ncf_getattr;
+ u32 ncf_cb_status;
+
+ /* from CB_GETATTR reply */
+ u64 ncf_cb_change;
+ u64 ncf_cb_fsize;
+ struct timespec64 ncf_cb_mtime;
+ struct timespec64 ncf_cb_atime;
+
+ bool ncf_file_modified;
+ u64 ncf_initial_cinfo;
+ u64 ncf_cur_fsize;
+};
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -129,13 +216,43 @@ struct nfs4_delegation {
struct list_head dl_perclnt;
struct list_head dl_recall_lru; /* delegation recalled */
struct nfs4_clnt_odstate *dl_clnt_odstate;
+ time64_t dl_time;
u32 dl_type;
- time_t dl_time;
-/* For recall: */
+ /* For recall: */
int dl_retries;
struct nfsd4_callback dl_recall;
+ bool dl_recalled;
+ bool dl_written;
+ bool dl_setattr;
+
+ /* for CB_GETATTR */
+ struct nfs4_cb_fattr dl_cb_fattr;
+
+ /* For delegated timestamps */
+ struct timespec64 dl_atime;
+ struct timespec64 dl_mtime;
+ struct timespec64 dl_ctime;
};
+static inline bool deleg_is_read(u32 dl_type)
+{
+ return (dl_type == OPEN_DELEGATE_READ || dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG);
+}
+
+static inline bool deleg_is_write(u32 dl_type)
+{
+ return (dl_type == OPEN_DELEGATE_WRITE || dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG);
+}
+
+static inline bool deleg_attrs_deleg(u32 dl_type)
+{
+ return dl_type == OPEN_DELEGATE_READ_ATTRS_DELEG ||
+ dl_type == OPEN_DELEGATE_WRITE_ATTRS_DELEG;
+}
+
+bool nfsd4_vet_deleg_time(struct timespec64 *cb, const struct timespec64 *orig,
+ const struct timespec64 *now);
+
#define cb_to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
@@ -156,10 +273,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
return container_of(s, struct nfs4_delegation, dl_stid);
}
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION 160
-/* Maximum number of operations per session compound */
-#define NFSD_MAX_OPS_PER_COMPOUND 16
+/* Maximum number of slots per session. This is for sanity-check only.
+ * It could be increased if we had a mechanism to shutdown misbehaving clients.
+ * A large number can be needed to get good throughput on high-latency servers.
+ */
+#define NFSD_MAX_SLOTS_PER_SESSION 2048
/* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -171,12 +289,15 @@ struct nfsd4_slot {
u32 sl_seqid;
__be32 sl_status;
struct svc_cred sl_cred;
+ u32 sl_index;
u32 sl_datalen;
u16 sl_opcnt;
+ u16 sl_generation;
#define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2)
#define NFSD4_SLOT_CACHED (1 << 3)
+#define NFSD4_SLOT_REUSED (1 << 4)
u8 sl_flags;
char sl_data[];
};
@@ -235,6 +356,9 @@ struct nfsd4_conn {
unsigned char cn_flags;
};
+/* Maximum number of slots that nfsd will use in the backchannel */
+#define NFSD_BC_SLOT_TABLE_SIZE (sizeof(u32) * 8)
+
/*
* Representation of a v4.1+ session. These are refcounted in a similar fashion
* to the nfs4_client. References are only taken when the server is actively
@@ -242,20 +366,23 @@ struct nfsd4_conn {
*/
struct nfsd4_session {
atomic_t se_ref;
+ spinlock_t se_lock;
+ u32 se_cb_slot_avail; /* bitmap of available slots */
+ u32 se_cb_highest_slot; /* highest slot client wants */
+ u32 se_cb_prog;
struct list_head se_hash; /* hash by sessionid */
struct list_head se_perclnt;
-/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */
-#define NFS4_SESSION_DEAD 0x010
- u32 se_flags;
+ struct list_head se_all_sessions;/* global list of sessions */
struct nfs4_client *se_client;
struct nfs4_sessionid se_sessionid;
struct nfsd4_channel_attrs se_fchannel;
- struct nfsd4_channel_attrs se_bchannel;
struct nfsd4_cb_sec se_cb_sec;
struct list_head se_conns;
- u32 se_cb_prog;
- u32 se_cb_seq_nr;
- struct nfsd4_slot *se_slots[]; /* forward channel slots */
+ u32 se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
+ struct xarray se_slots; /* forward channel slots */
+ u16 se_slot_gen;
+ bool se_dead;
+ u32 se_target_maxslots;
};
/* formatted contents of nfs4_sessionid */
@@ -265,7 +392,30 @@ struct nfsd4_sessionid {
u32 reserved;
};
-#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+/* Length of MD5 digest as hex, plus terminating '\0' */
+#define HEXDIR_LEN (2 * MD5_DIGEST_SIZE + 1)
+
+/*
+ * State Meaning Where set
+ * --------------------------------------------------------------------------
+ * | NFSD4_ACTIVE | Confirmed, active | Default |
+ * |------------------- ----------------------------------------------------|
+ * | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist |
+ * | | Lease/lock/share | |
+ * | | reservation conflict | |
+ * | | can cause Courtesy | |
+ * | | client to be expired | |
+ * |------------------------------------------------------------------------|
+ * | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat |
+ * | | expired by Laundromat| try_to_expire_client |
+ * | | due to conflict | |
+ * |------------------------------------------------------------------------|
+ */
+enum {
+ NFSD4_ACTIVE = 0,
+ NFSD4_COURTESY,
+ NFSD4_EXPIRABLE,
+};
/*
* struct nfs4_client - one per client. Clientids live here.
@@ -281,8 +431,9 @@ struct nfsd4_sessionid {
* 0. If they are not renewed within a lease period, they become eligible for
* destruction by the laundromat.
*
- * These objects can also be destroyed prematurely by the fault injection code,
- * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
+ * These objects can also be destroyed if the client sends certain forms of
+ * SETCLIENTID or EXCHANGE_ID operations.
+ *
* Care is taken *not* to do this however when the objects have an elevated
* refcount.
*
@@ -290,7 +441,7 @@ struct nfsd4_sessionid {
*
* o Each nfs4_clients is also hashed by name (the opaque quantity initially
* sent by the client to identify itself).
- *
+ *
* o cl_perclient list is used to ensure no dangling stateowner references
* when we expire the nfs4_client
*/
@@ -308,13 +459,18 @@ struct nfs4_client {
#endif
struct xdr_netobj cl_name; /* id generated by client */
nfs4_verifier cl_verifier; /* generated by client */
- time_t cl_time; /* time of last lease renewal */
+ time64_t cl_time; /* time of last lease renewal */
struct sockaddr_storage cl_addr; /* client ipaddress */
bool cl_mach_cred; /* SP4_MACH_CRED in force */
struct svc_cred cl_cred; /* setclientid principal */
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ atomic_t cl_admin_revoked; /* count of admin-revoked states */
+ /* NFSv4.1 client implementation id: */
+ struct xdr_netobj cl_nii_domain;
+ struct xdr_netobj cl_nii_name;
+ struct timespec64 cl_nii_time;
/* for v4.0 and v4.1 callbacks: */
struct nfs4_cb_conn cl_cb_conn;
@@ -327,6 +483,8 @@ struct nfs4_client {
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
unsigned long cl_flags;
+
+ struct workqueue_struct *cl_callback_wq;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
@@ -346,17 +504,29 @@ struct nfs4_client {
struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
u32 cl_exchange_flags;
/* number of rpc's in progress over an associated session: */
- atomic_t cl_refcount;
+ atomic_t cl_rpc_users;
+ struct nfsdfs_client cl_nfsdfs;
struct nfs4_op_map cl_spo_must_allow;
- /* for nfs41 callbacks */
- /* We currently support a single back channel with a single slot */
- unsigned long cl_cb_slot_busy;
+ /* debugging info directory under nfsd/clients/ : */
+ struct dentry *cl_nfsd_dentry;
+ /* 'info' file within that directory. Ref is not counted,
+ * but will remain valid iff cl_nfsd_dentry != NULL
+ */
+ struct dentry *cl_nfsd_info_dentry;
+
struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
/* wait here for slots */
struct net *net;
struct list_head async_copies; /* list of async copies */
spinlock_t async_lock; /* lock for async copies */
+ atomic_t cl_cb_inflight; /* Outstanding callbacks */
+
+ unsigned int cl_state;
+ atomic_t cl_delegs_in_recall;
+
+ struct nfsd4_cb_recall_any *cl_ra;
+ time64_t cl_ra_time;
};
/* struct nfs4_client_reset
@@ -367,7 +537,8 @@ struct nfs4_client {
struct nfs4_client_reclaim {
struct list_head cr_strhash; /* hash by cr_name */
struct nfs4_client *cr_clp; /* pointer to associated clp */
- char cr_recdir[HEXDIR_LEN]; /* recover dir */
+ struct xdr_netobj cr_name; /* recovery dir name */
+ struct xdr_netobj cr_princhash;
};
/* A reasonable value for REPLAY_ISIZE was estimated as follows:
@@ -388,7 +559,7 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
- struct mutex rp_mutex;
+ int rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
@@ -437,7 +608,7 @@ struct nfs4_openowner {
*/
struct list_head oo_close_lru;
struct nfs4_ol_stateid *oo_last_closed_stid;
- time_t oo_time; /* time of placement on so_close_lru */
+ time64_t oo_time; /* time of placement on so_close_lru */
#define NFS4_OO_CONFIRMED 1
unsigned char oo_flags;
};
@@ -481,14 +652,13 @@ struct nfs4_clnt_odstate {
* inode can have multiple filehandles associated with it, so there is
* (potentially) a many to one relationship between this struct and struct
* inode.
- *
- * These are hashed by filehandle in the file_hashtbl, which is protected by
- * the global state_lock spinlock.
*/
struct nfs4_file {
refcount_t fi_ref;
+ struct inode * fi_inode;
+ bool fi_aliased;
spinlock_t fi_lock;
- struct hlist_node fi_hash; /* hash on fi_fhandle */
+ struct rhlist_head fi_rlist;
struct list_head fi_stateids;
union {
struct list_head fi_delegations;
@@ -496,7 +666,7 @@ struct nfs4_file {
};
struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
- struct file * fi_fds[3];
+ struct nfsd_file *fi_fds[3];
/*
* Each open or lock stateid contributes 0-4 to the counts
* below depending on which bits are set in st_access_bitmap:
@@ -506,7 +676,8 @@ struct nfs4_file {
*/
atomic_t fi_access[2];
u32 fi_share_deny;
- struct file *fi_deleg_file;
+ struct nfsd_file *fi_deleg_file;
+ struct nfsd_file *fi_rdeleg_file;
int fi_delegees;
struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
@@ -537,6 +708,10 @@ struct nfs4_ol_stateid {
struct list_head st_locks;
struct nfs4_stateowner *st_stateowner;
struct nfs4_clnt_odstate *st_clnt_odstate;
+/*
+ * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the
+ * comment above bmap_to_share_mode() for explanation:
+ */
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
@@ -555,7 +730,7 @@ struct nfs4_layout_stateid {
spinlock_t ls_lock;
struct list_head ls_layouts;
u32 ls_layout_type;
- struct file *ls_file;
+ struct nfsd_file *ls_file;
struct nfsd4_callback ls_recall;
stateid_t ls_recall_sid;
bool ls_recalled;
@@ -578,6 +753,8 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
+ NFSPROC4_CLNT_CB_GETATTR,
};
/* Returns true iff a is later than b: */
@@ -594,10 +771,11 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
struct nfsd4_blocked_lock {
struct list_head nbl_list;
struct list_head nbl_lru;
- unsigned long nbl_time;
+ time64_t nbl_time;
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
+ struct kref nbl_kref;
};
struct nfsd4_compound_state;
@@ -606,48 +784,69 @@ struct nfsd4_copy;
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
- stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
+ stateid_t *stateid, int flags, struct nfsd_file **filp,
+ struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
- struct nfs4_stid **s, struct nfsd_net *nn);
+ stateid_t *stateid, unsigned short typemask,
+ unsigned short statusmask,
+ struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
-int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
-void nfs4_free_cp_state(struct nfsd4_copy *copy);
-void nfs4_unhash_stid(struct nfs4_stid *s);
+int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
+void nfs4_free_copy_state(struct nfsd4_copy *copy);
+struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_stid *p_stid);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
-extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
+extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name,
struct nfsd_net *nn);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
- struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
+extern __be32 nfs4_check_open_reclaim(struct nfs4_client *);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
+extern void nfsd41_cb_referring_call(struct nfsd4_callback *cb,
+ struct nfs4_sessionid *sessionid,
+ u32 slotid, u32 seqno);
+extern void nfsd41_cb_destroy_referring_call_list(struct nfsd4_callback *cb);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
-extern void nfsd4_run_cb(struct nfsd4_callback *cb);
-extern int nfsd4_create_callback_queue(void);
-extern void nfsd4_destroy_callback_queue(void);
+extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
+
+static inline void nfsd4_try_run_cb(struct nfsd4_callback *cb)
+{
+ if (!test_and_set_bit(NFSD4_CALLBACK_RUNNING, &cb->cb_flags))
+ WARN_ON_ONCE(!nfsd4_run_cb(cb));
+}
+
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
-extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
-extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
- struct nfsd_net *nn);
-extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
+void nfsd4_async_copy_reaper(struct nfsd_net *nn);
+bool nfsd4_has_active_async_copies(struct nfs4_client *clp);
+extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
+ struct xdr_netobj princhash, struct nfsd_net *nn);
+extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
-struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi);
-extern void nfs4_put_copy(struct nfsd4_copy *copy);
-extern struct nfsd4_copy *
-find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
+extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
+ struct nfs4_cpntf_state *cps);
+extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st,
+ struct nfs4_client *clp,
+ struct nfs4_cpntf_state **cps);
static inline void get_nfs4_file(struct nfs4_file *fi)
{
refcount_inc(&fi->fi_ref);
}
-struct file *find_any_file(struct nfs4_file *f);
+struct nfsd_file *find_any_file(struct nfs4_file *f);
+
+#ifdef CONFIG_NFSD_V4
+void nfsd4_revoke_states(struct net *net, struct super_block *sb);
+#else
+static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+}
+#endif
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
@@ -660,31 +859,17 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
extern int nfsd4_client_record_check(struct nfs4_client *clp);
extern void nfsd4_record_grace_done(struct nfsd_net *nn);
-/* nfs fault injection functions */
-#ifdef CONFIG_NFSD_FAULT_INJECTION
-int nfsd_fault_inject_init(void);
-void nfsd_fault_inject_cleanup(void);
-
-u64 nfsd_inject_print_clients(void);
-u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t);
-u64 nfsd_inject_forget_clients(u64);
-
-u64 nfsd_inject_print_locks(void);
-u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t);
-u64 nfsd_inject_forget_locks(u64);
-
-u64 nfsd_inject_print_openowners(void);
-u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t);
-u64 nfsd_inject_forget_openowners(u64);
-
-u64 nfsd_inject_print_delegations(void);
-u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t);
-u64 nfsd_inject_forget_delegations(u64);
-u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t);
-u64 nfsd_inject_recall_delegations(u64);
-#else /* CONFIG_NFSD_FAULT_INJECTION */
-static inline int nfsd_fault_inject_init(void) { return 0; }
-static inline void nfsd_fault_inject_cleanup(void) {}
-#endif /* CONFIG_NFSD_FAULT_INJECTION */
+static inline bool try_to_expire_client(struct nfs4_client *clp)
+{
+ cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
+ return clp->cl_state == NFSD4_EXPIRABLE;
+}
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+ struct dentry *dentry, struct nfs4_delegation **pdp);
+struct nfsd4_get_dir_delegation;
+struct nfs4_delegation *nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
+ struct nfsd4_get_dir_delegation *gdd,
+ struct nfsd_file *nf);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 9bce3b913189..f7eaf95e20fc 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -7,16 +7,14 @@
* Format:
* rc <hits> <misses> <nocache>
* Statistsics for the reply cache
- * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
+ * fh <stale> <deprecated filehandle cache stats>
* statistics for filehandle lookup
* io <bytes-read> <bytes-written>
* statistics for IO throughput
- * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
- * time (seconds) when nfsd thread usage above thresholds
- * and number of times that all threads were in use
- * ra cache-size <10% <20% <30% ... <100% not-found
- * number of times that read-ahead entry was found that deep in
- * the cache.
+ * th <threads> <deprecated thread usage histogram stats>
+ * number of threads
+ * ra <deprecated ra-cache stats>
+ *
* plus generic RPC stats (see net/sunrpc/stats.c)
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -29,49 +27,43 @@
#include "nfsd.h"
-struct nfsd_stats nfsdstats;
-struct svc_stat nfsd_svcstats = {
- .program = &nfsd_program,
-};
-
-static int nfsd_proc_show(struct seq_file *seq, void *v)
+static int nfsd_show(struct seq_file *seq, void *v)
{
+ struct net *net = pde_data(file_inode(seq->file));
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int i;
- seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
- nfsdstats.rchits,
- nfsdstats.rcmisses,
- nfsdstats.rcnocache,
- nfsdstats.fh_stale,
- nfsdstats.fh_lookup,
- nfsdstats.fh_anon,
- nfsdstats.fh_nocache_dir,
- nfsdstats.fh_nocache_nondir,
- nfsdstats.io_read,
- nfsdstats.io_write);
+ seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE]));
+
/* thread usage: */
- seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
- for (i=0; i<10; i++) {
- unsigned int jifs = nfsdstats.th_usage[i];
- unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ;
- seq_printf(seq, " %u.%03u", sec, msec);
- }
+ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt));
+
+ /* deprecated thread usage histogram stats */
+ for (i = 0; i < 10; i++)
+ seq_puts(seq, " 0.000");
+
+ /* deprecated ra-cache stats */
+ seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
- /* newline and ra-cache */
- seq_printf(seq, "\nra %u", nfsdstats.ra_size);
- for (i=0; i<11; i++)
- seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
- seq_putc(seq, '\n');
-
/* show my rpc info */
- svc_seq_show(seq, &nfsd_svcstats);
+ svc_seq_show(seq, &nn->nfsd_svcstats);
#ifdef CONFIG_NFSD_V4
/* Show count for individual nfsv4 operations */
/* Writing operation numbers 0 1 2 also for maintaining uniformity */
- seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1);
- for (i = 0; i <= LAST_NFS4_OP; i++)
- seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]);
+ seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1);
+ for (i = 0; i <= LAST_NFS4_OP; i++) {
+ seq_printf(seq, " %lld",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)]));
+ }
+ seq_printf(seq, "\nwdeleg_getattr %lld",
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
@@ -79,26 +71,16 @@ static int nfsd_proc_show(struct seq_file *seq, void *v)
return 0;
}
-static int nfsd_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, nfsd_proc_show, NULL);
-}
-
-static const struct file_operations nfsd_proc_fops = {
- .open = nfsd_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
-void
-nfsd_stat_init(void)
+struct proc_dir_entry *nfsd_proc_stat_init(struct net *net)
{
- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ return svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
}
-void
-nfsd_stat_shutdown(void)
+void nfsd_proc_stat_shutdown(struct net *net)
{
- svc_proc_unregister(&init_net, "nfsd");
+ svc_proc_unregister(net, "nfsd");
}
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index b23fdac69820..e4efb0e4e56d 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -8,37 +8,69 @@
#define _NFSD_STATS_H
#include <uapi/linux/nfsd/stats.h>
+#include <linux/percpu_counter.h>
+struct proc_dir_entry *nfsd_proc_stat_init(struct net *net);
+void nfsd_proc_stat_shutdown(struct net *net);
-struct nfsd_stats {
- unsigned int rchits; /* repcache hits */
- unsigned int rcmisses; /* repcache hits */
- unsigned int rcnocache; /* uncached reqs */
- unsigned int fh_stale; /* FH stale error */
- unsigned int fh_lookup; /* dentry cached */
- unsigned int fh_anon; /* anon file dentry returned */
- unsigned int fh_nocache_dir; /* filehandle not found in dcache */
- unsigned int fh_nocache_nondir; /* filehandle not found in dcache */
- unsigned int io_read; /* bytes returned to read requests */
- unsigned int io_write; /* bytes passed in write requests */
- unsigned int th_cnt; /* number of available threads */
- unsigned int th_usage[10]; /* number of ticks during which n perdeciles
- * of available threads were in use */
- unsigned int th_fullcnt; /* number of times last free thread was used */
- unsigned int ra_size; /* size of ra cache */
- unsigned int ra_depth[11]; /* number of times ra entry was found that deep
- * in the cache (10percentiles). [10] = not found */
-#ifdef CONFIG_NFSD_V4
- unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */
-#endif
+static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
+{
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]);
+}
+
+static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn)
+{
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]);
+}
+
+static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn)
+{
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]);
+}
-};
+static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn,
+ struct svc_export *exp)
+{
+ percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]);
+ if (exp && exp->ex_stats)
+ percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]);
+}
+static inline void nfsd_stats_io_read_add(struct nfsd_net *nn,
+ struct svc_export *exp, s64 amount)
+{
+ percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount);
+ if (exp && exp->ex_stats)
+ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount);
+}
-extern struct nfsd_stats nfsdstats;
-extern struct svc_stat nfsd_svcstats;
+static inline void nfsd_stats_io_write_add(struct nfsd_net *nn,
+ struct svc_export *exp, s64 amount)
+{
+ percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount);
+ if (exp && exp->ex_stats)
+ percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount);
+}
-void nfsd_stat_init(void);
-void nfsd_stat_shutdown(void);
+static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
+{
+ percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]);
+}
+static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
+{
+ percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
+}
+
+static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
+{
+ percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
+}
+
+#ifdef CONFIG_NFSD_V4
+static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn)
+{
+ percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]);
+}
+#endif
#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
index 90967466a1e5..f008b95ceec2 100644
--- a/fs/nfsd/trace.c
+++ b/fs/nfsd/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 80933e4334d8..5ae2a611e57f 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,22 +9,126 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprt.h>
+#include <trace/misc/fs.h>
+#include <trace/misc/nfs.h>
+#include <trace/misc/sunrpc.h>
+
+#include "export.h"
#include "nfsfh.h"
+#include "xdr4.h"
+
+#define NFSD_TRACE_PROC_CALL_FIELDS(r) \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen)
+
+#define NFSD_TRACE_PROC_CALL_ASSIGNMENTS(r) \
+ do { \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __entry->netns_ino = SVC_NET(r)->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
+ } while (0)
+
+#define NFSD_TRACE_PROC_RES_FIELDS(r) \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+ __field(unsigned long, status) \
+ __sockaddr(server, (r)->rq_xprt->xpt_locallen) \
+ __sockaddr(client, (r)->rq_xprt->xpt_remotelen)
+
+#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(r, error) \
+ do { \
+ struct svc_xprt *xprt = (r)->rq_xprt; \
+ __entry->netns_ino = SVC_NET(r)->ns.inum; \
+ __entry->xid = be32_to_cpu((r)->rq_xid); \
+ __entry->status = be32_to_cpu(error); \
+ __assign_sockaddr(server, &xprt->xpt_local, \
+ xprt->xpt_locallen); \
+ __assign_sockaddr(client, &xprt->xpt_remote, \
+ xprt->xpt_remotelen); \
+ } while (0);
+
+DECLARE_EVENT_CLASS(nfsd_xdr_err_class,
+ TP_PROTO(
+ const struct svc_rqst *rqstp
+ ),
+ TP_ARGS(rqstp),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(u32, xid)
+ __field(u32, vers)
+ __field(u32, proc)
+ __sockaddr(server, rqstp->rq_xprt->xpt_locallen)
+ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+ ),
+ TP_fast_assign(
+ const struct svc_xprt *xprt = rqstp->rq_xprt;
+
+ __entry->netns_ino = xprt->xpt_net->ns.inum;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->vers = rqstp->rq_vers;
+ __entry->proc = rqstp->rq_proc;
+ __assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen);
+ __assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen);
+ ),
+ TP_printk("xid=0x%08x vers=%u proc=%u",
+ __entry->xid, __entry->vers, __entry->proc
+ )
+);
+
+#define DEFINE_NFSD_XDR_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \
+ TP_PROTO(const struct svc_rqst *rqstp), \
+ TP_ARGS(rqstp))
+
+DEFINE_NFSD_XDR_ERR_EVENT(garbage_args);
+DEFINE_NFSD_XDR_ERR_EVENT(cant_encode);
+
+#define show_nfsd_may_flags(x) \
+ __print_flags(x, "|", \
+ { NFSD_MAY_EXEC, "EXEC" }, \
+ { NFSD_MAY_WRITE, "WRITE" }, \
+ { NFSD_MAY_READ, "READ" }, \
+ { NFSD_MAY_SATTR, "SATTR" }, \
+ { NFSD_MAY_TRUNC, "TRUNC" }, \
+ { NFSD_MAY_NLM, "NLM" }, \
+ { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \
+ { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \
+ { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \
+ { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \
+ { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \
+ { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \
+ { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }, \
+ { NFSD_MAY_LOCALIO, "LOCALIO" })
TRACE_EVENT(nfsd_compound,
- TP_PROTO(const struct svc_rqst *rqst,
- u32 args_opcnt),
- TP_ARGS(rqst, args_opcnt),
+ TP_PROTO(
+ const struct svc_rqst *rqst,
+ const char *tag,
+ u32 taglen,
+ u32 opcnt
+ ),
+ TP_ARGS(rqst, tag, taglen, opcnt),
TP_STRUCT__entry(
__field(u32, xid)
- __field(u32, args_opcnt)
+ __field(u32, opcnt)
+ __string_len(tag, tag, taglen)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqst->rq_xid);
- __entry->args_opcnt = args_opcnt;
+ __entry->opcnt = opcnt;
+ __assign_str(tag);
),
- TP_printk("xid=0x%08x opcnt=%u",
- __entry->xid, __entry->args_opcnt)
+ TP_printk("xid=0x%08x opcnt=%u tag=%s",
+ __entry->xid, __entry->opcnt, __get_str(tag)
+ )
)
TRACE_EVENT(nfsd_compound_status,
@@ -43,24 +147,300 @@ TRACE_EVENT(nfsd_compound_status,
__entry->args_opcnt = args_opcnt;
__entry->resp_opcnt = resp_opcnt;
__entry->status = be32_to_cpu(status);
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("op=%u/%u %s status=%d",
__entry->resp_opcnt, __entry->args_opcnt,
__get_str(name), __entry->status)
)
+TRACE_EVENT(nfsd_compound_decode_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ u32 args_opcnt,
+ u32 resp_opcnt,
+ u32 opnum,
+ __be32 status
+ ),
+ TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_RES_FIELDS(rqstp)
+
+ __field(u32, args_opcnt)
+ __field(u32, resp_opcnt)
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status)
+
+ __entry->args_opcnt = args_opcnt;
+ __entry->resp_opcnt = resp_opcnt;
+ __entry->opnum = opnum;
+ ),
+ TP_printk("op=%u/%u opnum=%u status=%lu",
+ __entry->resp_opcnt, __entry->args_opcnt,
+ __entry->opnum, __entry->status)
+);
+
+DECLARE_EVENT_CLASS(nfsd_compound_err_class,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ u32 opnum,
+ __be32 status
+ ),
+ TP_ARGS(rqstp, opnum, status),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_RES_FIELDS(rqstp)
+
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status)
+
+ __entry->opnum = opnum;
+ ),
+ TP_printk("opnum=%u status=%lu",
+ __entry->opnum, __entry->status)
+);
+
+#define DEFINE_NFSD_COMPOUND_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_compound_err_class, nfsd_compound_##name##_err, \
+ TP_PROTO( \
+ const struct svc_rqst *rqstp, \
+ u32 opnum, \
+ __be32 status \
+ ), \
+ TP_ARGS(rqstp, opnum, status))
+
+DEFINE_NFSD_COMPOUND_ERR_EVENT(op);
+DEFINE_NFSD_COMPOUND_ERR_EVENT(encode);
+
+#define show_fs_file_type(x) \
+ __print_symbolic(x, \
+ { S_IFLNK, "LNK" }, \
+ { S_IFREG, "REG" }, \
+ { S_IFDIR, "DIR" }, \
+ { S_IFCHR, "CHR" }, \
+ { S_IFBLK, "BLK" }, \
+ { S_IFIFO, "FIFO" }, \
+ { S_IFSOCK, "SOCK" })
+
+TRACE_EVENT_CONDITION(nfsd_fh_verify,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ umode_t type,
+ int access
+ ),
+ TP_ARGS(rqstp, fhp, type, access),
+ TP_CONDITION(rqstp != NULL),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
+ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+ __field(u32, xid)
+ __field(u32, fh_hash)
+ __field(const void *, inode)
+ __field(unsigned long, type)
+ __field(unsigned long, access)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+ __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local,
+ rqstp->rq_xprt->xpt_locallen);
+ __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->inode = d_inode(fhp->fh_dentry);
+ __entry->type = type;
+ __entry->access = access;
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s",
+ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type),
+ show_nfsd_may_flags(__entry->access)
+ )
+);
+
+TRACE_EVENT_CONDITION(nfsd_fh_verify_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ umode_t type,
+ int access,
+ __be32 error
+ ),
+ TP_ARGS(rqstp, fhp, type, access, error),
+ TP_CONDITION(rqstp != NULL && error),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __sockaddr(server, rqstp->rq_xprt->xpt_remotelen)
+ __sockaddr(client, rqstp->rq_xprt->xpt_remotelen)
+ __field(u32, xid)
+ __field(u32, fh_hash)
+ __field(const void *, inode)
+ __field(unsigned long, type)
+ __field(unsigned long, access)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+ __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local,
+ rqstp->rq_xprt->xpt_locallen);
+ __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ if (fhp->fh_dentry)
+ __entry->inode = d_inode(fhp->fh_dentry);
+ else
+ __entry->inode = NULL;
+ __entry->type = type;
+ __entry->access = access;
+ __entry->error = be32_to_cpu(error);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d",
+ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type),
+ show_nfsd_may_flags(__entry->access),
+ __entry->error
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_fh_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *fhp,
+ int status),
+ TP_ARGS(rqstp, fhp, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, fh_hash)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x status=%d",
+ __entry->xid, __entry->fh_hash,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_FH_ERR_EVENT(name) \
+DEFINE_EVENT_CONDITION(nfsd_fh_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *fhp, \
+ int status), \
+ TP_ARGS(rqstp, fhp, status), \
+ TP_CONDITION(rqstp != NULL))
+
+DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport);
+DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle);
+
+TRACE_EVENT(nfsd_exp_find_key,
+ TP_PROTO(const struct svc_expkey *key,
+ int status),
+ TP_ARGS(key, status),
+ TP_STRUCT__entry(
+ __field(u8, fsidtype)
+ __array(u32, fsid, 6)
+ __string(auth_domain, key->ek_client->name)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->fsidtype = key->ek_fsidtype;
+ memcpy(__entry->fsid, key->ek_fsid, 4*6);
+ __assign_str(auth_domain);
+ __entry->status = status;
+ ),
+ TP_printk("fsid=%x::%s domain=%s status=%d",
+ __entry->fsidtype,
+ __print_array(__entry->fsid, 6, 4),
+ __get_str(auth_domain),
+ __entry->status
+ )
+);
+
+TRACE_EVENT(nfsd_expkey_update,
+ TP_PROTO(const struct svc_expkey *key, const char *exp_path),
+ TP_ARGS(key, exp_path),
+ TP_STRUCT__entry(
+ __field(u8, fsidtype)
+ __array(u32, fsid, 6)
+ __string(auth_domain, key->ek_client->name)
+ __string(path, exp_path)
+ __field(bool, cache)
+ ),
+ TP_fast_assign(
+ __entry->fsidtype = key->ek_fsidtype;
+ memcpy(__entry->fsid, key->ek_fsid, 4*6);
+ __assign_str(auth_domain);
+ __assign_str(path);
+ __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags);
+ ),
+ TP_printk("fsid=%x::%s domain=%s path=%s cache=%s",
+ __entry->fsidtype,
+ __print_array(__entry->fsid, 6, 4),
+ __get_str(auth_domain),
+ __get_str(path),
+ __entry->cache ? "pos" : "neg"
+ )
+);
+
+TRACE_EVENT(nfsd_exp_get_by_name,
+ TP_PROTO(const struct svc_export *key,
+ int status),
+ TP_ARGS(key, status),
+ TP_STRUCT__entry(
+ __string(path, key->ex_path.dentry->d_name.name)
+ __string(auth_domain, key->ex_client->name)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __assign_str(path);
+ __assign_str(auth_domain);
+ __entry->status = status;
+ ),
+ TP_printk("path=%s domain=%s status=%d",
+ __get_str(path),
+ __get_str(auth_domain),
+ __entry->status
+ )
+);
+
+TRACE_EVENT(nfsd_export_update,
+ TP_PROTO(const struct svc_export *key),
+ TP_ARGS(key),
+ TP_STRUCT__entry(
+ __string(path, key->ex_path.dentry->d_name.name)
+ __string(auth_domain, key->ex_client->name)
+ __field(bool, cache)
+ ),
+ TP_fast_assign(
+ __assign_str(path);
+ __assign_str(auth_domain);
+ __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags);
+ ),
+ TP_printk("path=%s domain=%s cache=%s",
+ __get_str(path),
+ __get_str(auth_domain),
+ __entry->cache ? "pos" : "neg"
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_io_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
- loff_t offset,
- unsigned long len),
+ u64 offset,
+ u32 len),
TP_ARGS(rqstp, fhp, offset, len),
TP_STRUCT__entry(
__field(u32, xid)
__field(u32, fh_hash)
- __field(loff_t, offset)
- __field(unsigned long, len)
+ __field(u64, offset)
+ __field(u32, len)
),
TP_fast_assign(
__entry->xid = be32_to_cpu(rqstp->rq_xid);
@@ -68,7 +448,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
__entry->offset = offset;
__entry->len = len;
),
- TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu",
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u",
__entry->xid, __entry->fh_hash,
__entry->offset, __entry->len)
)
@@ -77,19 +457,24 @@ DECLARE_EVENT_CLASS(nfsd_io_class,
DEFINE_EVENT(nfsd_io_class, nfsd_##name, \
TP_PROTO(struct svc_rqst *rqstp, \
struct svc_fh *fhp, \
- loff_t offset, \
- unsigned long len), \
+ u64 offset, \
+ u32 len), \
TP_ARGS(rqstp, fhp, offset, len))
DEFINE_NFSD_IO_EVENT(read_start);
DEFINE_NFSD_IO_EVENT(read_splice);
DEFINE_NFSD_IO_EVENT(read_vector);
+DEFINE_NFSD_IO_EVENT(read_direct);
DEFINE_NFSD_IO_EVENT(read_io_done);
DEFINE_NFSD_IO_EVENT(read_done);
DEFINE_NFSD_IO_EVENT(write_start);
DEFINE_NFSD_IO_EVENT(write_opened);
+DEFINE_NFSD_IO_EVENT(write_direct);
+DEFINE_NFSD_IO_EVENT(write_vector);
DEFINE_NFSD_IO_EVENT(write_io_done);
DEFINE_NFSD_IO_EVENT(write_done);
+DEFINE_NFSD_IO_EVENT(commit_start);
+DEFINE_NFSD_IO_EVENT(commit_done);
DECLARE_EVENT_CLASS(nfsd_err_class,
TP_PROTO(struct svc_rqst *rqstp,
@@ -125,7 +510,103 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name, \
DEFINE_NFSD_ERR_EVENT(read_err);
DEFINE_NFSD_ERR_EVENT(write_err);
+TRACE_EVENT(nfsd_dirent,
+ TP_PROTO(struct svc_fh *fhp,
+ u64 ino,
+ const char *name,
+ int namlen),
+ TP_ARGS(fhp, ino, name, namlen),
+ TP_STRUCT__entry(
+ __field(u32, fh_hash)
+ __field(u64, ino)
+ __string_len(name, name, namlen)
+ ),
+ TP_fast_assign(
+ __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
+ __entry->ino = ino;
+ __assign_str(name);
+ ),
+ TP_printk("fh_hash=0x%08x ino=%llu name=%s",
+ __entry->fh_hash, __entry->ino, __get_str(name)
+ )
+)
+
+DECLARE_EVENT_CLASS(nfsd_copy_err_class,
+ TP_PROTO(struct svc_rqst *rqstp,
+ struct svc_fh *src_fhp,
+ loff_t src_offset,
+ struct svc_fh *dst_fhp,
+ loff_t dst_offset,
+ u64 count,
+ int status),
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(u32, src_fh_hash)
+ __field(loff_t, src_offset)
+ __field(u32, dst_fh_hash)
+ __field(loff_t, dst_offset)
+ __field(u64, count)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle);
+ __entry->src_offset = src_offset;
+ __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle);
+ __entry->dst_offset = dst_offset;
+ __entry->count = count;
+ __entry->status = status;
+ ),
+ TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld "
+ "dst_fh_hash=0x%08x dst_offset=%lld "
+ "count=%llu status=%d",
+ __entry->xid, __entry->src_fh_hash, __entry->src_offset,
+ __entry->dst_fh_hash, __entry->dst_offset,
+ (unsigned long long)__entry->count,
+ __entry->status)
+)
+
+#define DEFINE_NFSD_COPY_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \
+ TP_PROTO(struct svc_rqst *rqstp, \
+ struct svc_fh *src_fhp, \
+ loff_t src_offset, \
+ struct svc_fh *dst_fhp, \
+ loff_t dst_offset, \
+ u64 count, \
+ int status), \
+ TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \
+ count, status))
+
+DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err);
+
#include "state.h"
+#include "filecache.h"
+#include "vfs.h"
+
+TRACE_EVENT(nfsd_delegret_wakeup,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct inode *inode,
+ long timeo
+ ),
+ TP_ARGS(rqstp, inode, timeo),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(const void *, inode)
+ __field(long, timeo)
+ ),
+ TP_fast_assign(
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->inode = inode;
+ __entry->timeo = timeo;
+ ),
+ TP_printk("xid=0x%08x inode=%p%s",
+ __entry->xid, __entry->inode,
+ __entry->timeo == 0 ? " (timed out)" : ""
+ )
+);
DECLARE_EVENT_CLASS(nfsd_stateid_class,
TP_PROTO(stateid_t *stp),
@@ -153,6 +634,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class,
DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \
TP_PROTO(stateid_t *stp), \
TP_ARGS(stp))
+
DEFINE_STATEID_EVENT(layoutstate_alloc);
DEFINE_STATEID_EVENT(layoutstate_unhash);
DEFINE_STATEID_EVENT(layoutstate_free);
@@ -164,6 +646,2014 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);
+DEFINE_STATEID_EVENT(open);
+DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_write);
+DEFINE_STATEID_EVENT(deleg_return);
+
+DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
+ TP_PROTO(u32 seqid, const stateid_t *stp),
+ TP_ARGS(seqid, stp),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ ),
+ TP_fast_assign(
+ __entry->seqid = seqid;
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ ),
+ TP_printk("seqid=%u client %08x:%08x stateid %08x:%08x",
+ __entry->seqid, __entry->cl_boot, __entry->cl_id,
+ __entry->si_id, __entry->si_generation)
+)
+
+#define DEFINE_STATESEQID_EVENT(name) \
+DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
+ TP_PROTO(u32 seqid, const stateid_t *stp), \
+ TP_ARGS(seqid, stp))
+
+DEFINE_STATESEQID_EVENT(preprocess);
+DEFINE_STATESEQID_EVENT(open_confirm);
+
+#define show_stid_type(x) \
+ __print_flags(x, "|", \
+ { SC_TYPE_OPEN, "OPEN" }, \
+ { SC_TYPE_LOCK, "LOCK" }, \
+ { SC_TYPE_DELEG, "DELEG" }, \
+ { SC_TYPE_LAYOUT, "LAYOUT" })
+
+#define show_stid_status(x) \
+ __print_flags(x, "|", \
+ { SC_STATUS_CLOSED, "CLOSED" }, \
+ { SC_STATUS_REVOKED, "REVOKED" }, \
+ { SC_STATUS_ADMIN_REVOKED, "ADMIN_REVOKED" })
+
+DECLARE_EVENT_CLASS(nfsd_stid_class,
+ TP_PROTO(
+ const struct nfs4_stid *stid
+ ),
+ TP_ARGS(stid),
+ TP_STRUCT__entry(
+ __field(unsigned long, sc_type)
+ __field(unsigned long, sc_status)
+ __field(int, sc_count)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ ),
+ TP_fast_assign(
+ const stateid_t *stp = &stid->sc_stateid;
+
+ __entry->sc_type = stid->sc_type;
+ __entry->sc_status = stid->sc_status;
+ __entry->sc_count = refcount_read(&stid->sc_count);
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ ),
+ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->si_id, __entry->si_generation,
+ __entry->sc_count, show_stid_type(__entry->sc_type),
+ show_stid_status(__entry->sc_status)
+ )
+);
+
+#define DEFINE_STID_EVENT(name) \
+DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
+ TP_PROTO(const struct nfs4_stid *stid), \
+ TP_ARGS(stid))
+
+DEFINE_STID_EVENT(revoke);
+
+TRACE_EVENT(nfsd_stateowner_replay,
+ TP_PROTO(
+ u32 opnum,
+ const struct nfs4_replay *rp
+ ),
+ TP_ARGS(opnum, rp),
+ TP_STRUCT__entry(
+ __field(unsigned long, status)
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ __entry->status = be32_to_cpu(rp->rp_status);
+ __entry->opnum = opnum;
+ ),
+ TP_printk("opnum=%u status=%lu",
+ __entry->opnum, __entry->status)
+);
+
+TRACE_EVENT_CONDITION(nfsd_seq4_status,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct nfsd4_sequence *sequence
+ ),
+ TP_ARGS(rqstp, sequence),
+ TP_CONDITION(sequence->status_flags),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(u32, xid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(unsigned long, status_flags)
+ ),
+ TP_fast_assign(
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&sequence->sessionid;
+
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->status_flags = sequence->status_flags;
+ ),
+ TP_printk("xid=0x%08x sessionid=%08x:%08x:%08x:%08x status_flags=%s",
+ __entry->xid, __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ show_nfs4_seq4_status(__entry->status_flags)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_cs_slot_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_create_session *cs
+ ),
+ TP_ARGS(clp, cs),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, slot_seqid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ const struct nfsd4_clid_slot *slot = &clp->cl_cs_slot;
+
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen);
+ __entry->seqid = cs->seqid;
+ __entry->slot_seqid = slot->sl_seqid;
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->seqid, __entry->slot_seqid
+ )
+);
+
+#define DEFINE_CS_SLOT_EVENT(name) \
+DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const struct nfsd4_create_session *cs \
+ ), \
+ TP_ARGS(clp, cs))
+
+DEFINE_CS_SLOT_EVENT(slot_seqid_conf);
+DEFINE_CS_SLOT_EVENT(slot_seqid_unconf);
+
+#define show_nfs_slot_flags(val) \
+ __print_flags(val, "|", \
+ { NFSD4_SLOT_INUSE, "INUSE" }, \
+ { NFSD4_SLOT_CACHETHIS, "CACHETHIS" }, \
+ { NFSD4_SLOT_INITIALIZED, "INITIALIZED" }, \
+ { NFSD4_SLOT_CACHED, "CACHED" }, \
+ { NFSD4_SLOT_REUSED, "REUSED" })
+
+TRACE_EVENT(nfsd_slot_seqid_sequence,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_sequence *seq,
+ const struct nfsd4_slot *slot
+ ),
+ TP_ARGS(clp, seq, slot),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, slot_seqid)
+ __field(u32, slot_index)
+ __field(unsigned long, slot_flags)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen);
+ __entry->seqid = seq->seqid;
+ __entry->slot_seqid = slot->sl_seqid;
+ __entry->slot_index = seq->slotid;
+ __entry->slot_flags = slot->sl_flags;
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x idx=%u seqid=%u slot_seqid=%u flags=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->slot_index, __entry->seqid, __entry->slot_seqid,
+ show_nfs_slot_flags(__entry->slot_flags)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_clientid_class,
+ TP_PROTO(const clientid_t *clid),
+ TP_ARGS(clid),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clid->cl_boot;
+ __entry->cl_id = clid->cl_id;
+ ),
+ TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
+)
+
+#define DEFINE_CLIENTID_EVENT(name) \
+DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \
+ TP_PROTO(const clientid_t *clid), \
+ TP_ARGS(clid))
+
+DEFINE_CLIENTID_EVENT(expire_unconf);
+DEFINE_CLIENTID_EVENT(reclaim_complete);
+DEFINE_CLIENTID_EVENT(confirmed);
+DEFINE_CLIENTID_EVENT(destroyed);
+DEFINE_CLIENTID_EVENT(admin_expired);
+DEFINE_CLIENTID_EVENT(replaced);
+DEFINE_CLIENTID_EVENT(purged);
+DEFINE_CLIENTID_EVENT(renew);
+DEFINE_CLIENTID_EVENT(stale);
+
+TRACE_EVENT(nfsd_mark_client_expired,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ int cl_rpc_users
+ ),
+ TP_ARGS(clp, cl_rpc_users),
+ TP_STRUCT__entry(
+ __field(int, cl_rpc_users)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_rpc_users = cl_rpc_users;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x cl_rpc_users=%d",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->cl_rpc_users)
+);
+
+DECLARE_EVENT_CLASS(nfsd_net_class,
+ TP_PROTO(const struct nfsd_net *nn),
+ TP_ARGS(nn),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ ),
+ TP_printk("boot_time=%16llx", __entry->boot_time)
+)
+
+#define DEFINE_NET_EVENT(name) \
+DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
+ TP_PROTO(const struct nfsd_net *nn), \
+ TP_ARGS(nn))
+
+DEFINE_NET_EVENT(grace_start);
+DEFINE_NET_EVENT(grace_complete);
+
+TRACE_EVENT(nfsd_writeverf_reset,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int error
+ ),
+ TP_ARGS(nn, rqstp, error),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(int, error)
+ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->error = error;
+
+ /* avoid seqlock inside TP_fast_assign */
+ memcpy(__entry->verifier, nn->writeverf,
+ NFS4_VERIFIER_SIZE);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s",
+ __entry->boot_time, __entry->xid, __entry->error,
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+ )
+);
+
+TRACE_EVENT(nfsd_clid_cred_mismatch,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct svc_rqst *rqstp
+ ),
+ TP_ARGS(clp, rqstp),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(unsigned long, cl_flavor)
+ __field(unsigned long, new_flavor)
+ __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->cl_flavor = clp->cl_cred.cr_flavor;
+ __entry->new_flavor = rqstp->rq_cred.cr_flavor;
+ __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
+ ),
+ TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc",
+ __entry->cl_boot, __entry->cl_id,
+ show_nfsd_authflavor(__entry->cl_flavor),
+ show_nfsd_authflavor(__entry->new_flavor),
+ __get_sockaddr(addr)
+ )
+)
+
+TRACE_EVENT(nfsd_clid_verf_mismatch,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct svc_rqst *rqstp,
+ const nfs4_verifier *verf
+ ),
+ TP_ARGS(clp, rqstp, verf),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE)
+ __array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE)
+ __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ memcpy(__entry->cl_verifier, (void *)&clp->cl_verifier,
+ NFS4_VERIFIER_SIZE);
+ memcpy(__entry->new_verifier, (void *)verf,
+ NFS4_VERIFIER_SIZE);
+ __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote,
+ rqstp->rq_xprt->xpt_remotelen);
+ ),
+ TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc",
+ __entry->cl_boot, __entry->cl_id,
+ __print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE),
+ __print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE),
+ __get_sockaddr(addr)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_clid_class,
+ TP_PROTO(const struct nfs4_client *clp),
+ TP_ARGS(clp),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __field(unsigned long, flavor)
+ __array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
+ __string_len(name, clp->cl_name.data, clp->cl_name.len)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ memcpy(__entry->addr, &clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ __entry->flavor = clp->cl_cred.cr_flavor;
+ memcpy(__entry->verifier, (void *)&clp->cl_verifier,
+ NFS4_VERIFIER_SIZE);
+ __assign_str(name);
+ ),
+ TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
+ __entry->addr, __get_str(name),
+ __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE),
+ show_nfsd_authflavor(__entry->flavor),
+ __entry->cl_boot, __entry->cl_id)
+);
+
+#define DEFINE_CLID_EVENT(name) \
+DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \
+ TP_PROTO(const struct nfs4_client *clp), \
+ TP_ARGS(clp))
+
+DEFINE_CLID_EVENT(fresh);
+DEFINE_CLID_EVENT(confirmed_r);
+
+/*
+ * from fs/nfsd/filecache.h
+ */
+#define show_nf_flags(val) \
+ __print_flags(val, "|", \
+ { 1 << NFSD_FILE_HASHED, "HASHED" }, \
+ { 1 << NFSD_FILE_PENDING, "PENDING" }, \
+ { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \
+ { 1 << NFSD_FILE_RECENT, "RECENT" }, \
+ { 1 << NFSD_FILE_GC, "GC" })
+
+DECLARE_EVENT_CLASS(nfsd_file_class,
+ TP_PROTO(struct nfsd_file *nf),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+ __field(void *, nf_inode)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned char, nf_may)
+ __field(struct file *, nf_file)
+ ),
+ TP_fast_assign(
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p",
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+ __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_EVENT(name) \
+DEFINE_EVENT(nfsd_file_class, name, \
+ TP_PROTO(struct nfsd_file *nf), \
+ TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_closing);
+
+TRACE_EVENT(nfsd_file_alloc,
+ TP_PROTO(
+ const struct nfsd_file *nf
+ ),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+ __field(const void *, nf_inode)
+ __field(unsigned long, nf_flags)
+ __field(unsigned long, nf_may)
+ __field(unsigned int, nf_ref)
+ ),
+ TP_fast_assign(
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_may = nf->nf_may;
+ ),
+ TP_printk("inode=%p ref=%u flags=%s may=%s",
+ __entry->nf_inode, __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may)
+ )
+);
+
+TRACE_EVENT(nfsd_file_get_dio_attrs,
+ TP_PROTO(
+ const struct inode *inode,
+ const struct kstat *stat
+ ),
+ TP_ARGS(inode, stat),
+ TP_STRUCT__entry(
+ __field(const void *, inode)
+ __field(unsigned long, mask)
+ __field(u32, mem_align)
+ __field(u32, offset_align)
+ __field(u32, read_offset_align)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->mask = stat->result_mask;
+ __entry->mem_align = stat->dio_mem_align;
+ __entry->offset_align = stat->dio_offset_align;
+ __entry->read_offset_align = stat->dio_read_offset_align;
+ ),
+ TP_printk("inode=%p flags=%s mem_align=%u offset_align=%u read_offset_align=%u",
+ __entry->inode, show_statx_mask(__entry->mask),
+ __entry->mem_align, __entry->offset_align,
+ __entry->read_offset_align
+ )
+);
+
+TRACE_EVENT(nfsd_file_acquire,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct inode *inode,
+ unsigned int may_flags,
+ const struct nfsd_file *nf,
+ __be32 status
+ ),
+
+ TP_ARGS(rqstp, inode, may_flags, nf, status),
+
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(const void *, inode)
+ __field(unsigned long, may_flags)
+ __field(unsigned int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned long, nf_may)
+ __field(const void *, nf_file)
+ __field(u32, status)
+ ),
+
+ TP_fast_assign(
+ __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0;
+ __entry->nf_flags = nf ? nf->nf_flags : 0;
+ __entry->nf_may = nf ? nf->nf_may : 0;
+ __entry->nf_file = nf ? nf->nf_file : NULL;
+ __entry->status = be32_to_cpu(status);
+ ),
+
+ TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u",
+ __entry->xid, __entry->inode,
+ show_nfsd_may_flags(__entry->may_flags),
+ __entry->nf_ref, show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+ __entry->nf_file, __entry->status
+ )
+);
+
+TRACE_EVENT(nfsd_file_insert_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct inode *inode,
+ unsigned int may_flags,
+ long error
+ ),
+ TP_ARGS(rqstp, inode, may_flags, error),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(const void *, inode)
+ __field(unsigned long, may_flags)
+ __field(long, error)
+ ),
+ TP_fast_assign(
+ __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->error = error;
+ ),
+ TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld",
+ __entry->xid, __entry->inode,
+ show_nfsd_may_flags(__entry->may_flags),
+ __entry->error
+ )
+);
+
+TRACE_EVENT(nfsd_file_cons_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct inode *inode,
+ unsigned int may_flags,
+ const struct nfsd_file *nf
+ ),
+ TP_ARGS(rqstp, inode, may_flags, nf),
+ TP_STRUCT__entry(
+ __field(u32, xid)
+ __field(const void *, inode)
+ __field(unsigned long, may_flags)
+ __field(unsigned int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned long, nf_may)
+ __field(const void *, nf_file)
+ ),
+ TP_fast_assign(
+ __entry->xid = rqstp ? be32_to_cpu(rqstp->rq_xid) : 0;
+ __entry->inode = inode;
+ __entry->may_flags = may_flags;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
+ __entry->xid, __entry->inode,
+ show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_open_class,
+ TP_PROTO(const struct nfsd_file *nf, __be32 status),
+ TP_ARGS(nf, status),
+ TP_STRUCT__entry(
+ __field(void *, nf_inode) /* cannot be dereferenced */
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ __field(unsigned long, nf_may)
+ __field(void *, nf_file) /* cannot be dereferenced */
+ ),
+ TP_fast_assign(
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ __entry->nf_may = nf->nf_may;
+ __entry->nf_file = nf->nf_file;
+ ),
+ TP_printk("inode=%p ref=%d flags=%s may=%s file=%p",
+ __entry->nf_inode,
+ __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+ __entry->nf_file)
+)
+
+#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \
+DEFINE_EVENT(nfsd_file_open_class, name, \
+ TP_PROTO( \
+ const struct nfsd_file *nf, \
+ __be32 status \
+ ), \
+ TP_ARGS(nf, status))
+
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
+DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened);
+
+TRACE_EVENT(nfsd_file_is_cached,
+ TP_PROTO(
+ const struct inode *inode,
+ int found
+ ),
+ TP_ARGS(inode, found),
+ TP_STRUCT__entry(
+ __field(const struct inode *, inode)
+ __field(int, found)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->found = found;
+ ),
+ TP_printk("inode=%p is %scached",
+ __entry->inode,
+ __entry->found ? "" : "not "
+ )
+);
+
+TRACE_EVENT(nfsd_file_fsnotify_handle_event,
+ TP_PROTO(struct inode *inode, u32 mask),
+ TP_ARGS(inode, mask),
+ TP_STRUCT__entry(
+ __field(struct inode *, inode)
+ __field(unsigned int, nlink)
+ __field(umode_t, mode)
+ __field(u32, mask)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ __entry->nlink = inode->i_nlink;
+ __entry->mode = inode->i_mode;
+ __entry->mask = mask;
+ ),
+ TP_printk("inode=%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+ __entry->nlink, __entry->mode, __entry->mask)
+);
+
+DECLARE_EVENT_CLASS(nfsd_file_gc_class,
+ TP_PROTO(
+ const struct nfsd_file *nf
+ ),
+ TP_ARGS(nf),
+ TP_STRUCT__entry(
+ __field(void *, nf_inode)
+ __field(void *, nf_file)
+ __field(int, nf_ref)
+ __field(unsigned long, nf_flags)
+ ),
+ TP_fast_assign(
+ __entry->nf_inode = nf->nf_inode;
+ __entry->nf_file = nf->nf_file;
+ __entry->nf_ref = refcount_read(&nf->nf_ref);
+ __entry->nf_flags = nf->nf_flags;
+ ),
+ TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p",
+ __entry->nf_inode, __entry->nf_ref,
+ show_nf_flags(__entry->nf_flags),
+ __entry->nf_file
+ )
+);
+
+#define DEFINE_NFSD_FILE_GC_EVENT(name) \
+DEFINE_EVENT(nfsd_file_gc_class, name, \
+ TP_PROTO( \
+ const struct nfsd_file *nf \
+ ), \
+ TP_ARGS(nf))
+
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_aged);
+DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
+
+DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
+ TP_PROTO(
+ unsigned long removed,
+ unsigned long remaining
+ ),
+ TP_ARGS(removed, remaining),
+ TP_STRUCT__entry(
+ __field(unsigned long, removed)
+ __field(unsigned long, remaining)
+ ),
+ TP_fast_assign(
+ __entry->removed = removed;
+ __entry->remaining = remaining;
+ ),
+ TP_printk("%lu entries removed, %lu remaining",
+ __entry->removed, __entry->remaining)
+);
+
+#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \
+DEFINE_EVENT(nfsd_file_lruwalk_class, name, \
+ TP_PROTO( \
+ unsigned long removed, \
+ unsigned long remaining \
+ ), \
+ TP_ARGS(removed, remaining))
+
+DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
+DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
+
+TRACE_EVENT(nfsd_file_close,
+ TP_PROTO(
+ const struct inode *inode
+ ),
+ TP_ARGS(inode),
+ TP_STRUCT__entry(
+ __field(const void *, inode)
+ ),
+ TP_fast_assign(
+ __entry->inode = inode;
+ ),
+ TP_printk("inode=%p",
+ __entry->inode
+ )
+);
+
+#include "cache.h"
+
+TRACE_DEFINE_ENUM(RC_DROPIT);
+TRACE_DEFINE_ENUM(RC_REPLY);
+TRACE_DEFINE_ENUM(RC_DOIT);
+
+#define show_drc_retval(x) \
+ __print_symbolic(x, \
+ { RC_DROPIT, "DROPIT" }, \
+ { RC_REPLY, "REPLY" }, \
+ { RC_DOIT, "DOIT" })
+
+TRACE_EVENT(nfsd_drc_found,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int result
+ ),
+ TP_ARGS(nn, rqstp, result),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(unsigned long, result)
+ __field(u32, xid)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->result = result;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x result=%s",
+ __entry->boot_time, __entry->xid,
+ show_drc_retval(__entry->result))
+
+);
+
+TRACE_EVENT(nfsd_drc_mismatch,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp
+ ),
+ TP_ARGS(nn, key, rp),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(u32, cached)
+ __field(u32, ingress)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(key->c_key.k_xid);
+ __entry->cached = (__force u32)key->c_key.k_csum;
+ __entry->ingress = (__force u32)rp->c_key.k_csum;
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x cached-csum=0x%08x ingress-csum=0x%08x",
+ __entry->boot_time, __entry->xid, __entry->cached,
+ __entry->ingress)
+);
+
+TRACE_EVENT(nfsd_cb_args,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfs4_cb_conn *conn
+ ),
+ TP_ARGS(clp, conn),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, prog)
+ __field(u32, ident)
+ __sockaddr(addr, conn->cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->prog = conn->cb_prog;
+ __entry->ident = conn->cb_ident;
+ __assign_sockaddr(addr, &conn->cb_addr, conn->cb_addrlen);
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->prog, __entry->ident)
+);
+
+TRACE_EVENT(nfsd_cb_nodelegs,
+ TP_PROTO(const struct nfs4_client *clp),
+ TP_ARGS(clp),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ ),
+ TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
+)
+
+#define show_cb_state(val) \
+ __print_symbolic(val, \
+ { NFSD4_CB_UP, "UP" }, \
+ { NFSD4_CB_UNKNOWN, "UNKNOWN" }, \
+ { NFSD4_CB_DOWN, "DOWN" }, \
+ { NFSD4_CB_FAULT, "FAULT"})
+
+DECLARE_EVENT_CLASS(nfsd_cb_class,
+ TP_PROTO(const struct nfs4_client *clp),
+ TP_ARGS(clp),
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->state = clp->cl_cb_state;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x state=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ show_cb_state(__entry->state))
+);
+
+#define DEFINE_NFSD_CB_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
+ TP_PROTO(const struct nfs4_client *clp), \
+ TP_ARGS(clp))
+
+DEFINE_NFSD_CB_EVENT(start);
+DEFINE_NFSD_CB_EVENT(new_state);
+DEFINE_NFSD_CB_EVENT(probe);
+DEFINE_NFSD_CB_EVENT(lost);
+DEFINE_NFSD_CB_EVENT(shutdown);
+DEFINE_NFSD_CB_EVENT(rpc_prepare);
+DEFINE_NFSD_CB_EVENT(rpc_done);
+DEFINE_NFSD_CB_EVENT(rpc_release);
+
+TRACE_DEFINE_ENUM(RPC_AUTH_NULL);
+TRACE_DEFINE_ENUM(RPC_AUTH_UNIX);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P);
+
+#define show_nfsd_authflavor(val) \
+ __print_symbolic(val, \
+ { RPC_AUTH_NULL, "none" }, \
+ { RPC_AUTH_UNIX, "sys" }, \
+ { RPC_AUTH_GSS, "gss" }, \
+ { RPC_AUTH_GSS_KRB5, "krb5" }, \
+ { RPC_AUTH_GSS_KRB5I, "krb5i" }, \
+ { RPC_AUTH_GSS_KRB5P, "krb5p" })
+
+TRACE_EVENT(nfsd_cb_setup,
+ TP_PROTO(const struct nfs4_client *clp,
+ const char *netid,
+ rpc_authflavor_t authflavor
+ ),
+ TP_ARGS(clp, netid, authflavor),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(unsigned long, authflavor)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ __string(netid, netid)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_str(netid);
+ __entry->authflavor = authflavor;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __get_str(netid), show_nfsd_authflavor(__entry->authflavor))
+);
+
+TRACE_EVENT(nfsd_cb_setup_err,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ long error
+ ),
+ TP_ARGS(clp, error),
+ TP_STRUCT__entry(
+ __field(long, error)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x error=%ld",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->error)
+);
+
+/* Not a real opcode, but there is no 0 operation. */
+#define _CB_NULL 0
+
+#define show_nfsd_cb_opcode(val) \
+ __print_symbolic(val, \
+ { _CB_NULL, "CB_NULL" }, \
+ { OP_CB_GETATTR, "CB_GETATTR" }, \
+ { OP_CB_RECALL, "CB_RECALL" }, \
+ { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \
+ { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \
+ { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \
+ { OP_CB_OFFLOAD, "CB_OFFLOAD" })
+
+DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(clp, cb),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(const void *, cb)
+ __field(unsigned long, opcode)
+ __field(bool, need_restart)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->cb = cb;
+ __entry->opcode = cb->cb_ops ? cb->cb_ops->opcode : _CB_NULL;
+ __entry->need_restart = test_bit(NFSD4_CALLBACK_REQUEUE, &cb->cb_flags);
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x cb=%p%s opcode=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, __entry->cb,
+ __entry->need_restart ? " (need restart)" : " (first try)",
+ show_nfsd_cb_opcode(__entry->opcode)
+ )
+);
+
+#define DEFINE_NFSD_CB_LIFETIME_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_lifetime_class, nfsd_cb_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const struct nfsd4_callback *cb \
+ ), \
+ TP_ARGS(clp, cb))
+
+DEFINE_NFSD_CB_LIFETIME_EVENT(queue);
+DEFINE_NFSD_CB_LIFETIME_EVENT(destroy);
+DEFINE_NFSD_CB_LIFETIME_EVENT(restart);
+DEFINE_NFSD_CB_LIFETIME_EVENT(bc_update);
+DEFINE_NFSD_CB_LIFETIME_EVENT(bc_shutdown);
+
+TRACE_EVENT(nfsd_cb_seq_status,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(task, cb),
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(int, tk_status)
+ __field(int, seq_status)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = cb->cb_clp;
+ const struct nfsd4_session *session = clp->cl_cb_session;
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&session->se_sessionid;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->tk_status = task->tk_status;
+ __entry->seq_status = cb->cb_seq_status;
+ ),
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d",
+ __entry->task_id, __entry->client_id,
+ __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ __entry->tk_status, __entry->seq_status
+ )
+);
+
+TRACE_EVENT(nfsd_cb_free_slot,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(task, cb),
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(u32, slot_seqno)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = cb->cb_clp;
+ const struct nfsd4_session *session = clp->cl_cb_session;
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&session->se_sessionid;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->slot_seqno = session->se_cb_seq_nr[cb->cb_held_slot];
+ ),
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ __entry->slot_seqno
+ )
+);
+
+TRACE_EVENT_CONDITION(nfsd_cb_recall,
+ TP_PROTO(
+ const struct nfs4_stid *stid
+ ),
+ TP_ARGS(stid),
+ TP_CONDITION(stid->sc_client),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ __sockaddr(addr, stid->sc_client->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ const stateid_t *stp = &stid->sc_stateid;
+ const struct nfs4_client *clp = stid->sc_client;
+
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->si_id, __entry->si_generation)
+);
+
+TRACE_EVENT(nfsd_cb_notify_lock,
+ TP_PROTO(
+ const struct nfs4_lockowner *lo,
+ const struct nfsd4_blocked_lock *nbl
+ ),
+ TP_ARGS(lo, nbl),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, fh_hash)
+ __sockaddr(addr, lo->lo_owner.so_client->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = lo->lo_owner.so_client;
+
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh);
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->fh_hash)
+);
+
+TRACE_EVENT(nfsd_cb_offload,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const stateid_t *stp,
+ const struct knfsd_fh *fh,
+ u64 count,
+ __be32 status
+ ),
+ TP_ARGS(clp, stp, fh, count, status),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ __field(u32, fh_hash)
+ __field(int, status)
+ __field(u64, count)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ __entry->fh_hash = knfsd_fh_hash(fh);
+ __entry->status = be32_to_cpu(status);
+ __entry->count = count;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->si_id, __entry->si_generation,
+ __entry->fh_hash, __entry->count, __entry->status)
+);
+
+TRACE_EVENT(nfsd_cb_recall_any,
+ TP_PROTO(
+ const struct nfsd4_cb_recall_any *ra
+ ),
+ TP_ARGS(ra),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, keep)
+ __field(unsigned long, bmval0)
+ __sockaddr(addr, ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+ __entry->keep = ra->ra_keep;
+ __entry->bmval0 = ra->ra_bmval[0];
+ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
+ ra->ra_cb.cb_clp->cl_cb_conn.cb_addrlen);
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x keep=%u bmval0=%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->keep, show_rca_mask(__entry->bmval0)
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+ TP_PROTO(
+ const stateid_t *stp,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(stp, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ __entry->status = task->tk_status;
+ ),
+ TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->si_id,
+ __entry->si_generation, __entry->status
+ )
+);
+
+#define DEFINE_NFSD_CB_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_done_class, name, \
+ TP_PROTO( \
+ const stateid_t *stp, \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(stp, task))
+
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_getattr_done);
+
+TRACE_EVENT(nfsd_cb_recall_any_done,
+ TP_PROTO(
+ const struct nfsd4_callback *cb,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(cb, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->status = task->tk_status;
+ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+ ),
+ TP_printk("client %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->status
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_unlock_ip,
+ TP_PROTO(
+ const struct net *net,
+ const char *address
+ ),
+ TP_ARGS(net, address),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(address, address)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(address);
+ ),
+ TP_printk("address=%s",
+ __get_str(address)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_unlock_fs,
+ TP_PROTO(
+ const struct net *net,
+ const char *path
+ ),
+ TP_ARGS(net, path),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(path, path)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(path);
+ ),
+ TP_printk("path=%s",
+ __get_str(path)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_filehandle,
+ TP_PROTO(
+ const struct net *net,
+ const char *domain,
+ const char *path,
+ int maxsize
+ ),
+ TP_ARGS(net, domain, path, maxsize),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, maxsize)
+ __string(domain, domain)
+ __string(path, path)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->maxsize = maxsize;
+ __assign_str(domain);
+ __assign_str(path);
+ ),
+ TP_printk("domain=%s path=%s maxsize=%d",
+ __get_str(domain), __get_str(path), __entry->maxsize
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_threads,
+ TP_PROTO(
+ const struct net *net,
+ int newthreads
+ ),
+ TP_ARGS(net, newthreads),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, newthreads)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->newthreads = newthreads;
+ ),
+ TP_printk("newthreads=%d",
+ __entry->newthreads
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_pool_threads,
+ TP_PROTO(
+ const struct net *net,
+ int pool,
+ int nrthreads
+ ),
+ TP_ARGS(net, pool, nrthreads),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, pool)
+ __field(int, nrthreads)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->pool = pool;
+ __entry->nrthreads = nrthreads;
+ ),
+ TP_printk("pool=%d nrthreads=%d",
+ __entry->pool, __entry->nrthreads
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_version,
+ TP_PROTO(
+ const struct net *net,
+ const char *mesg
+ ),
+ TP_ARGS(net, mesg),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(mesg, mesg)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(mesg);
+ ),
+ TP_printk("%s",
+ __get_str(mesg)
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_ports_addfd,
+ TP_PROTO(
+ const struct net *net,
+ int fd
+ ),
+ TP_ARGS(net, fd),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, fd)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->fd = fd;
+ ),
+ TP_printk("fd=%d",
+ __entry->fd
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_ports_addxprt,
+ TP_PROTO(
+ const struct net *net,
+ const char *transport,
+ int port
+ ),
+ TP_ARGS(net, transport, port),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, port)
+ __string(transport, transport)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->port = port;
+ __assign_str(transport);
+ ),
+ TP_printk("transport=%s port=%d",
+ __get_str(transport), __entry->port
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_maxblksize,
+ TP_PROTO(
+ const struct net *net,
+ int bsize
+ ),
+ TP_ARGS(net, bsize),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, bsize)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->bsize = bsize;
+ ),
+ TP_printk("bsize=%d",
+ __entry->bsize
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_time,
+ TP_PROTO(
+ const struct net *net,
+ const char *name,
+ size_t namelen,
+ int time
+ ),
+ TP_ARGS(net, name, namelen, time),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(int, time)
+ __string_len(name, name, namelen)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __entry->time = time;
+ __assign_str(name);
+ ),
+ TP_printk("file=%s time=%d",
+ __get_str(name), __entry->time
+ )
+);
+
+TRACE_EVENT(nfsd_ctl_recoverydir,
+ TP_PROTO(
+ const struct net *net,
+ const char *recdir
+ ),
+ TP_ARGS(net, recdir),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __string(recdir, recdir)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ __assign_str(recdir);
+ ),
+ TP_printk("recdir=%s",
+ __get_str(recdir)
+ )
+);
+
+TRACE_EVENT(nfsd_end_grace,
+ TP_PROTO(
+ const struct net *net
+ ),
+ TP_ARGS(net),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ ),
+ TP_fast_assign(
+ __entry->netns_ino = net->ns.inum;
+ ),
+ TP_printk("nn=%d", __entry->netns_ino
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_copy_class,
+ TP_PROTO(
+ const struct nfsd4_copy *copy
+ ),
+ TP_ARGS(copy),
+ TP_STRUCT__entry(
+ __field(bool, intra)
+ __field(bool, async)
+ __field(u32, src_cl_boot)
+ __field(u32, src_cl_id)
+ __field(u32, src_so_id)
+ __field(u32, src_si_generation)
+ __field(u32, dst_cl_boot)
+ __field(u32, dst_cl_id)
+ __field(u32, dst_so_id)
+ __field(u32, dst_si_generation)
+ __field(u32, cb_cl_boot)
+ __field(u32, cb_cl_id)
+ __field(u32, cb_so_id)
+ __field(u32, cb_si_generation)
+ __field(u64, src_cp_pos)
+ __field(u64, dst_cp_pos)
+ __field(u64, cp_count)
+ __sockaddr(addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ const stateid_t *src_stp = &copy->cp_src_stateid;
+ const stateid_t *dst_stp = &copy->cp_dst_stateid;
+ const stateid_t *cb_stp = &copy->cp_res.cb_stateid;
+
+ __entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+ __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+ __entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot;
+ __entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id;
+ __entry->src_so_id = src_stp->si_opaque.so_id;
+ __entry->src_si_generation = src_stp->si_generation;
+ __entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot;
+ __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id;
+ __entry->dst_so_id = dst_stp->si_opaque.so_id;
+ __entry->dst_si_generation = dst_stp->si_generation;
+ __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot;
+ __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id;
+ __entry->cb_so_id = cb_stp->si_opaque.so_id;
+ __entry->cb_si_generation = cb_stp->si_generation;
+ __entry->src_cp_pos = copy->cp_src_pos;
+ __entry->dst_cp_pos = copy->cp_dst_pos;
+ __entry->cp_count = copy->cp_count;
+ __assign_sockaddr(addr, &copy->cp_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client=%pISpc intra=%d async=%d "
+ "src_client %08x:%08x src_stateid %08x:%08x "
+ "dst_client %08x:%08x dst_stateid %08x:%08x "
+ "cb_client %08x:%08x cb_stateid %08x:%08x "
+ "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
+ __get_sockaddr(addr), __entry->intra, __entry->async,
+ __entry->src_cl_boot, __entry->src_cl_id,
+ __entry->src_so_id, __entry->src_si_generation,
+ __entry->dst_cl_boot, __entry->dst_cl_id,
+ __entry->dst_so_id, __entry->dst_si_generation,
+ __entry->cb_cl_boot, __entry->cb_cl_id,
+ __entry->cb_so_id, __entry->cb_si_generation,
+ __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
+ )
+);
+
+#define DEFINE_COPY_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_class, nfsd_copy_##name, \
+ TP_PROTO(const struct nfsd4_copy *copy), \
+ TP_ARGS(copy))
+
+DEFINE_COPY_EVENT(inter);
+DEFINE_COPY_EVENT(intra);
+DEFINE_COPY_EVENT(async);
+
+TRACE_EVENT(nfsd_copy_done,
+ TP_PROTO(
+ const struct nfsd4_copy *copy,
+ __be32 status
+ ),
+ TP_ARGS(copy, status),
+ TP_STRUCT__entry(
+ __field(int, status)
+ __field(bool, intra)
+ __field(bool, async)
+ __sockaddr(addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->status = be32_to_cpu(status);
+ __entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+ __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+ __assign_sockaddr(addr, &copy->cp_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("addr=%pISpc status=%d intra=%d async=%d",
+ __get_sockaddr(addr), __entry->status, __entry->intra, __entry->async
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_copy_async_done_class,
+ TP_PROTO(
+ const struct nfsd4_copy *copy
+ ),
+ TP_ARGS(copy),
+ TP_STRUCT__entry(
+ __field(int, status)
+ __field(bool, intra)
+ __field(bool, async)
+ __field(u32, src_cl_boot)
+ __field(u32, src_cl_id)
+ __field(u32, src_so_id)
+ __field(u32, src_si_generation)
+ __field(u32, dst_cl_boot)
+ __field(u32, dst_cl_id)
+ __field(u32, dst_so_id)
+ __field(u32, dst_si_generation)
+ __field(u32, cb_cl_boot)
+ __field(u32, cb_cl_id)
+ __field(u32, cb_so_id)
+ __field(u32, cb_si_generation)
+ __field(u64, src_cp_pos)
+ __field(u64, dst_cp_pos)
+ __field(u64, cp_count)
+ __sockaddr(addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ const stateid_t *src_stp = &copy->cp_src_stateid;
+ const stateid_t *dst_stp = &copy->cp_dst_stateid;
+ const stateid_t *cb_stp = &copy->cp_res.cb_stateid;
+
+ __entry->status = be32_to_cpu(copy->nfserr);
+ __entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+ __entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+ __entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot;
+ __entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id;
+ __entry->src_so_id = src_stp->si_opaque.so_id;
+ __entry->src_si_generation = src_stp->si_generation;
+ __entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot;
+ __entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id;
+ __entry->dst_so_id = dst_stp->si_opaque.so_id;
+ __entry->dst_si_generation = dst_stp->si_generation;
+ __entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot;
+ __entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id;
+ __entry->cb_so_id = cb_stp->si_opaque.so_id;
+ __entry->cb_si_generation = cb_stp->si_generation;
+ __entry->src_cp_pos = copy->cp_src_pos;
+ __entry->dst_cp_pos = copy->cp_dst_pos;
+ __entry->cp_count = copy->cp_count;
+ __assign_sockaddr(addr, &copy->cp_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client=%pISpc status=%d intra=%d async=%d "
+ "src_client %08x:%08x src_stateid %08x:%08x "
+ "dst_client %08x:%08x dst_stateid %08x:%08x "
+ "cb_client %08x:%08x cb_stateid %08x:%08x "
+ "cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
+ __get_sockaddr(addr),
+ __entry->status, __entry->intra, __entry->async,
+ __entry->src_cl_boot, __entry->src_cl_id,
+ __entry->src_so_id, __entry->src_si_generation,
+ __entry->dst_cl_boot, __entry->dst_cl_id,
+ __entry->dst_so_id, __entry->dst_si_generation,
+ __entry->cb_cl_boot, __entry->cb_cl_id,
+ __entry->cb_so_id, __entry->cb_si_generation,
+ __entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
+ )
+);
+
+#define DEFINE_COPY_ASYNC_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_copy_async_done_class, \
+ nfsd_copy_async_##name, \
+ TP_PROTO(const struct nfsd4_copy *copy), \
+ TP_ARGS(copy))
+
+DEFINE_COPY_ASYNC_DONE_EVENT(done);
+DEFINE_COPY_ASYNC_DONE_EVENT(cancel);
+
+TRACE_EVENT(nfsd_vfs_setattr,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const struct iattr *iap,
+ const struct timespec64 *guardtime
+ ),
+ TP_ARGS(rqstp, fhp, iap, guardtime),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(s64, gtime_tv_sec)
+ __field(u32, gtime_tv_nsec)
+ __field(unsigned int, ia_valid)
+ __field(loff_t, ia_size)
+ __field(uid_t, ia_uid)
+ __field(gid_t, ia_gid)
+ __field(umode_t, ia_mode)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->gtime_tv_sec = guardtime ? guardtime->tv_sec : 0;
+ __entry->gtime_tv_nsec = guardtime ? guardtime->tv_nsec : 0;
+ __entry->ia_valid = iap->ia_valid;
+ __entry->ia_size = iap->ia_size;
+ __entry->ia_uid = __kuid_val(iap->ia_uid);
+ __entry->ia_gid = __kgid_val(iap->ia_gid);
+ __entry->ia_mode = iap->ia_mode;
+ ),
+ TP_printk(
+ "xid=0x%08x fh_hash=0x%08x ia_valid=%s ia_size=%llu ia_mode=0%o ia_uid=%u ia_gid=%u guard_time=%lld.%u",
+ __entry->xid, __entry->fh_hash, show_ia_valid_flags(__entry->ia_valid),
+ __entry->ia_size, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid,
+ __entry->gtime_tv_sec, __entry->gtime_tv_nsec
+ )
+)
+
+TRACE_EVENT(nfsd_vfs_lookup,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s",
+ __entry->xid, __entry->fh_hash, __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_create,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ umode_t type,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, type, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(umode_t, type)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->type = type;
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x type=%s name=%s",
+ __entry->xid, __entry->fh_hash,
+ show_fs_file_type(__entry->type), __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_symlink,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int namelen,
+ const char *target
+ ),
+ TP_ARGS(rqstp, fhp, name, namelen, target),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, namelen)
+ __string(target, target)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ __assign_str(target);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s target=%s",
+ __entry->xid, __entry->fh_hash,
+ __get_str(name), __get_str(target)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_link,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *sfhp,
+ const struct svc_fh *tfhp,
+ const char *name,
+ unsigned int namelen
+ ),
+ TP_ARGS(rqstp, sfhp, tfhp, name, namelen),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, sfh_hash)
+ __field(u32, tfh_hash)
+ __string_len(name, name, namelen)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle);
+ __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x src_fh=0x%08x tgt_fh=0x%08x name=%s",
+ __entry->xid, __entry->sfh_hash, __entry->tfh_hash,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_unlink,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ const char *name,
+ unsigned int len
+ ),
+ TP_ARGS(rqstp, fhp, name, len),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __string_len(name, name, len)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __assign_str(name);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x name=%s",
+ __entry->xid, __entry->fh_hash,
+ __get_str(name)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_rename,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *sfhp,
+ const struct svc_fh *tfhp,
+ const char *source,
+ unsigned int sourcelen,
+ const char *target,
+ unsigned int targetlen
+ ),
+ TP_ARGS(rqstp, sfhp, tfhp, source, sourcelen, target, targetlen),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, sfh_hash)
+ __field(u32, tfh_hash)
+ __string_len(source, source, sourcelen)
+ __string_len(target, target, targetlen)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle);
+ __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle);
+ __assign_str(source);
+ __assign_str(target);
+ ),
+ TP_printk("xid=0x%08x sfh_hash=0x%08x tfh_hash=0x%08x source=%s target=%s",
+ __entry->xid, __entry->sfh_hash, __entry->tfh_hash,
+ __get_str(source), __get_str(target)
+ )
+);
+
+TRACE_EVENT(nfsd_vfs_readdir,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp,
+ u32 count,
+ u64 offset
+ ),
+ TP_ARGS(rqstp, fhp, count, offset),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ __field(u32, count)
+ __field(u64, offset)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ __entry->count = count;
+ __entry->offset = offset;
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu count=%u",
+ __entry->xid, __entry->fh_hash,
+ __entry->offset, __entry->count
+ )
+);
+
+DECLARE_EVENT_CLASS(nfsd_vfs_getattr_class,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct svc_fh *fhp
+ ),
+ TP_ARGS(rqstp, fhp),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_CALL_FIELDS(rqstp)
+ __field(u32, fh_hash)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp);
+ __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle);
+ ),
+ TP_printk("xid=0x%08x fh_hash=0x%08x",
+ __entry->xid, __entry->fh_hash
+ )
+);
+
+#define DEFINE_NFSD_VFS_GETATTR_EVENT(__name) \
+DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \
+ TP_PROTO( \
+ const struct svc_rqst *rqstp, \
+ const struct svc_fh *fhp \
+ ), \
+ TP_ARGS(rqstp, fhp))
+
+DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr);
+DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs);
+
+DECLARE_EVENT_CLASS(nfsd_pnfs_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const char *dev,
+ int error
+ ),
+ TP_ARGS(clp, dev, error),
+ TP_STRUCT__entry(
+ __sockaddr(addr, sizeof(struct sockaddr_in6))
+ __field(unsigned int, netns_ino)
+ __string(dev, dev)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __assign_sockaddr(addr, &clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ __entry->netns_ino = clp->net->ns.inum;
+ __assign_str(dev);
+ __entry->error = error;
+ ),
+ TP_printk("client=%pISpc nn=%d dev=%s error=%d",
+ __get_sockaddr(addr),
+ __entry->netns_ino,
+ __get_str(dev),
+ __entry->error
+ )
+);
+
+#define DEFINE_NFSD_PNFS_ERR_EVENT(name) \
+DEFINE_EVENT(nfsd_pnfs_class, nfsd_pnfs_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const char *dev, \
+ int error \
+ ), \
+ TP_ARGS(clp, dev, error))
+
+DEFINE_NFSD_PNFS_ERR_EVENT(fence);
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9824e32b2f23..964cf922ad83 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -25,56 +25,95 @@
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
#include <linux/jhash.h>
-#include <linux/ima.h>
+#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
#include <linux/security.h>
+#include <linux/sunrpc/xdr.h>
-#ifdef CONFIG_NFSD_V3
#include "xdr3.h"
-#endif /* CONFIG_NFSD_V3 */
#ifdef CONFIG_NFSD_V4
-#include "../internal.h"
#include "acl.h"
#include "idmap.h"
+#include "xdr4.h"
#endif /* CONFIG_NFSD_V4 */
#include "nfsd.h"
#include "vfs.h"
+#include "filecache.h"
#include "trace.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
+bool nfsd_disable_splice_read __read_mostly;
+u64 nfsd_io_cache_read __read_mostly = NFSD_IO_BUFFERED;
+u64 nfsd_io_cache_write __read_mostly = NFSD_IO_BUFFERED;
-/*
- * This is a cache of readahead params that help us choose the proper
- * readahead strategy. Initially, we set all readahead parameters to 0
- * and let the VFS handle things.
- * If you increase the number of cached files very much, you'll need to
- * add a hash table here.
+/**
+ * nfserrno - Map Linux errnos to NFS errnos
+ * @errno: POSIX(-ish) error code to be mapped
+ *
+ * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If
+ * it's an error we don't expect, log it once and return nfserr_io.
*/
-struct raparms {
- struct raparms *p_next;
- unsigned int p_count;
- ino_t p_ino;
- dev_t p_dev;
- int p_set;
- struct file_ra_state p_ra;
- unsigned int p_hindex;
-};
-
-struct raparm_hbucket {
- struct raparms *pb_head;
- spinlock_t pb_lock;
-} ____cacheline_aligned_in_smp;
+__be32
+nfserrno (int errno)
+{
+ static struct {
+ __be32 nfserr;
+ int syserr;
+ } nfs_errtbl[] = {
+ { nfs_ok, 0 },
+ { nfserr_perm, -EPERM },
+ { nfserr_noent, -ENOENT },
+ { nfserr_io, -EIO },
+ { nfserr_nxio, -ENXIO },
+ { nfserr_fbig, -E2BIG },
+ { nfserr_stale, -EBADF },
+ { nfserr_acces, -EACCES },
+ { nfserr_exist, -EEXIST },
+ { nfserr_xdev, -EXDEV },
+ { nfserr_nodev, -ENODEV },
+ { nfserr_notdir, -ENOTDIR },
+ { nfserr_isdir, -EISDIR },
+ { nfserr_inval, -EINVAL },
+ { nfserr_fbig, -EFBIG },
+ { nfserr_nospc, -ENOSPC },
+ { nfserr_rofs, -EROFS },
+ { nfserr_mlink, -EMLINK },
+ { nfserr_nametoolong, -ENAMETOOLONG },
+ { nfserr_notempty, -ENOTEMPTY },
+ { nfserr_dquot, -EDQUOT },
+ { nfserr_stale, -ESTALE },
+ { nfserr_jukebox, -ETIMEDOUT },
+ { nfserr_jukebox, -ERESTARTSYS },
+ { nfserr_jukebox, -EAGAIN },
+ { nfserr_jukebox, -EWOULDBLOCK },
+ { nfserr_jukebox, -ENOMEM },
+ { nfserr_io, -ETXTBSY },
+ { nfserr_notsupp, -EOPNOTSUPP },
+ { nfserr_toosmall, -ETOOSMALL },
+ { nfserr_serverfault, -ESERVERFAULT },
+ { nfserr_serverfault, -ENFILE },
+ { nfserr_io, -EREMOTEIO },
+ { nfserr_stale, -EOPENSTALE },
+ { nfserr_io, -EUCLEAN },
+ { nfserr_perm, -ENOKEY },
+ { nfserr_no_grace, -ENOGRACE},
+ { nfserr_io, -EBADMSG },
+ };
+ int i;
-#define RAPARM_HASH_BITS 4
-#define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
-#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
-static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
+ for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
+ if (nfs_errtbl[i].syserr == errno)
+ return nfs_errtbl[i].nfserr;
+ }
+ WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
+ return nfserr_io;
+}
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
@@ -90,9 +129,13 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct dentry *dentry = *dpp;
struct path path = {.mnt = mntget(exp->ex_path.mnt),
.dentry = dget(dentry)};
+ unsigned int follow_flags = 0;
int err = 0;
- err = follow_down(&path);
+ if (exp->ex_flags & NFSEXP_CROSSMOUNT)
+ follow_flags = LOOKUP_AUTOMOUNT;
+
+ err = follow_down(&path, follow_flags);
if (err < 0)
goto out;
if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
@@ -187,7 +230,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
return 1;
if (nfsd4_is_junction(dentry))
return 1;
- if (d_mountpoint(dentry))
+ if (d_managed(dentry))
/*
* Might only be a mountpoint in a different namespace,
* but we need to check.
@@ -206,7 +249,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct dentry *dentry;
int host_err;
- dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
+ trace_nfsd_vfs_lookup(rqstp, fhp, name, len);
dparent = fhp->fh_dentry;
exp = exp_get(fhp->fh_export);
@@ -226,27 +269,14 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out_nfserr;
}
} else {
- /*
- * In the nfsd4_open() case, this may be held across
- * subsequent open and delegation acquisition which may
- * need to take the child's i_mutex:
- */
- fh_lock_nested(fhp, I_MUTEX_PARENT);
- dentry = lookup_one_len(name, dparent, len);
+ dentry = lookup_one_unlocked(&nop_mnt_idmap,
+ &QSTR_LEN(name, len), dparent);
host_err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_nfserr;
if (nfsd_mountpoint(dentry, exp)) {
- /*
- * We don't need the i_mutex after all. It's
- * still possible we could open this (regular
- * files can be mountpoints too), but the
- * i_mutex is just there to prevent renames of
- * something that we might be about to delegate,
- * and a mountpoint won't be renamed:
- */
- fh_unlock(fhp);
- if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
+ host_err = nfsd_cross_mnt(rqstp, &dentry, &exp);
+ if (host_err) {
dput(dentry);
goto out_nfserr;
}
@@ -261,7 +291,15 @@ out_nfserr:
return nfserrno(host_err);
}
-/*
+/**
+ * nfsd_lookup - look up a single path component for nfsd
+ *
+ * @rqstp: the request context
+ * @fhp: the file handle of the directory
+ * @name: the component name, or %NULL to look up parent
+ * @len: length of name to examine
+ * @resfh: pointer to pre-initialised filehandle to hold result.
+ *
* Look up one component of a pathname.
* N.B. After this call _both_ fhp and resfh need an fh_put
*
@@ -271,11 +309,11 @@ out_nfserr:
* returned. Otherwise the covered directory is returned.
* NOTE: this mountpoint crossing is not supported properly by all
* clients and is explicitly disallowed for NFSv3
- * NeilBrown <neilb@cse.unsw.edu.au>
+ *
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
- unsigned int len, struct svc_fh *resfh)
+ unsigned int len, struct svc_fh *resfh)
{
struct svc_export *exp;
struct dentry *dentry;
@@ -287,7 +325,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
if (err)
return err;
- err = check_nfsd_access(exp, rqstp);
+ err = check_nfsd_access(exp, rqstp, false);
if (err)
goto out;
/*
@@ -303,23 +341,47 @@ out:
return err;
}
+static void
+commit_reset_write_verifier(struct nfsd_net *nn, struct svc_rqst *rqstp,
+ int err)
+{
+ switch (err) {
+ case -EAGAIN:
+ case -ESTALE:
+ /*
+ * Neither of these are the result of a problem with
+ * durable storage, so avoid a write verifier reset.
+ */
+ break;
+ default:
+ nfsd_reset_write_verifier(nn);
+ trace_nfsd_writeverf_reset(nn, rqstp, err);
+ }
+}
+
/*
* Commit metadata changes to stable storage.
*/
static int
-commit_metadata(struct svc_fh *fhp)
+commit_inode_metadata(struct inode *inode)
{
- struct inode *inode = d_inode(fhp->fh_dentry);
const struct export_operations *export_ops = inode->i_sb->s_export_op;
- if (!EX_ISSYNC(fhp->fh_export))
- return 0;
-
if (export_ops->commit_metadata)
return export_ops->commit_metadata(inode);
return sync_inode_metadata(inode, 1);
}
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+ struct inode *inode = d_inode(fhp->fh_dentry);
+
+ if (!EX_ISSYNC(fhp->fh_export))
+ return 0;
+ return commit_inode_metadata(inode);
+}
+
/*
* Go over the attributes and take care of the small differences between
* NFS semantics and what Linux expects.
@@ -327,6 +389,10 @@ commit_metadata(struct svc_fh *fhp)
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
+ /* Ignore mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
@@ -344,7 +410,9 @@ nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
iap->ia_mode &= ~S_ISGID;
} else {
/* set ATTR_KILL_* bits and let VFS handle it */
- iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
+ iap->ia_valid |= ATTR_KILL_SUID;
+ iap->ia_valid |=
+ setattr_should_drop_sgid(&nop_mnt_idmap, inode);
}
}
}
@@ -354,47 +422,98 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct iattr *iap)
{
struct inode *inode = d_inode(fhp->fh_dentry);
- int host_err;
if (iap->ia_size < inode->i_size) {
__be32 err;
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+ err = nfsd_permission(&rqstp->rq_cred,
+ fhp->fh_export, fhp->fh_dentry,
+ NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
if (err)
return err;
}
+ return nfserrno(get_write_access(inode));
+}
+
+static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
+{
+ int host_err;
+
+ if (iap->ia_valid & ATTR_SIZE) {
+ /*
+ * RFC5661, Section 18.30.4:
+ * Changing the size of a file with SETATTR indirectly
+ * changes the time_modify and change attributes.
+ *
+ * (and similar for the older RFCs)
+ */
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
+
+ if (iap->ia_size < 0)
+ return -EFBIG;
+
+ host_err = notify_change(&nop_mnt_idmap, dentry, &size_attr, NULL);
+ if (host_err)
+ return host_err;
+ iap->ia_valid &= ~ATTR_SIZE;
+
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ return 0;
+ }
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserrno;
+ if ((iap->ia_valid & ~ATTR_DELEG) == 0)
+ return 0;
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err)
- goto out_put_write_access;
- return 0;
+ /*
+ * If ATTR_DELEG is set, then this is an update from a client that
+ * holds a delegation. If this is an update for only the atime, the
+ * ctime should not be changed. If the update contains the mtime
+ * too, then ATTR_CTIME should already be set.
+ */
+ if (!(iap->ia_valid & ATTR_DELEG))
+ iap->ia_valid |= ATTR_CTIME;
-out_put_write_access:
- put_write_access(inode);
-out_nfserrno:
- return nfserrno(host_err);
+ return notify_change(&nop_mnt_idmap, dentry, iap, NULL);
}
-/*
- * Set various file attributes. After this call fhp needs an fh_put.
+/**
+ * nfsd_setattr - Set various file attributes.
+ * @rqstp: controlling RPC transaction
+ * @fhp: filehandle of target
+ * @attr: attributes to set
+ * @guardtime: do not act if ctime.tv_sec does not match this timestamp
+ *
+ * This call may adjust the contents of @attr (in particular, this
+ * call may change the bits in the na_iattr.ia_valid field).
+ *
+ * Returns nfs_ok on success, otherwise an NFS status code is
+ * returned. Caller must release @fhp by calling fh_put in either
+ * case.
*/
__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_attrs *attr, const struct timespec64 *guardtime)
{
struct dentry *dentry;
struct inode *inode;
+ struct iattr *iap = attr->na_iattr;
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err;
+ int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
+ int retries;
+
+ trace_nfsd_vfs_setattr(rqstp, fhp, iap, guardtime);
if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -404,7 +523,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
- * will return EACCESS, when the caller's effective UID does not match
+ * will return EACCES, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
@@ -430,18 +549,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- return 0;
-
nfsd_sanitize_attrs(inode, iap);
- if (check_guard && guardtime != inode->i_ctime.tv_sec)
- return nfserr_notsync;
-
/*
* The size case is special, it changes the file in addition to the
* attributes, and file systems don't expect it to be mixed with
@@ -455,45 +564,64 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
return err;
}
- fh_lock(fhp);
- if (size_change) {
- /*
- * RFC5661, Section 18.30.4:
- * Changing the size of a file with SETATTR indirectly
- * changes the time_modify and change attributes.
- *
- * (and similar for the older RFCs)
- */
- struct iattr size_attr = {
- .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
- .ia_size = iap->ia_size,
- };
+ inode_lock(inode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err)
+ goto out_unlock;
+
+ if (guardtime) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+ if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
+ guardtime->tv_nsec != ctime.tv_nsec) {
+ err = nfserr_notsync;
+ goto out_fill_attrs;
+ }
+ }
- host_err = notify_change(dentry, &size_attr, NULL);
- if (host_err)
- goto out_unlock;
- iap->ia_valid &= ~ATTR_SIZE;
+ for (retries = 1;;) {
+ struct iattr attrs;
/*
- * Avoid the additional setattr call below if the only other
- * attribute that the client sends is the mtime, as we update
- * it as part of the size change above.
+ * notify_change() can alter its iattr argument, making
+ * @iap unsuitable for submission multiple times. Make a
+ * copy for every loop iteration.
*/
- if ((iap->ia_valid & ~ATTR_MTIME) == 0)
- goto out_unlock;
+ attrs = *iap;
+ host_err = __nfsd_setattr(dentry, &attrs);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, inode))
+ break;
}
-
- iap->ia_valid |= ATTR_CTIME;
- host_err = notify_change(dentry, iap, NULL);
-
+ if (attr->na_seclabel && attr->na_seclabel->len)
+ attr->na_labelerr = security_inode_setsecctx(dentry,
+ attr->na_seclabel->data, attr->na_seclabel->len);
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
+ dentry, ACL_TYPE_ACCESS,
+ attr->na_pacl);
+ if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
+ !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
+ attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
+ dentry, ACL_TYPE_DEFAULT,
+ attr->na_dpacl);
+out_fill_attrs:
+ /*
+ * RFC 1813 Section 3.3.2 does not mandate that an NFS server
+ * returns wcc_data for SETATTR. Some client implementations
+ * depend on receiving wcc_data, however, to sort out partial
+ * updates (eg., the client requested that size and mode be
+ * modified, but the server changed only the file mode).
+ */
+ fh_fill_post_attrs(fhp);
out_unlock:
- fh_unlock(fhp);
+ inode_unlock(inode);
if (size_change)
put_write_access(inode);
out:
if (!host_err)
host_err = commit_metadata(fhp);
- return nfserrno(host_err);
+ return err != 0 ? err : nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@@ -520,51 +648,68 @@ int nfsd4_is_junction(struct dentry *dentry)
return 0;
if (!(inode->i_mode & S_ISVTX))
return 0;
- if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
+ if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME,
+ NULL, 0) <= 0)
return 0;
return 1;
}
-#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct xdr_netobj *label)
-{
- __be32 error;
- int host_error;
- struct dentry *dentry;
-
- error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
- if (error)
- return error;
-
- dentry = fhp->fh_dentry;
- inode_lock(d_inode(dentry));
- host_error = security_inode_setsecctx(dentry, label->data, label->len);
- inode_unlock(d_inode(dentry));
- return nfserrno(host_error);
-}
-#else
-__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct xdr_netobj *label)
+static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
{
- return nfserr_notsupp;
+ return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
}
-#endif
-__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
- u64 dst_pos, u64 count)
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync)
{
+ struct file *src = nf_src->nf_file;
+ struct file *dst = nf_dst->nf_file;
+ errseq_t since;
loff_t cloned;
+ __be32 ret = 0;
+ since = READ_ONCE(dst->f_wb_err);
cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
- if (count && cloned != count)
- cloned = -EINVAL;
- return nfserrno(cloned < 0 ? cloned : 0);
+ if (cloned < 0) {
+ ret = nfserrno(cloned);
+ goto out_err;
+ }
+ if (count && cloned != count) {
+ ret = nfserrno(-EINVAL);
+ goto out_err;
+ }
+ if (sync) {
+ loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
+ int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
+
+ if (!status)
+ status = filemap_check_wb_err(dst->f_mapping, since);
+ if (!status)
+ status = commit_inode_metadata(file_inode(src));
+ if (status < 0) {
+ struct nfsd_net *nn = net_generic(nf_dst->nf_net,
+ nfsd_net_id);
+
+ trace_nfsd_clone_file_range_err(rqstp,
+ &nfsd4_get_cstate(rqstp)->save_fh,
+ src_pos,
+ &nfsd4_get_cstate(rqstp)->current_fh,
+ dst_pos,
+ count, status);
+ commit_reset_write_verifier(nn, rqstp, status);
+ ret = nfserrno(status);
+ }
+ }
+out_err:
+ return ret;
}
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
+ ssize_t ret;
/*
* Limit copy to 4MB to prevent indefinitely blocking an nfsd
@@ -575,7 +720,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
* limit like this and pipeline multiple COPY requests.
*/
count = min_t(u64, count, 1 << 22);
- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
+ COPY_FILE_SPLICE);
+ return ret;
}
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -595,7 +745,6 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif /* defined(CONFIG_NFSD_V4) */
-#ifdef CONFIG_NFSD_V3
/*
* Check server access rights to a file system object
*/
@@ -609,6 +758,12 @@ static struct accessmap nfs3_regaccess[] = {
{ NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
{ NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
+#ifdef CONFIG_NFSD_V4
+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
+#endif
+
{ 0, 0 }
};
@@ -619,6 +774,12 @@ static struct accessmap nfs3_diraccess[] = {
{ NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
{ NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
+#ifdef CONFIG_NFSD_V4
+ { NFS4_ACCESS_XAREAD, NFSD_MAY_READ },
+ { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE },
+ { NFS4_ACCESS_XALIST, NFSD_MAY_READ },
+#endif
+
{ 0, 0 }
};
@@ -669,7 +830,8 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
sresult |= map->access;
- err2 = nfsd_permission(rqstp, export, dentry, map->how);
+ err2 = nfsd_permission(&rqstp->rq_cred, export,
+ dentry, map->how);
switch (err2) {
case nfs_ok:
result |= map->access;
@@ -695,9 +857,8 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
out:
return error;
}
-#endif /* CONFIG_NFSD_V3 */
-static int nfsd_open_break_lease(struct inode *inode, int access)
+int nfsd_open_break_lease(struct inode *inode, int access)
{
unsigned int mode;
@@ -713,60 +874,28 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
* and additional flags.
* N.B. After this call fhp needs an fh_put
*/
-__be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int may_flags, struct file **filp)
+static int
+__nfsd_open(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp)
{
struct path path;
struct inode *inode;
struct file *file;
int flags = O_RDONLY|O_LARGEFILE;
- __be32 err;
- int host_err = 0;
-
- validate_process_creds();
-
- /*
- * If we get here, then the client has already done an "open",
- * and (hopefully) checked permission - so allow OWNER_OVERRIDE
- * in case a chmod has now revoked permission.
- *
- * Arguably we should also allow the owner override for
- * directories, but we never have and it doesn't seem to have
- * caused anyone a problem. If we were to change this, note
- * also that our filldir callbacks would need a variant of
- * lookup_one_len that doesn't check permissions.
- */
- if (type == S_IFREG)
- may_flags |= NFSD_MAY_OWNER_OVERRIDE;
- err = fh_verify(rqstp, fhp, type, may_flags);
- if (err)
- goto out;
+ int host_err = -EPERM;
path.mnt = fhp->fh_export->ex_path.mnt;
path.dentry = fhp->fh_dentry;
inode = d_inode(path.dentry);
- /* Disallow write access to files with the append-only bit set
- * or any access when mandatory locking enabled
- */
- err = nfserr_perm;
if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
goto out;
- /*
- * We must ignore files (but only files) which might have mandatory
- * locks on them because there is no way to know if the accesser has
- * the lock.
- */
- if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
- goto out;
if (!inode->i_fop)
goto out;
host_err = nfsd_open_break_lease(inode, may_flags);
if (host_err) /* NOMEM or WOULDBLOCK */
- goto out_nfserr;
+ goto out;
if (may_flags & NFSD_MAY_WRITE) {
if (may_flags & NFSD_MAY_READ)
@@ -778,119 +907,105 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
file = dentry_open(&path, flags, current_cred());
if (IS_ERR(file)) {
host_err = PTR_ERR(file);
- goto out_nfserr;
+ goto out;
}
- host_err = ima_file_check(file, may_flags);
+ host_err = security_file_post_open(file, may_flags);
if (host_err) {
fput(file);
- goto out_nfserr;
+ goto out;
}
- if (may_flags & NFSD_MAY_64BIT_COOKIE)
- file->f_mode |= FMODE_64BITHASH;
- else
- file->f_mode |= FMODE_32BITHASH;
-
*filp = file;
-out_nfserr:
- err = nfserrno(host_err);
out:
- validate_process_creds();
+ return host_err;
+}
+
+__be32
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+ int may_flags, struct file **filp)
+{
+ __be32 err;
+ int host_err;
+ bool retried = false;
+
+ /*
+ * If we get here, then the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ *
+ * Arguably we should also allow the owner override for
+ * directories, but we never have and it doesn't seem to have
+ * caused anyone a problem. If we were to change this, note
+ * also that our filldir callbacks would need a variant of
+ * lookup_one_positive_unlocked() that doesn't check permissions.
+ */
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+retry:
+ err = fh_verify(rqstp, fhp, type, may_flags);
+ if (!err) {
+ host_err = __nfsd_open(fhp, type, may_flags, filp);
+ if (host_err == -EOPENSTALE && !retried) {
+ retried = true;
+ fh_put(fhp);
+ goto retry;
+ }
+ err = nfserrno(host_err);
+ }
return err;
}
-struct raparms *
-nfsd_init_raparms(struct file *file)
+/**
+ * nfsd_open_verified - Open a regular file for the filecache
+ * @fhp: NFS filehandle of the file to open
+ * @type: S_IFMT inode type allowed (0 means any type is allowed)
+ * @may_flags: internal permission flags
+ * @filp: OUT: open "struct file *"
+ *
+ * Returns zero on success, or a negative errno value.
+ */
+int
+nfsd_open_verified(struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp)
{
- struct inode *inode = file_inode(file);
- dev_t dev = inode->i_sb->s_dev;
- ino_t ino = inode->i_ino;
- struct raparms *ra, **rap, **frap = NULL;
- int depth = 0;
- unsigned int hash;
- struct raparm_hbucket *rab;
-
- hash = jhash_2words(dev, ino, 0xfeedbeef) & RAPARM_HASH_MASK;
- rab = &raparm_hash[hash];
-
- spin_lock(&rab->pb_lock);
- for (rap = &rab->pb_head; (ra = *rap); rap = &ra->p_next) {
- if (ra->p_ino == ino && ra->p_dev == dev)
- goto found;
- depth++;
- if (ra->p_count == 0)
- frap = rap;
- }
- depth = nfsdstats.ra_size;
- if (!frap) {
- spin_unlock(&rab->pb_lock);
- return NULL;
- }
- rap = frap;
- ra = *frap;
- ra->p_dev = dev;
- ra->p_ino = ino;
- ra->p_set = 0;
- ra->p_hindex = hash;
-found:
- if (rap != &rab->pb_head) {
- *rap = ra->p_next;
- ra->p_next = rab->pb_head;
- rab->pb_head = ra;
- }
- ra->p_count++;
- nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
- spin_unlock(&rab->pb_lock);
-
- if (ra->p_set)
- file->f_ra = ra->p_ra;
- return ra;
-}
-
-void nfsd_put_raparams(struct file *file, struct raparms *ra)
-{
- struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-
- spin_lock(&rab->pb_lock);
- ra->p_ra = file->f_ra;
- ra->p_set = 1;
- ra->p_count--;
- spin_unlock(&rab->pb_lock);
+ return __nfsd_open(fhp, type, may_flags, filp);
}
/*
* Grab and keep cached pages associated with a file in the svc_rqst
- * so that they can be passed to the network sendmsg/sendpage routines
+ * so that they can be passed to the network sendmsg routines
* directly. They will be released after the sending has completed.
+ *
+ * Return values: Number of bytes consumed, or -EIO if there are no
+ * remaining pages in rqstp->rq_pages.
*/
static int
nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
struct svc_rqst *rqstp = sd->u.data;
- struct page **pp = rqstp->rq_next_page;
- struct page *page = buf->page;
- size_t size;
-
- size = sd->len;
-
- if (rqstp->rq_res.page_len == 0) {
- get_page(page);
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
- rqstp->rq_res.page_base = buf->offset;
- rqstp->rq_res.page_len = size;
- } else if (page != pp[-1]) {
- get_page(page);
- if (*rqstp->rq_next_page)
- put_page(*rqstp->rq_next_page);
- *(rqstp->rq_next_page++) = page;
- rqstp->rq_res.page_len += size;
- } else
- rqstp->rq_res.page_len += size;
+ struct page *page = buf->page; // may be a compound one
+ unsigned offset = buf->offset;
+ struct page *last_page;
- return size;
+ last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
+ for (page += offset / PAGE_SIZE; page <= last_page; page++) {
+ /*
+ * Skip page replacement when extending the contents of the
+ * current page. But note that we may get two zero_pages in a
+ * row from shmem.
+ */
+ if (page == *(rqstp->rq_next_page - 1) &&
+ offset_in_page(rqstp->rq_res.page_base +
+ rqstp->rq_res.page_len))
+ continue;
+ if (unlikely(!svc_rqst_replace_page(rqstp, page)))
+ return -EIO;
+ }
+ if (rqstp->rq_res.page_len == 0) // first call
+ rqstp->rq_res.page_base = offset % PAGE_SIZE;
+ rqstp->rq_res.page_len += sd->len;
+ return sd->len;
}
static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
@@ -899,12 +1014,25 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
+static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
+ size_t expected)
+{
+ if (expected != 0 && len == 0)
+ return 1;
+ if (offset+len >= i_size_read(file_inode(file)))
+ return 1;
+ return 0;
+}
+
static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count, int host_err)
+ unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
- nfsdstats.io_read += host_err;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
+ *eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
@@ -915,8 +1043,21 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
}
+/**
+ * nfsd_splice_read - Perform a VFS read using a splice pipe
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @file: opened struct file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset, unsigned long *count)
+ struct file *file, loff_t offset, unsigned long *count,
+ u32 *eof)
{
struct splice_desc sd = {
.len = 0,
@@ -924,25 +1065,157 @@ __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
.pos = offset,
.u.data = rqstp,
};
- int host_err;
+ ssize_t host_err;
trace_nfsd_read_splice(rqstp, fhp, offset, *count);
- rqstp->rq_next_page = rqstp->rq_respages + 1;
- host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ host_err = rw_verify_area(READ, file, &offset, *count);
+ if (!host_err)
+ host_err = splice_direct_to_actor(file, &sd,
+ nfsd_direct_splice_actor);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
-__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *count)
+/*
+ * The byte range of the client's READ request is expanded on both ends
+ * until it meets the underlying file system's direct I/O alignment
+ * requirements. After the internal read is complete, the byte range of
+ * the NFS READ payload is reduced to the byte range that was originally
+ * requested.
+ *
+ * Note that a direct read can be done only when the xdr_buf containing
+ * the NFS READ reply does not already have contents in its .pages array.
+ * This is due to potentially restrictive alignment requirements on the
+ * read buffer. When .page_len and @base are zero, the .pages array is
+ * guaranteed to be page-aligned.
+ */
+static noinline_for_stack __be32
+nfsd_direct_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, loff_t offset, unsigned long *count,
+ u32 *eof)
{
+ u64 dio_start, dio_end;
+ unsigned long v, total;
struct iov_iter iter;
- int host_err;
+ struct kiocb kiocb;
+ ssize_t host_err;
+ size_t len;
+
+ init_sync_kiocb(&kiocb, nf->nf_file);
+ kiocb.ki_flags |= IOCB_DIRECT;
+
+ /* Read a properly-aligned region of bytes into rq_bvec */
+ dio_start = round_down(offset, nf->nf_dio_read_offset_align);
+ dio_end = round_up((u64)offset + *count, nf->nf_dio_read_offset_align);
+
+ kiocb.ki_pos = dio_start;
+
+ v = 0;
+ total = dio_end - dio_start;
+ while (total && v < rqstp->rq_maxpages &&
+ rqstp->rq_next_page < rqstp->rq_page_end) {
+ len = min_t(size_t, total, PAGE_SIZE);
+ bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
+ len, 0);
+
+ total -= len;
+ ++rqstp->rq_next_page;
+ ++v;
+ }
+
+ trace_nfsd_read_direct(rqstp, fhp, offset, *count - total);
+ iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v,
+ dio_end - dio_start - total);
+
+ host_err = vfs_iocb_iter_read(nf->nf_file, &kiocb, &iter);
+ if (host_err >= 0) {
+ unsigned int pad = offset - dio_start;
+
+ /* The returned payload starts after the pad */
+ rqstp->rq_res.page_base = pad;
+
+ /* Compute the count of bytes to be returned */
+ if (host_err > pad + *count)
+ host_err = *count;
+ else if (host_err > pad)
+ host_err -= pad;
+ else
+ host_err = 0;
+ } else if (unlikely(host_err == -EINVAL)) {
+ struct inode *inode = d_inode(fhp->fh_dentry);
+
+ pr_info_ratelimited("nfsd: Direct I/O alignment failure on %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ host_err = -ESERVERFAULT;
+ }
- trace_nfsd_read_vector(rqstp, fhp, offset, *count);
- iov_iter_kvec(&iter, READ, vec, vlen, *count);
- host_err = vfs_iter_read(file, &iter, &offset, 0);
- return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
+ return nfsd_finish_read(rqstp, fhp, nf->nf_file, offset, count,
+ eof, host_err);
+}
+
+/**
+ * nfsd_iter_read - Perform a VFS read using an iterator
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @nf: opened struct nfsd_file of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @base: offset in first page of read buffer
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * Some filesystems or situations cannot use nfsd_splice_read. This
+ * function is the slightly less-performant fallback for those cases.
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
+ */
+__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, loff_t offset, unsigned long *count,
+ unsigned int base, u32 *eof)
+{
+ struct file *file = nf->nf_file;
+ unsigned long v, total;
+ struct iov_iter iter;
+ struct kiocb kiocb;
+ ssize_t host_err;
+ size_t len;
+
+ init_sync_kiocb(&kiocb, file);
+
+ switch (nfsd_io_cache_read) {
+ case NFSD_IO_BUFFERED:
+ break;
+ case NFSD_IO_DIRECT:
+ /* When dio_read_offset_align is zero, dio is not supported */
+ if (nf->nf_dio_read_offset_align && !rqstp->rq_res.page_len)
+ return nfsd_direct_read(rqstp, fhp, nf, offset,
+ count, eof);
+ fallthrough;
+ case NFSD_IO_DONTCACHE:
+ if (file->f_op->fop_flags & FOP_DONTCACHE)
+ kiocb.ki_flags = IOCB_DONTCACHE;
+ break;
+ }
+
+ kiocb.ki_pos = offset;
+
+ v = 0;
+ total = *count;
+ while (total && v < rqstp->rq_maxpages &&
+ rqstp->rq_next_page < rqstp->rq_page_end) {
+ len = min_t(size_t, total, PAGE_SIZE - base);
+ bvec_set_page(&rqstp->rq_bvec[v], *rqstp->rq_next_page,
+ len, base);
+
+ total -= len;
+ ++rqstp->rq_next_page;
+ ++v;
+ base = 0;
+ }
+
+ trace_nfsd_read_vector(rqstp, fhp, offset, *count - total);
+ iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count - total);
+ host_err = vfs_iocb_iter_read(file, &kiocb, &iter);
+ return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
}
/*
@@ -973,7 +1246,7 @@ static int wait_for_concurrent_writes(struct file *file)
dprintk("nfsd: write resume %d\n", task_pid_nr(current));
}
- if (inode->i_state & I_DIRTY) {
+ if (inode_state_read_once(inode) & I_DIRTY) {
dprintk("nfsd: write sync %d\n", task_pid_nr(current));
err = vfs_fsync(file, 0);
}
@@ -982,49 +1255,247 @@ static int wait_for_concurrent_writes(struct file *file)
return err;
}
+struct nfsd_write_dio_seg {
+ struct iov_iter iter;
+ int flags;
+};
+
+static unsigned long
+iov_iter_bvec_offset(const struct iov_iter *iter)
+{
+ return (unsigned long)(iter->bvec->bv_offset + iter->iov_offset);
+}
+
+static void
+nfsd_write_dio_seg_init(struct nfsd_write_dio_seg *segment,
+ struct bio_vec *bvec, unsigned int nvecs,
+ unsigned long total, size_t start, size_t len,
+ struct kiocb *iocb)
+{
+ iov_iter_bvec(&segment->iter, ITER_SOURCE, bvec, nvecs, total);
+ if (start)
+ iov_iter_advance(&segment->iter, start);
+ iov_iter_truncate(&segment->iter, len);
+ segment->flags = iocb->ki_flags;
+}
+
+static unsigned int
+nfsd_write_dio_iters_init(struct nfsd_file *nf, struct bio_vec *bvec,
+ unsigned int nvecs, struct kiocb *iocb,
+ unsigned long total,
+ struct nfsd_write_dio_seg segments[3])
+{
+ u32 offset_align = nf->nf_dio_offset_align;
+ loff_t prefix_end, orig_end, middle_end;
+ u32 mem_align = nf->nf_dio_mem_align;
+ size_t prefix, middle, suffix;
+ loff_t offset = iocb->ki_pos;
+ unsigned int nsegs = 0;
+
+ /*
+ * Check if direct I/O is feasible for this write request.
+ * If alignments are not available, the write is too small,
+ * or no alignment can be found, fall back to buffered I/O.
+ */
+ if (unlikely(!mem_align || !offset_align) ||
+ unlikely(total < max(offset_align, mem_align)))
+ goto no_dio;
+
+ prefix_end = round_up(offset, offset_align);
+ orig_end = offset + total;
+ middle_end = round_down(orig_end, offset_align);
+
+ prefix = prefix_end - offset;
+ middle = middle_end - prefix_end;
+ suffix = orig_end - middle_end;
+
+ if (!middle)
+ goto no_dio;
+
+ if (prefix)
+ nfsd_write_dio_seg_init(&segments[nsegs++], bvec,
+ nvecs, total, 0, prefix, iocb);
+
+ nfsd_write_dio_seg_init(&segments[nsegs], bvec, nvecs,
+ total, prefix, middle, iocb);
+
+ /*
+ * Check if the bvec iterator is aligned for direct I/O.
+ *
+ * bvecs generated from RPC receive buffers are contiguous: After
+ * the first bvec, all subsequent bvecs start at bv_offset zero
+ * (page-aligned). Therefore, only the first bvec is checked.
+ */
+ if (iov_iter_bvec_offset(&segments[nsegs].iter) & (mem_align - 1))
+ goto no_dio;
+ segments[nsegs].flags |= IOCB_DIRECT;
+ nsegs++;
+
+ if (suffix)
+ nfsd_write_dio_seg_init(&segments[nsegs++], bvec, nvecs, total,
+ prefix + middle, suffix, iocb);
+
+ return nsegs;
+
+no_dio:
+ /* No DIO alignment possible - pack into single non-DIO segment. */
+ nfsd_write_dio_seg_init(&segments[0], bvec, nvecs, total, 0,
+ total, iocb);
+ return 1;
+}
+
+static noinline_for_stack int
+nfsd_direct_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, unsigned int nvecs,
+ unsigned long *cnt, struct kiocb *kiocb)
+{
+ struct nfsd_write_dio_seg segments[3];
+ struct file *file = nf->nf_file;
+ unsigned int nsegs, i;
+ ssize_t host_err;
+
+ nsegs = nfsd_write_dio_iters_init(nf, rqstp->rq_bvec, nvecs,
+ kiocb, *cnt, segments);
+
+ *cnt = 0;
+ for (i = 0; i < nsegs; i++) {
+ kiocb->ki_flags = segments[i].flags;
+ if (kiocb->ki_flags & IOCB_DIRECT)
+ trace_nfsd_write_direct(rqstp, fhp, kiocb->ki_pos,
+ segments[i].iter.count);
+ else {
+ trace_nfsd_write_vector(rqstp, fhp, kiocb->ki_pos,
+ segments[i].iter.count);
+ /*
+ * Mark the I/O buffer as evict-able to reduce
+ * memory contention.
+ */
+ if (nf->nf_file->f_op->fop_flags & FOP_DONTCACHE)
+ kiocb->ki_flags |= IOCB_DONTCACHE;
+ }
+
+ host_err = vfs_iocb_iter_write(file, kiocb, &segments[i].iter);
+ if (host_err < 0)
+ return host_err;
+ *cnt += host_err;
+ if (host_err < segments[i].iter.count)
+ break; /* partial write */
+ }
+
+ return 0;
+}
+
+/**
+ * nfsd_vfs_write - write data to an already-open file
+ * @rqstp: RPC execution context
+ * @fhp: File handle of file to write into
+ * @nf: An open file matching @fhp
+ * @offset: Byte offset of start
+ * @payload: xdr_buf containing the write payload
+ * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
+ * @stable: An NFS stable_how value
+ * @verf: NFS WRITE verifier
+ *
+ * Upon return, caller must invoke fh_put on @fhp.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
+ */
__be32
-nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen,
- unsigned long *cnt, int stable)
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, loff_t offset,
+ const struct xdr_buf *payload, unsigned long *cnt,
+ int stable, __be32 *verf)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct file *file = nf->nf_file;
+ struct super_block *sb = file_inode(file)->i_sb;
+ struct kiocb kiocb;
struct svc_export *exp;
struct iov_iter iter;
+ errseq_t since;
__be32 nfserr;
int host_err;
- int use_wgather;
- loff_t pos = offset;
+ unsigned long exp_op_flags = 0;
unsigned int pflags = current->flags;
- rwf_t flags = 0;
+ bool restore_flags = false;
+ unsigned int nvecs;
trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
- if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
+ if (sb->s_export_op)
+ exp_op_flags = sb->s_export_op->flags;
+
+ if (test_bit(RQ_LOCAL, &rqstp->rq_flags) &&
+ !(exp_op_flags & EXPORT_OP_REMOTE_FS)) {
/*
- * We want less throttling in balance_dirty_pages()
- * and shrink_inactive_list() so that nfs to
+ * We want throttling in balance_dirty_pages()
+ * and shrink_inactive_list() to only consider
+ * the backingdev we are writing to, so that nfs to
* localhost doesn't cause nfsd to lock up due to all
* the client's dirty pages or its congested queue.
*/
- current->flags |= PF_LESS_THROTTLE;
+ current->flags |= PF_LOCAL_THROTTLE;
+ restore_flags = true;
+ }
exp = fhp->fh_export;
- use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
if (!EX_ISSYNC(exp))
stable = NFS_UNSTABLE;
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = offset;
+ if (likely(!fhp->fh_use_wgather)) {
+ switch (stable) {
+ case NFS_FILE_SYNC:
+ /* persist data and timestamps */
+ kiocb.ki_flags |= IOCB_DSYNC | IOCB_SYNC;
+ break;
+ case NFS_DATA_SYNC:
+ /* persist data only */
+ kiocb.ki_flags |= IOCB_DSYNC;
+ break;
+ }
+ }
- if (stable && !use_wgather)
- flags |= RWF_SYNC;
+ nvecs = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, payload);
- iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
- host_err = vfs_iter_write(file, &iter, &pos, flags);
- if (host_err < 0)
+ since = READ_ONCE(file->f_wb_err);
+ if (verf)
+ nfsd_copy_write_verifier(verf, nn);
+
+ switch (nfsd_io_cache_write) {
+ case NFSD_IO_DIRECT:
+ host_err = nfsd_direct_write(rqstp, fhp, nf, nvecs,
+ cnt, &kiocb);
+ break;
+ case NFSD_IO_DONTCACHE:
+ if (file->f_op->fop_flags & FOP_DONTCACHE)
+ kiocb.ki_flags |= IOCB_DONTCACHE;
+ fallthrough;
+ case NFSD_IO_BUFFERED:
+ iov_iter_bvec(&iter, ITER_SOURCE, rqstp->rq_bvec, nvecs, *cnt);
+ host_err = vfs_iocb_iter_write(file, &kiocb, &iter);
+ if (host_err < 0)
+ break;
+ *cnt = host_err;
+ break;
+ }
+ if (host_err < 0) {
+ commit_reset_write_verifier(nn, rqstp, host_err);
goto out_nfserr;
- nfsdstats.io_write += *cnt;
+ }
+ nfsd_stats_io_write_add(nn, exp, *cnt);
fsnotify_modify(file);
+ host_err = filemap_check_wb_err(file->f_mapping, since);
+ if (host_err < 0)
+ goto out_nfserr;
- if (stable && use_wgather)
+ if (stable && fhp->fh_use_wgather) {
host_err = wait_for_concurrent_writes(file);
+ if (host_err < 0)
+ commit_reset_write_verifier(nn, rqstp, host_err);
+ }
out_nfserr:
if (host_err >= 0) {
@@ -1034,122 +1505,207 @@ out_nfserr:
trace_nfsd_write_err(rqstp, fhp, offset, host_err);
nfserr = nfserrno(host_err);
}
- if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
- current_restore_flags(pflags, PF_LESS_THROTTLE);
+ if (restore_flags)
+ current_restore_flags(pflags, PF_LOCAL_THROTTLE);
return nfserr;
}
-/*
- * Read data from a file. count must contain the requested read count
- * on entry. On return, *count contains the number of bytes actually read.
+/**
+ * nfsd_read_splice_ok - check if spliced reading is supported
+ * @rqstp: RPC transaction context
+ *
+ * Return values:
+ * %true: nfsd_splice_read() may be used
+ * %false: nfsd_splice_read() must not be used
+ *
+ * NFS READ normally uses splice to send data in-place. However the
+ * data in cache can change after the reply's MIC is computed but
+ * before the RPC reply is sent. To prevent the client from
+ * rejecting the server-computed MIC in this somewhat rare case, do
+ * not use splice with the GSS integrity and privacy services.
+ */
+bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
+{
+ if (nfsd_disable_splice_read)
+ return false;
+ switch (svc_auth_flavor(rqstp)) {
+ case RPC_AUTH_GSS_KRB5I:
+ case RPC_AUTH_GSS_KRB5P:
+ return false;
+ }
+ return true;
+}
+
+/**
+ * nfsd_read - Read data from a file
+ * @rqstp: RPC transaction context
+ * @fhp: file handle of file to be read
+ * @offset: starting byte offset
+ * @count: IN: requested number of bytes; OUT: number of bytes read
+ * @eof: OUT: set non-zero if operation reached the end of the file
+ *
+ * The caller must verify that there is enough space in @rqstp.rq_res
+ * to perform this operation.
+ *
* N.B. After this call fhp needs an fh_put
+ *
+ * Returns nfs_ok on success, otherwise an nfserr stat value is
+ * returned.
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+ loff_t offset, unsigned long *count, u32 *eof)
{
+ struct nfsd_file *nf;
struct file *file;
- struct raparms *ra;
__be32 err;
trace_nfsd_read_start(rqstp, fhp, offset, *count);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf);
if (err)
return err;
- ra = nfsd_init_raparms(file);
-
- if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
- err = nfsd_splice_read(rqstp, fhp, file, offset, count);
+ file = nf->nf_file;
+ if (file->f_op->splice_read && nfsd_read_splice_ok(rqstp))
+ err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
else
- err = nfsd_readv(rqstp, fhp, file, offset, vec, vlen, count);
-
- if (ra)
- nfsd_put_raparams(file, ra);
- fput(file);
+ err = nfsd_iter_read(rqstp, fhp, nf, offset, count, 0, eof);
+ nfsd_file_put(nf);
trace_nfsd_read_done(rqstp, fhp, offset, *count);
-
return err;
}
-/*
- * Write data to a file.
- * The stable flag requests synchronous writes.
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_write - open a file and write data to it
+ * @rqstp: RPC execution context
+ * @fhp: File handle of file to write into; nfsd_write() may modify it
+ * @offset: Byte offset of start
+ * @payload: xdr_buf containing the write payload
+ * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
+ * @stable: An NFS stable_how value
+ * @verf: NFS WRITE verifier
+ *
+ * Upon return, caller must invoke fh_put on @fhp.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt, int stable)
+ const struct xdr_buf *payload, unsigned long *cnt, int stable,
+ __be32 *verf)
{
- struct file *file = NULL;
- __be32 err = 0;
+ struct nfsd_file *nf;
+ __be32 err;
trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+ err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf);
if (err)
goto out;
- err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, stable);
- fput(file);
+ err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt,
+ stable, verf);
+ nfsd_file_put(nf);
out:
trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
return err;
}
-#ifdef CONFIG_NFSD_V3
-/*
- * Commit all pending writes to stable storage.
+/**
+ * nfsd_commit - Commit pending writes to stable storage
+ * @rqstp: RPC request being processed
+ * @fhp: NFS filehandle
+ * @nf: target file
+ * @offset: raw offset from beginning of file
+ * @count: raw count of bytes to sync
+ * @verf: filled in with the server's current write verifier
*
- * Note: we only guarantee that data that lies within the range specified
- * by the 'offset' and 'count' parameters will be synced.
+ * Note: we guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced. The server
+ * is permitted to sync data that lies outside this range at the
+ * same time.
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client, as locking happens lower down in the filesystem.
+ *
+ * Return values:
+ * An nfsstat value in network byte order.
*/
__be32
-nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, unsigned long count)
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
+ u64 offset, u32 count, __be32 *verf)
{
- struct file *file;
- loff_t end = LLONG_MAX;
- __be32 err = nfserr_inval;
+ __be32 err = nfs_ok;
+ u64 maxbytes;
+ loff_t start, end;
+ struct nfsd_net *nn;
- if (offset < 0)
- goto out;
- if (count != 0) {
- end = offset + (loff_t)count - 1;
- if (end < offset)
- goto out;
+ trace_nfsd_commit_start(rqstp, fhp, offset, count);
+
+ /*
+ * Convert the client-provided (offset, count) range to a
+ * (start, end) range. If the client-provided range falls
+ * outside the maximum file size of the underlying FS,
+ * clamp the sync range appropriately.
+ */
+ start = 0;
+ end = LLONG_MAX;
+ maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
+ if (offset < maxbytes) {
+ start = offset;
+ if (count && (offset + count - 1 < maxbytes))
+ end = offset + count - 1;
}
- err = nfsd_open(rqstp, fhp, S_IFREG,
- NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
- if (err)
- goto out;
+ nn = net_generic(nf->nf_net, nfsd_net_id);
if (EX_ISSYNC(fhp->fh_export)) {
- int err2 = vfs_fsync_range(file, offset, end, 0);
-
- if (err2 != -EINVAL)
+ errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
+ int err2;
+
+ err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
+ switch (err2) {
+ case 0:
+ nfsd_copy_write_verifier(verf, nn);
+ err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
+ since);
err = nfserrno(err2);
- else
+ break;
+ case -EINVAL:
err = nfserr_notsupp;
- }
+ break;
+ default:
+ commit_reset_write_verifier(nn, rqstp, err2);
+ err = nfserrno(err2);
+ }
+ } else
+ nfsd_copy_write_verifier(verf, nn);
- fput(file);
-out:
+ trace_nfsd_commit_done(rqstp, fhp, offset, count);
return err;
}
-#endif /* CONFIG_NFSD_V3 */
-static __be32
-nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
- struct iattr *iap)
+/**
+ * nfsd_create_setattr - Set a created file's attributes
+ * @rqstp: RPC transaction being executed
+ * @fhp: NFS filehandle of parent directory
+ * @resfhp: NFS filehandle of new object
+ * @attrs: requested attributes of new object
+ *
+ * Returns nfs_ok on success, or an nfsstat in network byte order.
+ */
+__be32
+nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd_attrs *attrs)
{
+ struct iattr *iap = attrs->na_iattr;
+ __be32 status;
+
/*
- * Mode has already been set earlier in create:
+ * Mode has already been set by file creation.
*/
iap->ia_valid &= ~ATTR_MODE;
+
/*
* Setting uid/gid works only for root. Irix appears to
* send along the gid on create when it tries to implement
@@ -1157,10 +1713,31 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
*/
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
- if (iap->ia_valid)
- return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
- /* Callers expect file metadata to be committed here */
- return nfserrno(commit_metadata(resfhp));
+
+ /*
+ * Callers expect new file metadata to be committed even
+ * if the attributes have not changed.
+ */
+ if (nfsd_attrs_valid(attrs))
+ status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
+ else
+ status = nfserrno(commit_metadata(resfhp));
+
+ /*
+ * Transactional filesystems had a chance to commit changes
+ * for both parent and child simultaneously making the
+ * following commit_metadata a noop in many cases.
+ */
+ if (!status)
+ status = nfserrno(commit_metadata(fhp));
+
+ /*
+ * Update the new filehandle to pick up the new attributes.
+ */
+ if (!status)
+ status = fh_update(resfhp);
+
+ return status;
}
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
@@ -1178,30 +1755,24 @@ nfsd_check_ignore_resizing(struct iattr *iap)
iap->ia_valid &= ~ATTR_SIZE;
}
-/* The parent directory should already be locked: */
+/* The parent directory should already be locked - we will unlock */
__be32
nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen, struct iattr *iap,
- int type, dev_t rdev, struct svc_fh *resfhp)
+ struct nfsd_attrs *attrs,
+ int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild;
struct inode *dirp;
+ struct iattr *iap = attrs->na_iattr;
__be32 err;
- __be32 err2;
- int host_err;
+ int host_err = 0;
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
dchild = dget(resfhp->fh_dentry);
- if (!fhp->fh_locked) {
- WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n",
- dentry);
- err = nfserr_io;
- goto out;
- }
-
- err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE);
+ err = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, dentry,
+ NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1209,44 +1780,34 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+ if (!IS_POSIXACL(dirp))
+ iap->ia_mode &= ~current_umask();
+
err = 0;
- host_err = 0;
switch (type) {
case S_IFREG:
- host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
+ host_err = vfs_create(&nop_mnt_idmap, dchild, iap->ia_mode, NULL);
if (!host_err)
nfsd_check_ignore_resizing(iap);
break;
case S_IFDIR:
- host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
- if (!host_err && unlikely(d_unhashed(dchild))) {
- struct dentry *d;
- d = lookup_one_len(dchild->d_name.name,
- dchild->d_parent,
- dchild->d_name.len);
- if (IS_ERR(d)) {
- host_err = PTR_ERR(d);
- break;
- }
- if (unlikely(d_is_negative(d))) {
- dput(d);
- err = nfserr_serverfault;
- goto out;
- }
+ dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, NULL);
+ if (IS_ERR(dchild)) {
+ host_err = PTR_ERR(dchild);
+ } else if (d_is_negative(dchild)) {
+ err = nfserr_serverfault;
+ goto out;
+ } else if (unlikely(dchild != resfhp->fh_dentry)) {
dput(resfhp->fh_dentry);
- resfhp->fh_dentry = dget(d);
- err = fh_update(resfhp);
- dput(dchild);
- dchild = d;
- if (err)
- goto out;
+ resfhp->fh_dentry = dget(dchild);
}
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+ host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild,
+ iap->ia_mode, rdev, NULL);
break;
default:
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
@@ -1256,24 +1817,12 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_err < 0)
goto out_nfserr;
- err = nfsd_create_setattr(rqstp, resfhp, iap);
+ err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
- /*
- * nfsd_create_setattr already committed the child. Transactional
- * filesystems had a chance to commit changes for both parent and
- * child simultaneously making the following commit_metadata a
- * noop.
- */
- err2 = nfserrno(commit_metadata(fhp));
- if (err2)
- err = err2;
- /*
- * Update the file handle to get the new inode info.
- */
- if (!err)
- err = fh_update(resfhp);
out:
- dput(dchild);
+ if (!err)
+ fh_fill_post_attrs(fhp);
+ end_creating(dchild);
return err;
out_nfserr:
@@ -1289,13 +1838,15 @@ out_nfserr:
*/
__be32
nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen, struct iattr *iap,
- int type, dev_t rdev, struct svc_fh *resfhp)
+ char *fname, int flen, struct nfsd_attrs *attrs,
+ int type, dev_t rdev, struct svc_fh *resfhp)
{
struct dentry *dentry, *dchild = NULL;
__be32 err;
int host_err;
+ trace_nfsd_vfs_create(rqstp, fhp, type, fname, flen);
+
if (isdotent(fname, flen))
return nfserr_exist;
@@ -1309,187 +1860,26 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_err)
return nfserrno(host_err);
- fh_lock_nested(fhp, I_MUTEX_PARENT);
- dchild = lookup_one_len(fname, dentry, flen);
+ dchild = start_creating(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
host_err = PTR_ERR(dchild);
if (IS_ERR(dchild))
return nfserrno(host_err);
- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
- /*
- * We unconditionally drop our ref to dchild as fh_compose will have
- * already grabbed its own ref for it.
- */
- dput(dchild);
- if (err)
- return err;
- return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
- rdev, resfhp);
-}
-
-#ifdef CONFIG_NFSD_V3
-
-/*
- * NFSv3 and NFSv4 version of nfsd_create
- */
-__be32
-do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen, struct iattr *iap,
- struct svc_fh *resfhp, int createmode, u32 *verifier,
- bool *truncp, bool *created)
-{
- struct dentry *dentry, *dchild = NULL;
- struct inode *dirp;
- __be32 err;
- int host_err;
- __u32 v_mtime=0, v_atime=0;
-
- err = nfserr_perm;
- if (!flen)
- goto out;
- err = nfserr_exist;
- if (isdotent(fname, flen))
- goto out;
- if (!(iap->ia_valid & ATTR_MODE))
- iap->ia_mode = 0;
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- dirp = d_inode(dentry);
-
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
-
- fh_lock_nested(fhp, I_MUTEX_PARENT);
-
- /*
- * Compose the response file handle.
- */
- dchild = lookup_one_len(fname, dentry, flen);
- host_err = PTR_ERR(dchild);
- if (IS_ERR(dchild))
- goto out_nfserr;
-
- /* If file doesn't exist, check for permissions to create one */
- if (d_really_is_negative(dchild)) {
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
- if (err)
- goto out;
- }
err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
if (err)
- goto out;
-
- if (nfsd_create_is_exclusive(createmode)) {
- /* solaris7 gets confused (bugid 4218508) if these have
- * the high bit set, so just clear the high bits. If this is
- * ever changed to use different attrs for storing the
- * verifier, then do_open_lookup() will also need to be fixed
- * accordingly.
- */
- v_mtime = verifier[0]&0x7fffffff;
- v_atime = verifier[1]&0x7fffffff;
- }
-
- if (d_really_is_positive(dchild)) {
- err = 0;
-
- switch (createmode) {
- case NFS3_CREATE_UNCHECKED:
- if (! d_is_reg(dchild))
- goto out;
- else if (truncp) {
- /* in nfsv4, we need to treat this case a little
- * differently. we don't want to truncate the
- * file now; this would be wrong if the OPEN
- * fails for some other reason. furthermore,
- * if the size is nonzero, we should ignore it
- * according to spec!
- */
- *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
- }
- else {
- iap->ia_valid &= ATTR_SIZE;
- goto set_attr;
- }
- break;
- case NFS3_CREATE_EXCLUSIVE:
- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
- && d_inode(dchild)->i_atime.tv_sec == v_atime
- && d_inode(dchild)->i_size == 0 ) {
- if (created)
- *created = 1;
- break;
- }
- /* fall through */
- case NFS4_CREATE_EXCLUSIVE4_1:
- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
- && d_inode(dchild)->i_atime.tv_sec == v_atime
- && d_inode(dchild)->i_size == 0 ) {
- if (created)
- *created = 1;
- goto set_attr;
- }
- /* fall through */
- case NFS3_CREATE_GUARDED:
- err = nfserr_exist;
- }
- fh_drop_write(fhp);
- goto out;
- }
-
- host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
- if (host_err < 0) {
- fh_drop_write(fhp);
- goto out_nfserr;
- }
- if (created)
- *created = 1;
-
- nfsd_check_ignore_resizing(iap);
-
- if (nfsd_create_is_exclusive(createmode)) {
- /* Cram the verifier into atime/mtime */
- iap->ia_valid = ATTR_MTIME|ATTR_ATIME
- | ATTR_MTIME_SET|ATTR_ATIME_SET;
- /* XXX someone who knows this better please fix it for nsec */
- iap->ia_mtime.tv_sec = v_mtime;
- iap->ia_atime.tv_sec = v_atime;
- iap->ia_mtime.tv_nsec = 0;
- iap->ia_atime.tv_nsec = 0;
- }
-
- set_attr:
- err = nfsd_create_setattr(rqstp, resfhp, iap);
-
- /*
- * nfsd_create_setattr already committed the child
- * (and possibly also the parent).
- */
- if (!err)
- err = nfserrno(commit_metadata(fhp));
-
- /*
- * Update the filehandle to get the new inode info.
- */
- if (!err)
- err = fh_update(resfhp);
+ goto out_unlock;
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
+ /* nfsd_create_locked() unlocked the parent */
+ dput(dchild);
+ return err;
- out:
- fh_unlock(fhp);
- if (dchild && !IS_ERR(dchild))
- dput(dchild);
- fh_drop_write(fhp);
- return err;
-
- out_nfserr:
- err = nfserrno(host_err);
- goto out;
+out_unlock:
+ end_creating(dchild);
+ return err;
}
-#endif /* CONFIG_NFSD_V3 */
/*
* Read a symlink. On entry, *lenp must contain the maximum path length that
@@ -1529,20 +1919,32 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
return 0;
}
-/*
- * Create a symlink and look up its inode
+/**
+ * nfsd_symlink - Create a symlink and look up its inode
+ * @rqstp: RPC transaction being executed
+ * @fhp: NFS filehandle of parent directory
+ * @fname: filename of the new symlink
+ * @flen: length of @fname
+ * @path: content of the new symlink (NUL-terminated)
+ * @attrs: requested attributes of new object
+ * @resfhp: NFS filehandle of new object
+ *
* N.B. After this call _both_ fhp and resfhp need an fh_put
+ *
+ * Returns nfs_ok on success, or an nfsstat in network byte order.
*/
__be32
nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen,
- char *path,
- struct svc_fh *resfhp)
+ char *fname, int flen,
+ char *path, struct nfsd_attrs *attrs,
+ struct svc_fh *resfhp)
{
struct dentry *dentry, *dnew;
__be32 err, cerr;
int host_err;
+ trace_nfsd_vfs_symlink(rqstp, fhp, fname, flen, path);
+
err = nfserr_noent;
if (!flen || path[0] == '\0')
goto out;
@@ -1555,38 +1957,49 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
+ if (host_err) {
+ err = nfserrno(host_err);
+ goto out;
+ }
- fh_lock(fhp);
dentry = fhp->fh_dentry;
- dnew = lookup_one_len(fname, dentry, flen);
- host_err = PTR_ERR(dnew);
- if (IS_ERR(dnew))
- goto out_nfserr;
-
- host_err = vfs_symlink(d_inode(dentry), dnew, path);
+ dnew = start_creating(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
+ if (IS_ERR(dnew)) {
+ err = nfserrno(PTR_ERR(dnew));
+ goto out_drop_write;
+ }
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path, NULL);
err = nfserrno(host_err);
+ cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+ if (!err)
+ nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
+ fh_fill_post_attrs(fhp);
+out_unlock:
+ end_creating(dnew);
if (!err)
err = nfserrno(commit_metadata(fhp));
- fh_unlock(fhp);
-
+ if (!err)
+ err = cerr;
+out_drop_write:
fh_drop_write(fhp);
-
- cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
- dput(dnew);
- if (err==0) err = cerr;
out:
return err;
-
-out_nfserr:
- err = nfserrno(host_err);
- goto out;
}
-/*
- * Create a hardlink
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
+/**
+ * nfsd_link - create a link
+ * @rqstp: RPC transaction context
+ * @ffhp: the file handle of the directory where the new link is to be created
+ * @name: the filename of the new link
+ * @len: the length of @name in octets
+ * @tfhp: the file handle of an existing file object
+ *
+ * After this call _both_ ffhp and tfhp need an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
@@ -1594,9 +2007,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
{
struct dentry *ddir, *dnew, *dold;
struct inode *dirp;
+ int type;
__be32 err;
int host_err;
+ trace_nfsd_vfs_link(rqstp, ffhp, tfhp, name, len);
+
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
@@ -1613,62 +2029,101 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (isdotent(name, len))
goto out;
+ err = nfs_ok;
+ type = d_inode(tfhp->fh_dentry)->i_mode & S_IFMT;
host_err = fh_want_write(tfhp);
- if (host_err) {
- err = nfserrno(host_err);
+ if (host_err)
goto out;
- }
- fh_lock_nested(ffhp, I_MUTEX_PARENT);
ddir = ffhp->fh_dentry;
dirp = d_inode(ddir);
+ dnew = start_creating(&nop_mnt_idmap, ddir, &QSTR_LEN(name, len));
- dnew = lookup_one_len(name, ddir, len);
- host_err = PTR_ERR(dnew);
- if (IS_ERR(dnew))
- goto out_nfserr;
+ if (IS_ERR(dnew)) {
+ host_err = PTR_ERR(dnew);
+ goto out_drop_write;
+ }
dold = tfhp->fh_dentry;
err = nfserr_noent;
if (d_really_is_negative(dold))
- goto out_dput;
- host_err = vfs_link(dold, dirp, dnew, NULL);
+ goto out_unlock;
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
+ fh_fill_post_attrs(ffhp);
+out_unlock:
+ end_creating(dnew);
if (!host_err) {
- err = nfserrno(commit_metadata(ffhp));
- if (!err)
- err = nfserrno(commit_metadata(tfhp));
- } else {
- if (host_err == -EXDEV && rqstp->rq_vers == 2)
- err = nfserr_acces;
- else
- err = nfserrno(host_err);
+ host_err = commit_metadata(ffhp);
+ if (!host_err)
+ host_err = commit_metadata(tfhp);
}
-out_dput:
- dput(dnew);
-out_unlock:
- fh_unlock(ffhp);
+
+out_drop_write:
fh_drop_write(tfhp);
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.9.4 para 1-2: NFSv4 LINK
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ }
out:
- return err;
+ return err != nfs_ok ? err : nfserrno(host_err);
+}
-out_nfserr:
- err = nfserrno(host_err);
- goto out_unlock;
+static void
+nfsd_close_cached_files(struct dentry *dentry)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ nfsd_file_close_inode_sync(inode);
}
-/*
- * Rename a file
- * N.B. After this call _both_ ffhp and tfhp need an fh_put
+static bool
+nfsd_has_cached_files(struct dentry *dentry)
+{
+ bool ret = false;
+ struct inode *inode = d_inode(dentry);
+
+ if (inode && S_ISREG(inode->i_mode))
+ ret = nfsd_file_is_cached(inode);
+ return ret;
+}
+
+/**
+ * nfsd_rename - rename a directory entry
+ * @rqstp: RPC transaction context
+ * @ffhp: the file handle of parent directory containing the entry to be renamed
+ * @fname: the filename of directory entry to be renamed
+ * @flen: the length of @fname in octets
+ * @tfhp: the file handle of parent directory to contain the renamed entry
+ * @tname: the filename of the new entry
+ * @tlen: the length of @tlen in octets
+ *
+ * After this call _both_ ffhp and tfhp need an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
struct svc_fh *tfhp, char *tname, int tlen)
{
- struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
- struct inode *fdir, *tdir;
+ struct dentry *fdentry, *tdentry;
+ int type = S_IFDIR;
+ struct renamedata rd = {};
__be32 err;
int host_err;
+ struct dentry *close_cached;
+
+ trace_nfsd_vfs_rename(rqstp, ffhp, tfhp, fname, flen, tname, tlen);
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
@@ -1678,84 +2133,118 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out;
fdentry = ffhp->fh_dentry;
- fdir = d_inode(fdentry);
tdentry = tfhp->fh_dentry;
- tdir = d_inode(tdentry);
err = nfserr_perm;
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+ err = nfserr_xdev;
+ if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
+ goto out;
+ if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
+ goto out;
+
+retry:
+ close_cached = NULL;
host_err = fh_want_write(ffhp);
if (host_err) {
err = nfserrno(host_err);
goto out;
}
- /* cannot use fh_lock as we need deadlock protective ordering
- * so do it by hand */
- trap = lock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = true;
- fill_pre_wcc(ffhp);
- fill_pre_wcc(tfhp);
+ rd.mnt_idmap = &nop_mnt_idmap;
+ rd.old_parent = fdentry;
+ rd.new_parent = tdentry;
- odentry = lookup_one_len(fname, fdentry, flen);
- host_err = PTR_ERR(odentry);
- if (IS_ERR(odentry))
- goto out_nfserr;
+ host_err = start_renaming(&rd, 0, &QSTR_LEN(fname, flen),
+ &QSTR_LEN(tname, tlen));
- host_err = -ENOENT;
- if (d_really_is_negative(odentry))
- goto out_dput_old;
- host_err = -EINVAL;
- if (odentry == trap)
- goto out_dput_old;
-
- ndentry = lookup_one_len(tname, tdentry, tlen);
- host_err = PTR_ERR(ndentry);
- if (IS_ERR(ndentry))
- goto out_dput_old;
- host_err = -ENOTEMPTY;
- if (ndentry == trap)
- goto out_dput_new;
-
- host_err = -EXDEV;
- if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
- goto out_dput_new;
- if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
- goto out_dput_new;
+ if (host_err) {
+ err = nfserrno(host_err);
+ goto out_want_write;
+ }
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ err = fh_fill_pre_attrs(tfhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+
+ type = d_inode(rd.old_dentry)->i_mode & S_IFMT;
+
+ if (d_inode(rd.new_dentry))
+ type = d_inode(rd.new_dentry)->i_mode & S_IFMT;
+
+ if ((rd.new_dentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
+ nfsd_has_cached_files(rd.new_dentry)) {
+ close_cached = dget(rd.new_dentry);
+ goto out_unlock;
+ } else {
+ int retries;
- host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
- if (!host_err) {
- host_err = commit_metadata(tfhp);
- if (!host_err)
- host_err = commit_metadata(ffhp);
+ for (retries = 1;;) {
+ host_err = vfs_rename(&rd);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, d_inode(rd.old_dentry)))
+ break;
+ }
+ if (!host_err) {
+ host_err = commit_metadata(tfhp);
+ if (!host_err)
+ host_err = commit_metadata(ffhp);
+ }
}
- out_dput_new:
- dput(ndentry);
- out_dput_old:
- dput(odentry);
- out_nfserr:
- err = nfserrno(host_err);
- /*
- * We cannot rely on fh_unlock on the two filehandles,
- * as that would do the wrong thing if the two directories
- * were the same, so again we do it by hand.
- */
- fill_post_wcc(ffhp);
- fill_post_wcc(tfhp);
- unlock_rename(tdentry, fdentry);
- ffhp->fh_locked = tfhp->fh_locked = false;
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.26.4 para 1-3: NFSv4 RENAME
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ } else {
+ err = nfserrno(host_err);
+ }
+
+ if (!close_cached) {
+ fh_fill_post_attrs(ffhp);
+ fh_fill_post_attrs(tfhp);
+ }
+out_unlock:
+ end_renaming(&rd);
+out_want_write:
fh_drop_write(ffhp);
+ /*
+ * If the target dentry has cached open files, then we need to
+ * try to close them prior to doing the rename. Final fput
+ * shouldn't be done with locks held however, so we delay it
+ * until this point and then reattempt the whole shebang.
+ */
+ if (close_cached) {
+ nfsd_close_cached_files(close_cached);
+ dput(close_cached);
+ goto retry;
+ }
out:
return err;
}
-/*
- * Unlink a file or directory
- * N.B. After this call fhp needs an fh_put
+/**
+ * nfsd_unlink - remove a directory entry
+ * @rqstp: RPC transaction context
+ * @fhp: the file handle of the parent directory to be modified
+ * @type: enforced file type of the object to be removed
+ * @fname: the name of directory entry to be removed
+ * @flen: length of @fname in octets
+ *
+ * After this call fhp needs an fh_put.
+ *
+ * Returns a generic NFS status code in network byte-order.
*/
__be32
nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
@@ -1763,9 +2252,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
{
struct dentry *dentry, *rdentry;
struct inode *dirp;
+ struct inode *rinode = NULL;
__be32 err;
int host_err;
+ trace_nfsd_vfs_unlink(rqstp, fhp, fname, flen);
+
err = nfserr_acces;
if (!flen || isdotent(fname, flen))
goto out;
@@ -1777,36 +2269,65 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (host_err)
goto out_nfserr;
- fh_lock_nested(fhp, I_MUTEX_PARENT);
dentry = fhp->fh_dentry;
dirp = d_inode(dentry);
- rdentry = lookup_one_len(fname, dentry, flen);
+ rdentry = start_removing(&nop_mnt_idmap, dentry, &QSTR_LEN(fname, flen));
+
host_err = PTR_ERR(rdentry);
if (IS_ERR(rdentry))
- goto out_nfserr;
+ goto out_drop_write;
- if (d_really_is_negative(rdentry)) {
- dput(rdentry);
- err = nfserr_noent;
- goto out;
- }
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+
+ rinode = d_inode(rdentry);
+ /* Prevent truncation until after locks dropped */
+ ihold(rinode);
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- if (type != S_IFDIR)
- host_err = vfs_unlink(dirp, rdentry, NULL);
- else
- host_err = vfs_rmdir(dirp, rdentry);
- if (!host_err)
+ if (type != S_IFDIR) {
+ int retries;
+
+ if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
+ nfsd_close_cached_files(rdentry);
+
+ for (retries = 1;;) {
+ host_err = vfs_unlink(&nop_mnt_idmap, dirp, rdentry, NULL);
+ if (host_err != -EAGAIN || !retries--)
+ break;
+ if (!nfsd_wait_for_delegreturn(rqstp, rinode))
+ break;
+ }
+ } else {
+ host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry, NULL);
+ }
+ fh_fill_post_attrs(fhp);
+
+out_unlock:
+ end_removing(rdentry);
+ if (!err && !host_err)
host_err = commit_metadata(fhp);
- dput(rdentry);
+ iput(rinode); /* truncate the inode here */
+out_drop_write:
+ fh_drop_write(fhp);
out_nfserr:
- err = nfserrno(host_err);
+ if (host_err == -EBUSY) {
+ /*
+ * See RFC 8881 Section 18.25.4 para 4: NFSv4 REMOVE
+ * wants a status unique to the object type.
+ */
+ if (type != S_IFDIR)
+ err = nfserr_file_open;
+ else
+ err = nfserr_acces;
+ }
out:
- return err;
+ return err != nfs_ok ? err : nfserrno(host_err);
}
/*
@@ -1831,7 +2352,7 @@ struct readdir_data {
int full;
};
-static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
+static bool nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
int namlen, loff_t offset, u64 ino,
unsigned int d_type)
{
@@ -1843,7 +2364,7 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
if (buf->used + reclen > PAGE_SIZE) {
buf->full = 1;
- return -EINVAL;
+ return false;
}
de->namlen = namlen;
@@ -1853,11 +2374,12 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
memcpy(de->name, name, namlen);
buf->used += reclen;
- return 0;
+ return true;
}
-static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
- struct readdir_cd *cdp, loff_t *offsetp)
+static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp,
+ nfsd_filldir_t func, struct readdir_cd *cdp,
+ loff_t *offsetp)
{
struct buffered_dirent *de;
int host_err;
@@ -1903,6 +2425,8 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
if (cdp->err != nfs_ok)
break;
+ trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen);
+
reclen = ALIGN(sizeof(*de) + de->namlen,
sizeof(u64));
size -= reclen;
@@ -1923,9 +2447,23 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
return cdp->err;
}
-/*
- * Read entries from a directory.
- * The NFSv3/4 verifier we ignore for now.
+/**
+ * nfsd_readdir - Read entries from a directory
+ * @rqstp: RPC transaction context
+ * @fhp: NFS file handle of directory to be read
+ * @offsetp: OUT: seek offset of final entry that was read
+ * @cdp: OUT: an eof error value
+ * @func: entry filler actor
+ *
+ * This implementation ignores the NFSv3/4 verifier cookie.
+ *
+ * NB: normal system calls hold file->f_pos_lock when calling
+ * ->iterate_shared and ->llseek, but nfsd_readdir() does not.
+ * Because the struct file acquired here is not visible to other
+ * threads, it's internal state does not need mutex protection.
+ *
+ * Returns nfs_ok on success, otherwise an nfsstat code is
+ * returned.
*/
__be32
nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
@@ -1936,30 +2474,63 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
loff_t offset = *offsetp;
int may_flags = NFSD_MAY_READ;
- /* NFSv2 only supports 32 bit cookies */
- if (rqstp->rq_vers > 2)
- may_flags |= NFSD_MAY_64BIT_COOKIE;
-
err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
if (err)
goto out;
+ if (fhp->fh_64bit_cookies)
+ file->f_mode |= FMODE_64BITHASH;
+ else
+ file->f_mode |= FMODE_32BITHASH;
+
offset = vfs_llseek(file, offset, SEEK_SET);
if (offset < 0) {
err = nfserrno((int)offset);
goto out_close;
}
- err = nfsd_buffered_readdir(file, func, cdp, offsetp);
+ err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp);
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
- fput(file);
+ nfsd_filp_close(file);
out:
return err;
}
+/**
+ * nfsd_filp_close: close a file synchronously
+ * @fp: the file to close
+ *
+ * nfsd_filp_close() is similar in behaviour to filp_close().
+ * The difference is that if this is the final close on the
+ * file, the that finalisation happens immediately, rather then
+ * being handed over to a work_queue, as it the case for
+ * filp_close().
+ * When a user-space process closes a file (even when using
+ * filp_close() the finalisation happens before returning to
+ * userspace, so it is effectively synchronous. When a kernel thread
+ * uses file_close(), on the other hand, the handling is completely
+ * asynchronous. This means that any cost imposed by that finalisation
+ * is not imposed on the nfsd thread, and nfsd could potentually
+ * close files more quickly than the work queue finalises the close,
+ * which would lead to unbounded growth in the queue.
+ *
+ * In some contexts is it not safe to synchronously wait for
+ * close finalisation (see comment for __fput_sync()), but nfsd
+ * does not match those contexts. In partcilarly it does not, at the
+ * time that this function is called, hold and locks and no finalisation
+ * of any file, socket, or device driver would have any cause to wait
+ * for nfsd to make progress.
+ */
+void nfsd_filp_close(struct file *fp)
+{
+ get_file(fp);
+ filp_close(fp, NULL);
+ __fput_sync(fp);
+}
+
/*
* Get file system stats
* N.B. After this call fhp needs an fh_put
@@ -1969,6 +2540,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in
{
__be32 err;
+ trace_nfsd_vfs_statfs(rqstp, fhp);
+
err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
if (!err) {
struct path path = {
@@ -1981,17 +2554,254 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in
return err;
}
-static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp)
+static int exp_rdonly(struct svc_cred *cred, struct svc_export *exp)
{
- return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY;
+ return nfsexp_flags(cred, exp) & NFSEXP_READONLY;
}
+#ifdef CONFIG_NFSD_V4
+/*
+ * Helper function to translate error numbers. In the case of xattr operations,
+ * some error codes need to be translated outside of the standard translations.
+ *
+ * ENODATA needs to be translated to nfserr_noxattr.
+ * E2BIG to nfserr_xattr2big.
+ *
+ * Additionally, vfs_listxattr can return -ERANGE. This means that the
+ * file has too many extended attributes to retrieve inside an
+ * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation:
+ * filesystems will allow the adding of extended attributes until they hit
+ * their own internal limit. This limit may be larger than XATTR_LIST_MAX.
+ * So, at that point, the attributes are present and valid, but can't
+ * be retrieved using listxattr, since the upper level xattr code enforces
+ * the XATTR_LIST_MAX limit.
+ *
+ * This bug means that we need to deal with listxattr returning -ERANGE. The
+ * best mapping is to return TOOSMALL.
+ */
+static __be32
+nfsd_xattr_errno(int err)
+{
+ switch (err) {
+ case -ENODATA:
+ return nfserr_noxattr;
+ case -E2BIG:
+ return nfserr_xattr2big;
+ case -ERANGE:
+ return nfserr_toosmall;
+ }
+ return nfserrno(err);
+}
+
+/*
+ * Retrieve the specified user extended attribute. To avoid always
+ * having to allocate the maximum size (since we are not getting
+ * a maximum size from the RPC), do a probe + alloc. Hold a reader
+ * lock on i_rwsem to prevent the extended attribute from changing
+ * size while we're doing this.
+ */
+__be32
+nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
+ void **bufp, int *lenp)
+{
+ ssize_t len;
+ __be32 err;
+ char *buf;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
+ if (err)
+ return err;
+
+ err = nfs_ok;
+ dentry = fhp->fh_dentry;
+ inode = d_inode(dentry);
+
+ inode_lock_shared(inode);
+
+ len = vfs_getxattr(&nop_mnt_idmap, dentry, name, NULL, 0);
+
+ /*
+ * Zero-length attribute, just return.
+ */
+ if (len == 0) {
+ *bufp = NULL;
+ *lenp = 0;
+ goto out;
+ }
+
+ if (len < 0) {
+ err = nfsd_xattr_errno(len);
+ goto out;
+ }
+
+ if (len > *lenp) {
+ err = nfserr_toosmall;
+ goto out;
+ }
+
+ buf = kvmalloc(len, GFP_KERNEL);
+ if (buf == NULL) {
+ err = nfserr_jukebox;
+ goto out;
+ }
+
+ len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len);
+ if (len <= 0) {
+ kvfree(buf);
+ buf = NULL;
+ err = nfsd_xattr_errno(len);
+ }
+
+ *lenp = len;
+ *bufp = buf;
+
+out:
+ inode_unlock_shared(inode);
+
+ return err;
+}
+
+/*
+ * Retrieve the xattr names. Since we can't know how many are
+ * user extended attributes, we must get all attributes here,
+ * and have the XDR encode filter out the "user." ones.
+ *
+ * While this could always just allocate an XATTR_LIST_MAX
+ * buffer, that's a waste, so do a probe + allocate. To
+ * avoid any changes between the probe and allocate, wrap
+ * this in inode_lock.
+ */
+__be32
+nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp,
+ int *lenp)
+{
+ ssize_t len;
+ __be32 err;
+ char *buf;
+ struct inode *inode;
+ struct dentry *dentry;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
+ if (err)
+ return err;
+
+ dentry = fhp->fh_dentry;
+ inode = d_inode(dentry);
+ *lenp = 0;
+
+ inode_lock_shared(inode);
+
+ len = vfs_listxattr(dentry, NULL, 0);
+ if (len <= 0) {
+ err = nfsd_xattr_errno(len);
+ goto out;
+ }
+
+ if (len > XATTR_LIST_MAX) {
+ err = nfserr_xattr2big;
+ goto out;
+ }
+
+ buf = kvmalloc(len, GFP_KERNEL);
+ if (buf == NULL) {
+ err = nfserr_jukebox;
+ goto out;
+ }
+
+ len = vfs_listxattr(dentry, buf, len);
+ if (len <= 0) {
+ kvfree(buf);
+ err = nfsd_xattr_errno(len);
+ goto out;
+ }
+
+ *lenp = len;
+ *bufp = buf;
+
+ err = nfs_ok;
+out:
+ inode_unlock_shared(inode);
+
+ return err;
+}
+
+/**
+ * nfsd_removexattr - Remove an extended attribute
+ * @rqstp: RPC transaction being executed
+ * @fhp: NFS filehandle of object with xattr to remove
+ * @name: name of xattr to remove (NUL-terminate)
+ *
+ * Pass in a NULL pointer for delegated_inode, and let the client deal
+ * with NFS4ERR_DELAY (same as with e.g. setattr and remove).
+ *
+ * Returns nfs_ok on success, or an nfsstat in network byte order.
+ */
+__be32
+nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
+{
+ __be32 err;
+ int ret;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
+ if (err)
+ return err;
+
+ ret = fh_want_write(fhp);
+ if (ret)
+ return nfserrno(ret);
+
+ inode_lock(fhp->fh_dentry->d_inode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
+ name, NULL);
+ err = nfsd_xattr_errno(ret);
+ fh_fill_post_attrs(fhp);
+out_unlock:
+ inode_unlock(fhp->fh_dentry->d_inode);
+ fh_drop_write(fhp);
+
+ return err;
+}
+
+__be32
+nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
+ void *buf, u32 len, u32 flags)
+{
+ __be32 err;
+ int ret;
+
+ err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
+ if (err)
+ return err;
+
+ ret = fh_want_write(fhp);
+ if (ret)
+ return nfserrno(ret);
+ inode_lock(fhp->fh_dentry->d_inode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
+ name, buf, len, flags, NULL);
+ fh_fill_post_attrs(fhp);
+ err = nfsd_xattr_errno(ret);
+out_unlock:
+ inode_unlock(fhp->fh_dentry->d_inode);
+ fh_drop_write(fhp);
+ return err;
+}
+#endif
+
/*
* Check for a user's access permissions to this inode.
*/
__be32
-nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
- struct dentry *dentry, int acc)
+nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
+ struct dentry *dentry, int acc)
{
struct inode *inode = d_inode(dentry);
int err;
@@ -2006,7 +2816,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
(acc & NFSD_MAY_EXEC)? " exec" : "",
(acc & NFSD_MAY_SATTR)? " sattr" : "",
(acc & NFSD_MAY_TRUNC)? " trunc" : "",
- (acc & NFSD_MAY_LOCK)? " lock" : "",
+ (acc & NFSD_MAY_NLM)? " nlm" : "",
(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
inode->i_mode,
IS_IMMUTABLE(inode)? " immut" : "",
@@ -2022,7 +2832,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
*/
if (!(acc & NFSD_MAY_LOCAL_ACCESS))
if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
- if (exp_rdonly(rqstp, exp) ||
+ if (exp_rdonly(cred, exp) ||
__mnt_is_readonly(exp->ex_path.mnt))
return nfserr_rofs;
if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
@@ -2031,16 +2841,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
return nfserr_perm;
- if (acc & NFSD_MAY_LOCK) {
- /* If we cannot rely on authentication in NLM requests,
- * just allow locks, otherwise require read permission, or
- * ownership
- */
- if (exp->ex_flags & NFSEXP_NOAUTHNLM)
- return 0;
- else
- acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
- }
/*
* The file owner always gets access permission for accesses that
* would normally be checked at open time. This is to make
@@ -2060,73 +2860,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
return 0;
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
- err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
+ err = inode_permission(&nop_mnt_idmap, inode,
+ acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
/* Allow read access to binaries even when mode 111 */
if (err == -EACCES && S_ISREG(inode->i_mode) &&
(acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
- err = inode_permission(inode, MAY_EXEC);
+ err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC);
return err? nfserrno(err) : 0;
}
-
-void
-nfsd_racache_shutdown(void)
-{
- struct raparms *raparm, *last_raparm;
- unsigned int i;
-
- dprintk("nfsd: freeing readahead buffers.\n");
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- raparm = raparm_hash[i].pb_head;
- while(raparm) {
- last_raparm = raparm;
- raparm = raparm->p_next;
- kfree(last_raparm);
- }
- raparm_hash[i].pb_head = NULL;
- }
-}
-/*
- * Initialize readahead param cache
- */
-int
-nfsd_racache_init(int cache_size)
-{
- int i;
- int j = 0;
- int nperbucket;
- struct raparms **raparm = NULL;
-
-
- if (raparm_hash[0].pb_head)
- return 0;
- nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
- nperbucket = max(2, nperbucket);
- cache_size = nperbucket * RAPARM_HASH_SIZE;
-
- dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
-
- for (i = 0; i < RAPARM_HASH_SIZE; i++) {
- spin_lock_init(&raparm_hash[i].pb_lock);
-
- raparm = &raparm_hash[i].pb_head;
- for (j = 0; j < nperbucket; j++) {
- *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL);
- if (!*raparm)
- goto out_nomem;
- raparm = &(*raparm)->p_next;
- }
- *raparm = NULL;
- }
-
- nfsdstats.ra_size = cache_size;
- return 0;
-
-out_nomem:
- dprintk("nfsd: kmalloc failed, freeing readahead buffers\n");
- nfsd_racache_shutdown();
- return -ENOMEM;
-}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index a7e107309f76..ded2900d423f 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -6,6 +6,8 @@
#ifndef LINUX_NFSD_VFS_H
#define LINUX_NFSD_VFS_H
+#include <linux/fs.h>
+#include <linux/posix_acl.h>
#include "nfsfh.h"
#include "nfsd.h"
@@ -18,7 +20,7 @@
#define NFSD_MAY_READ 0x004 /* == MAY_READ */
#define NFSD_MAY_SATTR 0x008
#define NFSD_MAY_TRUNC 0x010
-#define NFSD_MAY_LOCK 0x020
+#define NFSD_MAY_NLM 0x020 /* request is from lockd */
#define NFSD_MAY_MASK 0x03f
/* extra hints to permission and open routines: */
@@ -31,17 +33,44 @@
#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */
+#define NFSD_MAY_LOCALIO 0x2000 /* for tracing, reflects when localio used */
+
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+struct nfsd_file;
+
/*
* Callback function for readdir
*/
typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
/* nfsd/vfs.c */
-int nfsd_racache_init(int);
-void nfsd_racache_shutdown(void);
+struct nfsd_attrs {
+ struct iattr *na_iattr; /* input */
+ struct xdr_netobj *na_seclabel; /* input */
+ struct posix_acl *na_pacl; /* input */
+ struct posix_acl *na_dpacl; /* input */
+
+ int na_labelerr; /* output */
+ int na_aclerr; /* output */
+};
+
+static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
+{
+ posix_acl_release(attrs->na_pacl);
+ posix_acl_release(attrs->na_dpacl);
+}
+
+static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs)
+{
+ struct iattr *iap = attrs->na_iattr;
+
+ return (iap->ia_valid || (attrs->na_seclabel &&
+ attrs->na_seclabel->len));
+}
+
+__be32 nfserrno (int errno);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
@@ -50,54 +79,68 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct iattr *, int, time_t);
+ struct nfsd_attrs *, const struct timespec64 *);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
-__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
- struct xdr_netobj *);
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
struct file *, loff_t, loff_t, int);
-__be32 nfsd4_clone_file_range(struct file *, u64, struct file *,
- u64, u64);
+__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
+ struct nfsd_file *nf_src, u64 src_pos,
+ struct nfsd_file *nf_dst, u64 dst_pos,
+ u64 count, bool sync);
#endif /* CONFIG_NFSD_V4 */
__be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *,
- char *name, int len, struct iattr *attrs,
- int type, dev_t rdev, struct svc_fh *res);
+ struct nfsd_attrs *attrs, int type, dev_t rdev,
+ struct svc_fh *res);
__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
- char *name, int len, struct iattr *attrs,
+ char *name, int len, struct nfsd_attrs *attrs,
int type, dev_t rdev, struct svc_fh *res);
-#ifdef CONFIG_NFSD_V3
__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
- char *name, int len, struct iattr *attrs,
- struct svc_fh *res, int createmode,
- u32 *verifier, bool *truncp, bool *created);
-__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
- loff_t, unsigned long);
-#endif /* CONFIG_NFSD_V3 */
+__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd_attrs *iap);
+__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
+ struct nfsd_file *nf, u64 offset, u32 count,
+ __be32 *verf);
+#ifdef CONFIG_NFSD_V4
+__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *name, void **bufp, int *lenp);
+__be32 nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char **bufp, int *lenp);
+__be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *name);
+__be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *name, void *buf, u32 len, u32 flags);
+#endif
+int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-struct raparms;
+int nfsd_open_verified(struct svc_fh *fhp, umode_t type, int may_flags,
+ struct file **filp);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
- unsigned long *count);
-__be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
- struct kvec *vec, int vlen,
- unsigned long *count);
-__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t,
- struct kvec *, int, unsigned long *, int);
+ unsigned long *count,
+ u32 *eof);
+__be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfsd_file *nf, loff_t offset,
+ unsigned long *count, unsigned int base,
+ u32 *eof);
+bool nfsd_read_splice_ok(struct svc_rqst *rqstp);
+__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, unsigned long *count,
+ u32 *eof);
+__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, const struct xdr_buf *payload,
+ unsigned long *cnt, int stable, __be32 *verf);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct file *file, loff_t offset,
- struct kvec *vec, int vlen, unsigned long *cnt,
- int stable);
+ struct nfsd_file *nf, loff_t offset,
+ const struct xdr_buf *payload,
+ unsigned long *cnt, int stable, __be32 *verf);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
- char *name, int len, char *path,
- struct svc_fh *res);
+ char *name, int len, char *path,
+ struct nfsd_attrs *attrs,
+ struct svc_fh *res);
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
char *, int, struct svc_fh *);
ssize_t nfsd_copy_file_range(struct file *, u64,
@@ -112,60 +155,9 @@ __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
struct kstatfs *, int access);
-__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
- struct dentry *, int);
+__be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
+ struct dentry *dentry, int acc);
-struct raparms *nfsd_init_raparms(struct file *file);
-void nfsd_put_raparams(struct file *file, struct raparms *ra);
-
-static inline int fh_want_write(struct svc_fh *fh)
-{
- int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
-
- if (!ret)
- fh->fh_want_write = true;
- return ret;
-}
-
-static inline void fh_drop_write(struct svc_fh *fh)
-{
- if (fh->fh_want_write) {
- fh->fh_want_write = false;
- mnt_drop_write(fh->fh_export->ex_path.mnt);
- }
-}
-
-static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat)
-{
- struct path p = {.mnt = fh->fh_export->ex_path.mnt,
- .dentry = fh->fh_dentry};
- return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
- AT_STATX_SYNC_AS_STAT));
-}
-
-static inline int nfsd_create_is_exclusive(int createmode)
-{
- return createmode == NFS3_CREATE_EXCLUSIVE
- || createmode == NFS4_CREATE_EXCLUSIVE4_1;
-}
-
-static inline bool nfsd_eof_on_read(long requested, long read,
- loff_t offset, loff_t size)
-{
- /* We assume a short read means eof: */
- if (requested > read)
- return true;
- /*
- * A non-short read might also reach end of file. The spec
- * still requires us to set eof in that case.
- *
- * Further operations may have modified the file size since
- * the read, so the following check is not atomic with the read.
- * We've only seen that cause a problem for a client in the case
- * where the read returned a count of 0 without setting eof.
- * That case was fixed by the addition of the above check.
- */
- return (offset + read >= size);
-}
+void nfsd_filp_close(struct file *fp);
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index ea7cca3a64b7..852f71580bd0 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -27,14 +27,13 @@ struct nfsd_readargs {
struct svc_fh fh;
__u32 offset;
__u32 count;
- int vlen;
};
struct nfsd_writeargs {
svc_fh fh;
__u32 offset;
- int len;
- struct kvec first;
+ __u32 len;
+ struct xdr_buf payload;
};
struct nfsd_createargs {
@@ -53,11 +52,6 @@ struct nfsd_renameargs {
unsigned int tlen;
};
-struct nfsd_readlinkargs {
- struct svc_fh fh;
- char * buffer;
-};
-
struct nfsd_linkargs {
struct svc_fh ffh;
struct svc_fh tfh;
@@ -79,39 +73,53 @@ struct nfsd_readdirargs {
struct svc_fh fh;
__u32 cookie;
__u32 count;
- __be32 * buffer;
+};
+
+struct nfsd_stat {
+ __be32 status;
};
struct nfsd_attrstat {
+ __be32 status;
struct svc_fh fh;
struct kstat stat;
};
struct nfsd_diropres {
+ __be32 status;
struct svc_fh fh;
struct kstat stat;
};
struct nfsd_readlinkres {
+ __be32 status;
int len;
+ struct page *page;
};
struct nfsd_readres {
+ __be32 status;
struct svc_fh fh;
unsigned long count;
struct kstat stat;
+ struct page **pages;
};
struct nfsd_readdirres {
+ /* Components of the reply */
+ __be32 status;
+
int count;
+ /* Used to encode the reply's entry list */
+ struct xdr_stream xdr;
+ struct xdr_buf dirlist;
struct readdir_cd common;
- __be32 * buffer;
- int buflen;
- __be32 * offset;
+ unsigned int cookie_offset;
};
struct nfsd_statfsres {
+ __be32 status;
struct kstatfs stats;
};
@@ -133,33 +141,37 @@ union nfsd_xdrstore {
#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
-int nfssvc_decode_void(struct svc_rqst *, __be32 *);
-int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *);
-int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
-
-int nfssvc_encode_entry(void *, const char *name,
- int namlen, loff_t offset, u64 ino, unsigned int);
-
-void nfssvc_release_fhandle(struct svc_rqst *);
+bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
+int nfssvc_encode_entry(void *data, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type);
+
+void nfssvc_release_attrstat(struct svc_rqst *rqstp);
+void nfssvc_release_diropres(struct svc_rqst *rqstp);
+void nfssvc_release_readres(struct svc_rqst *rqstp);
/* Helper functions for NFSv2 ACL code */
-__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat);
-__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp);
+bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status);
+bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp, const struct kstat *stat);
#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 2cb29e961a76..522067b7fd75 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh;
struct iattr attrs;
int check_guard;
- time_t guardtime;
+ struct timespec64 guardtime;
};
struct nfsd3_diropargs {
@@ -25,14 +25,13 @@ struct nfsd3_diropargs {
struct nfsd3_accessargs {
struct svc_fh fh;
- unsigned int access;
+ __u32 access;
};
struct nfsd3_readargs {
struct svc_fh fh;
__u64 offset;
__u32 count;
- int vlen;
};
struct nfsd3_writeargs {
@@ -41,7 +40,7 @@ struct nfsd3_writeargs {
__u32 count;
int stable;
__u32 len;
- struct kvec first;
+ struct xdr_buf payload;
};
struct nfsd3_createargs {
@@ -71,11 +70,6 @@ struct nfsd3_renameargs {
unsigned int tlen;
};
-struct nfsd3_readlinkargs {
- struct svc_fh fh;
- char * buffer;
-};
-
struct nfsd3_linkargs {
struct svc_fh ffh;
struct svc_fh tfh;
@@ -96,10 +90,8 @@ struct nfsd3_symlinkargs {
struct nfsd3_readdirargs {
struct svc_fh fh;
__u64 cookie;
- __u32 dircount;
__u32 count;
__be32 * verf;
- __be32 * buffer;
};
struct nfsd3_commitargs {
@@ -110,13 +102,13 @@ struct nfsd3_commitargs {
struct nfsd3_getaclargs {
struct svc_fh fh;
- int mask;
+ __u32 mask;
};
struct posix_acl;
struct nfsd3_setaclargs {
struct svc_fh fh;
- int mask;
+ __u32 mask;
struct posix_acl *acl_access;
struct posix_acl *acl_default;
};
@@ -145,13 +137,15 @@ struct nfsd3_readlinkres {
__be32 status;
struct svc_fh fh;
__u32 len;
+ struct page **pages;
};
struct nfsd3_readres {
__be32 status;
struct svc_fh fh;
unsigned long count;
- int eof;
+ __u32 eof;
+ struct page **pages;
};
struct nfsd3_writeres {
@@ -159,6 +153,7 @@ struct nfsd3_writeres {
struct svc_fh fh;
unsigned long count;
int committed;
+ __be32 verf[2];
};
struct nfsd3_renameres {
@@ -174,19 +169,17 @@ struct nfsd3_linkres {
};
struct nfsd3_readdirres {
+ /* Components of the reply */
__be32 status;
struct svc_fh fh;
- /* Just to save kmalloc on every readdirplus entry (svc_fh is a
- * little large for the stack): */
- struct svc_fh scratch;
- int count;
__be32 verf[2];
+ /* Used to encode the reply's entry list */
+ struct xdr_stream xdr;
+ struct xdr_buf dirlist;
+ struct svc_fh scratch;
struct readdir_cd common;
- __be32 * buffer;
- int buflen;
- __be32 * offset;
- __be32 * offset1;
+ unsigned int cookie_offset;
struct svc_rqst * rqstp;
};
@@ -223,6 +216,7 @@ struct nfsd3_pathconfres {
struct nfsd3_commitres {
__be32 status;
struct svc_fh fh;
+ __be32 verf[2];
};
struct nfsd3_getaclres {
@@ -271,51 +265,50 @@ union nfsd3_xdrstore {
#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
-int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
+bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
void nfs3svc_release_fhandle(struct svc_rqst *);
void nfs3svc_release_fhandle2(struct svc_rqst *);
-int nfs3svc_encode_entry(void *, const char *name,
- int namlen, loff_t offset, u64 ino,
- unsigned int);
-int nfs3svc_encode_entry_plus(void *, const char *name,
- int namlen, loff_t offset, u64 ino,
- unsigned int);
-/* Helper functions for NFSv3 ACL code */
-__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
- struct svc_fh *fhp);
-__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset);
+int nfs3svc_encode_entry3(void *data, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type);
+int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen,
+ loff_t offset, u64 ino, unsigned int d_type);
+/* Helper functions for NFSv3 ACL code */
+bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp);
+bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status);
+bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp);
#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index feeb6d4bdffd..ae75846b3cd7 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -46,9 +46,137 @@
#define CURRENT_STATE_ID_FLAG (1<<0)
#define SAVED_STATE_ID_FLAG (1<<1)
-#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f))
-#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f))
-#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f))
+#define SET_CSTATE_FLAG(c, f) ((c)->sid_flags |= (f))
+#define HAS_CSTATE_FLAG(c, f) ((c)->sid_flags & (f))
+#define CLEAR_CSTATE_FLAG(c, f) ((c)->sid_flags &= ~(f))
+
+/**
+ * nfsd4_encode_bool - Encode an XDR bool type result
+ * @xdr: target XDR stream
+ * @val: boolean value to encode
+ *
+ * Return values:
+ * %nfs_ok: @val encoded; @xdr advanced to next position
+ * %nfserr_resource: stream buffer space exhausted
+ */
+static __always_inline __be32
+nfsd4_encode_bool(struct xdr_stream *xdr, bool val)
+{
+ __be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+ if (unlikely(p == NULL))
+ return nfserr_resource;
+ *p = val ? xdr_one : xdr_zero;
+ return nfs_ok;
+}
+
+/**
+ * nfsd4_encode_uint32_t - Encode an XDR uint32_t type result
+ * @xdr: target XDR stream
+ * @val: integer value to encode
+ *
+ * Return values:
+ * %nfs_ok: @val encoded; @xdr advanced to next position
+ * %nfserr_resource: stream buffer space exhausted
+ */
+static __always_inline __be32
+nfsd4_encode_uint32_t(struct xdr_stream *xdr, u32 val)
+{
+ __be32 *p = xdr_reserve_space(xdr, XDR_UNIT);
+
+ if (unlikely(p == NULL))
+ return nfserr_resource;
+ *p = cpu_to_be32(val);
+ return nfs_ok;
+}
+
+#define nfsd4_encode_aceflag4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_acemask4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_acetype4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_count4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_mode4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_nfs_lease4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_qop4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_sequenceid4(x, v) nfsd4_encode_uint32_t(x, v)
+#define nfsd4_encode_slotid4(x, v) nfsd4_encode_uint32_t(x, v)
+
+/**
+ * nfsd4_encode_uint64_t - Encode an XDR uint64_t type result
+ * @xdr: target XDR stream
+ * @val: integer value to encode
+ *
+ * Return values:
+ * %nfs_ok: @val encoded; @xdr advanced to next position
+ * %nfserr_resource: stream buffer space exhausted
+ */
+static __always_inline __be32
+nfsd4_encode_uint64_t(struct xdr_stream *xdr, u64 val)
+{
+ __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2);
+
+ if (unlikely(p == NULL))
+ return nfserr_resource;
+ put_unaligned_be64(val, p);
+ return nfs_ok;
+}
+
+#define nfsd4_encode_changeid4(x, v) nfsd4_encode_uint64_t(x, v)
+#define nfsd4_encode_nfs_cookie4(x, v) nfsd4_encode_uint64_t(x, v)
+#define nfsd4_encode_length4(x, v) nfsd4_encode_uint64_t(x, v)
+#define nfsd4_encode_offset4(x, v) nfsd4_encode_uint64_t(x, v)
+
+/**
+ * nfsd4_encode_opaque_fixed - Encode a fixed-length XDR opaque type result
+ * @xdr: target XDR stream
+ * @data: pointer to data
+ * @size: length of data in bytes
+ *
+ * Return values:
+ * %nfs_ok: @data encoded; @xdr advanced to next position
+ * %nfserr_resource: stream buffer space exhausted
+ */
+static __always_inline __be32
+nfsd4_encode_opaque_fixed(struct xdr_stream *xdr, const void *data,
+ size_t size)
+{
+ __be32 *p = xdr_reserve_space(xdr, xdr_align_size(size));
+ size_t pad = xdr_pad_size(size);
+
+ if (unlikely(p == NULL))
+ return nfserr_resource;
+ memcpy(p, data, size);
+ if (pad)
+ memset((char *)p + size, 0, pad);
+ return nfs_ok;
+}
+
+/**
+ * nfsd4_encode_opaque - Encode a variable-length XDR opaque type result
+ * @xdr: target XDR stream
+ * @data: pointer to data
+ * @size: length of data in bytes
+ *
+ * Return values:
+ * %nfs_ok: @data encoded; @xdr advanced to next position
+ * %nfserr_resource: stream buffer space exhausted
+ */
+static __always_inline __be32
+nfsd4_encode_opaque(struct xdr_stream *xdr, const void *data, size_t size)
+{
+ size_t pad = xdr_pad_size(size);
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(size));
+ if (unlikely(p == NULL))
+ return nfserr_resource;
+ *p++ = cpu_to_be32(size);
+ memcpy(p, data, size);
+ if (pad)
+ memset((char *)p + size, 0, pad);
+ return nfs_ok;
+}
+
+#define nfsd4_encode_component4(x, d, s) nfsd4_encode_opaque(x, d, s)
struct nfsd4_compound_state {
struct svc_fh current_fh;
@@ -76,12 +204,7 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
struct nfsd4_change_info {
u32 atomic;
- bool change_supported;
- u32 before_ctime_sec;
- u32 before_ctime_nsec;
u64 before_change;
- u32 after_ctime_sec;
- u32 after_ctime_nsec;
u64 after_change;
};
@@ -175,12 +298,8 @@ struct nfsd4_lock {
} v;
/* response */
- union {
- struct {
- stateid_t stateid;
- } ok;
- struct nfsd4_lock_denied denied;
- } u;
+ stateid_t lk_resp_stateid;
+ struct nfsd4_lock_denied lk_denied;
};
#define lk_new_open_seqid v.new.open_seqid
#define lk_new_open_stateid v.new.open_stateid
@@ -190,20 +309,15 @@ struct nfsd4_lock {
#define lk_old_lock_stateid v.old.lock_stateid
#define lk_old_lock_seqid v.old.lock_seqid
-#define lk_resp_stateid u.ok.stateid
-#define lk_denied u.denied
-
-
struct nfsd4_lockt {
u32 lt_type;
clientid_t lt_clientid;
struct xdr_netobj lt_owner;
u64 lt_offset;
u64 lt_length;
- struct nfsd4_lock_denied lt_denied;
+ struct nfsd4_lock_denied lt_denied;
};
-
struct nfsd4_locku {
u32 lu_type;
u32 lu_seqid;
@@ -221,11 +335,39 @@ struct nfsd4_lookup {
struct nfsd4_putfh {
u32 pf_fhlen; /* request */
char *pf_fhval; /* request */
+ bool no_verify; /* represents foreigh fh */
+};
+
+struct nfsd4_getxattr {
+ char *getxa_name; /* request */
+ u32 getxa_len; /* request */
+ void *getxa_buf;
+};
+
+struct nfsd4_setxattr {
+ u32 setxa_flags; /* request */
+ char *setxa_name; /* request */
+ char *setxa_buf; /* request */
+ u32 setxa_len; /* request */
+ struct nfsd4_change_info setxa_cinfo; /* response */
+};
+
+struct nfsd4_removexattr {
+ char *rmxa_name; /* request */
+ struct nfsd4_change_info rmxa_cinfo; /* response */
+};
+
+struct nfsd4_listxattrs {
+ u64 lsxa_cookie; /* request */
+ u32 lsxa_maxcount; /* request */
+ char *lsxa_buf; /* unfiltered buffer (reply) */
+ u32 lsxa_len; /* unfiltered len (reply) */
};
struct nfsd4_open {
u32 op_claim_type; /* request */
- struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
+ u32 op_fnamelen;
+ char * op_fname; /* request - everything but CLAIM_PREV */
u32 op_delegate_type; /* request - CLAIM_PREV only */
stateid_t op_delegate_stateid; /* request - response */
u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */
@@ -244,17 +386,19 @@ struct nfsd4_open {
u32 op_deleg_want; /* request */
stateid_t op_stateid; /* response */
__be32 op_xdr_error; /* see nfsd4_open_omfg() */
- u32 op_recall; /* recall */
struct nfsd4_change_info op_cinfo; /* response */
u32 op_rflags; /* response */
+ bool op_recall; /* response */
bool op_truncate; /* used during processing */
bool op_created; /* used during processing */
struct nfs4_openowner *op_openowner; /* used during processing */
+ struct file *op_filp; /* used during processing */
struct nfs4_file *op_file; /* used during processing */
struct nfs4_ol_stateid *op_stp; /* used during processing */
struct nfs4_clnt_odstate *op_odstate; /* used during processing */
struct nfs4_acl *op_acl;
struct xdr_netobj op_label;
+ struct svc_rqst *op_rqstp;
};
struct nfsd4_open_confirm {
@@ -273,15 +417,15 @@ struct nfsd4_open_downgrade {
struct nfsd4_read {
- stateid_t rd_stateid; /* request */
- u64 rd_offset; /* request */
- u32 rd_length; /* request */
- int rd_vlen;
- struct file *rd_filp;
- bool rd_tmp_file;
-
- struct svc_rqst *rd_rqstp; /* response */
- struct svc_fh * rd_fhp; /* response */
+ stateid_t rd_stateid; /* request */
+ u64 rd_offset; /* request */
+ u32 rd_length; /* request */
+ int rd_vlen;
+ struct nfsd_file *rd_nf;
+
+ struct svc_rqst *rd_rqstp; /* response */
+ struct svc_fh *rd_fhp; /* response */
+ u32 rd_eof; /* response */
};
struct nfsd4_readdir {
@@ -359,13 +503,6 @@ struct nfsd4_setclientid_confirm {
nfs4_verifier sc_confirm;
};
-struct nfsd4_saved_compoundargs {
- __be32 *p;
- __be32 *end;
- int pagelen;
- struct page **pagelist;
-};
-
struct nfsd4_test_stateid_id {
__be32 ts_id_status;
stateid_t ts_id_stateid;
@@ -381,6 +518,24 @@ struct nfsd4_free_stateid {
stateid_t fr_stateid; /* request */
};
+struct nfsd4_get_dir_delegation {
+ /* request */
+ u32 gdda_signal_deleg_avail;
+ u32 gdda_notification_types[1];
+ struct timespec64 gdda_child_attr_delay;
+ struct timespec64 gdda_dir_attr_delay;
+ u32 gdda_child_attributes[3];
+ u32 gdda_dir_attributes[3];
+ /* response */
+ u32 gddrnf_status;
+ nfs4_verifier gddr_cookieverf;
+ stateid_t gddr_stateid;
+ u32 gddr_notification[1];
+ u32 gddr_child_attributes[3];
+ u32 gddr_dir_attributes[3];
+ bool gddrnf_will_signal_deleg_avail;
+};
+
/* also used for NVERIFY */
struct nfsd4_verify {
u32 ve_bmval[3]; /* request */
@@ -393,8 +548,7 @@ struct nfsd4_write {
u64 wr_offset; /* request */
u32 wr_stable_how; /* request */
u32 wr_buflen; /* request */
- struct kvec wr_head;
- struct page ** wr_pagelist; /* request */
+ struct xdr_buf wr_payload; /* request */
u32 wr_bytes_written; /* response */
u32 wr_how_written; /* response */
@@ -407,20 +561,23 @@ struct nfsd4_exchange_id {
u32 flags;
clientid_t clientid;
u32 seqid;
- int spa_how;
+ u32 spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
+ struct xdr_netobj nii_domain;
+ struct xdr_netobj nii_name;
+ struct timespec64 nii_time;
+ char *server_impl_name;
};
struct nfsd4_sequence {
struct nfs4_sessionid sessionid; /* request/response */
u32 seqid; /* request/response */
u32 slotid; /* request/response */
- u32 maxslots; /* request/response */
+ u32 maxslots; /* request */
u32 cachethis; /* request */
-#if 0
+ u32 maxslots_response; /* response */
u32 target_maxslots; /* response */
-#endif /* not yet */
u32 status_flags; /* response */
};
@@ -439,9 +596,43 @@ struct nfsd4_reclaim_complete {
struct nfsd4_deviceid {
u64 fsid_idx;
u32 generation;
- u32 pad;
};
+static inline __be32 *
+svcxdr_encode_deviceid4(__be32 *p, const struct nfsd4_deviceid *devid)
+{
+ __be64 *q = (__be64 *)p;
+
+ *q = (__force __be64)devid->fsid_idx;
+ p += 2;
+ *p++ = (__force __be32)devid->generation;
+ *p++ = xdr_zero;
+ return p;
+}
+
+static inline __be32 *
+svcxdr_decode_deviceid4(__be32 *p, struct nfsd4_deviceid *devid)
+{
+ __be64 *q = (__be64 *)p;
+
+ devid->fsid_idx = (__force u64)(*q);
+ p += 2;
+ devid->generation = (__force u32)(*p++);
+ p++; /* NFSD does not use the remaining octets */
+ return p;
+}
+
+static inline __be32
+nfsd4_decode_deviceid4(struct xdr_stream *xdr, struct nfsd4_deviceid *devid)
+{
+ __be32 *p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE);
+
+ if (unlikely(!p))
+ return nfserr_bad_xdr;
+ svcxdr_decode_deviceid4(p, devid);
+ return nfs_ok;
+}
+
struct nfsd4_layout_seg {
u32 iomode;
u64 offset;
@@ -472,11 +663,10 @@ struct nfsd4_layoutcommit {
u32 lc_reclaim; /* request */
u32 lc_newoffset; /* request */
u64 lc_last_wr; /* request */
- struct timespec lc_mtime; /* request */
+ struct timespec64 lc_mtime; /* request */
u32 lc_layout_type; /* request */
- u32 lc_up_len; /* layout length */
- void *lc_up_layout; /* decoded by callback */
- u32 lc_size_chg; /* boolean for response */
+ struct xdr_buf lc_up_layout; /* decoded by callback */
+ bool lc_size_chg; /* response */
u64 lc_newsize; /* response */
};
@@ -488,7 +678,7 @@ struct nfsd4_layoutreturn {
u32 lrf_body_len; /* request */
void *lrf_body; /* request */
stateid_t lr_sid; /* request/response */
- u32 lrs_present; /* response */
+ bool lrs_present; /* response */
};
struct nfsd4_fallocate {
@@ -514,38 +704,84 @@ struct nfsd42_write_res {
stateid_t cb_stateid;
};
+struct nfsd4_cb_offload {
+ struct nfsd4_callback co_cb;
+ struct nfsd42_write_res co_res;
+ __be32 co_nfserr;
+ unsigned int co_retries;
+ struct knfsd_fh co_fh;
+
+ struct nfs4_sessionid co_referring_sessionid;
+ u32 co_referring_slotid;
+ u32 co_referring_seqno;
+};
+
struct nfsd4_copy {
/* request */
- stateid_t cp_src_stateid;
- stateid_t cp_dst_stateid;
- u64 cp_src_pos;
- u64 cp_dst_pos;
- u64 cp_count;
-
- /* both */
- bool cp_synchronous;
+ stateid_t cp_src_stateid;
+ stateid_t cp_dst_stateid;
+ u64 cp_src_pos;
+ u64 cp_dst_pos;
+ u64 cp_count;
+ struct nl4_server *cp_src;
+
+ unsigned long cp_flags;
+#define NFSD4_COPY_F_STOPPED (0)
+#define NFSD4_COPY_F_INTRA (1)
+#define NFSD4_COPY_F_SYNCHRONOUS (2)
+#define NFSD4_COPY_F_COMMITTED (3)
+#define NFSD4_COPY_F_COMPLETED (4)
+#define NFSD4_COPY_F_OFFLOAD_DONE (5)
/* response */
- struct nfsd42_write_res cp_res;
-
- /* for cb_offload */
- struct nfsd4_callback cp_cb;
__be32 nfserr;
+ struct nfsd42_write_res cp_res;
struct knfsd_fh fh;
+ /* offload callback */
+ struct nfsd4_cb_offload cp_cb_offload;
+
struct nfs4_client *cp_clp;
- struct file *file_src;
- struct file *file_dst;
+ struct nfsd_file *nf_src;
+ struct nfsd_file *nf_dst;
- stateid_t cp_stateid;
+ copy_stateid_t cp_stateid;
struct list_head copies;
struct task_struct *copy_task;
refcount_t refcount;
- bool stopped;
+ unsigned int cp_ttl;
+
+ struct nfsd4_ssc_umount_item *ss_nsui;
+ struct nfs_fh c_fh;
+ nfs4_stateid stateid;
+ struct nfsd_net *cp_nn;
};
+static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync)
+{
+ if (sync)
+ set_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+ else
+ clear_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+}
+
+static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy)
+{
+ return test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+}
+
+static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy)
+{
+ return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+}
+
+static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy)
+{
+ return !test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+}
+
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -563,13 +799,26 @@ struct nfsd4_offload_status {
/* response */
u64 count;
- u32 status;
+ __be32 status;
+ bool completed;
+};
+
+struct nfsd4_copy_notify {
+ /* request */
+ stateid_t cpn_src_stateid;
+ struct nl4_server *cpn_dst;
+
+ /* response */
+ stateid_t cpn_cnr_stateid;
+ struct timespec64 cpn_lease_time;
+ struct nl4_server *cpn_src;
};
struct nfsd4_op {
- int opnum;
- const struct nfsd4_operation * opdesc;
+ u32 opnum;
__be32 status;
+ const struct nfsd4_operation *opdesc;
+ struct nfs4_replay *replay;
union nfsd4_op_u {
struct nfsd4_access access;
struct nfsd4_close close;
@@ -613,6 +862,7 @@ struct nfsd4_op {
struct nfsd4_reclaim_complete reclaim_complete;
struct nfsd4_test_stateid test_stateid;
struct nfsd4_free_stateid free_stateid;
+ struct nfsd4_get_dir_delegation get_dir_delegation;
struct nfsd4_getdeviceinfo getdeviceinfo;
struct nfsd4_layoutget layoutget;
struct nfsd4_layoutcommit layoutcommit;
@@ -625,9 +875,14 @@ struct nfsd4_op {
struct nfsd4_clone clone;
struct nfsd4_copy copy;
struct nfsd4_offload_status offload_status;
+ struct nfsd4_copy_notify copy_notify;
struct nfsd4_seek seek;
+
+ struct nfsd4_getxattr getxattr;
+ struct nfsd4_setxattr setxattr;
+ struct nfsd4_listxattrs listxattrs;
+ struct nfsd4_removexattr removexattr;
} u;
- struct nfs4_replay * replay;
};
bool nfsd4_cache_this_op(struct nfsd4_op *);
@@ -642,59 +897,33 @@ struct svcxdr_tmpbuf {
struct nfsd4_compoundargs {
/* scratch variables for XDR decode */
- __be32 * p;
- __be32 * end;
- struct page ** pagelist;
- int pagelen;
- bool tail;
- __be32 tmp[8];
- __be32 * tmpp;
+ struct xdr_stream *xdr;
struct svcxdr_tmpbuf *to_free;
-
struct svc_rqst *rqstp;
- u32 taglen;
char * tag;
+ u32 taglen;
u32 minorversion;
+ u32 client_opcnt;
u32 opcnt;
+ bool splice_ok;
struct nfsd4_op *ops;
struct nfsd4_op iops[8];
- int cachetype;
};
struct nfsd4_compoundres {
/* scratch variables for XDR encode */
- struct xdr_stream xdr;
+ struct xdr_stream *xdr;
struct svc_rqst * rqstp;
- u32 taglen;
+ __be32 *statusp;
char * tag;
+ u32 taglen;
u32 opcnt;
- __be32 * tagp; /* tag, opcount encode location */
+
struct nfsd4_compound_state cstate;
};
-static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
-{
- struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
- return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
-}
-
-/*
- * The session reply cache only needs to cache replies that the client
- * actually asked us to. But it's almost free for us to cache compounds
- * consisting of only a SEQUENCE op, so we may as well cache those too.
- * Also, the protocol doesn't give us a convenient response in the case
- * of a replay of a solo SEQUENCE op that wasn't cached
- * (RETRY_UNCACHED_REP can only be returned in the second op of a
- * compound).
- */
-static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
-{
- return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
- || nfsd4_is_solo_sequence(resp);
-}
-
static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
@@ -709,27 +938,9 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op);
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
- BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = (u32)fhp->fh_post_saved;
- cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
-
- cinfo->before_change = fhp->fh_pre_change;
- cinfo->after_change = fhp->fh_post_change;
- cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
- cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
- cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
- cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-
-}
-
-
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
+bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
@@ -741,6 +952,7 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
+void nfsd4_exchange_id_release(union nfsd4_op_u *u);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *,
@@ -773,8 +985,10 @@ extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *u);
extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
union nfsd4_op_u *u);
+extern void nfsd4_lock_release(union nfsd4_op_u *u);
extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
union nfsd4_op_u *u);
+extern void nfsd4_lockt_release(union nfsd4_op_u *u);
extern __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
union nfsd4_op_u *u);
extern __be32
@@ -830,18 +1044,18 @@ struct nfsd4_operation {
u32 op_flags;
char *op_name;
/* Try to get response size before operation */
- u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
+ u32 (*op_rsize_bop)(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op);
void (*op_get_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
void (*op_set_currentstateid)(struct nfsd4_compound_state *,
union nfsd4_op_u *);
};
+struct nfsd4_cb_recall_any {
+ struct nfsd4_callback ra_cb;
+ u32 ra_keep;
+ u32 ra_bmval[1];
+};
#endif
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..f4e29c0c701c 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -6,8 +6,11 @@
#define cb_compound_enc_hdr_sz 4
#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
#define sessionid_sz (NFS4_MAX_SESSIONID_LEN >> 2)
+#define enc_referring_call4_sz (1 + 1)
+#define enc_referring_call_list4_sz (sessionid_sz + 1 + \
+ enc_referring_call4_sz)
#define cb_sequence_enc_sz (sessionid_sz + 4 + \
- 1 /* no referring calls list yet */)
+ enc_referring_call_list4_sz)
#define cb_sequence_dec_sz (op_dec_sz + sessionid_sz + 4)
#define op_enc_sz 1
@@ -48,3 +51,31 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
+
+/*
+ * 1: CB_GETATTR opcode (32-bit)
+ * N: file_handle
+ * 1: number of entry in attribute array (32-bit)
+ * 3: entry 0-2 in attribute array (32-bit * 3)
+ */
+#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + enc_nfs4_fh_sz + 1 + 3)
+/*
+ * 4: fattr_bitmap_maxsz
+ * 1: attribute array len
+ * 2: change attr (64-bit)
+ * 2: size (64-bit)
+ * 2: atime.seconds (64-bit)
+ * 1: atime.nanoseconds (32-bit)
+ * 2: mtime.seconds (64-bit)
+ * 1: mtime.nanoseconds (32-bit)
+ */
+#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + 4 + 1 + 2 + 2 + 2 + 1 + 2 + 1 + op_dec_sz)