summaryrefslogtreecommitdiff
path: root/fs/nfs_common
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs_common')
-rw-r--r--fs/nfs_common/Makefile8
-rw-r--r--fs/nfs_common/common.c201
-rw-r--r--fs/nfs_common/grace.c11
-rw-r--r--fs/nfs_common/localio_trace.c10
-rw-r--r--fs/nfs_common/localio_trace.h56
-rw-r--r--fs/nfs_common/nfs_ssc.c91
-rw-r--r--fs/nfs_common/nfsacl.c133
-rw-r--r--fs/nfs_common/nfslocalio.c373
8 files changed, 878 insertions, 5 deletions
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
index d153ca3ea577..c10ead273ff2 100644
--- a/fs/nfs_common/Makefile
+++ b/fs/nfs_common/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for Linux filesystem routines that are shared by client and server.
#
@@ -5,4 +6,11 @@
obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
nfs_acl-objs := nfsacl.o
+CFLAGS_localio_trace.o += -I$(src)
+obj-$(CONFIG_NFS_COMMON_LOCALIO_SUPPORT) += nfs_localio.o
+nfs_localio-objs := nfslocalio.o localio_trace.o
+
obj-$(CONFIG_GRACE_PERIOD) += grace.o
+obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o
+
+obj-$(CONFIG_NFS_COMMON) += common.o
diff --git a/fs/nfs_common/common.c b/fs/nfs_common/common.c
new file mode 100644
index 000000000000..af09aed09fd2
--- /dev/null
+++ b/fs/nfs_common/common.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/module.h>
+#include <linux/nfs_common.h>
+#include <linux/nfs4.h>
+
+/*
+ * We need to translate between nfs status return values and
+ * the local errno values which may not be the same.
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs_errtbl[] = {
+ { NFS_OK, 0 },
+ { NFSERR_PERM, -EPERM },
+ { NFSERR_NOENT, -ENOENT },
+ { NFSERR_IO, -EIO },
+ { NFSERR_NXIO, -ENXIO },
+/* { NFSERR_EAGAIN, -EAGAIN }, */
+ { NFSERR_ACCES, -EACCES },
+ { NFSERR_EXIST, -EEXIST },
+ { NFSERR_XDEV, -EXDEV },
+ { NFSERR_NODEV, -ENODEV },
+ { NFSERR_NOTDIR, -ENOTDIR },
+ { NFSERR_ISDIR, -EISDIR },
+ { NFSERR_INVAL, -EINVAL },
+ { NFSERR_FBIG, -EFBIG },
+ { NFSERR_NOSPC, -ENOSPC },
+ { NFSERR_ROFS, -EROFS },
+ { NFSERR_MLINK, -EMLINK },
+ { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFSERR_NOTEMPTY, -ENOTEMPTY },
+ { NFSERR_DQUOT, -EDQUOT },
+ { NFSERR_STALE, -ESTALE },
+ { NFSERR_REMOTE, -EREMOTE },
+#ifdef EWFLUSH
+ { NFSERR_WFLUSH, -EWFLUSH },
+#endif
+ { NFSERR_BADHANDLE, -EBADHANDLE },
+ { NFSERR_NOT_SYNC, -ENOTSYNC },
+ { NFSERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFSERR_NOTSUPP, -ENOTSUPP },
+ { NFSERR_TOOSMALL, -ETOOSMALL },
+ { NFSERR_SERVERFAULT, -EREMOTEIO },
+ { NFSERR_BADTYPE, -EBADTYPE },
+ { NFSERR_JUKEBOX, -EJUKEBOX },
+};
+
+/**
+ * nfs_stat_to_errno - convert an NFS status code to a local errno
+ * @status: NFS status code to convert
+ *
+ * Returns a local errno value, or -EIO if the NFS status code is
+ * not recognized. This function is used jointly by NFSv2 and NFSv3.
+ */
+int nfs_stat_to_errno(enum nfs_stat status)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
+ if (nfs_errtbl[i].stat == (int)status)
+ return nfs_errtbl[i].errno;
+ }
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(nfs_stat_to_errno);
+
+/*
+ * We need to translate between nfs v4 status return values and
+ * the local errno values which may not be the same.
+ *
+ * nfs4_errtbl_common[] is used before more specialized mappings
+ * available in nfs4_errtbl[] or nfs4_errtbl_localio[].
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs4_errtbl_common[] = {
+ { NFS4_OK, 0 },
+ { NFS4ERR_PERM, -EPERM },
+ { NFS4ERR_NOENT, -ENOENT },
+ { NFS4ERR_IO, -EIO },
+ { NFS4ERR_NXIO, -ENXIO },
+ { NFS4ERR_ACCESS, -EACCES },
+ { NFS4ERR_EXIST, -EEXIST },
+ { NFS4ERR_XDEV, -EXDEV },
+ { NFS4ERR_NOTDIR, -ENOTDIR },
+ { NFS4ERR_ISDIR, -EISDIR },
+ { NFS4ERR_INVAL, -EINVAL },
+ { NFS4ERR_FBIG, -EFBIG },
+ { NFS4ERR_NOSPC, -ENOSPC },
+ { NFS4ERR_ROFS, -EROFS },
+ { NFS4ERR_MLINK, -EMLINK },
+ { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
+ { NFS4ERR_DQUOT, -EDQUOT },
+ { NFS4ERR_STALE, -ESTALE },
+ { NFS4ERR_BADHANDLE, -EBADHANDLE },
+ { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, -ENOTSUPP },
+ { NFS4ERR_TOOSMALL, -ETOOSMALL },
+ { NFS4ERR_BADTYPE, -EBADTYPE },
+ { NFS4ERR_SYMLINK, -ELOOP },
+ { NFS4ERR_DEADLOCK, -EDEADLK },
+};
+
+static const struct {
+ int stat;
+ int errno;
+} nfs4_errtbl[] = {
+ { NFS4ERR_SERVERFAULT, -EREMOTEIO },
+ { NFS4ERR_LOCKED, -EAGAIN },
+ { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
+ { NFS4ERR_NOXATTR, -ENODATA },
+ { NFS4ERR_XATTR2BIG, -E2BIG },
+};
+
+/*
+ * Convert an NFS error code to a local one.
+ * This one is used by NFSv4.
+ */
+int nfs4_stat_to_errno(int stat)
+{
+ int i;
+
+ /* First check nfs4_errtbl_common */
+ for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_common); i++) {
+ if (nfs4_errtbl_common[i].stat == stat)
+ return nfs4_errtbl_common[i].errno;
+ }
+ /* Then check nfs4_errtbl */
+ for (i = 0; i < ARRAY_SIZE(nfs4_errtbl); i++) {
+ if (nfs4_errtbl[i].stat == stat)
+ return nfs4_errtbl[i].errno;
+ }
+ if (stat <= 10000 || stat > 10100) {
+ /* The server is looney tunes. */
+ return -EREMOTEIO;
+ }
+ /* If we cannot translate the error, the recovery routines should
+ * handle it.
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+ return -stat;
+}
+EXPORT_SYMBOL_GPL(nfs4_stat_to_errno);
+
+/*
+ * This table is useful for conversion from local errno to NFS error.
+ * It provides more logically correct mappings for use with LOCALIO
+ * (which is focused on converting from errno to NFS status).
+ */
+static const struct {
+ int stat;
+ int errno;
+} nfs4_errtbl_localio[] = {
+ /* Map errors differently than nfs4_errtbl */
+ { NFS4ERR_IO, -EREMOTEIO },
+ { NFS4ERR_DELAY, -EAGAIN },
+ { NFS4ERR_FBIG, -E2BIG },
+ /* Map errors not handled by nfs4_errtbl */
+ { NFS4ERR_STALE, -EBADF },
+ { NFS4ERR_STALE, -EOPENSTALE },
+ { NFS4ERR_DELAY, -ETIMEDOUT },
+ { NFS4ERR_DELAY, -ERESTARTSYS },
+ { NFS4ERR_DELAY, -ENOMEM },
+ { NFS4ERR_IO, -ETXTBSY },
+ { NFS4ERR_IO, -EBUSY },
+ { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
+ { NFS4ERR_SERVERFAULT, -ENFILE },
+ { NFS4ERR_IO, -EUCLEAN },
+ { NFS4ERR_PERM, -ENOKEY },
+};
+
+/*
+ * Convert an errno to an NFS error code for LOCALIO.
+ */
+__u32 nfs_localio_errno_to_nfs4_stat(int errno)
+{
+ int i;
+
+ /* First check nfs4_errtbl_common */
+ for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_common); i++) {
+ if (nfs4_errtbl_common[i].errno == errno)
+ return nfs4_errtbl_common[i].stat;
+ }
+ /* Then check nfs4_errtbl_localio */
+ for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_localio); i++) {
+ if (nfs4_errtbl_localio[i].errno == errno)
+ return nfs4_errtbl_localio[i].stat;
+ }
+ /* If we cannot translate the error, the recovery routines should
+ * handle it.
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+ return NFS4ERR_SERVERFAULT;
+}
+EXPORT_SYMBOL_GPL(nfs_localio_errno_to_nfs4_stat);
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 5be08f02a76b..27cd0d13143b 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common code for control of lockd and nfsv4 grace periods.
*
@@ -8,6 +9,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/fs.h>
+#include <linux/filelock.h>
static unsigned int grace_net_id;
static DEFINE_SPINLOCK(grace_lock);
@@ -41,7 +43,6 @@ EXPORT_SYMBOL_GPL(locks_start_grace);
/**
* locks_end_grace
- * @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for
*
* Call this function to state that the given lock manager is ready to
@@ -68,15 +69,20 @@ __state_in_grace(struct net *net, bool open)
if (!open)
return !list_empty(grace_list);
+ spin_lock(&grace_lock);
list_for_each_entry(lm, grace_list, list) {
- if (lm->block_opens)
+ if (lm->block_opens) {
+ spin_unlock(&grace_lock);
return true;
+ }
}
+ spin_unlock(&grace_lock);
return false;
}
/**
* locks_in_grace
+ * @net: network namespace
*
* Lock managers call this function to determine when it is OK for them
* to answer ordinary lock requests, and when they should accept only
@@ -133,6 +139,7 @@ exit_grace(void)
}
MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
+MODULE_DESCRIPTION("NFS client and server infrastructure");
MODULE_LICENSE("GPL");
module_init(init_grace)
module_exit(exit_grace)
diff --git a/fs/nfs_common/localio_trace.c b/fs/nfs_common/localio_trace.c
new file mode 100644
index 000000000000..7decfe57abeb
--- /dev/null
+++ b/fs/nfs_common/localio_trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ */
+#include <linux/nfs_fs.h>
+#include <linux/namei.h>
+
+#define CREATE_TRACE_POINTS
+#include "localio_trace.h"
diff --git a/fs/nfs_common/localio_trace.h b/fs/nfs_common/localio_trace.h
new file mode 100644
index 000000000000..4055aec9ff8d
--- /dev/null
+++ b/fs/nfs_common/localio_trace.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Trond Myklebust <trond.myklebust@hammerspace.com>
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nfs_localio
+
+#if !defined(_TRACE_NFS_COMMON_LOCALIO_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NFS_COMMON_LOCALIO_H
+
+#include <linux/tracepoint.h>
+
+#include <trace/misc/fs.h>
+#include <trace/misc/nfs.h>
+#include <trace/misc/sunrpc.h>
+
+DECLARE_EVENT_CLASS(nfs_local_client_event,
+ TP_PROTO(
+ const struct nfs_client *clp
+ ),
+
+ TP_ARGS(clp),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, protocol)
+ __string(server, clp->cl_hostname)
+ ),
+
+ TP_fast_assign(
+ __entry->protocol = clp->rpc_ops->version;
+ __assign_str(server);
+ ),
+
+ TP_printk(
+ "server=%s NFSv%u", __get_str(server), __entry->protocol
+ )
+);
+
+#define DEFINE_NFS_LOCAL_CLIENT_EVENT(name) \
+ DEFINE_EVENT(nfs_local_client_event, name, \
+ TP_PROTO( \
+ const struct nfs_client *clp \
+ ), \
+ TP_ARGS(clp))
+
+DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_localio_enable_client);
+DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_localio_disable_client);
+
+#endif /* _TRACE_NFS_COMMON_LOCALIO_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE localio_trace
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/fs/nfs_common/nfs_ssc.c b/fs/nfs_common/nfs_ssc.c
new file mode 100644
index 000000000000..832246b22c51
--- /dev/null
+++ b/fs/nfs_common/nfs_ssc.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helper for knfsd's SSC to access ops in NFS client modules
+ *
+ * Author: Dai Ngo <dai.ngo@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/nfs_ssc.h>
+#include "../nfs/nfs4_fs.h"
+
+
+struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl;
+EXPORT_SYMBOL_GPL(nfs_ssc_client_tbl);
+
+#ifdef CONFIG_NFS_V4_2
+/**
+ * nfs42_ssc_register - install the NFS_V4 client ops in the nfs_ssc_client_tbl
+ * @ops: NFS_V4 ops to be installed
+ *
+ * Return values:
+ * None
+ */
+void nfs42_ssc_register(const struct nfs4_ssc_client_ops *ops)
+{
+ nfs_ssc_client_tbl.ssc_nfs4_ops = ops;
+}
+EXPORT_SYMBOL_GPL(nfs42_ssc_register);
+
+/**
+ * nfs42_ssc_unregister - uninstall the NFS_V4 client ops from
+ * the nfs_ssc_client_tbl
+ * @ops: ops to be uninstalled
+ *
+ * Return values:
+ * None
+ */
+void nfs42_ssc_unregister(const struct nfs4_ssc_client_ops *ops)
+{
+ if (nfs_ssc_client_tbl.ssc_nfs4_ops != ops)
+ return;
+
+ nfs_ssc_client_tbl.ssc_nfs4_ops = NULL;
+}
+EXPORT_SYMBOL_GPL(nfs42_ssc_unregister);
+#endif /* CONFIG_NFS_V4_2 */
+
+#ifdef CONFIG_NFS_V4_2
+/**
+ * nfs_ssc_register - install the NFS_FS client ops in the nfs_ssc_client_tbl
+ * @ops: NFS_FS ops to be installed
+ *
+ * Return values:
+ * None
+ */
+void nfs_ssc_register(const struct nfs_ssc_client_ops *ops)
+{
+ nfs_ssc_client_tbl.ssc_nfs_ops = ops;
+}
+EXPORT_SYMBOL_GPL(nfs_ssc_register);
+
+/**
+ * nfs_ssc_unregister - uninstall the NFS_FS client ops from
+ * the nfs_ssc_client_tbl
+ * @ops: ops to be uninstalled
+ *
+ * Return values:
+ * None
+ */
+void nfs_ssc_unregister(const struct nfs_ssc_client_ops *ops)
+{
+ if (nfs_ssc_client_tbl.ssc_nfs_ops != ops)
+ return;
+ nfs_ssc_client_tbl.ssc_nfs_ops = NULL;
+}
+EXPORT_SYMBOL_GPL(nfs_ssc_unregister);
+
+#else
+void nfs_ssc_register(const struct nfs_ssc_client_ops *ops)
+{
+}
+EXPORT_SYMBOL_GPL(nfs_ssc_register);
+
+void nfs_ssc_unregister(const struct nfs_ssc_client_ops *ops)
+{
+}
+EXPORT_SYMBOL_GPL(nfs_ssc_unregister);
+#endif /* CONFIG_NFS_V4_2 */
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index 538f142935ea..e2eaac14fd8e 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* fs/nfs_common/nfsacl.c
*
@@ -28,6 +29,7 @@
#include <linux/nfs3.h>
#include <linux/sort.h>
+MODULE_DESCRIPTION("NFS ACL support");
MODULE_LICENSE("GPL");
struct nfsacl_encode_desc {
@@ -40,7 +42,7 @@ struct nfsacl_encode_desc {
};
struct nfsacl_simple_acl {
- struct posix_acl acl;
+ struct posix_acl_hdr acl;
struct posix_acl_entry ace[4];
};
@@ -110,7 +112,8 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
xdr_encode_word(buf, base, entries))
return -EINVAL;
if (encode_entries && acl && acl->a_count == 3) {
- struct posix_acl *acl2 = &aclbuf.acl;
+ struct posix_acl *acl2 =
+ container_of(&aclbuf.acl, struct posix_acl, hdr);
/* Avoid the use of posix_acl_alloc(). nfsacl_encode() is
* invoked in contexts where a memory allocation failure is
@@ -135,6 +138,78 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
}
EXPORT_SYMBOL_GPL(nfsacl_encode);
+/**
+ * nfs_stream_encode_acl - Encode an NFSv3 ACL
+ *
+ * @xdr: an xdr_stream positioned to receive an encoded ACL
+ * @inode: inode of file whose ACL this is
+ * @acl: posix_acl to encode
+ * @encode_entries: whether to encode ACEs as well
+ * @typeflag: ACL type: NFS_ACL_DEFAULT or zero
+ *
+ * Return values:
+ * %false: The ACL could not be encoded
+ * %true: @xdr is advanced to the next available position
+ */
+bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode,
+ struct posix_acl *acl, int encode_entries,
+ int typeflag)
+{
+ const size_t elem_size = XDR_UNIT * 3;
+ u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0;
+ struct nfsacl_encode_desc nfsacl_desc = {
+ .desc = {
+ .elem_size = elem_size,
+ .array_len = encode_entries ? entries : 0,
+ .xcode = xdr_nfsace_encode,
+ },
+ .acl = acl,
+ .typeflag = typeflag,
+ .uid = inode->i_uid,
+ .gid = inode->i_gid,
+ };
+ struct nfsacl_simple_acl aclbuf;
+ unsigned int base;
+ int err;
+
+ if (entries > NFS_ACL_MAX_ENTRIES)
+ return false;
+ if (xdr_stream_encode_u32(xdr, entries) < 0)
+ return false;
+
+ if (encode_entries && acl && acl->a_count == 3) {
+ struct posix_acl *acl2 =
+ container_of(&aclbuf.acl, struct posix_acl, hdr);
+
+ /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is
+ * invoked in contexts where a memory allocation failure is
+ * fatal. Fortunately this fake ACL is small enough to
+ * construct on the stack. */
+ posix_acl_init(acl2, 4);
+
+ /* Insert entries in canonical order: other orders seem
+ to confuse Solaris VxFS. */
+ acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */
+ acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */
+ acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */
+ acl2->a_entries[2].e_tag = ACL_MASK;
+ acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */
+ nfsacl_desc.acl = acl2;
+ }
+
+ base = xdr_stream_pos(xdr);
+ if (!xdr_reserve_space(xdr, XDR_UNIT +
+ elem_size * nfsacl_desc.desc.array_len))
+ return false;
+ err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc);
+ if (err)
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(nfs_stream_encode_acl);
+
+
struct nfsacl_decode_desc {
struct xdr_array2_desc desc;
unsigned int count;
@@ -236,7 +311,7 @@ posix_acl_from_nfsacl(struct posix_acl *acl)
break;
case ACL_MASK:
mask = pa;
- /* fall through */
+ fallthrough;
case ACL_OTHER:
break;
}
@@ -294,3 +369,55 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
nfsacl_desc.desc.array_len;
}
EXPORT_SYMBOL_GPL(nfsacl_decode);
+
+/**
+ * nfs_stream_decode_acl - Decode an NFSv3 ACL
+ *
+ * @xdr: an xdr_stream positioned at an encoded ACL
+ * @aclcnt: OUT: count of ACEs in decoded posix_acl
+ * @pacl: OUT: a dynamically-allocated buffer containing the decoded posix_acl
+ *
+ * Return values:
+ * %false: The encoded ACL is not valid
+ * %true: @pacl contains a decoded ACL, and @xdr is advanced
+ *
+ * On a successful return, caller must release *pacl using posix_acl_release().
+ */
+bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt,
+ struct posix_acl **pacl)
+{
+ const size_t elem_size = XDR_UNIT * 3;
+ struct nfsacl_decode_desc nfsacl_desc = {
+ .desc = {
+ .elem_size = elem_size,
+ .xcode = pacl ? xdr_nfsace_decode : NULL,
+ },
+ };
+ unsigned int base;
+ u32 entries;
+
+ if (xdr_stream_decode_u32(xdr, &entries) < 0)
+ return false;
+ if (entries > NFS_ACL_MAX_ENTRIES)
+ return false;
+
+ base = xdr_stream_pos(xdr);
+ if (!xdr_inline_decode(xdr, XDR_UNIT + elem_size * entries))
+ return false;
+ nfsacl_desc.desc.array_maxlen = entries;
+ if (xdr_decode_array2(xdr->buf, base, &nfsacl_desc.desc))
+ return false;
+
+ if (pacl) {
+ if (entries != nfsacl_desc.desc.array_len ||
+ posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) {
+ posix_acl_release(nfsacl_desc.acl);
+ return false;
+ }
+ *pacl = nfsacl_desc.acl;
+ }
+ if (aclcnt)
+ *aclcnt = entries;
+ return true;
+}
+EXPORT_SYMBOL_GPL(nfs_stream_decode_acl);
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
new file mode 100644
index 000000000000..dd715cdb6c04
--- /dev/null
+++ b/fs/nfs_common/nfslocalio.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
+ * Copyright (C) 2024 NeilBrown <neilb@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/nfslocalio.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <net/netns/generic.h>
+
+#include "localio_trace.h"
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("NFS localio protocol bypass support");
+
+static DEFINE_SPINLOCK(nfs_uuids_lock);
+
+/*
+ * Global list of nfs_uuid_t instances
+ * that is protected by nfs_uuids_lock.
+ */
+static LIST_HEAD(nfs_uuids);
+
+/*
+ * Lock ordering:
+ * 1: nfs_uuid->lock
+ * 2: nfs_uuids_lock
+ * 3: nfs_uuid->list_lock (aka nn->local_clients_lock)
+ *
+ * May skip locks in select cases, but never hold multiple
+ * locks out of order.
+ */
+
+void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
+{
+ RCU_INIT_POINTER(nfs_uuid->net, NULL);
+ nfs_uuid->dom = NULL;
+ nfs_uuid->list_lock = NULL;
+ INIT_LIST_HEAD(&nfs_uuid->list);
+ INIT_LIST_HEAD(&nfs_uuid->files);
+ spin_lock_init(&nfs_uuid->lock);
+ nfs_uuid->nfs3_localio_probe_count = 0;
+}
+EXPORT_SYMBOL_GPL(nfs_uuid_init);
+
+bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid)
+{
+ spin_lock(&nfs_uuid->lock);
+ if (rcu_access_pointer(nfs_uuid->net)) {
+ /* This nfs_uuid is already in use */
+ spin_unlock(&nfs_uuid->lock);
+ return false;
+ }
+
+ spin_lock(&nfs_uuids_lock);
+ if (!list_empty(&nfs_uuid->list)) {
+ /* This nfs_uuid is already in use */
+ spin_unlock(&nfs_uuids_lock);
+ spin_unlock(&nfs_uuid->lock);
+ return false;
+ }
+ list_add_tail(&nfs_uuid->list, &nfs_uuids);
+ spin_unlock(&nfs_uuids_lock);
+
+ uuid_gen(&nfs_uuid->uuid);
+ spin_unlock(&nfs_uuid->lock);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(nfs_uuid_begin);
+
+void nfs_uuid_end(nfs_uuid_t *nfs_uuid)
+{
+ if (!rcu_access_pointer(nfs_uuid->net)) {
+ spin_lock(&nfs_uuid->lock);
+ if (!rcu_access_pointer(nfs_uuid->net)) {
+ /* Not local, remove from nfs_uuids */
+ spin_lock(&nfs_uuids_lock);
+ list_del_init(&nfs_uuid->list);
+ spin_unlock(&nfs_uuids_lock);
+ }
+ spin_unlock(&nfs_uuid->lock);
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_uuid_end);
+
+static nfs_uuid_t * nfs_uuid_lookup_locked(const uuid_t *uuid)
+{
+ nfs_uuid_t *nfs_uuid;
+
+ list_for_each_entry(nfs_uuid, &nfs_uuids, list)
+ if (uuid_equal(&nfs_uuid->uuid, uuid))
+ return nfs_uuid;
+
+ return NULL;
+}
+
+static struct module *nfsd_mod;
+
+void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list,
+ spinlock_t *list_lock, struct net *net,
+ struct auth_domain *dom, struct module *mod)
+{
+ nfs_uuid_t *nfs_uuid;
+
+ spin_lock(&nfs_uuids_lock);
+ nfs_uuid = nfs_uuid_lookup_locked(uuid);
+ if (!nfs_uuid) {
+ spin_unlock(&nfs_uuids_lock);
+ return;
+ }
+
+ /*
+ * We don't hold a ref on the net, but instead put
+ * ourselves on @list (nn->local_clients) so the net
+ * pointer can be invalidated.
+ */
+ spin_lock(list_lock); /* list_lock is nn->local_clients_lock */
+ list_move(&nfs_uuid->list, list);
+ spin_unlock(list_lock);
+
+ spin_unlock(&nfs_uuids_lock);
+ /* Once nfs_uuid is parented to @list, avoid global nfs_uuids_lock */
+ spin_lock(&nfs_uuid->lock);
+
+ __module_get(mod);
+ nfsd_mod = mod;
+
+ nfs_uuid->list_lock = list_lock;
+ kref_get(&dom->ref);
+ nfs_uuid->dom = dom;
+ rcu_assign_pointer(nfs_uuid->net, net);
+ spin_unlock(&nfs_uuid->lock);
+}
+EXPORT_SYMBOL_GPL(nfs_uuid_is_local);
+
+void nfs_localio_enable_client(struct nfs_client *clp)
+{
+ /* nfs_uuid_is_local() does the actual enablement */
+ trace_nfs_localio_enable_client(clp);
+}
+EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
+
+/*
+ * Cleanup the nfs_uuid_t embedded in an nfs_client.
+ * This is the long-form of nfs_uuid_init().
+ */
+static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
+{
+ struct nfs_file_localio *nfl;
+
+ spin_lock(&nfs_uuid->lock);
+ if (unlikely(!rcu_access_pointer(nfs_uuid->net))) {
+ spin_unlock(&nfs_uuid->lock);
+ return false;
+ }
+ RCU_INIT_POINTER(nfs_uuid->net, NULL);
+
+ if (nfs_uuid->dom) {
+ auth_domain_put(nfs_uuid->dom);
+ nfs_uuid->dom = NULL;
+ }
+
+ /* Walk list of files and ensure their last references dropped */
+
+ while ((nfl = list_first_entry_or_null(&nfs_uuid->files,
+ struct nfs_file_localio,
+ list)) != NULL) {
+ /* If nfs_uuid is already NULL, nfs_close_local_fh is
+ * closing and we must wait, else we unlink and close.
+ */
+ if (rcu_access_pointer(nfl->nfs_uuid) == NULL) {
+ /* nfs_close_local_fh() is doing the
+ * close and we must wait. until it unlinks
+ */
+ wait_var_event_spinlock(nfs_uuid,
+ list_first_entry_or_null(
+ &nfs_uuid->files,
+ struct nfs_file_localio,
+ list) != nfl,
+ &nfs_uuid->lock);
+ continue;
+ }
+
+ /* Remove nfl from nfs_uuid->files list */
+ list_del_init(&nfl->list);
+ spin_unlock(&nfs_uuid->lock);
+
+ nfs_to_nfsd_file_put_local(&nfl->ro_file);
+ nfs_to_nfsd_file_put_local(&nfl->rw_file);
+ cond_resched();
+
+ spin_lock(&nfs_uuid->lock);
+ /* Now we can allow racing nfs_close_local_fh() to
+ * skip the locking.
+ */
+ store_release_wake_up(&nfl->nfs_uuid, RCU_INITIALIZER(NULL));
+ }
+
+ /* Remove client from nn->local_clients */
+ if (nfs_uuid->list_lock) {
+ spin_lock(nfs_uuid->list_lock);
+ BUG_ON(list_empty(&nfs_uuid->list));
+ list_del_init(&nfs_uuid->list);
+ spin_unlock(nfs_uuid->list_lock);
+ nfs_uuid->list_lock = NULL;
+ }
+
+ module_put(nfsd_mod);
+ spin_unlock(&nfs_uuid->lock);
+
+ return true;
+}
+
+void nfs_localio_disable_client(struct nfs_client *clp)
+{
+ if (nfs_uuid_put(&clp->cl_uuid))
+ trace_nfs_localio_disable_client(clp);
+}
+EXPORT_SYMBOL_GPL(nfs_localio_disable_client);
+
+void nfs_localio_invalidate_clients(struct list_head *nn_local_clients,
+ spinlock_t *nn_local_clients_lock)
+{
+ LIST_HEAD(local_clients);
+ nfs_uuid_t *nfs_uuid, *tmp;
+ struct nfs_client *clp;
+
+ spin_lock(nn_local_clients_lock);
+ list_splice_init(nn_local_clients, &local_clients);
+ spin_unlock(nn_local_clients_lock);
+ list_for_each_entry_safe(nfs_uuid, tmp, &local_clients, list) {
+ if (WARN_ON(nfs_uuid->list_lock != nn_local_clients_lock))
+ break;
+ clp = container_of(nfs_uuid, struct nfs_client, cl_uuid);
+ nfs_localio_disable_client(clp);
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients);
+
+static int nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl)
+{
+ int ret = 0;
+
+ /* Add nfl to nfs_uuid->files if it isn't already */
+ spin_lock(&nfs_uuid->lock);
+ if (rcu_access_pointer(nfs_uuid->net) == NULL) {
+ ret = -ENXIO;
+ } else if (list_empty(&nfl->list)) {
+ rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid);
+ list_add_tail(&nfl->list, &nfs_uuid->files);
+ }
+ spin_unlock(&nfs_uuid->lock);
+ return ret;
+}
+
+/*
+ * Caller is responsible for calling nfsd_net_put and
+ * nfsd_file_put (via nfs_to_nfsd_file_put_local).
+ */
+struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
+ struct rpc_clnt *rpc_clnt, const struct cred *cred,
+ const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl,
+ struct nfsd_file __rcu **pnf,
+ const fmode_t fmode)
+{
+ struct net *net;
+ struct nfsd_file *localio;
+
+ /*
+ * Not running in nfsd context, so must safely get reference on nfsd_serv.
+ * But the server may already be shutting down, if so disallow new localio.
+ * uuid->net is NOT a counted reference, but rcu_read_lock() ensures that
+ * if uuid->net is not NULL, then calling nfsd_net_try_get() is safe
+ * and if it succeeds we will have an implied reference to the net.
+ *
+ * Otherwise NFS may not have ref on NFSD and therefore cannot safely
+ * make 'nfs_to' calls.
+ */
+ rcu_read_lock();
+ net = rcu_dereference(uuid->net);
+ if (!net || !nfs_to->nfsd_net_try_get(net)) {
+ rcu_read_unlock();
+ return ERR_PTR(-ENXIO);
+ }
+ rcu_read_unlock();
+ /* We have an implied reference to net thanks to nfsd_net_try_get */
+ localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred,
+ nfs_fh, pnf, fmode);
+ if (!IS_ERR(localio) && nfs_uuid_add_file(uuid, nfl) < 0) {
+ /* Delete the cached file when racing with nfs_uuid_put() */
+ nfs_to_nfsd_file_put_local(pnf);
+ }
+ nfs_to_nfsd_net_put(net);
+
+ return localio;
+}
+EXPORT_SYMBOL_GPL(nfs_open_local_fh);
+
+void nfs_close_local_fh(struct nfs_file_localio *nfl)
+{
+ nfs_uuid_t *nfs_uuid;
+
+ rcu_read_lock();
+ nfs_uuid = rcu_dereference(nfl->nfs_uuid);
+ if (!nfs_uuid) {
+ /* regular (non-LOCALIO) NFS will hammer this */
+ rcu_read_unlock();
+ return;
+ }
+
+ spin_lock(&nfs_uuid->lock);
+ if (!rcu_access_pointer(nfl->nfs_uuid)) {
+ /* nfs_uuid_put has finished here */
+ spin_unlock(&nfs_uuid->lock);
+ rcu_read_unlock();
+ return;
+ }
+ if (list_empty(&nfl->list)) {
+ /* nfs_uuid_put() has started closing files, wait for it
+ * to finished
+ */
+ spin_unlock(&nfs_uuid->lock);
+ rcu_read_unlock();
+ wait_var_event(&nfl->nfs_uuid,
+ rcu_access_pointer(nfl->nfs_uuid) == NULL);
+ return;
+ }
+ /* tell nfs_uuid_put() to wait for us */
+ RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
+ spin_unlock(&nfs_uuid->lock);
+ rcu_read_unlock();
+
+ nfs_to_nfsd_file_put_local(&nfl->ro_file);
+ nfs_to_nfsd_file_put_local(&nfl->rw_file);
+
+ /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put()
+ * that we are done. The moment we drop the spinlock the
+ * nfs_uuid could be freed.
+ */
+ spin_lock(&nfs_uuid->lock);
+ list_del_init(&nfl->list);
+ wake_up_var_locked(nfs_uuid, &nfs_uuid->lock);
+ spin_unlock(&nfs_uuid->lock);
+}
+EXPORT_SYMBOL_GPL(nfs_close_local_fh);
+
+/*
+ * The NFS LOCALIO code needs to call into NFSD using various symbols,
+ * but cannot be statically linked, because that will make the NFS
+ * module always depend on the NFSD module.
+ *
+ * 'nfs_to' provides NFS access to NFSD functions needed for LOCALIO,
+ * its lifetime is tightly coupled to the NFSD module and will always
+ * be available to NFS LOCALIO because any successful client<->server
+ * LOCALIO handshake results in a reference on the NFSD module (above),
+ * so NFS implicitly holds a reference to the NFSD module and its
+ * functions in the 'nfs_to' nfsd_localio_operations cannot disappear.
+ *
+ * If the last NFS client using LOCALIO disconnects (and its reference
+ * on NFSD dropped) then NFSD could be unloaded, resulting in 'nfs_to'
+ * functions being invalid pointers. But if NFSD isn't loaded then NFS
+ * will not be able to handshake with NFSD and will have no cause to
+ * try to call 'nfs_to' function pointers. If/when NFSD is reloaded it
+ * will reinitialize the 'nfs_to' function pointers and make LOCALIO
+ * possible.
+ */
+const struct nfsd_localio_operations *nfs_to;
+EXPORT_SYMBOL_GPL(nfs_to);