diff options
Diffstat (limited to 'fs/nfs_common')
| -rw-r--r-- | fs/nfs_common/Makefile | 8 | ||||
| -rw-r--r-- | fs/nfs_common/common.c | 201 | ||||
| -rw-r--r-- | fs/nfs_common/grace.c | 11 | ||||
| -rw-r--r-- | fs/nfs_common/localio_trace.c | 10 | ||||
| -rw-r--r-- | fs/nfs_common/localio_trace.h | 56 | ||||
| -rw-r--r-- | fs/nfs_common/nfs_ssc.c | 91 | ||||
| -rw-r--r-- | fs/nfs_common/nfsacl.c | 133 | ||||
| -rw-r--r-- | fs/nfs_common/nfslocalio.c | 373 |
8 files changed, 878 insertions, 5 deletions
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile index d153ca3ea577..c10ead273ff2 100644 --- a/fs/nfs_common/Makefile +++ b/fs/nfs_common/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Makefile for Linux filesystem routines that are shared by client and server. # @@ -5,4 +6,11 @@ obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o nfs_acl-objs := nfsacl.o +CFLAGS_localio_trace.o += -I$(src) +obj-$(CONFIG_NFS_COMMON_LOCALIO_SUPPORT) += nfs_localio.o +nfs_localio-objs := nfslocalio.o localio_trace.o + obj-$(CONFIG_GRACE_PERIOD) += grace.o +obj-$(CONFIG_NFS_V4_2_SSC_HELPER) += nfs_ssc.o + +obj-$(CONFIG_NFS_COMMON) += common.o diff --git a/fs/nfs_common/common.c b/fs/nfs_common/common.c new file mode 100644 index 000000000000..af09aed09fd2 --- /dev/null +++ b/fs/nfs_common/common.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/module.h> +#include <linux/nfs_common.h> +#include <linux/nfs4.h> + +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static const struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS_OK, 0 }, + { NFSERR_PERM, -EPERM }, + { NFSERR_NOENT, -ENOENT }, + { NFSERR_IO, -EIO }, + { NFSERR_NXIO, -ENXIO }, +/* { NFSERR_EAGAIN, -EAGAIN }, */ + { NFSERR_ACCES, -EACCES }, + { NFSERR_EXIST, -EEXIST }, + { NFSERR_XDEV, -EXDEV }, + { NFSERR_NODEV, -ENODEV }, + { NFSERR_NOTDIR, -ENOTDIR }, + { NFSERR_ISDIR, -EISDIR }, + { NFSERR_INVAL, -EINVAL }, + { NFSERR_FBIG, -EFBIG }, + { NFSERR_NOSPC, -ENOSPC }, + { NFSERR_ROFS, -EROFS }, + { NFSERR_MLINK, -EMLINK }, + { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFSERR_NOTEMPTY, -ENOTEMPTY }, + { NFSERR_DQUOT, -EDQUOT }, + { NFSERR_STALE, -ESTALE }, + { NFSERR_REMOTE, -EREMOTE }, +#ifdef EWFLUSH + { NFSERR_WFLUSH, -EWFLUSH }, +#endif + { NFSERR_BADHANDLE, -EBADHANDLE }, + { NFSERR_NOT_SYNC, -ENOTSYNC }, + { NFSERR_BAD_COOKIE, -EBADCOOKIE }, + { NFSERR_NOTSUPP, -ENOTSUPP }, + { NFSERR_TOOSMALL, -ETOOSMALL }, + { NFSERR_SERVERFAULT, -EREMOTEIO }, + { NFSERR_BADTYPE, -EBADTYPE }, + { NFSERR_JUKEBOX, -EJUKEBOX }, +}; + +/** + * nfs_stat_to_errno - convert an NFS status code to a local errno + * @status: NFS status code to convert + * + * Returns a local errno value, or -EIO if the NFS status code is + * not recognized. This function is used jointly by NFSv2 and NFSv3. + */ +int nfs_stat_to_errno(enum nfs_stat status) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) { + if (nfs_errtbl[i].stat == (int)status) + return nfs_errtbl[i].errno; + } + return -EIO; +} +EXPORT_SYMBOL_GPL(nfs_stat_to_errno); + +/* + * We need to translate between nfs v4 status return values and + * the local errno values which may not be the same. + * + * nfs4_errtbl_common[] is used before more specialized mappings + * available in nfs4_errtbl[] or nfs4_errtbl_localio[]. + */ +static const struct { + int stat; + int errno; +} nfs4_errtbl_common[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, -EPERM }, + { NFS4ERR_NOENT, -ENOENT }, + { NFS4ERR_IO, -EIO }, + { NFS4ERR_NXIO, -ENXIO }, + { NFS4ERR_ACCESS, -EACCES }, + { NFS4ERR_EXIST, -EEXIST }, + { NFS4ERR_XDEV, -EXDEV }, + { NFS4ERR_NOTDIR, -ENOTDIR }, + { NFS4ERR_ISDIR, -EISDIR }, + { NFS4ERR_INVAL, -EINVAL }, + { NFS4ERR_FBIG, -EFBIG }, + { NFS4ERR_NOSPC, -ENOSPC }, + { NFS4ERR_ROFS, -EROFS }, + { NFS4ERR_MLINK, -EMLINK }, + { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, + { NFS4ERR_DQUOT, -EDQUOT }, + { NFS4ERR_STALE, -ESTALE }, + { NFS4ERR_BADHANDLE, -EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, + { NFS4ERR_NOTSUPP, -ENOTSUPP }, + { NFS4ERR_TOOSMALL, -ETOOSMALL }, + { NFS4ERR_BADTYPE, -EBADTYPE }, + { NFS4ERR_SYMLINK, -ELOOP }, + { NFS4ERR_DEADLOCK, -EDEADLK }, +}; + +static const struct { + int stat; + int errno; +} nfs4_errtbl[] = { + { NFS4ERR_SERVERFAULT, -EREMOTEIO }, + { NFS4ERR_LOCKED, -EAGAIN }, + { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, + { NFS4ERR_NOXATTR, -ENODATA }, + { NFS4ERR_XATTR2BIG, -E2BIG }, +}; + +/* + * Convert an NFS error code to a local one. + * This one is used by NFSv4. + */ +int nfs4_stat_to_errno(int stat) +{ + int i; + + /* First check nfs4_errtbl_common */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_common); i++) { + if (nfs4_errtbl_common[i].stat == stat) + return nfs4_errtbl_common[i].errno; + } + /* Then check nfs4_errtbl */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl); i++) { + if (nfs4_errtbl[i].stat == stat) + return nfs4_errtbl[i].errno; + } + if (stat <= 10000 || stat > 10100) { + /* The server is looney tunes. */ + return -EREMOTEIO; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return -stat; +} +EXPORT_SYMBOL_GPL(nfs4_stat_to_errno); + +/* + * This table is useful for conversion from local errno to NFS error. + * It provides more logically correct mappings for use with LOCALIO + * (which is focused on converting from errno to NFS status). + */ +static const struct { + int stat; + int errno; +} nfs4_errtbl_localio[] = { + /* Map errors differently than nfs4_errtbl */ + { NFS4ERR_IO, -EREMOTEIO }, + { NFS4ERR_DELAY, -EAGAIN }, + { NFS4ERR_FBIG, -E2BIG }, + /* Map errors not handled by nfs4_errtbl */ + { NFS4ERR_STALE, -EBADF }, + { NFS4ERR_STALE, -EOPENSTALE }, + { NFS4ERR_DELAY, -ETIMEDOUT }, + { NFS4ERR_DELAY, -ERESTARTSYS }, + { NFS4ERR_DELAY, -ENOMEM }, + { NFS4ERR_IO, -ETXTBSY }, + { NFS4ERR_IO, -EBUSY }, + { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_SERVERFAULT, -ENFILE }, + { NFS4ERR_IO, -EUCLEAN }, + { NFS4ERR_PERM, -ENOKEY }, +}; + +/* + * Convert an errno to an NFS error code for LOCALIO. + */ +__u32 nfs_localio_errno_to_nfs4_stat(int errno) +{ + int i; + + /* First check nfs4_errtbl_common */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_common); i++) { + if (nfs4_errtbl_common[i].errno == errno) + return nfs4_errtbl_common[i].stat; + } + /* Then check nfs4_errtbl_localio */ + for (i = 0; i < ARRAY_SIZE(nfs4_errtbl_localio); i++) { + if (nfs4_errtbl_localio[i].errno == errno) + return nfs4_errtbl_localio[i].stat; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return NFS4ERR_SERVERFAULT; +} +EXPORT_SYMBOL_GPL(nfs_localio_errno_to_nfs4_stat); diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 5be08f02a76b..27cd0d13143b 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Common code for control of lockd and nfsv4 grace periods. * @@ -8,6 +9,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/fs.h> +#include <linux/filelock.h> static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); @@ -41,7 +43,6 @@ EXPORT_SYMBOL_GPL(locks_start_grace); /** * locks_end_grace - * @net: net namespace that this lock manager belongs to * @lm: who this grace period is for * * Call this function to state that the given lock manager is ready to @@ -68,15 +69,20 @@ __state_in_grace(struct net *net, bool open) if (!open) return !list_empty(grace_list); + spin_lock(&grace_lock); list_for_each_entry(lm, grace_list, list) { - if (lm->block_opens) + if (lm->block_opens) { + spin_unlock(&grace_lock); return true; + } } + spin_unlock(&grace_lock); return false; } /** * locks_in_grace + * @net: network namespace * * Lock managers call this function to determine when it is OK for them * to answer ordinary lock requests, and when they should accept only @@ -133,6 +139,7 @@ exit_grace(void) } MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>"); +MODULE_DESCRIPTION("NFS client and server infrastructure"); MODULE_LICENSE("GPL"); module_init(init_grace) module_exit(exit_grace) diff --git a/fs/nfs_common/localio_trace.c b/fs/nfs_common/localio_trace.c new file mode 100644 index 000000000000..7decfe57abeb --- /dev/null +++ b/fs/nfs_common/localio_trace.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + */ +#include <linux/nfs_fs.h> +#include <linux/namei.h> + +#define CREATE_TRACE_POINTS +#include "localio_trace.h" diff --git a/fs/nfs_common/localio_trace.h b/fs/nfs_common/localio_trace.h new file mode 100644 index 000000000000..4055aec9ff8d --- /dev/null +++ b/fs/nfs_common/localio_trace.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Trond Myklebust <trond.myklebust@hammerspace.com> + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nfs_localio + +#if !defined(_TRACE_NFS_COMMON_LOCALIO_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NFS_COMMON_LOCALIO_H + +#include <linux/tracepoint.h> + +#include <trace/misc/fs.h> +#include <trace/misc/nfs.h> +#include <trace/misc/sunrpc.h> + +DECLARE_EVENT_CLASS(nfs_local_client_event, + TP_PROTO( + const struct nfs_client *clp + ), + + TP_ARGS(clp), + + TP_STRUCT__entry( + __field(unsigned int, protocol) + __string(server, clp->cl_hostname) + ), + + TP_fast_assign( + __entry->protocol = clp->rpc_ops->version; + __assign_str(server); + ), + + TP_printk( + "server=%s NFSv%u", __get_str(server), __entry->protocol + ) +); + +#define DEFINE_NFS_LOCAL_CLIENT_EVENT(name) \ + DEFINE_EVENT(nfs_local_client_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp \ + ), \ + TP_ARGS(clp)) + +DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_localio_enable_client); +DEFINE_NFS_LOCAL_CLIENT_EVENT(nfs_localio_disable_client); + +#endif /* _TRACE_NFS_COMMON_LOCALIO_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE localio_trace +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/fs/nfs_common/nfs_ssc.c b/fs/nfs_common/nfs_ssc.c new file mode 100644 index 000000000000..832246b22c51 --- /dev/null +++ b/fs/nfs_common/nfs_ssc.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Helper for knfsd's SSC to access ops in NFS client modules + * + * Author: Dai Ngo <dai.ngo@oracle.com> + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/nfs_ssc.h> +#include "../nfs/nfs4_fs.h" + + +struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl; +EXPORT_SYMBOL_GPL(nfs_ssc_client_tbl); + +#ifdef CONFIG_NFS_V4_2 +/** + * nfs42_ssc_register - install the NFS_V4 client ops in the nfs_ssc_client_tbl + * @ops: NFS_V4 ops to be installed + * + * Return values: + * None + */ +void nfs42_ssc_register(const struct nfs4_ssc_client_ops *ops) +{ + nfs_ssc_client_tbl.ssc_nfs4_ops = ops; +} +EXPORT_SYMBOL_GPL(nfs42_ssc_register); + +/** + * nfs42_ssc_unregister - uninstall the NFS_V4 client ops from + * the nfs_ssc_client_tbl + * @ops: ops to be uninstalled + * + * Return values: + * None + */ +void nfs42_ssc_unregister(const struct nfs4_ssc_client_ops *ops) +{ + if (nfs_ssc_client_tbl.ssc_nfs4_ops != ops) + return; + + nfs_ssc_client_tbl.ssc_nfs4_ops = NULL; +} +EXPORT_SYMBOL_GPL(nfs42_ssc_unregister); +#endif /* CONFIG_NFS_V4_2 */ + +#ifdef CONFIG_NFS_V4_2 +/** + * nfs_ssc_register - install the NFS_FS client ops in the nfs_ssc_client_tbl + * @ops: NFS_FS ops to be installed + * + * Return values: + * None + */ +void nfs_ssc_register(const struct nfs_ssc_client_ops *ops) +{ + nfs_ssc_client_tbl.ssc_nfs_ops = ops; +} +EXPORT_SYMBOL_GPL(nfs_ssc_register); + +/** + * nfs_ssc_unregister - uninstall the NFS_FS client ops from + * the nfs_ssc_client_tbl + * @ops: ops to be uninstalled + * + * Return values: + * None + */ +void nfs_ssc_unregister(const struct nfs_ssc_client_ops *ops) +{ + if (nfs_ssc_client_tbl.ssc_nfs_ops != ops) + return; + nfs_ssc_client_tbl.ssc_nfs_ops = NULL; +} +EXPORT_SYMBOL_GPL(nfs_ssc_unregister); + +#else +void nfs_ssc_register(const struct nfs_ssc_client_ops *ops) +{ +} +EXPORT_SYMBOL_GPL(nfs_ssc_register); + +void nfs_ssc_unregister(const struct nfs_ssc_client_ops *ops) +{ +} +EXPORT_SYMBOL_GPL(nfs_ssc_unregister); +#endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 538f142935ea..e2eaac14fd8e 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * fs/nfs_common/nfsacl.c * @@ -28,6 +29,7 @@ #include <linux/nfs3.h> #include <linux/sort.h> +MODULE_DESCRIPTION("NFS ACL support"); MODULE_LICENSE("GPL"); struct nfsacl_encode_desc { @@ -40,7 +42,7 @@ struct nfsacl_encode_desc { }; struct nfsacl_simple_acl { - struct posix_acl acl; + struct posix_acl_hdr acl; struct posix_acl_entry ace[4]; }; @@ -110,7 +112,8 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, xdr_encode_word(buf, base, entries)) return -EINVAL; if (encode_entries && acl && acl->a_count == 3) { - struct posix_acl *acl2 = &aclbuf.acl; + struct posix_acl *acl2 = + container_of(&aclbuf.acl, struct posix_acl, hdr); /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is * invoked in contexts where a memory allocation failure is @@ -135,6 +138,78 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, } EXPORT_SYMBOL_GPL(nfsacl_encode); +/** + * nfs_stream_encode_acl - Encode an NFSv3 ACL + * + * @xdr: an xdr_stream positioned to receive an encoded ACL + * @inode: inode of file whose ACL this is + * @acl: posix_acl to encode + * @encode_entries: whether to encode ACEs as well + * @typeflag: ACL type: NFS_ACL_DEFAULT or zero + * + * Return values: + * %false: The ACL could not be encoded + * %true: @xdr is advanced to the next available position + */ +bool nfs_stream_encode_acl(struct xdr_stream *xdr, struct inode *inode, + struct posix_acl *acl, int encode_entries, + int typeflag) +{ + const size_t elem_size = XDR_UNIT * 3; + u32 entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = elem_size, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + struct nfsacl_simple_acl aclbuf; + unsigned int base; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES) + return false; + if (xdr_stream_encode_u32(xdr, entries) < 0) + return false; + + if (encode_entries && acl && acl->a_count == 3) { + struct posix_acl *acl2 = + container_of(&aclbuf.acl, struct posix_acl, hdr); + + /* Avoid the use of posix_acl_alloc(). nfsacl_encode() is + * invoked in contexts where a memory allocation failure is + * fatal. Fortunately this fake ACL is small enough to + * construct on the stack. */ + posix_acl_init(acl2, 4); + + /* Insert entries in canonical order: other orders seem + to confuse Solaris VxFS. */ + acl2->a_entries[0] = acl->a_entries[0]; /* ACL_USER_OBJ */ + acl2->a_entries[1] = acl->a_entries[1]; /* ACL_GROUP_OBJ */ + acl2->a_entries[2] = acl->a_entries[1]; /* ACL_MASK */ + acl2->a_entries[2].e_tag = ACL_MASK; + acl2->a_entries[3] = acl->a_entries[2]; /* ACL_OTHER */ + nfsacl_desc.acl = acl2; + } + + base = xdr_stream_pos(xdr); + if (!xdr_reserve_space(xdr, XDR_UNIT + + elem_size * nfsacl_desc.desc.array_len)) + return false; + err = xdr_encode_array2(xdr->buf, base, &nfsacl_desc.desc); + if (err) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(nfs_stream_encode_acl); + + struct nfsacl_decode_desc { struct xdr_array2_desc desc; unsigned int count; @@ -236,7 +311,7 @@ posix_acl_from_nfsacl(struct posix_acl *acl) break; case ACL_MASK: mask = pa; - /* fall through */ + fallthrough; case ACL_OTHER: break; } @@ -294,3 +369,55 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, nfsacl_desc.desc.array_len; } EXPORT_SYMBOL_GPL(nfsacl_decode); + +/** + * nfs_stream_decode_acl - Decode an NFSv3 ACL + * + * @xdr: an xdr_stream positioned at an encoded ACL + * @aclcnt: OUT: count of ACEs in decoded posix_acl + * @pacl: OUT: a dynamically-allocated buffer containing the decoded posix_acl + * + * Return values: + * %false: The encoded ACL is not valid + * %true: @pacl contains a decoded ACL, and @xdr is advanced + * + * On a successful return, caller must release *pacl using posix_acl_release(). + */ +bool nfs_stream_decode_acl(struct xdr_stream *xdr, unsigned int *aclcnt, + struct posix_acl **pacl) +{ + const size_t elem_size = XDR_UNIT * 3; + struct nfsacl_decode_desc nfsacl_desc = { + .desc = { + .elem_size = elem_size, + .xcode = pacl ? xdr_nfsace_decode : NULL, + }, + }; + unsigned int base; + u32 entries; + + if (xdr_stream_decode_u32(xdr, &entries) < 0) + return false; + if (entries > NFS_ACL_MAX_ENTRIES) + return false; + + base = xdr_stream_pos(xdr); + if (!xdr_inline_decode(xdr, XDR_UNIT + elem_size * entries)) + return false; + nfsacl_desc.desc.array_maxlen = entries; + if (xdr_decode_array2(xdr->buf, base, &nfsacl_desc.desc)) + return false; + + if (pacl) { + if (entries != nfsacl_desc.desc.array_len || + posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { + posix_acl_release(nfsacl_desc.acl); + return false; + } + *pacl = nfsacl_desc.acl; + } + if (aclcnt) + *aclcnt = entries; + return true; +} +EXPORT_SYMBOL_GPL(nfs_stream_decode_acl); diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c new file mode 100644 index 000000000000..dd715cdb6c04 --- /dev/null +++ b/fs/nfs_common/nfslocalio.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> + * Copyright (C) 2024 NeilBrown <neilb@suse.de> + */ + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/nfslocalio.h> +#include <linux/nfs3.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <net/netns/generic.h> + +#include "localio_trace.h" + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("NFS localio protocol bypass support"); + +static DEFINE_SPINLOCK(nfs_uuids_lock); + +/* + * Global list of nfs_uuid_t instances + * that is protected by nfs_uuids_lock. + */ +static LIST_HEAD(nfs_uuids); + +/* + * Lock ordering: + * 1: nfs_uuid->lock + * 2: nfs_uuids_lock + * 3: nfs_uuid->list_lock (aka nn->local_clients_lock) + * + * May skip locks in select cases, but never hold multiple + * locks out of order. + */ + +void nfs_uuid_init(nfs_uuid_t *nfs_uuid) +{ + RCU_INIT_POINTER(nfs_uuid->net, NULL); + nfs_uuid->dom = NULL; + nfs_uuid->list_lock = NULL; + INIT_LIST_HEAD(&nfs_uuid->list); + INIT_LIST_HEAD(&nfs_uuid->files); + spin_lock_init(&nfs_uuid->lock); + nfs_uuid->nfs3_localio_probe_count = 0; +} +EXPORT_SYMBOL_GPL(nfs_uuid_init); + +bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid) +{ + spin_lock(&nfs_uuid->lock); + if (rcu_access_pointer(nfs_uuid->net)) { + /* This nfs_uuid is already in use */ + spin_unlock(&nfs_uuid->lock); + return false; + } + + spin_lock(&nfs_uuids_lock); + if (!list_empty(&nfs_uuid->list)) { + /* This nfs_uuid is already in use */ + spin_unlock(&nfs_uuids_lock); + spin_unlock(&nfs_uuid->lock); + return false; + } + list_add_tail(&nfs_uuid->list, &nfs_uuids); + spin_unlock(&nfs_uuids_lock); + + uuid_gen(&nfs_uuid->uuid); + spin_unlock(&nfs_uuid->lock); + + return true; +} +EXPORT_SYMBOL_GPL(nfs_uuid_begin); + +void nfs_uuid_end(nfs_uuid_t *nfs_uuid) +{ + if (!rcu_access_pointer(nfs_uuid->net)) { + spin_lock(&nfs_uuid->lock); + if (!rcu_access_pointer(nfs_uuid->net)) { + /* Not local, remove from nfs_uuids */ + spin_lock(&nfs_uuids_lock); + list_del_init(&nfs_uuid->list); + spin_unlock(&nfs_uuids_lock); + } + spin_unlock(&nfs_uuid->lock); + } +} +EXPORT_SYMBOL_GPL(nfs_uuid_end); + +static nfs_uuid_t * nfs_uuid_lookup_locked(const uuid_t *uuid) +{ + nfs_uuid_t *nfs_uuid; + + list_for_each_entry(nfs_uuid, &nfs_uuids, list) + if (uuid_equal(&nfs_uuid->uuid, uuid)) + return nfs_uuid; + + return NULL; +} + +static struct module *nfsd_mod; + +void nfs_uuid_is_local(const uuid_t *uuid, struct list_head *list, + spinlock_t *list_lock, struct net *net, + struct auth_domain *dom, struct module *mod) +{ + nfs_uuid_t *nfs_uuid; + + spin_lock(&nfs_uuids_lock); + nfs_uuid = nfs_uuid_lookup_locked(uuid); + if (!nfs_uuid) { + spin_unlock(&nfs_uuids_lock); + return; + } + + /* + * We don't hold a ref on the net, but instead put + * ourselves on @list (nn->local_clients) so the net + * pointer can be invalidated. + */ + spin_lock(list_lock); /* list_lock is nn->local_clients_lock */ + list_move(&nfs_uuid->list, list); + spin_unlock(list_lock); + + spin_unlock(&nfs_uuids_lock); + /* Once nfs_uuid is parented to @list, avoid global nfs_uuids_lock */ + spin_lock(&nfs_uuid->lock); + + __module_get(mod); + nfsd_mod = mod; + + nfs_uuid->list_lock = list_lock; + kref_get(&dom->ref); + nfs_uuid->dom = dom; + rcu_assign_pointer(nfs_uuid->net, net); + spin_unlock(&nfs_uuid->lock); +} +EXPORT_SYMBOL_GPL(nfs_uuid_is_local); + +void nfs_localio_enable_client(struct nfs_client *clp) +{ + /* nfs_uuid_is_local() does the actual enablement */ + trace_nfs_localio_enable_client(clp); +} +EXPORT_SYMBOL_GPL(nfs_localio_enable_client); + +/* + * Cleanup the nfs_uuid_t embedded in an nfs_client. + * This is the long-form of nfs_uuid_init(). + */ +static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid) +{ + struct nfs_file_localio *nfl; + + spin_lock(&nfs_uuid->lock); + if (unlikely(!rcu_access_pointer(nfs_uuid->net))) { + spin_unlock(&nfs_uuid->lock); + return false; + } + RCU_INIT_POINTER(nfs_uuid->net, NULL); + + if (nfs_uuid->dom) { + auth_domain_put(nfs_uuid->dom); + nfs_uuid->dom = NULL; + } + + /* Walk list of files and ensure their last references dropped */ + + while ((nfl = list_first_entry_or_null(&nfs_uuid->files, + struct nfs_file_localio, + list)) != NULL) { + /* If nfs_uuid is already NULL, nfs_close_local_fh is + * closing and we must wait, else we unlink and close. + */ + if (rcu_access_pointer(nfl->nfs_uuid) == NULL) { + /* nfs_close_local_fh() is doing the + * close and we must wait. until it unlinks + */ + wait_var_event_spinlock(nfs_uuid, + list_first_entry_or_null( + &nfs_uuid->files, + struct nfs_file_localio, + list) != nfl, + &nfs_uuid->lock); + continue; + } + + /* Remove nfl from nfs_uuid->files list */ + list_del_init(&nfl->list); + spin_unlock(&nfs_uuid->lock); + + nfs_to_nfsd_file_put_local(&nfl->ro_file); + nfs_to_nfsd_file_put_local(&nfl->rw_file); + cond_resched(); + + spin_lock(&nfs_uuid->lock); + /* Now we can allow racing nfs_close_local_fh() to + * skip the locking. + */ + store_release_wake_up(&nfl->nfs_uuid, RCU_INITIALIZER(NULL)); + } + + /* Remove client from nn->local_clients */ + if (nfs_uuid->list_lock) { + spin_lock(nfs_uuid->list_lock); + BUG_ON(list_empty(&nfs_uuid->list)); + list_del_init(&nfs_uuid->list); + spin_unlock(nfs_uuid->list_lock); + nfs_uuid->list_lock = NULL; + } + + module_put(nfsd_mod); + spin_unlock(&nfs_uuid->lock); + + return true; +} + +void nfs_localio_disable_client(struct nfs_client *clp) +{ + if (nfs_uuid_put(&clp->cl_uuid)) + trace_nfs_localio_disable_client(clp); +} +EXPORT_SYMBOL_GPL(nfs_localio_disable_client); + +void nfs_localio_invalidate_clients(struct list_head *nn_local_clients, + spinlock_t *nn_local_clients_lock) +{ + LIST_HEAD(local_clients); + nfs_uuid_t *nfs_uuid, *tmp; + struct nfs_client *clp; + + spin_lock(nn_local_clients_lock); + list_splice_init(nn_local_clients, &local_clients); + spin_unlock(nn_local_clients_lock); + list_for_each_entry_safe(nfs_uuid, tmp, &local_clients, list) { + if (WARN_ON(nfs_uuid->list_lock != nn_local_clients_lock)) + break; + clp = container_of(nfs_uuid, struct nfs_client, cl_uuid); + nfs_localio_disable_client(clp); + } +} +EXPORT_SYMBOL_GPL(nfs_localio_invalidate_clients); + +static int nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl) +{ + int ret = 0; + + /* Add nfl to nfs_uuid->files if it isn't already */ + spin_lock(&nfs_uuid->lock); + if (rcu_access_pointer(nfs_uuid->net) == NULL) { + ret = -ENXIO; + } else if (list_empty(&nfl->list)) { + rcu_assign_pointer(nfl->nfs_uuid, nfs_uuid); + list_add_tail(&nfl->list, &nfs_uuid->files); + } + spin_unlock(&nfs_uuid->lock); + return ret; +} + +/* + * Caller is responsible for calling nfsd_net_put and + * nfsd_file_put (via nfs_to_nfsd_file_put_local). + */ +struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid, + struct rpc_clnt *rpc_clnt, const struct cred *cred, + const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl, + struct nfsd_file __rcu **pnf, + const fmode_t fmode) +{ + struct net *net; + struct nfsd_file *localio; + + /* + * Not running in nfsd context, so must safely get reference on nfsd_serv. + * But the server may already be shutting down, if so disallow new localio. + * uuid->net is NOT a counted reference, but rcu_read_lock() ensures that + * if uuid->net is not NULL, then calling nfsd_net_try_get() is safe + * and if it succeeds we will have an implied reference to the net. + * + * Otherwise NFS may not have ref on NFSD and therefore cannot safely + * make 'nfs_to' calls. + */ + rcu_read_lock(); + net = rcu_dereference(uuid->net); + if (!net || !nfs_to->nfsd_net_try_get(net)) { + rcu_read_unlock(); + return ERR_PTR(-ENXIO); + } + rcu_read_unlock(); + /* We have an implied reference to net thanks to nfsd_net_try_get */ + localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt, cred, + nfs_fh, pnf, fmode); + if (!IS_ERR(localio) && nfs_uuid_add_file(uuid, nfl) < 0) { + /* Delete the cached file when racing with nfs_uuid_put() */ + nfs_to_nfsd_file_put_local(pnf); + } + nfs_to_nfsd_net_put(net); + + return localio; +} +EXPORT_SYMBOL_GPL(nfs_open_local_fh); + +void nfs_close_local_fh(struct nfs_file_localio *nfl) +{ + nfs_uuid_t *nfs_uuid; + + rcu_read_lock(); + nfs_uuid = rcu_dereference(nfl->nfs_uuid); + if (!nfs_uuid) { + /* regular (non-LOCALIO) NFS will hammer this */ + rcu_read_unlock(); + return; + } + + spin_lock(&nfs_uuid->lock); + if (!rcu_access_pointer(nfl->nfs_uuid)) { + /* nfs_uuid_put has finished here */ + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + return; + } + if (list_empty(&nfl->list)) { + /* nfs_uuid_put() has started closing files, wait for it + * to finished + */ + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + wait_var_event(&nfl->nfs_uuid, + rcu_access_pointer(nfl->nfs_uuid) == NULL); + return; + } + /* tell nfs_uuid_put() to wait for us */ + RCU_INIT_POINTER(nfl->nfs_uuid, NULL); + spin_unlock(&nfs_uuid->lock); + rcu_read_unlock(); + + nfs_to_nfsd_file_put_local(&nfl->ro_file); + nfs_to_nfsd_file_put_local(&nfl->rw_file); + + /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put() + * that we are done. The moment we drop the spinlock the + * nfs_uuid could be freed. + */ + spin_lock(&nfs_uuid->lock); + list_del_init(&nfl->list); + wake_up_var_locked(nfs_uuid, &nfs_uuid->lock); + spin_unlock(&nfs_uuid->lock); +} +EXPORT_SYMBOL_GPL(nfs_close_local_fh); + +/* + * The NFS LOCALIO code needs to call into NFSD using various symbols, + * but cannot be statically linked, because that will make the NFS + * module always depend on the NFSD module. + * + * 'nfs_to' provides NFS access to NFSD functions needed for LOCALIO, + * its lifetime is tightly coupled to the NFSD module and will always + * be available to NFS LOCALIO because any successful client<->server + * LOCALIO handshake results in a reference on the NFSD module (above), + * so NFS implicitly holds a reference to the NFSD module and its + * functions in the 'nfs_to' nfsd_localio_operations cannot disappear. + * + * If the last NFS client using LOCALIO disconnects (and its reference + * on NFSD dropped) then NFSD could be unloaded, resulting in 'nfs_to' + * functions being invalid pointers. But if NFSD isn't loaded then NFS + * will not be able to handshake with NFSD and will have no cause to + * try to call 'nfs_to' function pointers. If/when NFSD is reloaded it + * will reinitialize the 'nfs_to' function pointers and make LOCALIO + * possible. + */ +const struct nfsd_localio_operations *nfs_to; +EXPORT_SYMBOL_GPL(nfs_to); |
