diff options
Diffstat (limited to 'include/linux/sunrpc')
37 files changed, 3127 insertions, 1264 deletions
diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h index 07d8e53bedfc..07d454873b6d 100644 --- a/include/linux/sunrpc/addr.h +++ b/include/linux/sunrpc/addr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/addr.h * @@ -46,8 +47,8 @@ static inline void rpc_set_port(struct sockaddr *sap, #define IPV6_SCOPE_DELIMITER '%' #define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") -static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) +static inline bool rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) { const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; @@ -67,8 +68,8 @@ static inline bool __rpc_copy_addr4(struct sockaddr *dst, } #if IS_ENABLED(CONFIG_IPV6) -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) +static inline bool rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) { const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; @@ -93,7 +94,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst, return true; } #else /* !(IS_ENABLED(CONFIG_IPV6) */ -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, +static inline bool rpc_cmp_addr6(const struct sockaddr *sap1, const struct sockaddr *sap2) { return false; @@ -122,15 +123,28 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1, if (sap1->sa_family == sap2->sa_family) { switch (sap1->sa_family) { case AF_INET: - return __rpc_cmp_addr4(sap1, sap2); + return rpc_cmp_addr4(sap1, sap2); case AF_INET6: - return __rpc_cmp_addr6(sap1, sap2); + return rpc_cmp_addr6(sap1, sap2); } } return false; } /** + * rpc_cmp_addr_port - compare the address and port number of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + */ +static inline bool rpc_cmp_addr_port(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (!rpc_cmp_addr(sap1, sap2)) + return false; + return rpc_get_port(sap1) == rpc_get_port(sap2); +} + +/** * rpc_copy_addr - copy the address portion of one sockaddr to another * @dst: destination sockaddr * @src: source sockaddr diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 0dd00f4f6810..61e58327b1aa 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/auth.h * @@ -9,8 +10,6 @@ #ifndef _LINUX_SUNRPC_AUTH_H #define _LINUX_SUNRPC_AUTH_H -#ifdef __KERNEL__ - #include <linux/sunrpc/sched.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/xdr.h> @@ -18,19 +17,27 @@ #include <linux/atomic.h> #include <linux/rcupdate.h> #include <linux/uidgid.h> +#include <linux/utsname.h> + +/* + * Maximum size of AUTH_NONE authentication information, in XDR words. + */ +#define NUL_CALLSLACK (4) +#define NUL_REPLYSLACK (2) -/* size of the nodename buffer */ -#define UNX_MAXNODENAME 32 +/* + * Size of the nodename buffer. RFC1831 specifies a hard limit of 255 bytes, + * but Linux hostnames are actually limited to __NEW_UTS_LEN bytes. + */ +#define UNX_MAXNODENAME __NEW_UTS_LEN +#define UNX_CALLSLACK (21 + XDR_QUADLEN(UNX_MAXNODENAME)) +#define UNX_NGROUPS 16 struct rpcsec_gss_info; -/* Work around the lack of a VFS credential */ struct auth_cred { - kuid_t uid; - kgid_t gid; - struct group_info *group_info; - const char *principal; - unsigned char machine_cred : 1; + const struct cred *cred; + const char *principal; /* If present, this is a machine credential */ }; /* @@ -44,14 +51,10 @@ struct rpc_cred { struct rcu_head cr_rcu; struct rpc_auth * cr_auth; const struct rpc_credops *cr_ops; -#ifdef RPC_DEBUG - unsigned long cr_magic; /* 0x0f4aa4f0 */ -#endif unsigned long cr_expire; /* when to gc */ unsigned long cr_flags; /* various flags */ - atomic_t cr_count; /* ref count */ - - kuid_t cr_uid; + refcount_t cr_count; /* ref count */ + const struct cred *cr_cred; /* per-flavor data */ }; @@ -60,7 +63,7 @@ struct rpc_cred { #define RPCAUTH_CRED_HASHED 2 #define RPCAUTH_CRED_NEGATIVE 3 -#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 +const struct cred *rpc_machine_cred(void); /* * Client authentication handle @@ -69,26 +72,34 @@ struct rpc_cred_cache; struct rpc_authops; struct rpc_auth { unsigned int au_cslack; /* call cred size estimate */ - /* guess at number of u32's auth adds before - * reply data; normally the verifier size: */ - unsigned int au_rslack; - /* for gss, used to calculate au_rslack: */ - unsigned int au_verfsize; - - unsigned int au_flags; /* various flags */ - const struct rpc_authops *au_ops; /* operations */ + unsigned int au_rslack; /* reply cred size estimate */ + unsigned int au_verfsize; /* size of reply verifier */ + unsigned int au_ralign; /* words before UL header */ + + unsigned long au_flags; + const struct rpc_authops *au_ops; rpc_authflavor_t au_flavor; /* pseudoflavor (note may * differ from the flavor in * au_ops->au_flavor in gss * case) */ - atomic_t au_count; /* Reference counter */ + refcount_t au_count; /* Reference counter */ struct rpc_cred_cache * au_credcache; /* per-flavor data */ }; +/* rpc_auth au_flags */ +#define RPCAUTH_AUTH_DATATOUCH (1) +#define RPCAUTH_AUTH_UPDATE_SLACK (2) + +struct rpc_auth_create_args { + rpc_authflavor_t pseudoflavor; + const char *target_name; +}; + /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ +#define RPCAUTH_LOOKUP_ASYNC 0x02 /* Don't block waiting for memory */ /* * Client authentication ops @@ -97,17 +108,19 @@ struct rpc_authops { struct module *owner; rpc_authflavor_t au_flavor; /* flavor (RPC_AUTH_*) */ char * au_name; - struct rpc_auth * (*create)(struct rpc_clnt *, rpc_authflavor_t); + struct rpc_auth * (*create)(const struct rpc_auth_create_args *, + struct rpc_clnt *); void (*destroy)(struct rpc_auth *); + int (*hash_cred)(struct auth_cred *, unsigned int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); - struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); - int (*pipes_create)(struct rpc_auth *); - void (*pipes_destroy)(struct rpc_auth *); - int (*list_pseudoflavors)(rpc_authflavor_t *, int); + struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, struct rpcsec_gss_info *); + int (*key_timeout)(struct rpc_auth *, + struct rpc_cred *); + int (*ping)(struct rpc_clnt *clnt); }; struct rpc_credops { @@ -116,59 +129,69 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); - struct rpc_cred * (*crbind)(struct rpc_task *, struct rpc_cred *, int); - __be32 * (*crmarshal)(struct rpc_task *, __be32 *); + int (*crmarshal)(struct rpc_task *task, + struct xdr_stream *xdr); int (*crrefresh)(struct rpc_task *); - __be32 * (*crvalidate)(struct rpc_task *, __be32 *); - int (*crwrap_req)(struct rpc_task *, kxdreproc_t, - void *, __be32 *, void *); - int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t, - void *, __be32 *, void *); + int (*crvalidate)(struct rpc_task *task, + struct xdr_stream *xdr); + int (*crwrap_req)(struct rpc_task *task, + struct xdr_stream *xdr); + int (*crunwrap_resp)(struct rpc_task *task, + struct xdr_stream *xdr); + int (*crkey_timeout)(struct rpc_cred *); + char * (*crstringify_acceptor)(struct rpc_cred *); + bool (*crneed_reencode)(struct rpc_task *); }; extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; +extern const struct rpc_authops authtls_ops; int __init rpc_init_authunix(void); -int __init rpc_init_generic_auth(void); int __init rpcauth_init_module(void); void rpcauth_remove_module(void); -void rpc_destroy_generic_auth(void); void rpc_destroy_authunix(void); -struct rpc_cred * rpc_lookup_cred(void); -struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); -struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); +struct rpc_auth * rpcauth_create(const struct rpc_auth_create_args *, + struct rpc_clnt *); void rpcauth_release(struct rpc_auth *); rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, struct rpcsec_gss_info *); int rpcauth_get_gssinfo(rpc_authflavor_t, struct rpcsec_gss_info *); -int rpcauth_list_flavors(rpc_authflavor_t *, int); -struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); +struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); -struct rpc_cred * rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int); void put_rpccred(struct rpc_cred *); -__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); -__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *); -int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj); -int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj); +int rpcauth_marshcred(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_checkverf(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_wrap_req_encode(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_wrap_req(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_unwrap_resp_decode(struct rpc_task *task, + struct xdr_stream *xdr); +int rpcauth_unwrap_resp(struct rpc_task *task, + struct xdr_stream *xdr); +bool rpcauth_xmit_need_reencode(struct rpc_task *task); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); int rpcauth_init_credcache(struct rpc_auth *); void rpcauth_destroy_credcache(struct rpc_auth *); void rpcauth_clear_credcache(struct rpc_cred_cache *); +char * rpcauth_stringify_acceptor(struct rpc_cred *); static inline -struct rpc_cred * get_rpccred(struct rpc_cred *cred) +struct rpc_cred *get_rpccred(struct rpc_cred *cred) { - atomic_inc(&cred->cr_count); - return cred; + if (cred != NULL && refcount_inc_not_zero(&cred->cr_count)) + return cred; + return NULL; } -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_H */ diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index f1cfd4c85cd0..43e481aa347a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/auth_gss.h * @@ -12,7 +13,7 @@ #ifndef _LINUX_SUNRPC_AUTH_GSS_H #define _LINUX_SUNRPC_AUTH_GSS_H -#ifdef __KERNEL__ +#include <linux/refcount.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/svc.h> #include <linux/sunrpc/gss_api.h> @@ -65,12 +66,14 @@ struct rpc_gss_init_res { * the wire when communicating with a server. */ struct gss_cl_ctx { - atomic_t count; + refcount_t count; enum rpc_gss_proc gc_proc; u32 gc_seq; + u32 gc_seq_xmit; spinlock_t gc_seq_lock; - struct gss_ctx __rcu *gc_gss_ctx; + struct gss_ctx *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; + struct xdr_netobj gc_acceptor; u32 gc_win; unsigned long gc_expiry; struct rcu_head gc_rcu; @@ -86,6 +89,5 @@ struct gss_cred { unsigned long gc_upcall_timestamp; }; -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 969c0a671dbf..f22bf915dcf6 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -1,22 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /****************************************************************************** (c) 2008 NetApp. All Rights Reserved. -NetApp provides this source code under the GPL v2 License. -The GPL v2 license is available at -http://opensource.org/licenses/gpl-license.php. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ @@ -32,20 +18,31 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <linux/sunrpc/sched.h> #ifdef CONFIG_SUNRPC_BACKCHANNEL -struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); +void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task, + const struct rpc_timeout *to); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -int bc_send(struct rpc_rqst *req); + +/* Socket backchannel transport methods */ +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); +void xprt_free_bc_rqst(struct rpc_rqst *req); +unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt); /* * Determine if a shared backchannel is in use */ -static inline int svc_is_backchannel(const struct svc_rqst *rqstp) +static inline bool svc_is_backchannel(const struct svc_rqst *rqstp) { - if (rqstp->rq_server->sv_bc_xprt) - return 1; - return 0; + return rqstp->rq_server->sv_bc_enabled; +} + +static inline void set_bc_enabled(struct svc_serv *serv) +{ + serv->sv_bc_enabled = true; } #else /* CONFIG_SUNRPC_BACKCHANNEL */ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, @@ -54,9 +51,18 @@ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, return 0; } -static inline int svc_is_backchannel(const struct svc_rqst *rqstp) +static inline void xprt_destroy_backchannel(struct rpc_xprt *xprt, + unsigned int max_reqs) +{ +} + +static inline bool svc_is_backchannel(const struct svc_rqst *rqstp) +{ + return false; +} + +static inline void set_bc_enabled(struct svc_serv *serv) { - return 0; } static inline void xprt_free_bc_request(struct rpc_rqst *req) @@ -64,4 +70,3 @@ static inline void xprt_free_bc_request(struct rpc_rqst *req) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* _LINUX_SUNRPC_BC_XPRT_H */ - diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6ce690de447f..e783132e481f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * include/linux/sunrpc/cache.h * @@ -5,9 +6,6 @@ * used by sunrpc clients and servers. * * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> - * - * Released under terms in GPL version 2. See COPYING. - * */ #ifndef _LINUX_SUNRPC_CACHE_H_ @@ -16,6 +14,7 @@ #include <linux/kref.h> #include <linux/slab.h> #include <linux/atomic.h> +#include <linux/kstrtox.h> #include <linux/proc_fs.h> /* @@ -46,37 +45,33 @@ * */ struct cache_head { - struct cache_head * next; - time_t expiry_time; /* After time time, don't use the data */ - time_t last_refresh; /* If CACHE_PENDING, this is when upcall - * was sent, else this is when update was received + struct hlist_node cache_list; + time64_t expiry_time; /* After time expiry_time, don't use + * the data */ + time64_t last_refresh; /* If CACHE_PENDING, this is when upcall was + * sent, else this is when update was + * received, though it is alway set to + * be *after* ->flush_time. */ struct kref ref; unsigned long flags; }; -#define CACHE_VALID 0 /* Entry contains valid data */ -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */ -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/ -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */ -#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ - -struct cache_detail_procfs { - struct proc_dir_entry *proc_ent; - struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; +/* cache_head.flags */ +enum { + CACHE_VALID, /* Entry contains valid data */ + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */ + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/ + CACHE_CLEANED, /* Entry has been cleaned from cache */ }; -struct cache_detail_pipefs { - struct dentry *dir; -}; +#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ struct cache_detail { struct module * owner; int hash_size; - struct cache_head ** hash_table; - rwlock_t hash_lock; - - atomic_t inuse; /* active user-space update or lookup */ + struct hlist_head * hash_table; + spinlock_t hash_lock; char *name; void (*cache_put)(struct kref *); @@ -98,6 +93,7 @@ struct cache_detail { int has_died); struct cache_head * (*alloc)(void); + void (*flush)(void); int (*match)(struct cache_head *orig, struct cache_head *new); void (*init)(struct cache_head *orig, struct cache_head *new); void (*update)(struct cache_head *orig, struct cache_head *new); @@ -105,37 +101,41 @@ struct cache_detail { /* fields below this comment are for internal use * and should not be touched by cache owners */ - time_t flush_time; /* flush all cache items with last_refresh - * earlier than this */ + time64_t flush_time; /* flush all cache items with + * last_refresh at or earlier + * than this. last_refresh + * is never set at or earlier + * than this. + */ struct list_head others; - time_t nextcheck; + time64_t nextcheck; int entries; /* fields for communication over channel */ struct list_head queue; - atomic_t readers; /* how many time is /chennel open */ - time_t last_close; /* if no readers, when did last close */ - time_t last_warn; /* when we last warned about no readers */ + atomic_t writers; /* how many time is /channel open */ + time64_t last_close; /* if no writers, when did last close */ + time64_t last_warn; /* when we last warned about no writers */ union { - struct cache_detail_procfs procfs; - struct cache_detail_pipefs pipefs; - } u; + struct proc_dir_entry *procfs; + struct dentry *pipefs; + }; struct net *net; }; - /* this must be embedded in any request structure that * identifies an object that will want a callback on * a cache fill */ struct cache_req { struct cache_deferred_req *(*defer)(struct cache_req *req); - int thread_wait; /* How long (jiffies) we can block the - * current thread to wait for updates. - */ + unsigned long thread_wait; /* How long (jiffies) we can block the + * current thread to wait for updates. + */ }; + /* this must be embedded in a deferred_request that is being * delayed awaiting cache-fill */ @@ -153,18 +153,22 @@ struct cache_deferred_req { * timestamps kept in the cache are expressed in seconds * since boot. This is the best for measuring differences in * real time. + * This reimplemnts ktime_get_boottime_seconds() in a slightly + * faster but less accurate way. When we end up converting + * back to wallclock (CLOCK_REALTIME), that error often + * cancels out during the reverse operation. */ -static inline time_t seconds_since_boot(void) +static inline time64_t seconds_since_boot(void) { - struct timespec boot; - getboottime(&boot); - return get_seconds() - boot.tv_sec; + struct timespec64 boot; + getboottime64(&boot); + return ktime_get_real_seconds() - boot.tv_sec; } -static inline time_t convert_to_wallclock(time_t sinceboot) +static inline time64_t convert_to_wallclock(time64_t sinceboot) { - struct timespec boot; - getboottime(&boot); + struct timespec64 boot; + getboottime64(&boot); return boot.tv_sec + sinceboot; } @@ -173,14 +177,17 @@ extern const struct file_operations content_file_operations_pipefs; extern const struct file_operations cache_flush_operations_pipefs; extern struct cache_head * -sunrpc_cache_lookup(struct cache_detail *detail, - struct cache_head *key, int hash); +sunrpc_cache_lookup_rcu(struct cache_detail *detail, + struct cache_head *key, int hash); extern struct cache_head * sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); +extern int +sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, + struct cache_head *h); extern void cache_clean_deferred(void *owner); @@ -191,21 +198,32 @@ static inline struct cache_head *cache_get(struct cache_head *h) return h; } +static inline struct cache_head *cache_get_rcu(struct cache_head *h) +{ + if (kref_get_unless_zero(&h->ref)) + return h; + return NULL; +} static inline void cache_put(struct cache_head *h, struct cache_detail *cd) { - if (atomic_read(&h->ref.refcount) <= 2 && + if (kref_read(&h->ref) <= 2 && h->expiry_time < cd->nextcheck) cd->nextcheck = h->expiry_time; kref_put(&h->ref, cd->cache_put); } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) +static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h) { - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); + if (h->expiry_time < seconds_since_boot()) + return true; + if (!test_bit(CACHE_VALID, &h->flags)) + return false; + return detail->flush_time >= h->last_refresh; } +extern int cache_check_rcu(struct cache_detail *detail, + struct cache_head *h, struct cache_req *rqstp); extern int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp); extern void cache_flush(void); @@ -215,7 +233,7 @@ extern void __init cache_initialize(void); extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); -extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); +extern struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net); extern void cache_destroy_net(struct cache_detail *cd, struct net *net); extern void sunrpc_init_cache_detail(struct cache_detail *cd); @@ -223,6 +241,12 @@ extern void sunrpc_destroy_cache_detail(struct cache_detail *cd); extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, umode_t, struct cache_detail *); extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); +extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); + +/* Must store cache_detail in seq_file->private if using next three functions */ +extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos); +extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos); +extern void cache_seq_stop_rcu(struct seq_file *file, void *p); extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); @@ -264,17 +288,36 @@ static inline int get_uint(char **bpp, unsigned int *anint) return 0; } -static inline time_t get_expiry(char **bpp) +static inline int get_time(char **bpp, time64_t *time) { - int rv; - struct timespec boot; - - if (get_int(bpp, &rv)) - return 0; - if (rv < 0) - return 0; - getboottime(&boot); - return rv - boot.tv_sec; + char buf[50]; + long long ll; + int len = qword_get(bpp, buf, sizeof(buf)); + + if (len < 0) + return -EINVAL; + if (len == 0) + return -ENOENT; + + if (kstrtoll(buf, 0, &ll)) + return -EINVAL; + + *time = ll; + return 0; +} + +static inline int get_expiry(char **bpp, time64_t *rvp) +{ + int error; + struct timespec64 boot; + + error = get_time(bpp, rvp); + if (error) + return error; + + getboottime64(&boot); + (*rvp) -= boot.tv_sec; + return 0; } #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index bfe11be81f6f..f8b406b0a1af 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/clnt.h * @@ -13,6 +14,7 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/in6.h> +#include <linux/refcount.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/sched.h> @@ -21,47 +23,79 @@ #include <linux/sunrpc/stats.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/timer.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <asm/signal.h> #include <linux/path.h> #include <net/ipv6.h> +#include <linux/sunrpc/xprtmultipath.h> struct rpc_inode; +struct rpc_sysfs_client { + struct kobject kobject; + struct net *net; + struct rpc_clnt *clnt; + struct rpc_xprt_switch *xprt_switch; +}; + /* * The high-level client handle */ struct rpc_clnt { - atomic_t cl_count; /* Number of references */ + refcount_t cl_count; /* Number of references */ + unsigned int cl_clid; /* client id */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ + atomic_t cl_pid; /* task PID counter */ spinlock_t cl_lock; /* spinlock */ struct rpc_xprt __rcu * cl_xprt; /* transport */ - struct rpc_procinfo * cl_procinfo; /* procedure info */ + const struct rpc_procinfo *cl_procinfo; /* procedure info */ u32 cl_prog, /* RPC program number */ cl_vers, /* RPC version number */ cl_maxproc; /* max procedure number */ - const char * cl_protname; /* protocol name */ struct rpc_auth * cl_auth; /* authenticator */ struct rpc_stat * cl_stats; /* per-program statistics */ struct rpc_iostats * cl_metrics; /* per-client statistics */ unsigned int cl_softrtry : 1,/* soft timeouts */ + cl_softerr : 1,/* Timeouts return errors */ cl_discrtry : 1,/* disconnect before retry */ + cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ - cl_chatty : 1;/* be verbose */ + cl_chatty : 1,/* be verbose */ + cl_shutdown : 1,/* rpc immediate -EIO */ + cl_netunreach_fatal : 1; + /* Treat ENETUNREACH errors as fatal */ + struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ - char cl_nodename[UNX_MAXNODENAME]; - struct dentry * cl_dentry; + char cl_nodename[UNX_MAXNODENAME+1]; + struct rpc_pipe_dir_head cl_pipedir_objects; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; const struct rpc_program *cl_program; - char *cl_principal; /* target to authenticate to */ + const char * cl_principal; /* use for machine cred */ +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *cl_debugfs; /* debugfs directory */ +#endif + struct rpc_sysfs_client *cl_sysfs; /* sysfs directory */ + /* cl_work is only needed after cl_xpi is no longer used, + * and that are of similar size + */ + union { + struct rpc_xprt_iter cl_xpi; + struct work_struct cl_work; + }; + const struct cred *cl_cred; + unsigned int cl_max_connect; /* max number of transports not to the same IP */ + struct super_block *pipefs_sb; + atomic_t cl_task_count; }; /* @@ -80,7 +114,8 @@ struct rpc_program { struct rpc_version { u32 number; /* version number */ unsigned int nrprocs; /* number of procs */ - struct rpc_procinfo * procs; /* procedure array */ + const struct rpc_procinfo *procs; /* procedure array */ + unsigned int *counts; /* call counts */ }; /* @@ -92,14 +127,11 @@ struct rpc_procinfo { kxdrdproc_t p_decode; /* XDR decode function */ unsigned int p_arglen; /* argument hdr length (u32) */ unsigned int p_replen; /* reply hdr length (u32) */ - unsigned int p_count; /* call count */ unsigned int p_timer; /* Which RTT timer to use */ u32 p_statidx; /* Which procedure to account */ const char * p_name; /* name of procedure */ }; -#ifdef __KERNEL__ - struct rpc_create_args { struct net *net; int protocol; @@ -108,13 +140,28 @@ struct rpc_create_args { struct sockaddr *saddress; const struct rpc_timeout *timeout; const char *servername; + const char *nodename; const struct rpc_program *program; + struct rpc_stat *stats; u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; + u32 nconnect; unsigned long flags; char *client_name; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + const struct cred *cred; + unsigned int max_connect; + struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; +}; + +struct rpc_add_xprt_test { + void (*add_xprt_test)(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + void *calldata); + void *data; }; /* Values for "flags" field */ @@ -126,17 +173,28 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_QUIET (1UL << 6) #define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7) #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) +#define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) +#define RPC_CLNT_CREATE_SOFTERR (1UL << 10) +#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) +#define RPC_CLNT_CREATE_CONNECTED (1UL << 12) +#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); -void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *, rpc_authflavor_t); +int rpc_switch_client_transport(struct rpc_clnt *, + struct xprt_create *, + const struct rpc_timeout *); + void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); +void rpc_task_release_transport(struct rpc_task *); void rpc_task_release_client(struct rpc_task *); +struct rpc_xprt *rpc_task_get_xprt(struct rpc_clnt *clnt, + struct rpc_xprt *xprt); int rpcb_create_local(struct net *); void rpcb_put_local(struct net *); @@ -147,6 +205,9 @@ int rpcb_v4_register(struct net *net, const u32 program, const char *netid); void rpcb_getport_async(struct rpc_task *); +void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages, + unsigned int base, unsigned int len, + unsigned int hdrsize); void rpc_call_start(struct rpc_task *); int rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, @@ -159,14 +220,60 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int rpc_restart_call_prepare(struct rpc_task *); int rpc_restart_call(struct rpc_task *); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); -int rpc_protocol(struct rpc_clnt *); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); -unsigned long rpc_get_timeout(struct rpc_clnt *clnt); +size_t rpc_max_bc_payload(struct rpc_clnt *); +unsigned int rpc_num_bc_slots(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); -#endif /* __KERNEL__ */ +int rpc_clnt_iterate_for_each_xprt(struct rpc_clnt *clnt, + int (*fn)(struct rpc_clnt *, struct rpc_xprt *, void *), + void *data); + +int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, + struct rpc_xprt_switch *xps, + struct rpc_xprt *xprt, + void *dummy); +int rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *, + int (*setup)(struct rpc_clnt *, + struct rpc_xprt_switch *, + struct rpc_xprt *, + void *), + void *data); +void rpc_set_connect_timeout(struct rpc_clnt *clnt, + unsigned long connect_timeout, + unsigned long reconnect_timeout); + +int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, + struct rpc_xprt_switch *, + struct rpc_xprt *, + void *); +void rpc_clnt_manage_trunked_xprts(struct rpc_clnt *); +void rpc_clnt_probe_trunked_xprts(struct rpc_clnt *, + struct rpc_add_xprt_test *); + +const char *rpc_proc_name(const struct rpc_task *task); + +void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); +void rpc_clnt_xprt_switch_remove_xprt(struct rpc_clnt *, struct rpc_xprt *); +bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, + const struct sockaddr *sap); +void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt); +void rpc_clnt_disconnect(struct rpc_clnt *clnt); +void rpc_cleanup_clids(void); + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + +static inline void rpc_task_close_connection(struct rpc_task *task) +{ + if (task->tk_xprt) + xprt_force_disconnect(task->tk_xprt); +} #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 9385bd74c860..891f6173c951 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/debug.h * @@ -10,64 +11,106 @@ #include <uapi/linux/sunrpc/debug.h> - -/* - * Enable RPC debugging/profiling. - */ -#ifdef CONFIG_SUNRPC_DEBUG -#define RPC_DEBUG -#endif -#ifdef CONFIG_TRACEPOINTS -#define RPC_TRACEPOINTS -#endif -/* #define RPC_PROFILE */ - /* * Debugging macros etc */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) extern unsigned int rpc_debug; extern unsigned int nfs_debug; extern unsigned int nfsd_debug; extern unsigned int nlm_debug; #endif -#define dprintk(args...) dfprintk(FACILITY, ## args) -#define dprintk_rcu(args...) dfprintk_rcu(FACILITY, ## args) +#define dprintk(fmt, ...) \ + dfprintk(FACILITY, fmt, ##__VA_ARGS__) +#define dprintk_rcu(fmt, ...) \ + dfprintk_rcu(FACILITY, fmt, ##__VA_ARGS__) #undef ifdebug -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define ifdebug(fac) if (unlikely(rpc_debug & RPCDBG_##fac)) -# define dfprintk(fac, args...) \ - do { \ - ifdebug(fac) \ - printk(KERN_DEFAULT args); \ - } while (0) - -# define dfprintk_rcu(fac, args...) \ - do { \ - ifdebug(fac) { \ - rcu_read_lock(); \ - printk(KERN_DEFAULT args); \ - rcu_read_unlock(); \ - } \ - } while (0) +# if IS_ENABLED(CONFIG_SUNRPC_DEBUG_TRACE) +# define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__) +# else +# define __sunrpc_printk(fmt, ...) printk(KERN_DEFAULT fmt, ##__VA_ARGS__) +# endif + +# define dfprintk(fac, fmt, ...) \ +do { \ + ifdebug(fac) \ + __sunrpc_printk(fmt, ##__VA_ARGS__); \ +} while (0) + +# define dfprintk_rcu(fac, fmt, ...) \ +do { \ + ifdebug(fac) { \ + rcu_read_lock(); \ + __sunrpc_printk(fmt, ##__VA_ARGS__); \ + rcu_read_unlock(); \ + } \ +} while (0) # define RPC_IFDEBUG(x) x #else # define ifdebug(fac) if (0) -# define dfprintk(fac, args...) do {} while (0) -# define dfprintk_rcu(fac, args...) do {} while (0) +# define dfprintk(fac, fmt, ...) do {} while (0) +# define dfprintk_rcu(fac, fmt, ...) do {} while (0) # define RPC_IFDEBUG(x) #endif /* * Sysctl interface for RPC debugging */ -#ifdef RPC_DEBUG + +struct rpc_clnt; +struct rpc_xprt; + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); +void sunrpc_debugfs_init(void); +void sunrpc_debugfs_exit(void); +void rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_unregister(struct rpc_clnt *); +void rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_unregister(struct rpc_xprt *); +#else +static inline void +sunrpc_debugfs_init(void) +{ + return; +} + +static inline void +sunrpc_debugfs_exit(void) +{ + return; +} + +static inline void +rpc_clnt_debugfs_register(struct rpc_clnt *clnt) +{ + return; +} + +static inline void +rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) +{ + return; +} + +static inline void +rpc_xprt_debugfs_register(struct rpc_xprt *xprt) +{ + return; +} + +static inline void +rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) +{ + return; +} #endif #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 1f911ccb2a75..bf4ac8a0268c 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/gss_api.h * @@ -12,7 +13,6 @@ #ifndef _LINUX_SUNRPC_GSS_API_H #define _LINUX_SUNRPC_GSS_API_H -#ifdef __KERNEL__ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/msg_prot.h> #include <linux/uio.h> @@ -21,6 +21,7 @@ struct gss_ctx { struct gss_api_mech *mech_type; void *internal_ctx_id; + unsigned int slack, align; }; #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) @@ -48,7 +49,7 @@ int gss_import_sec_context( size_t bufsize, struct gss_api_mech *mech, struct gss_ctx **ctx_id, - time_t *endtime, + time64_t *endtime, gfp_t gfp_mask); u32 gss_get_mic( struct gss_ctx *ctx_id, @@ -66,6 +67,7 @@ u32 gss_wrap( u32 gss_unwrap( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *inbuf); u32 gss_delete_sec_context( struct gss_ctx **ctx_id); @@ -73,6 +75,7 @@ u32 gss_delete_sec_context( rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *, u32 qop, u32 service); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); +bool gss_pseudoflavor_to_datatouch(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); struct pf_desc { @@ -81,6 +84,8 @@ struct pf_desc { u32 service; char *name; char *auth_domain_name; + struct auth_domain *domain; + bool datatouch; }; /* Different mechanisms (e.g., krb5 or spkm3) may implement gss-api, and @@ -106,7 +111,7 @@ struct gss_api_ops { const void *input_token, size_t bufsize, struct gss_ctx *ctx_id, - time_t *endtime, + time64_t *endtime, gfp_t gfp_mask); u32 (*gss_get_mic)( struct gss_ctx *ctx_id, @@ -124,6 +129,7 @@ struct gss_api_ops { u32 (*gss_unwrap)( struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf); void (*gss_delete_sec_context)( void *internal_ctx_id); @@ -148,15 +154,11 @@ struct gss_api_mech *gss_mech_get_by_name(const char *); /* Similar, but get by pseudoflavor. */ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); -/* Fill in an array with a list of supported pseudoflavors */ -int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); - struct gss_api_mech * gss_mech_get(struct gss_api_mech *); /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a * corresponding call to gss_mech_put. */ void gss_mech_put(struct gss_api_mech *); -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_GSS_API_H */ diff --git a/include/linux/sunrpc/gss_asn1.h b/include/linux/sunrpc/gss_asn1.h deleted file mode 100644 index 3ccecd0ad229..000000000000 --- a/include/linux/sunrpc/gss_asn1.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/include/linux/sunrpc/gss_asn1.h - * - * minimal asn1 for generic encoding/decoding of gss tokens - * - * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, - * lib/gssapi/krb5/gssapiP_krb5.h, and others - * - * Copyright (c) 2000 The Regents of the University of Michigan. - * All rights reserved. - * - * Andy Adamson <andros@umich.edu> - */ - -/* - * Copyright 1995 by the Massachusetts Institute of Technology. - * All Rights Reserved. - * - * Export of this software from the United States of America may - * require a specific license from the United States Government. - * It is the responsibility of any person or organization contemplating - * export to obtain such a license before exporting. - * - * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and - * distribute this software and its documentation for any purpose and - * without fee is hereby granted, provided that the above copyright - * notice appear in all copies and that both that copyright notice and - * this permission notice appear in supporting documentation, and that - * the name of M.I.T. not be used in advertising or publicity pertaining - * to distribution of the software without specific, written prior - * permission. Furthermore if you modify this software you must label - * your software as modified software and not distribute it in such a - * fashion that it might be confused with the original M.I.T. software. - * M.I.T. makes no representations about the suitability of - * this software for any purpose. It is provided "as is" without express - * or implied warranty. - * - */ - - -#include <linux/sunrpc/gss_api.h> - -#define SIZEOF_INT 4 - -/* from gssapi_err_generic.h */ -#define G_BAD_SERVICE_NAME (-2045022976L) -#define G_BAD_STRING_UID (-2045022975L) -#define G_NOUSER (-2045022974L) -#define G_VALIDATE_FAILED (-2045022973L) -#define G_BUFFER_ALLOC (-2045022972L) -#define G_BAD_MSG_CTX (-2045022971L) -#define G_WRONG_SIZE (-2045022970L) -#define G_BAD_USAGE (-2045022969L) -#define G_UNKNOWN_QOP (-2045022968L) -#define G_NO_HOSTNAME (-2045022967L) -#define G_BAD_HOSTNAME (-2045022966L) -#define G_WRONG_MECH (-2045022965L) -#define G_BAD_TOK_HEADER (-2045022964L) -#define G_BAD_DIRECTION (-2045022963L) -#define G_TOK_TRUNC (-2045022962L) -#define G_REFLECT (-2045022961L) -#define G_WRONG_TOKID (-2045022960L) - -#define g_OID_equal(o1,o2) \ - (((o1)->len == (o2)->len) && \ - (memcmp((o1)->data,(o2)->data,(int) (o1)->len) == 0)) - -u32 g_verify_token_header( - struct xdr_netobj *mech, - int *body_size, - unsigned char **buf_in, - int toksize); - -int g_token_size( - struct xdr_netobj *mech, - unsigned int body_size); - -void g_make_token_header( - struct xdr_netobj *mech, - int body_size, - unsigned char **buf); diff --git a/include/linux/sunrpc/gss_err.h b/include/linux/sunrpc/gss_err.h index a6807867bd21..b73c329c83f2 100644 --- a/include/linux/sunrpc/gss_err.h +++ b/include/linux/sunrpc/gss_err.h @@ -34,8 +34,6 @@ #ifndef _LINUX_SUNRPC_GSS_ERR_H #define _LINUX_SUNRPC_GSS_ERR_H -#ifdef __KERNEL__ - typedef unsigned int OM_uint32; /* @@ -163,5 +161,4 @@ typedef unsigned int OM_uint32; /* XXXX This is a necessary evil until the spec is fixed */ #define GSS_S_CRED_UNAVAIL GSS_S_FAILURE -#endif /* __KERNEL__ */ #endif /* __LINUX_SUNRPC_GSS_ERR_H */ diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 5af2931cf58d..43950b5237c8 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -1,6 +1,4 @@ /* - * linux/include/linux/sunrpc/gss_krb5_types.h - * * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, * lib/gssapi/krb5/gssapiP_krb5.h, and others * @@ -36,90 +34,25 @@ * */ -#include <linux/crypto.h> +#ifndef _LINUX_SUNRPC_GSS_KRB5_H +#define _LINUX_SUNRPC_GSS_KRB5_H + +#include <crypto/skcipher.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> -#include <linux/sunrpc/gss_asn1.h> /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) -/* Maximum key length (in bytes) for the supported crypto algorithms*/ +/* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) -/* Maximum checksum function output for the supported crypto algorithms */ -#define GSS_KRB5_MAX_CKSUM_LEN (20) +/* Maximum checksum function output for the supported enctypes */ +#define GSS_KRB5_MAX_CKSUM_LEN (24) /* Maximum blocksize for the supported crypto algorithms */ #define GSS_KRB5_MAX_BLOCKSIZE (16) -struct krb5_ctx; - -struct gss_krb5_enctype { - const u32 etype; /* encryption (key) type */ - const u32 ctype; /* checksum type */ - const char *name; /* "friendly" name */ - const char *encrypt_name; /* crypto encrypt name */ - const char *cksum_name; /* crypto checksum name */ - const u16 signalg; /* signing algorithm */ - const u16 sealalg; /* sealing algorithm */ - const u32 blocksize; /* encryption blocksize */ - const u32 conflen; /* confounder length - (normally the same as - the blocksize) */ - const u32 cksumlength; /* checksum length */ - const u32 keyed_cksum; /* is it a keyed cksum? */ - const u32 keybytes; /* raw key len, in bytes */ - const u32 keylength; /* final key len, in bytes */ - u32 (*encrypt) (struct crypto_blkcipher *tfm, - void *iv, void *in, void *out, - int length); /* encryption function */ - u32 (*decrypt) (struct crypto_blkcipher *tfm, - void *iv, void *in, void *out, - int length); /* decryption function */ - u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *in, - struct xdr_netobj *out); /* complete key generation */ - u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, int ec, - struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, u32 *headskip, - u32 *tailskip); /* v2 decryption function */ -}; - -/* krb5_ctx flags definitions */ -#define KRB5_CTX_FLAG_INITIATOR 0x00000001 -#define KRB5_CTX_FLAG_CFX 0x00000002 -#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 - -struct krb5_ctx { - int initiate; /* 1 = initiating, 0 = accepting */ - u32 enctype; - u32 flags; - const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ - struct crypto_blkcipher *enc; - struct crypto_blkcipher *seq; - struct crypto_blkcipher *acceptor_enc; - struct crypto_blkcipher *initiator_enc; - struct crypto_blkcipher *acceptor_enc_aux; - struct crypto_blkcipher *initiator_enc_aux; - u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ - u8 cksum[GSS_KRB5_MAX_KEYLEN]; - s32 endtime; - u32 seq_send; - u64 seq_send64; - struct xdr_netobj mech_used; - u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; - u8 initiator_seal[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_seal[GSS_KRB5_MAX_KEYLEN]; - u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; -}; - -extern spinlock_t krb5_seq_lock; - /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) @@ -143,17 +76,21 @@ enum sgn_alg { SGN_ALG_MD2_5 = 0x0001, SGN_ALG_DES_MAC = 0x0002, SGN_ALG_3 = 0x0003, /* not published */ - SGN_ALG_HMAC_MD5 = 0x0011, /* microsoft w2k; no support */ SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004 }; enum seal_alg { SEAL_ALG_NONE = 0xffff, SEAL_ALG_DES = 0x0000, SEAL_ALG_1 = 0x0001, /* not published */ - SEAL_ALG_MICROSOFT_RC4 = 0x0010,/* microsoft w2k; no support */ SEAL_ALG_DES3KD = 0x0002 }; +/* + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-2 + */ #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 #define CKSUMTYPE_RSA_MD4_DES 0x0003 @@ -164,6 +101,10 @@ enum seal_alg { #define CKSUMTYPE_HMAC_SHA1_DES3 0x000c #define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f #define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_CMAC_CAMELLIA128 0x0011 +#define CKSUMTYPE_CMAC_CAMELLIA256 0x0012 +#define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 +#define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 #define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ /* from gssapi_err_krb5.h */ @@ -184,6 +125,11 @@ enum seal_alg { /* per Kerberos v5 protocol spec crypto types from the wire. * these get mapped to linux kernel crypto routines. + * + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-1 */ #define ENCTYPE_NULL 0x0000 #define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ @@ -197,8 +143,12 @@ enum seal_alg { #define ENCTYPE_DES3_CBC_SHA1 0x0010 #define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 #define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 +#define ENCTYPE_AES128_CTS_HMAC_SHA256_128 0x0013 +#define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 #define ENCTYPE_ARCFOUR_HMAC 0x0017 #define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 +#define ENCTYPE_CAMELLIA128_CTS_CMAC 0x0019 +#define ENCTYPE_CAMELLIA256_CTS_CMAC 0x001A #define ENCTYPE_UNKNOWN 0x01ff /* @@ -220,112 +170,4 @@ enum seal_alg { #define KG_USAGE_INITIATOR_SEAL (24) #define KG_USAGE_INITIATOR_SIGN (25) -/* - * This compile-time check verifies that we will not exceed the - * slack space allotted by the client and server auth_gss code - * before they call gss_wrap(). - */ -#define GSS_KRB5_MAX_SLACK_NEEDED \ - (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ - + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ - + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */\ - + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ - + 4 + 4 /* RPC verifier */ \ - + GSS_KRB5_TOK_HDR_LEN \ - + GSS_KRB5_MAX_CKSUM_LEN) - -u32 -make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *cksumkey, - unsigned int usage, struct xdr_netobj *cksumout); - -u32 -make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *key, - unsigned int usage, struct xdr_netobj *cksum); - -u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 -gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, - struct xdr_buf *outbuf, struct page **pages); - -u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, - struct xdr_buf *buf); - - -u32 -krb5_encrypt(struct crypto_blkcipher *key, - void *iv, void *in, void *out, int length); - -u32 -krb5_decrypt(struct crypto_blkcipher *key, - void *iv, void *in, void *out, int length); - -int -gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *outbuf, - int offset, struct page **pages); - -int -gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf, - int offset); - -s32 -krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_blkcipher *key, - int direction, - u32 seqnum, unsigned char *cksum, unsigned char *buf); - -s32 -krb5_get_seq_num(struct krb5_ctx *kctx, - unsigned char *cksum, - unsigned char *buf, int *direction, u32 *seqnum); - -int -xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); - -u32 -krb5_derive_key(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *inkey, - struct xdr_netobj *outkey, - const struct xdr_netobj *in_constant, - gfp_t gfp_mask); - -u32 -gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - -u32 -gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - -u32 -gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, int ec, - struct page **pages); - -u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, u32 *plainoffset, - u32 *plainlen); - -int -krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, - struct crypto_blkcipher *cipher, - unsigned char *cksum); - -int -krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, - struct crypto_blkcipher *cipher, - s32 seqnum); -void -gss_krb5_make_confounder(char *p, u32 conflen); +#endif /* _LINUX_SUNRPC_GSS_KRB5_H */ diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h deleted file mode 100644 index ec6234eee89c..000000000000 --- a/include/linux/sunrpc/gss_krb5_enctypes.h +++ /dev/null @@ -1,4 +0,0 @@ -/* - * Dumb way to share this static piece of information with nfsd - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2" diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 1565bbe86d51..0ee3f7052846 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/metrics.h * @@ -27,10 +28,13 @@ #include <linux/seq_file.h> #include <linux/ktime.h> +#include <linux/spinlock.h> -#define RPC_IOSTATS_VERS "1.0" +#define RPC_IOSTATS_VERS "1.1" struct rpc_iostats { + spinlock_t om_lock; + /* * These counters give an idea about how many request * transmissions are required, on average, to complete that @@ -62,6 +66,11 @@ struct rpc_iostats { ktime_t om_queue, /* queued for xmit */ om_rtt, /* RPC RTT */ om_execute; /* RPC execution */ + /* + * The count of operations that complete with tk_status < 0. + * These statuses usually indicate error conditions. + */ + unsigned long om_error_status; } ____cacheline_aligned; struct rpc_task; @@ -76,7 +85,9 @@ struct rpc_clnt; struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); void rpc_count_iostats(const struct rpc_task *, struct rpc_iostats *); -void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); +void rpc_count_iostats_metrics(const struct rpc_task *, + struct rpc_iostats *); +void rpc_clnt_show_stats(struct seq_file *, struct rpc_clnt *); void rpc_free_iostats(struct rpc_iostats *); #else /* CONFIG_PROC_FS */ @@ -84,7 +95,12 @@ void rpc_free_iostats(struct rpc_iostats *); static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } static inline void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) {} -static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} +static inline void rpc_count_iostats_metrics(const struct rpc_task *task, + struct rpc_iostats *stats) +{ +} + +static inline void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt) {} static inline void rpc_free_iostats(struct rpc_iostats *stats) {} #endif /* CONFIG_PROC_FS */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index aadc6a04e1ac..ada17b57ca44 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/msg_prot.h * @@ -7,13 +8,8 @@ #ifndef _LINUX_SUNRPC_MSGPROT_H_ #define _LINUX_SUNRPC_MSGPROT_H_ -#ifdef __KERNEL__ /* user programs should get these from the rpc header files */ - #define RPC_VERSION 2 -/* size of an XDR encoding unit in bytes, i.e. 32bit */ -#define XDR_UNIT (4) - /* spec defines authentication flavor as an unsigned 32 bit integer */ typedef u32 rpc_authflavor_t; @@ -24,6 +20,7 @@ enum rpc_auth_flavors { RPC_AUTH_DES = 3, RPC_AUTH_KRB = 4, RPC_AUTH_GSS = 6, + RPC_AUTH_TLS = 7, RPC_AUTH_MAXFLAVOR = 8, /* pseudoflavors: */ RPC_AUTH_GSS_KRB5 = 390003, @@ -37,6 +34,11 @@ enum rpc_auth_flavors { RPC_AUTH_GSS_SPKMP = 390011, }; +/* Maximum size (in octets) of the machinename in an AUTH_UNIX + * credential (per RFC 5531 Appendix A) + */ +#define RPC_MAX_MACHINENAME (255) + /* Maximum size (in bytes) of an rpc credential or verifier */ #define RPC_MAX_AUTH_SIZE (400) @@ -67,15 +69,17 @@ enum rpc_reject_stat { }; enum rpc_auth_stat { - RPC_AUTH_OK = 0, - RPC_AUTH_BADCRED = 1, - RPC_AUTH_REJECTEDCRED = 2, - RPC_AUTH_BADVERF = 3, - RPC_AUTH_REJECTEDVERF = 4, - RPC_AUTH_TOOWEAK = 5, + RPC_AUTH_OK = 0, /* success */ + RPC_AUTH_BADCRED = 1, /* bad credential (seal broken) */ + RPC_AUTH_REJECTEDCRED = 2, /* client must begin new session */ + RPC_AUTH_BADVERF = 3, /* bad verifier (seal broken) */ + RPC_AUTH_REJECTEDVERF = 4, /* verifier expired or replayed */ + RPC_AUTH_TOOWEAK = 5, /* rejected for security reasons */ + RPC_AUTH_INVALIDRESP = 6, /* bogus response verifier */ + RPC_AUTH_FAILED = 7, /* reason unknown */ /* RPCSEC_GSS errors */ - RPCSEC_GSS_CREDPROBLEM = 13, - RPCSEC_GSS_CTXPROBLEM = 14 + RPCSEC_GSS_CREDPROBLEM = 13, /* no credentials for user */ + RPCSEC_GSS_CTXPROBLEM = 14 /* problem with context */ }; #define RPC_MAXNETNAMELEN 256 @@ -142,19 +146,25 @@ typedef __be32 rpc_fraghdr; (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4)) /* - * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. + * Well-known netids. See: + * + * https://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml */ #define RPCBIND_NETID_UDP "udp" #define RPCBIND_NETID_TCP "tcp" +#define RPCBIND_NETID_RDMA "rdma" +#define RPCBIND_NETID_SCTP "sctp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_RDMA6 "rdma6" +#define RPCBIND_NETID_SCTP6 "sctp6" #define RPCBIND_NETID_LOCAL "local" /* * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. + * netid string, but all currently defined netid's fit in 5 bytes. */ -#define RPCBIND_MAXNETIDLEN (4u) +#define RPCBIND_MAXNETIDLEN (5u) /* * Universal addresses are introduced in RFC 1833 and further spelled @@ -210,5 +220,4 @@ typedef __be32 rpc_fraghdr; /* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ #define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h new file mode 100644 index 000000000000..7d032ca057af --- /dev/null +++ b/include/linux/sunrpc/rdma_rn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2024, Oracle and/or its affiliates. + */ + +#ifndef _LINUX_SUNRPC_RDMA_RN_H +#define _LINUX_SUNRPC_RDMA_RN_H + +#include <rdma/ib_verbs.h> + +/** + * rpcrdma_notification - request removal notification + */ +struct rpcrdma_notification { + void (*rn_done)(struct rpcrdma_notification *rn); + u32 rn_index; +}; + +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)); +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn); +int rpcrdma_ib_client_register(void); +void rpcrdma_ib_client_unregister(void); + +#endif /* _LINUX_SUNRPC_RDMA_RN_H */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index aa5b582cc471..2cb406f8ff4e 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -1,10 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SUNRPC_RPC_PIPE_FS_H #define _LINUX_SUNRPC_RPC_PIPE_FS_H -#ifdef __KERNEL__ - #include <linux/workqueue.h> +struct rpc_pipe_dir_head { + struct list_head pdh_entries; + struct dentry *pdh_dentry; +}; + +struct rpc_pipe_dir_object_ops; +struct rpc_pipe_dir_object { + struct list_head pdo_head; + const struct rpc_pipe_dir_object_ops *pdo_ops; + + void *pdo_data; +}; + +struct rpc_pipe_dir_object_ops { + int (*create)(struct dentry *dir, + struct rpc_pipe_dir_object *pdo); + void (*destroy)(struct dentry *dir, + struct rpc_pipe_dir_object *pdo); +}; + struct rpc_pipe_msg { struct list_head list; void *data; @@ -64,7 +83,8 @@ enum { extern struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name); -extern void rpc_pipefs_init_net(struct net *net); +extern int rpc_pipefs_init_net(struct net *net); +extern void rpc_pipefs_exit_net(struct net *net); extern struct super_block *rpc_get_sb_net(const struct net *net); extern void rpc_put_sb_net(const struct net *net); @@ -72,9 +92,31 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); +/* returns true if the msg is in-flight, i.e., already eaten by the peer */ +static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { + return (msg->copied != 0 && list_empty(&msg->list)); +} + struct rpc_clnt; -extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); -extern int rpc_remove_client_dir(struct dentry *); +extern int rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); +extern int rpc_remove_client_dir(struct rpc_clnt *); + +extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh); +extern void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo, + const struct rpc_pipe_dir_object_ops *pdo_ops, + void *pdo_data); +extern int rpc_add_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo); +extern void rpc_remove_pipe_dir_object(struct net *net, + struct rpc_pipe_dir_head *pdh, + struct rpc_pipe_dir_object *pdo); +extern struct rpc_pipe_dir_object *rpc_find_or_alloc_pipe_dir_object( + struct net *net, + struct rpc_pipe_dir_head *pdh, + int (*match)(struct rpc_pipe_dir_object *, void *), + struct rpc_pipe_dir_object *(*alloc)(void *), + void *data); struct cache_detail; extern struct dentry *rpc_create_cache_dir(struct dentry *, @@ -83,15 +125,14 @@ extern struct dentry *rpc_create_cache_dir(struct dentry *, struct cache_detail *); extern void rpc_remove_cache_dir(struct dentry *); -extern int rpc_rmdir(struct dentry *dentry); - struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags); void rpc_destroy_pipe_data(struct rpc_pipe *pipe); -extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *, +extern int rpc_mkpipe_dentry(struct dentry *, const char *, void *, struct rpc_pipe *); -extern int rpc_unlink(struct dentry *); +extern void rpc_unlink(struct rpc_pipe *); extern int register_rpc_pipefs(void); extern void unregister_rpc_pipefs(void); -#endif +extern bool gssd_running(struct net *net); + #endif diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index b78f16b1dea3..4af31bbc8802 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -1,4 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* + * Copyright (c) 2015-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -41,72 +43,36 @@ #define _LINUX_SUNRPC_RPC_RDMA_H #include <linux/types.h> +#include <linux/bitops.h> -struct rpcrdma_segment { - __be32 rs_handle; /* Registered memory handle */ - __be32 rs_length; /* Length of the chunk in bytes */ - __be64 rs_offset; /* Chunk virtual address or offset */ -}; +#define RPCRDMA_VERSION 1 +#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) -/* - * read chunk(s), encoded as a linked list. - */ -struct rpcrdma_read_chunk { - __be32 rc_discrim; /* 1 indicates presence */ - __be32 rc_position; /* Position in XDR stream */ - struct rpcrdma_segment rc_target; +enum { + RPCRDMA_V1_DEF_INLINE_SIZE = 1024, }; /* - * write chunk, and reply chunk. + * XDR sizes, in quads */ -struct rpcrdma_write_chunk { - struct rpcrdma_segment wc_target; +enum { + rpcrdma_fixed_maxsz = 4, + rpcrdma_segment_maxsz = 4, + rpcrdma_readseg_maxsz = 1 + rpcrdma_segment_maxsz, + rpcrdma_readchunk_maxsz = 1 + rpcrdma_readseg_maxsz, }; /* - * write chunk(s), encoded as a counted array. + * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks */ -struct rpcrdma_write_array { - __be32 wc_discrim; /* 1 indicates presence */ - __be32 wc_nchunks; /* Array count */ - struct rpcrdma_write_chunk wc_array[0]; -}; - -struct rpcrdma_msg { - __be32 rm_xid; /* Mirrors the RPC header xid */ - __be32 rm_vers; /* Version of this protocol */ - __be32 rm_credit; /* Buffers requested/granted */ - __be32 rm_type; /* Type of message (enum rpcrdma_proc) */ - union { - - struct { /* no chunks */ - __be32 rm_empty[3]; /* 3 empty chunk lists */ - } rm_nochunks; - - struct { /* no chunks and padded */ - __be32 rm_align; /* Padding alignment */ - __be32 rm_thresh; /* Padding threshold */ - __be32 rm_pempty[3]; /* 3 empty chunk lists */ - } rm_padded; - - __be32 rm_chunks[0]; /* read, write and reply chunks */ - - } rm_body; -}; - -#define RPCRDMA_HDRLEN_MIN 28 +#define RPCRDMA_HDRLEN_MIN (sizeof(__be32) * 7) +#define RPCRDMA_HDRLEN_ERR (sizeof(__be32) * 5) enum rpcrdma_errcode { ERR_VERS = 1, ERR_CHUNK = 2 }; -struct rpcrdma_err_vers { - uint32_t rdma_vers_low; /* Version range supported by peer */ - uint32_t rdma_vers_high; -}; - enum rpcrdma_proc { RDMA_MSG = 0, /* An RPC call or reply msg */ RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */ @@ -115,4 +81,121 @@ enum rpcrdma_proc { RDMA_ERROR = 4 /* An RPC RDMA encoding error */ }; +#define rdma_msg cpu_to_be32(RDMA_MSG) +#define rdma_nomsg cpu_to_be32(RDMA_NOMSG) +#define rdma_msgp cpu_to_be32(RDMA_MSGP) +#define rdma_done cpu_to_be32(RDMA_DONE) +#define rdma_error cpu_to_be32(RDMA_ERROR) + +#define err_vers cpu_to_be32(ERR_VERS) +#define err_chunk cpu_to_be32(ERR_CHUNK) + +/* + * Private extension to RPC-over-RDMA Version One. + * Message passed during RDMA-CM connection set-up. + * + * Add new fields at the end, and don't permute existing + * fields. + */ +struct rpcrdma_connect_private { + __be32 cp_magic; + u8 cp_version; + u8 cp_flags; + u8 cp_send_size; + u8 cp_recv_size; +} __packed; + +#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18) + +enum { + RPCRDMA_CMP_VERSION = 1, + RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0), +}; + +static inline u8 +rpcrdma_encode_buffer_size(unsigned int size) +{ + return (size >> 10) - 1; +} + +static inline unsigned int +rpcrdma_decode_buffer_size(u8 val) +{ + return ((unsigned int)val + 1) << 10; +} + +/** + * xdr_encode_rdma_segment - Encode contents of an RDMA segment + * @p: Pointer into a send buffer + * @handle: The RDMA handle to encode + * @length: The RDMA length to encode + * @offset: The RDMA offset to encode + * + * Return value: + * Pointer to the XDR position that follows the encoded RDMA segment + */ +static inline __be32 *xdr_encode_rdma_segment(__be32 *p, u32 handle, + u32 length, u64 offset) +{ + *p++ = cpu_to_be32(handle); + *p++ = cpu_to_be32(length); + return xdr_encode_hyper(p, offset); +} + +/** + * xdr_encode_read_segment - Encode contents of a Read segment + * @p: Pointer into a send buffer + * @position: The position to encode + * @handle: The RDMA handle to encode + * @length: The RDMA length to encode + * @offset: The RDMA offset to encode + * + * Return value: + * Pointer to the XDR position that follows the encoded Read segment + */ +static inline __be32 *xdr_encode_read_segment(__be32 *p, u32 position, + u32 handle, u32 length, + u64 offset) +{ + *p++ = cpu_to_be32(position); + return xdr_encode_rdma_segment(p, handle, length, offset); +} + +/** + * xdr_decode_rdma_segment - Decode contents of an RDMA segment + * @p: Pointer to the undecoded RDMA segment + * @handle: Upon return, the RDMA handle + * @length: Upon return, the RDMA length + * @offset: Upon return, the RDMA offset + * + * Return value: + * Pointer to the XDR item that follows the RDMA segment + */ +static inline __be32 *xdr_decode_rdma_segment(__be32 *p, u32 *handle, + u32 *length, u64 *offset) +{ + *handle = be32_to_cpup(p++); + *length = be32_to_cpup(p++); + return xdr_decode_hyper(p, offset); +} + +/** + * xdr_decode_read_segment - Decode contents of a Read segment + * @p: Pointer to the undecoded Read segment + * @position: Upon return, the segment's position + * @handle: Upon return, the RDMA handle + * @length: Upon return, the RDMA length + * @offset: Upon return, the RDMA offset + * + * Return value: + * Pointer to the XDR item that follows the Read segment + */ +static inline __be32 *xdr_decode_read_segment(__be32 *p, u32 *position, + u32 *handle, u32 *length, + u64 *offset) +{ + *position = be32_to_cpup(p++); + return xdr_decode_rdma_segment(p, handle, length, offset); +} + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/include/linux/sunrpc/rpc_rdma_cid.h b/include/linux/sunrpc/rpc_rdma_cid.h new file mode 100644 index 000000000000..be24ab2baa6a --- /dev/null +++ b/include/linux/sunrpc/rpc_rdma_cid.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#ifndef RPC_RDMA_CID_H +#define RPC_RDMA_CID_H + +/* + * The rpc_rdma_cid struct records completion ID information. A + * completion ID matches an incoming Send or Receive completion + * to a Completion Queue and to a previous ib_post_*(). The ID + * can then be displayed in an error message or recorded in a + * trace record. + * + * This struct is shared between the server and client RPC/RDMA + * transport implementations. + */ +struct rpc_rdma_cid { + u32 ci_queue_id; + int ci_completion_id; +}; + +#endif /* RPC_RDMA_CID_H */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 6d870353674a..ccba79ebf893 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/sched.h * @@ -13,7 +14,7 @@ #include <linux/ktime.h> #include <linux/sunrpc/types.h> #include <linux/spinlock.h> -#include <linux/wait.h> +#include <linux/wait_bit.h> #include <linux/workqueue.h> #include <linux/sunrpc/xdr.h> @@ -22,10 +23,10 @@ */ struct rpc_procinfo; struct rpc_message { - struct rpc_procinfo * rpc_proc; /* Procedure information */ + const struct rpc_procinfo *rpc_proc; /* Procedure information */ void * rpc_argp; /* Arguments */ void * rpc_resp; /* Result */ - struct rpc_cred * rpc_cred; /* Credentials */ + const struct cred * rpc_cred; /* Credentials */ }; struct rpc_call_ops; @@ -34,7 +35,17 @@ struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ struct list_head timer_list; /* Timer list */ - unsigned long expires; +}; + +/* + * This describes a timeout strategy + */ +struct rpc_timeout { + unsigned long to_initval, /* initial timeout */ + to_maxval, /* max timeout */ + to_increment; /* if !exponential */ + unsigned int to_retries; /* max # of retries */ + unsigned char to_exponential; }; /* @@ -42,50 +53,55 @@ struct rpc_wait { */ struct rpc_task { atomic_t tk_count; /* Reference count */ + int tk_status; /* result of last operation */ struct list_head tk_task; /* global list of tasks */ - struct rpc_clnt * tk_client; /* RPC client */ - struct rpc_rqst * tk_rqstp; /* RPC request */ - - /* - * RPC call state - */ - struct rpc_message tk_msg; /* RPC call info */ /* * callback to be executed after waking up * action next procedure for async tasks - * tk_ops caller callbacks */ void (*tk_callback)(struct rpc_task *); void (*tk_action)(struct rpc_task *); - const struct rpc_call_ops *tk_ops; - void * tk_calldata; unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned long tk_runstate; /* Task run status */ - struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could - * be any workqueue - */ + struct rpc_wait_queue *tk_waitqueue; /* RPC wait queue we're on */ union { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ } u; + /* + * RPC call state + */ + struct rpc_message tk_msg; /* RPC call info */ + void * tk_calldata; /* Caller private data */ + const struct rpc_call_ops *tk_ops; /* Caller callbacks */ + + struct rpc_clnt * tk_client; /* RPC client */ + struct rpc_xprt * tk_xprt; /* Transport */ + struct rpc_cred * tk_op_cred; /* cred being operated on */ + + struct rpc_rqst * tk_rqstp; /* RPC request */ + + struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could + * be any workqueue + */ ktime_t tk_start; /* RPC task init timestamp */ pid_t tk_owner; /* Process id for batching tasks */ - int tk_status; /* result of last operation */ + + int tk_rpc_status; /* Result of last RPC operation */ unsigned short tk_flags; /* misc flags */ unsigned short tk_timeouts; /* maj timeouts */ -#ifdef RPC_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) unsigned short tk_pid; /* debugging aid */ #endif unsigned char tk_priority : 2,/* Task priority */ tk_garb_retry : 2, - tk_cred_retry : 2, - tk_rebind_retry : 2; + tk_cred_retry : 2; }; typedef void (*rpc_action)(struct rpc_task *); @@ -100,6 +116,8 @@ struct rpc_call_ops { struct rpc_task_setup { struct rpc_task *task; struct rpc_clnt *rpc_client; + struct rpc_xprt *rpc_xprt; + struct rpc_cred *rpc_op_cred; /* credential being operated on */ const struct rpc_message *rpc_message; const struct rpc_call_ops *callback_ops; void *callback_data; @@ -113,49 +131,48 @@ struct rpc_task_setup { */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ +#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */ +#define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ -#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ +#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ -#define RPC_TASK_KILLED 0x0100 /* task was killed */ +#define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ +#define RPC_TASK_NOCONNECT 0x2000 /* return ENOTCONN if not connected */ +#define RPC_TASK_NO_RETRANS_TIMEOUT 0x4000 /* wait forever for a reply */ +#define RPC_TASK_CRED_NOREF 0x8000 /* No refcount on the credential */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) -#define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) -#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) +#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE) -#define RPC_TASK_RUNNING 0 -#define RPC_TASK_QUEUED 1 -#define RPC_TASK_ACTIVE 2 +enum { + RPC_TASK_RUNNING, + RPC_TASK_QUEUED, + RPC_TASK_ACTIVE, + RPC_TASK_NEED_XMIT, + RPC_TASK_NEED_RECV, + RPC_TASK_MSG_PIN_WAIT, +}; -#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_test_and_set_running(t) \ test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) -#define rpc_clear_running(t) \ - do { \ - smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ - smp_mb__after_clear_bit(); \ - } while (0) +#define rpc_clear_running(t) clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define RPC_IS_QUEUED(t) test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define rpc_set_queued(t) set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) -#define rpc_clear_queued(t) \ - do { \ - smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \ - smp_mb__after_clear_bit(); \ - } while (0) +#define rpc_clear_queued(t) clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate) #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) +#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS) + /* * Task priorities. * Note: if you change these, you must also change @@ -168,9 +185,9 @@ struct rpc_task_setup { #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_PRIVILEGED - RPC_PRIORITY_LOW) struct rpc_timer { - struct timer_list timer; struct list_head list; unsigned long expires; + struct delayed_work dwork; }; /* @@ -179,13 +196,12 @@ struct rpc_timer { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ - unsigned short qlen; /* total # tasks waiting in queue */ + unsigned int qlen; /* total # tasks waiting in queue */ struct rpc_timer timer_list; -#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) const char * name; #endif }; @@ -195,7 +211,6 @@ struct rpc_wait_queue { * from a single cookie. The aim is to improve * performance of NFS operations such as read/write. */ -#define RPC_BATCH_COUNT 16 #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* @@ -204,52 +219,71 @@ struct rpc_wait_queue { struct rpc_task *rpc_new_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_task(const struct rpc_task_setup *); struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, - const struct rpc_call_ops *ops); + struct rpc_timeout *timeout); void rpc_put_task(struct rpc_task *); void rpc_put_task_async(struct rpc_task *); +bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status); +void rpc_task_try_cancel(struct rpc_task *task, int error); +void rpc_signal_task(struct rpc_task *); void rpc_exit_task(struct rpc_task *); void rpc_exit(struct rpc_task *, int); void rpc_release_calldata(const struct rpc_call_ops *, void *); void rpc_killall_tasks(struct rpc_clnt *); +unsigned long rpc_cancel_tasks(struct rpc_clnt *clnt, int error, + bool (*fnmatch)(const struct rpc_task *, + const void *), + const void *data); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_destroy_wait_queue(struct rpc_wait_queue *); +unsigned long rpc_task_timeout(const struct rpc_task *task); +void rpc_sleep_on_timeout(struct rpc_wait_queue *queue, + struct rpc_task *task, + rpc_action action, + unsigned long timeout); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action); +void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned long timeout, + int priority); void rpc_sleep_on_priority(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, int priority); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); +void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *, + struct rpc_task *, + int); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); +struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq, + struct rpc_wait_queue *, + bool (*)(struct rpc_task *, void *), + void *); struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, bool (*)(struct rpc_task *, void *), void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); -void * rpc_malloc(struct rpc_task *, size_t); -void rpc_free(void *); +int rpc_malloc(struct rpc_task *); +void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); -int __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *)); -#ifdef RPC_DEBUG +int rpc_wait_for_completion_task(struct rpc_task *task); +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct net; void rpc_show_tasks(struct net *); #endif int rpc_init_mempool(void); void rpc_destroy_mempool(void); extern struct workqueue_struct *rpciod_workqueue; +extern struct workqueue_struct *xprtiod_workqueue; void rpc_prepare_task(struct rpc_task *task); +gfp_t rpc_task_gfp_mask(void); -static inline int rpc_wait_for_completion_task(struct rpc_task *task) -{ - return __rpc_wait_for_completion_task(task, NULL); -} - -#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS) +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS) static inline const char * rpc_qname(const struct rpc_wait_queue *q) { return ((q && q->name) ? q->name : "unknown"); @@ -267,4 +301,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h index edc64219f92b..3ce1550d1beb 100644 --- a/include/linux/sunrpc/stats.h +++ b/include/linux/sunrpc/stats.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/stats.h * @@ -42,39 +43,30 @@ struct net; #ifdef CONFIG_PROC_FS int rpc_proc_init(struct net *); void rpc_proc_exit(struct net *); -#else -static inline int rpc_proc_init(struct net *net) -{ - return 0; -} - -static inline void rpc_proc_exit(struct net *net) -{ -} -#endif - -#ifdef MODULE -void rpc_modcount(struct inode *, int); -#endif - -#ifdef CONFIG_PROC_FS struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *); void rpc_proc_unregister(struct net *,const char *); void rpc_proc_zero(const struct rpc_program *); struct proc_dir_entry * svc_proc_register(struct net *, struct svc_stat *, - const struct file_operations *); + const struct proc_ops *); void svc_proc_unregister(struct net *, const char *); void svc_seq_show(struct seq_file *, const struct svc_stat *); #else +static inline int rpc_proc_init(struct net *net) +{ + return 0; +} +static inline void rpc_proc_exit(struct net *net) +{ +} static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; } static inline void rpc_proc_unregister(struct net *net, const char *p) {} static inline void rpc_proc_zero(const struct rpc_program *p) {} static inline struct proc_dir_entry *svc_proc_register(struct net *net, struct svc_stat *s, - const struct file_operations *f) { return NULL; } + const struct proc_ops *proc_ops) { return NULL; } static inline void svc_proc_unregister(struct net *net, const char *p) {} static inline void svc_seq_show(struct seq_file *seq, diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1f0216b9a6c9..5506d20857c3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/svc.h * @@ -16,21 +17,11 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/svcauth.h> +#include <linux/lwq.h> #include <linux/wait.h> #include <linux/mm.h> - -/* - * This is the RPC server thread function prototype - */ -typedef int (*svc_thread_fn)(void *); - -/* statistics for svc_pool structures */ -struct svc_pool_stats { - unsigned long packets; - unsigned long sockets_queued; - unsigned long threads_woken; - unsigned long threads_timedout; -}; +#include <linux/pagevec.h> +#include <linux/kthread.h> /* * @@ -43,16 +34,28 @@ struct svc_pool_stats { * node traffic on multi-node NUMA NFS servers. */ struct svc_pool { - unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ - struct list_head sp_threads; /* idle server threads */ - struct list_head sp_sockets; /* pending sockets */ + unsigned int sp_id; /* pool id; also node id on NUMA */ + struct lwq sp_xprts; /* pending transports */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ - struct svc_pool_stats sp_stats; /* statistics on pool operation */ - int sp_task_pending;/* has pending task */ + struct llist_head sp_idle_threads; /* idle server threads */ + + /* statistics on pool operation */ + struct percpu_counter sp_messages_arrived; + struct percpu_counter sp_sockets_queued; + struct percpu_counter sp_threads_woken; + + unsigned long sp_flags; } ____cacheline_aligned_in_smp; +/* bits for sp_flags */ +enum { + SP_TASK_PENDING, /* still work to do even if no xprt is queued */ + SP_NEED_VICTIM, /* One thread needs to agree to exit */ + SP_VICTIM_REMAINS, /* One thread needs to actually exit */ +}; + + /* * RPC service. * @@ -64,57 +67,41 @@ struct svc_pool { * We currently do not support more than one RPC program per daemon. */ struct svc_serv { - struct svc_program * sv_program; /* RPC program */ + struct svc_program * sv_programs; /* RPC programs */ struct svc_stat * sv_stats; /* RPC statistics */ spinlock_t sv_lock; + unsigned int sv_nprogs; /* Number of sv_programs */ unsigned int sv_nrthreads; /* # of server threads */ - unsigned int sv_maxconn; /* max connections allowed or - * '0' causing max to be based - * on number of threads. */ - unsigned int sv_max_payload; /* datagram payload size */ unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */ unsigned int sv_xdrsize; /* XDR buffer size */ struct list_head sv_permsocks; /* all permanent sockets */ struct list_head sv_tempsocks; /* all temporary sockets */ - int sv_tmpcnt; /* count of temporary sockets */ + int sv_tmpcnt; /* count of temporary "valid" sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ char * sv_name; /* service name */ unsigned int sv_nrpools; /* number of thread pools */ + bool sv_is_pooled; /* is this a pooled service? */ struct svc_pool * sv_pools; /* array of thread pools */ + int (*sv_threadfn)(void *data); - void (*sv_shutdown)(struct svc_serv *serv, - struct net *net); - /* Callback to use when last thread - * exits. - */ - - struct module * sv_module; /* optional module to count when - * adding threads */ - svc_thread_fn sv_function; /* main function for threads */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head sv_cb_list; /* queue for callback requests + struct lwq sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ - spinlock_t sv_cb_lock; /* protects the svc_cb_list */ - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no - * entries in the svc_cb_list */ - struct svc_xprt *sv_bc_xprt; /* callback on fore channel */ + bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; -/* - * We use sv_nrthreads as a reference count. svc_destroy() drops - * this refcount, so we need to bump it up around operations that - * change the number of threads. Horrible, but there it is. - * Should be called with the BKL held. - */ -static inline void svc_get(struct svc_serv *serv) -{ - serv->sv_nrthreads++; -} +/* This is used by pool_stats to find and lock an svc */ +struct svc_info { + struct svc_serv *serv; + struct mutex *mutex; +}; + +void svc_destroy(struct svc_serv **svcp); /* * Maximum payload size supported by a kernel RPC server. @@ -123,7 +110,7 @@ static inline void svc_get(struct svc_serv *serv) * * These happen to all be powers of 2, which is not strictly * necessary but helps enforce the real limitation, which is - * that they should be multiples of PAGE_CACHE_SIZE. + * that they should be multiples of PAGE_SIZE. * * For UDP transports, a block plus NFS,RPC, and UDP headers * has to fit into the IP datagram limit of 64K. The largest @@ -132,28 +119,27 @@ static inline void svc_get(struct svc_serv *serv) * Linux limit; someone who cares more about NFS/UDP performance * can test a larger number. * - * For TCP transports we have more freedom. A size of 1MB is - * chosen to match the client limit. Other OSes are known to - * have larger limits, but those numbers are probably beyond - * the point of diminishing returns. + * For non-UDP transports we have more freedom. A size of 4MB is + * chosen to accommodate clients that support larger I/O sizes. */ -#define RPCSVC_MAXPAYLOAD (1*1024*1024u) -#define RPCSVC_MAXPAYLOAD_TCP RPCSVC_MAXPAYLOAD -#define RPCSVC_MAXPAYLOAD_UDP (32*1024u) +enum { + RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024, + RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD, + RPCSVC_MAXPAYLOAD_UDP = 32 * 1024, +}; extern u32 svc_max_payload(const struct svc_rqst *rqstp); /* - * RPC Requsts and replies are stored in one or more pages. + * RPC Requests and replies are stored in one or more pages. * We maintain an array of pages for each server thread. * Requests are copied into these pages as they arrive. Remaining * pages are available to write the reply into. * - * Pages are sent using ->sendpage so each server thread needs to - * allocate more to replace those used in sending. To help keep track - * of these pages we have a receive list where all pages initialy live, - * and a send list where pages are moved to when there are to be part - * of a reply. + * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread + * needs to allocate more to replace those used in sending. To help keep track + * of these pages we have a receive list where all pages initialy live, and a + * send list where pages are moved to when there are to be part of a reply. * * We use xdr_buf for holding responses as it fits well with NFS * read responses (that have a header, and some data pages, and possibly @@ -164,54 +150,23 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); * list. xdr_buf.tail points to the end of the first page. * This assumes that the non-page part of an rpc reply will fit * in a page - NFSd ensures this. lockd also has no trouble. - * - * Each request/reply pair can have at most one "payload", plus two pages, - * one for the request, and one for the reply. - * We using ->sendfile to return read data, we might need one extra page - * if the request is not page-aligned. So add another '1'. */ -#define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ - + 2 + 1) - -static inline u32 svc_getnl(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return ntohl(val); -} - -static inline void svc_putnl(struct kvec *iov, u32 val) -{ - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = htonl(val); - iov->iov_len += sizeof(__be32); -} -static inline __be32 svc_getu32(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return val; -} - -static inline void svc_ungetu32(struct kvec *iov) -{ - __be32 *vp = (__be32 *)iov->iov_base; - iov->iov_base = (void *)(vp - 1); - iov->iov_len += sizeof(*vp); -} - -static inline void svc_putu32(struct kvec *iov, __be32 val) +/** + * svc_serv_maxpages - maximum count of pages needed for one RPC message + * @serv: RPC service context + * + * Returns a count of pages or vectors that can hold the maximum + * size RPC message for @serv. + * + * Each request/reply pair can have at most one "payload", plus two + * pages, one for the request, and one for the reply. + * nfsd_splice_actor() might need an extra page when a READ payload + * is not page-aligned. + */ +static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) { - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = val; - iov->iov_len += sizeof(__be32); + return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; } /* @@ -219,8 +174,9 @@ static inline void svc_putu32(struct kvec *iov, __be32 val) * processed. */ struct svc_rqst { - struct list_head rq_list; /* idle list */ struct list_head rq_all; /* all threads list */ + struct llist_node rq_idle; /* On the idle list */ + struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ struct sockaddr_storage rq_addr; /* peer address */ @@ -231,56 +187,81 @@ struct svc_rqst { struct svc_serv * rq_server; /* RPC service definition */ struct svc_pool * rq_pool; /* thread pool */ - struct svc_procedure * rq_procinfo; /* procedure info */ + const struct svc_procedure *rq_procinfo;/* procedure info */ struct auth_ops * rq_authop; /* authentication flavour */ struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ - int rq_usedeferral; /* use deferral */ - size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; + struct xdr_stream rq_arg_stream; + struct xdr_stream rq_res_stream; + struct folio *rq_scratch_folio; struct xdr_buf rq_res; - struct page * rq_pages[RPCSVC_MAXPAGES]; + unsigned long rq_maxpages; /* num of entries in rq_pages */ + struct page * *rq_pages; struct page * *rq_respages; /* points into rq_pages */ - int rq_resused; /* number of pages used for result */ struct page * *rq_next_page; /* next reply page to use */ + struct page * *rq_page_end; /* one past the last page */ - struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ + struct folio_batch rq_fbatch; + struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ u32 rq_vers; /* program version */ u32 rq_proc; /* procedure number */ u32 rq_prot; /* IP protocol */ - unsigned short - rq_secure : 1; /* secure port */ + int rq_cachetype; /* catering to nfsd */ + unsigned long rq_flags; /* flags field */ + ktime_t rq_qtime; /* enqueue time */ void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ + __be32 *rq_accept_statp; void * rq_auth_data; /* flavor-specific data */ - + __be32 rq_auth_stat; /* authentication status */ + int rq_auth_slack; /* extra space xdr code + * should leave in head + * for krb5i, krb5p. + */ int rq_reserved; /* space on socket outq * reserved for this request */ + ktime_t rq_stime; /* start time */ struct cache_req rq_chandle; /* handle passed to caches for * request delaying */ - bool rq_dropme; /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ - int rq_cachetype; - struct svc_cacherep * rq_cacherep; /* cache info */ - int rq_splice_ok; /* turned off in gss privacy - * to prevent encrypting page - * cache pages */ - wait_queue_head_t rq_wait; /* synchronization */ struct task_struct *rq_task; /* service thread */ + struct net *rq_bc_net; /* pointer to backchannel's + * net namespace + */ + + int rq_err; /* Thread sets this to inidicate + * initialisation success. + */ + + unsigned long bc_to_initval; + unsigned int bc_to_retries; + unsigned int rq_status_counter; /* RPC processing counter */ + void **rq_lease_breaker; /* The v4 client breaking a lease */ }; -#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net) +/* bits for rq_flags */ +enum { + RQ_SECURE, /* secure port */ + RQ_LOCAL, /* local request */ + RQ_USEDEFERRAL, /* use deferral */ + RQ_DROPME, /* drop current reply */ + RQ_VICTIM, /* Have agreed to shut down */ + RQ_DATA, /* request has data */ +}; + +#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* * Rigorous type checking on sockaddr type conversions @@ -315,38 +296,46 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } -/* - * Check buffer bounds after decoding arguments +/** + * svc_thread_should_stop - check if this thread should stop + * @rqstp: the thread that might need to stop + * + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS + * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming + * the victim using this function. It should then promptly call + * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS + * so the task waiting for a thread to exit can wake and continue. + * + * Return values: + * %true: caller should invoke svc_exit_thread() + * %false: caller should do nothing */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) -{ - char *cp = (char *)p; - struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp >= (char*)vec->iov_base - && cp <= (char*)vec->iov_base + vec->iov_len; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) { - struct kvec *vec = &rqstp->rq_res.head[0]; - char *cp = (char*)p; - - vec->iov_len = cp - (char*)vec->iov_base; + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) + set_bit(RQ_VICTIM, &rqstp->rq_flags); - return vec->iov_len <= PAGE_SIZE; + return test_bit(RQ_VICTIM, &rqstp->rq_flags); } -static inline void svc_free_res_pages(struct svc_rqst *rqstp) +/** + * svc_thread_init_status - report whether thread has initialised successfully + * @rqstp: the thread in question + * @err: errno code + * + * After performing any initialisation that could fail, and before starting + * normal work, each sunrpc svc_thread must call svc_thread_init_status() + * with an appropriate error, or zero. + * + * If zero is passed, the thread is ready and must continue until + * svc_thread_should_stop() returns true. If a non-zero error is passed + * the call will not return - the thread will exit. + */ +static inline void svc_thread_init_status(struct svc_rqst *rqstp, int err) { - while (rqstp->rq_next_page != rqstp->rq_respages) { - struct page **pp = --rqstp->rq_next_page; - if (*pp) { - put_page(*pp); - *pp = NULL; - } - } + store_release_wake_up(&rqstp->rq_err, err); + if (err) + kthread_exit(1); } struct svc_deferred_req { @@ -356,26 +345,42 @@ struct svc_deferred_req { size_t addrlen; struct sockaddr_storage daddr; /* where reply must come from */ size_t daddrlen; + void *xprt_ctxt; struct cache_deferred_req handle; - size_t xprt_hlen; int argslen; - __be32 args[0]; + __be32 args[]; +}; + +struct svc_process_info { + union { + int (*dispatch)(struct svc_rqst *rqstp); + struct { + unsigned int lovers; + unsigned int hivers; + } mismatch; + }; }; /* - * List of RPC programs on the same transport endpoint + * RPC program - an array of these can use the same transport endpoint */ struct svc_program { - struct svc_program * pg_next; /* other programs (same xprt) */ u32 pg_prog; /* program number */ unsigned int pg_lovers; /* lowest version */ - unsigned int pg_hivers; /* lowest version */ + unsigned int pg_hivers; /* highest version */ unsigned int pg_nvers; /* number of versions */ - struct svc_version ** pg_vers; /* version array */ + const struct svc_version **pg_vers; /* version array */ char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ - struct svc_stat * pg_stats; /* rpc statistics */ - int (*pg_authenticate)(struct svc_rqst *); + enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp); + __be32 (*pg_init_request)(struct svc_rqst *, + const struct svc_program *, + struct svc_process_info *); + int (*pg_rpcbind_set)(struct net *net, + const struct svc_program *, + u32 version, int family, + unsigned short proto, + unsigned short port); }; /* @@ -384,63 +389,90 @@ struct svc_program { struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ - struct svc_procedure * vs_proc; /* per-procedure info */ + const struct svc_procedure *vs_proc; /* per-procedure info */ + unsigned long __percpu *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ - unsigned int vs_hidden : 1; /* Don't register with portmapper. - * Only used for nfsacl so far. */ + /* Don't register with rpcbind */ + bool vs_hidden; - /* Override dispatch function (e.g. when caching replies). - * A return value of 0 means drop the request. - * vs_dispatch == NULL means use default dispatcher. - */ - int (*vs_dispatch)(struct svc_rqst *, __be32 *); + /* Don't care if the rpcbind registration fails */ + bool vs_rpcb_optnl; + + /* Need xprt with congestion control */ + bool vs_need_cong_ctrl; + + /* Dispatch function */ + int (*vs_dispatch)(struct svc_rqst *rqstp); }; /* * RPC procedure info */ -typedef __be32 (*svc_procfunc)(struct svc_rqst *, void *argp, void *resp); struct svc_procedure { - svc_procfunc pc_func; /* process the request */ - kxdrproc_t pc_decode; /* XDR decode args */ - kxdrproc_t pc_encode; /* XDR encode result */ - kxdrproc_t pc_release; /* XDR free result */ + /* process the request: */ + __be32 (*pc_func)(struct svc_rqst *); + /* XDR decode args: */ + bool (*pc_decode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); + /* XDR encode result: */ + bool (*pc_encode)(struct svc_rqst *rqstp, + struct xdr_stream *xdr); + /* XDR free result: */ + void (*pc_release)(struct svc_rqst *); unsigned int pc_argsize; /* argument struct size */ + unsigned int pc_argzero; /* how much of argument to clear */ unsigned int pc_ressize; /* result struct size */ - unsigned int pc_count; /* call count */ unsigned int pc_cachetype; /* cache info (NFS) */ unsigned int pc_xdrressize; /* maximum size of XDR reply */ + const char * pc_name; /* for display */ }; /* * Function prototypes. */ -int svc_rpcb_setup(struct svc_serv *serv, struct net *net); +int sunrpc_set_pool_mode(const char *val); +int sunrpc_get_pool_mode(char *val, size_t size); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net)); -struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, - struct svc_pool *pool, int node); + int (*threadfn)(void *data)); +bool svc_rqst_replace_page(struct svc_rqst *rqstp, + struct page *page); +void svc_rqst_release_pages(struct svc_rqst *rqstp); void svc_exit_thread(struct svc_rqst *); -struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net), - svc_thread_fn, struct module *); +struct svc_serv * svc_create_pooled(struct svc_program *prog, + unsigned int nprog, + struct svc_stat *stats, + unsigned int bufsize, + int (*threadfn)(void *data)); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); -int svc_pool_stats_open(struct svc_serv *serv, struct file *file); -void svc_destroy(struct svc_serv *); -void svc_shutdown_net(struct svc_serv *, struct net *); -int svc_process(struct svc_rqst *); -int bc_svc_process(struct svc_serv *, struct rpc_rqst *, - struct svc_rqst *); +int svc_pool_stats_open(struct svc_info *si, struct file *file); +void svc_process(struct svc_rqst *rqstp); +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); -struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); +void svc_pool_wake_idle_thread(struct svc_pool *pool); +struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv); char * svc_print_addr(struct svc_rqst *, char *, size_t); +const char * svc_proc_name(const struct svc_rqst *rqstp); +int svc_encode_result_payload(struct svc_rqst *rqstp, + unsigned int offset, + unsigned int length); +char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, + struct kvec *first, void *p, + size_t total); +__be32 svc_generic_init_request(struct svc_rqst *rqstp, + const struct svc_program *progp, + struct svc_process_info *procinfo); +int svc_generic_rpcbind_set(struct net *net, + const struct svc_program *progp, + u32 version, int family, + unsigned short proto, + unsigned short port); #define RPC_MAX_ADDRBUFLEN (63U) @@ -453,11 +485,111 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); */ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) { - int added_space = 0; + svc_reserve(rqstp, space + rqstp->rq_auth_slack); +} + +/** + * svcxdr_init_decode - Prepare an xdr_stream for Call decoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_decode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_arg_stream; + struct xdr_buf *buf = &rqstp->rq_arg; + struct kvec *argv = buf->head; + + WARN_ON(buf->len != buf->head->iov_len + buf->page_len + buf->tail->iov_len); + buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; + + xdr_init_decode(xdr, buf, argv->iov_base, NULL); + xdr_set_scratch_folio(xdr, rqstp->rq_scratch_folio); +} + +/** + * svcxdr_init_encode - Prepare an xdr_stream for svc Reply encoding + * @rqstp: controlling server RPC transaction context + * + */ +static inline void svcxdr_init_encode(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + xdr_reset_scratch_buffer(xdr); + + xdr->buf = buf; + xdr->iov = resv; + xdr->p = resv->iov_base + resv->iov_len; + xdr->end = resv->iov_base + PAGE_SIZE; + buf->len = resv->iov_len; + xdr->page_ptr = buf->pages - 1; + buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); + xdr->rqst = NULL; +} + +/** + * svcxdr_encode_opaque_pages - Insert pages into an xdr_stream + * @xdr: xdr_stream to be updated + * @pages: array of pages to insert + * @base: starting offset of first data byte in @pages + * @len: number of data bytes in @pages to insert + * + * After the @pages are added, the tail iovec is instantiated pointing + * to end of the head buffer, and the stream is set up to encode + * subsequent items into the tail. + */ +static inline void svcxdr_encode_opaque_pages(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct page **pages, + unsigned int base, + unsigned int len) +{ + xdr_write_pages(xdr, pages, base, len); + xdr->page_ptr = rqstp->rq_next_page - 1; +} + +/** + * svcxdr_set_auth_slack - + * @rqstp: RPC transaction + * @slack: buffer space to reserve for the transaction's security flavor + * + * Set the request's slack space requirement, and set aside that much + * space in the rqstp's rq_res.head for use when the auth wraps the Reply. + */ +static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + rqstp->rq_auth_slack = slack; + + xdr->end -= XDR_QUADLEN(slack); + buf->buflen -= rqstp->rq_auth_slack; + + WARN_ON(xdr->iov != resv); + WARN_ON(xdr->p > xdr->end); +} + +/** + * svcxdr_set_accept_stat - Reserve space for the accept_stat field + * @rqstp: RPC transaction context + * + * Return values: + * %true: Success + * %false: No response buffer space was available + */ +static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; - if (rqstp->rq_authop->flavour) - added_space = RPC_MAX_AUTH_SIZE; - svc_reserve(rqstp, space + added_space); + rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT); + if (unlikely(!rqstp->rq_accept_statp)) + return false; + *rqstp->rq_accept_statp = rpc_success; + return true; } #endif /* SUNRPC_SVC_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 0b8e3e6bdacf..57f4fd94166a 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. * @@ -41,270 +42,282 @@ #ifndef SVC_RDMA_H #define SVC_RDMA_H +#include <linux/llist.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/rpc_rdma.h> +#include <linux/sunrpc/rpc_rdma_cid.h> +#include <linux/sunrpc/svc_rdma_pcl.h> +#include <linux/sunrpc/rdma_rn.h> + +#include <linux/percpu_counter.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> -#define SVCRDMA_DEBUG + +/* Default and maximum inline threshold sizes */ +enum { + RPCRDMA_PULLUP_THRESH = RPCRDMA_V1_DEF_INLINE_SIZE >> 1, + RPCRDMA_DEF_INLINE_THRESH = 4096, + RPCRDMA_MAX_INLINE_THRESH = 65536 +}; /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; +extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; +extern struct workqueue_struct *svcrdma_wq; -extern atomic_t rdma_stat_recv; -extern atomic_t rdma_stat_read; -extern atomic_t rdma_stat_write; -extern atomic_t rdma_stat_sq_starve; -extern atomic_t rdma_stat_rq_starve; -extern atomic_t rdma_stat_rq_poll; -extern atomic_t rdma_stat_rq_prod; -extern atomic_t rdma_stat_sq_poll; -extern atomic_t rdma_stat_sq_prod; - -#define RPCRDMA_VERSION 1 - -/* - * Contexts are built when an RDMA request is created and are a - * record of the resources that can be recovered when the request - * completes. - */ -struct svc_rdma_op_ctxt { - struct svc_rdma_op_ctxt *read_hdr; - struct svc_rdma_fastreg_mr *frmr; - int hdr_count; - struct xdr_buf arg; - struct list_head dto_q; - enum ib_wr_opcode wr_op; - enum ib_wc_status wc_status; - u32 byte_len; - struct svcxprt_rdma *xprt; - unsigned long flags; - enum dma_data_direction direction; - int count; - struct ib_sge sge[RPCSVC_MAXPAGES]; - struct page *pages[RPCSVC_MAXPAGES]; -}; - -/* - * NFS_ requests are mapped on the client side by the chunk lists in - * the RPCRDMA header. During the fetching of the RPC from the client - * and the writing of the reply to the client, the memory in the - * client and the memory in the server must be mapped as contiguous - * vaddr/len for access by the hardware. These data strucures keep - * these mappings. - * - * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the - * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the - * 'ch' field maps the read-list of the RPCRDMA header to the 'sge' - * mapping of the reply. - */ -struct svc_rdma_chunk_sge { - int start; /* sge no for this chunk */ - int count; /* sge count for this chunk */ -}; -struct svc_rdma_fastreg_mr { - struct ib_mr *mr; - void *kva; - struct ib_fast_reg_page_list *page_list; - int page_list_len; - unsigned long access_flags; - unsigned long map_len; - enum dma_data_direction direction; - struct list_head frmr_list; -}; -struct svc_rdma_req_map { - struct svc_rdma_fastreg_mr *frmr; - unsigned long count; - union { - struct kvec sge[RPCSVC_MAXPAGES]; - struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; - }; -}; -#define RDMACTXT_F_FAST_UNREG 1 -#define RDMACTXT_F_LAST_CTXT 2 - -#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ -#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ +extern struct percpu_counter svcrdma_stat_read; +extern struct percpu_counter svcrdma_stat_recv; +extern struct percpu_counter svcrdma_stat_sq_starve; +extern struct percpu_counter svcrdma_stat_write; struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ struct list_head sc_accept_q; /* Conn. waiting accept */ + struct rpcrdma_notification sc_rn; /* removal notification */ int sc_ord; /* RDMA read limit */ - int sc_max_sge; - - int sc_sq_depth; /* Depth of SQ */ - atomic_t sc_sq_count; /* Number of SQ WR on queue */ - - int sc_max_requests; /* Depth of RQ */ + int sc_max_send_sges; + bool sc_snd_w_inv; /* OK to use Send With Invalidate */ + + atomic_t sc_sq_avail; /* SQEs ready to be consumed */ + unsigned int sc_sq_depth; /* Depth of SQ */ + __be32 sc_fc_credits; /* Forward credits */ + u32 sc_max_requests; /* Max requests */ + u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ + u8 sc_port_num; struct ib_pd *sc_pd; - atomic_t sc_dma_used; - atomic_t sc_ctxt_used; + spinlock_t sc_send_lock; + struct llist_head sc_send_ctxts; + spinlock_t sc_rw_ctxt_lock; + struct llist_head sc_rw_ctxts; + + u32 sc_pending_recvs; + u32 sc_recv_batch; struct list_head sc_rq_dto_q; + struct list_head sc_read_complete_q; spinlock_t sc_rq_dto_lock; struct ib_qp *sc_qp; struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; - struct ib_mr *sc_phys_mr; /* MR for server memory */ - u32 sc_dev_caps; /* distilled device caps */ - u32 sc_dma_lkey; /* local dma key */ - unsigned int sc_frmr_pg_list_len; - struct list_head sc_frmr_q; - spinlock_t sc_frmr_q_lock; spinlock_t sc_lock; /* transport lock */ wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ unsigned long sc_flags; - struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ - struct list_head sc_read_complete_q; struct work_struct sc_work; + + struct llist_head sc_recv_ctxts; + + atomic_t sc_completion_ids; }; /* sc_flags */ -#define RDMAXPRT_RQ_PENDING 1 -#define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 -#define RPCRDMA_LISTEN_BACKLOG 10 -/* The default ORD value is based on two outstanding full-size writes with a - * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ -#define RPCRDMA_ORD (64/4) -#define RPCRDMA_SQ_DEPTH_MULT 8 -#define RPCRDMA_MAX_THREADS 16 -#define RPCRDMA_MAX_REQUESTS 16 -#define RPCRDMA_MAX_REQ_SIZE 4096 - -/* svc_rdma_marshal.c */ -extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, - int *, int *); -extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); -extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); -extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, - struct rpcrdma_msg *, - enum rpcrdma_errcode, u32 *); -extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); -extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); -extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, - __be32, __be64, u32); -extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, - struct rpcrdma_msg *, - struct rpcrdma_msg *, - enum rpcrdma_proc); -extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); +static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp) +{ + struct svc_xprt *xprt = rqstp->rq_xprt; -/* svc_rdma_recvfrom.c */ -extern int svc_rdma_recvfrom(struct svc_rqst *); + return container_of(xprt, struct svcxprt_rdma, sc_xprt); +} -/* svc_rdma_sendto.c */ -extern int svc_rdma_sendto(struct svc_rqst *); +/* + * Default connection parameters + */ +enum { + RPCRDMA_LISTEN_BACKLOG = 10, + RPCRDMA_MAX_REQUESTS = 128, + RPCRDMA_MAX_BC_REQUESTS = 2, +}; -/* svc_rdma_transport.c */ -extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); -extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - enum rpcrdma_errcode); -struct page *svc_rdma_get_page(void); -extern int svc_rdma_post_recv(struct svcxprt_rdma *); -extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); -extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); -extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); -extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); -extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); -extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); -extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); -extern void svc_rdma_put_frmr(struct svcxprt_rdma *, - struct svc_rdma_fastreg_mr *); -extern void svc_sq_reap(struct svcxprt_rdma *); -extern void svc_rq_reap(struct svcxprt_rdma *); -extern struct svc_xprt_class svc_rdma_class; -extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); +#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD -/* svc_rdma.c */ -extern int svc_rdma_init(void); -extern void svc_rdma_cleanup(void); - -/* - * Returns the address of the first read chunk or <nul> if no read chunk is - * present +/** + * svc_rdma_send_cid_init - Initialize a Receive Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize */ -static inline struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +static inline void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) { - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - - if (ch->rc_discrim == 0) - return NULL; + cid->ci_queue_id = rdma->sc_rq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); +} - return ch; +/** + * svc_rdma_send_cid_init - Initialize a Send Queue completion ID + * @rdma: controlling transport + * @cid: completion ID to initialize + */ +static inline void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma, + struct rpc_rdma_cid *cid) +{ + cid->ci_queue_id = rdma->sc_sq_cq->res.id; + cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids); } /* - * Returns the address of the first read write array element or <nul> if no - * write array list is present + * A chunk context tracks all I/O for moving one Read or Write + * chunk. This is a set of rdma_rw's that handle data movement + * for all segments of one chunk. */ -static inline struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != 0 - || rmsgp->rm_body.rm_chunks[1] == 0) - return NULL; +struct svc_rdma_chunk_ctxt { + struct rpc_rdma_cid cc_cid; + struct ib_cqe cc_cqe; + struct list_head cc_rwctxts; + ktime_t cc_posttime; + int cc_sqecount; +}; - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} +struct svc_rdma_recv_ctxt { + struct llist_node rc_node; + struct list_head rc_list; + struct ib_recv_wr rc_recv_wr; + struct ib_cqe rc_cqe; + struct rpc_rdma_cid rc_cid; + struct ib_sge rc_recv_sge; + void *rc_recv_buf; + struct xdr_stream rc_stream; + u32 rc_byte_len; + u32 rc_inv_rkey; + __be32 rc_msgtype; + + /* State for pulling a Read chunk */ + unsigned int rc_pageoff; + unsigned int rc_curpage; + unsigned int rc_readbytes; + struct xdr_buf rc_saved_arg; + struct svc_rdma_chunk_ctxt rc_cc; + + struct svc_rdma_pcl rc_call_pcl; + + struct svc_rdma_pcl rc_read_pcl; + struct svc_rdma_chunk *rc_cur_result_payload; + struct svc_rdma_pcl rc_write_pcl; + struct svc_rdma_pcl rc_reply_pcl; + + unsigned int rc_page_count; + unsigned long rc_maxpages; + struct page *rc_pages[] __counted_by(rc_maxpages); +}; /* - * Returns the address of the first reply array element or <nul> if no - * reply array is present + * State for sending a Write chunk. + * - Tracks progress of writing one chunk over all its segments + * - Stores arguments for the SGL constructor functions */ -static inline struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *wr_ary; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply list may occur with read-list and/or - * write list */ - if (rmsgp->rm_body.rm_chunks[0] != 0 || - rmsgp->rm_body.rm_chunks[1] != 0) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim) - rch++; - - /* The reply list follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - - goto found_it; - } - - wr_ary = svc_rdma_get_write_array(rmsgp); - if (wr_ary) { - rp_ary = (struct rpcrdma_write_array *) - &wr_ary-> - wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; - - goto found_it; - } - - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == 0) - return NULL; - - return rp_ary; -} +struct svc_rdma_write_info { + struct svcxprt_rdma *wi_rdma; + + const struct svc_rdma_chunk *wi_chunk; + + /* write state of this chunk */ + unsigned int wi_seg_off; + unsigned int wi_seg_no; + + /* SGL constructor arguments */ + const struct xdr_buf *wi_xdr; + unsigned char *wi_base; + unsigned int wi_next_off; + + struct svc_rdma_chunk_ctxt wi_cc; + struct work_struct wi_work; +}; + +struct svc_rdma_send_ctxt { + struct llist_node sc_node; + struct rpc_rdma_cid sc_cid; + struct work_struct sc_work; + + struct svcxprt_rdma *sc_rdma; + struct ib_send_wr sc_send_wr; + struct ib_send_wr *sc_wr_chain; + int sc_sqecount; + struct ib_cqe sc_cqe; + struct xdr_buf sc_hdrbuf; + struct xdr_stream sc_stream; + struct svc_rdma_write_info sc_reply_info; + void *sc_xprt_buf; + int sc_page_count; + int sc_cur_sge_no; + unsigned long sc_maxpages; + struct page **sc_pages; + struct ib_sge sc_sges[]; +}; + +/* svc_rdma_backchannel.c */ +extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt); + +/* svc_rdma_recvfrom.c */ +extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); +extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma); +extern struct svc_rdma_recv_ctxt * + svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma); +extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_recv_ctxt *ctxt); +extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma); +extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt); +extern int svc_rdma_recvfrom(struct svc_rqst *); + +/* svc_rdma_rw.c */ +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); +extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc); +extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma, + struct svc_rdma_chunk_ctxt *cc, + enum dma_data_direction dir); +extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma, + const struct svc_rdma_recv_ctxt *rctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + struct svc_rdma_send_ctxt *sctxt, + const struct xdr_buf *xdr); +extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, + struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *head); + +/* svc_rdma_sendto.c */ +extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); +extern struct svc_rdma_send_ctxt * + svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); +extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_post_send(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *ctxt); +extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + const struct svc_rdma_pcl *write_pcl, + const struct svc_rdma_pcl *reply_pcl, + const struct xdr_buf *xdr); +extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma, + struct svc_rdma_send_ctxt *sctxt, + struct svc_rdma_recv_ctxt *rctxt, + int status); +extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail); +extern int svc_rdma_sendto(struct svc_rqst *); +extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset, + unsigned int length); + +/* svc_rdma_transport.c */ +extern struct svc_xprt_class svc_rdma_class; +#ifdef CONFIG_SUNRPC_BACKCHANNEL +extern struct svc_xprt_class svc_rdma_bc_class; +#endif + +/* svc_rdma.c */ +extern int svc_rdma_init(void); +extern void svc_rdma_cleanup(void); + #endif diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h new file mode 100644 index 000000000000..7516ad0fae80 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma_pcl.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Oracle and/or its affiliates + */ + +#ifndef SVC_RDMA_PCL_H +#define SVC_RDMA_PCL_H + +#include <linux/list.h> + +struct svc_rdma_segment { + u32 rs_handle; + u32 rs_length; + u64 rs_offset; +}; + +struct svc_rdma_chunk { + struct list_head ch_list; + + u32 ch_position; + u32 ch_length; + u32 ch_payload_length; + + u32 ch_segcount; + struct svc_rdma_segment ch_segments[]; +}; + +struct svc_rdma_pcl { + unsigned int cl_count; + struct list_head cl_chunks; +}; + +/** + * pcl_init - Initialize a parsed chunk list + * @pcl: parsed chunk list to initialize + * + */ +static inline void pcl_init(struct svc_rdma_pcl *pcl) +{ + INIT_LIST_HEAD(&pcl->cl_chunks); +} + +/** + * pcl_is_empty - Return true if parsed chunk list is empty + * @pcl: parsed chunk list + * + */ +static inline bool pcl_is_empty(const struct svc_rdma_pcl *pcl) +{ + return list_empty(&pcl->cl_chunks); +} + +/** + * pcl_first_chunk - Return first chunk in a parsed chunk list + * @pcl: parsed chunk list + * + * Returns the first chunk in the list, or NULL if the list is empty. + */ +static inline struct svc_rdma_chunk * +pcl_first_chunk(const struct svc_rdma_pcl *pcl) +{ + if (pcl_is_empty(pcl)) + return NULL; + return list_first_entry(&pcl->cl_chunks, struct svc_rdma_chunk, + ch_list); +} + +/** + * pcl_next_chunk - Return next chunk in a parsed chunk list + * @pcl: a parsed chunk list + * @chunk: chunk in @pcl + * + * Returns the next chunk in the list, or NULL if @chunk is already last. + */ +static inline struct svc_rdma_chunk * +pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk) +{ + if (list_is_last(&chunk->ch_list, &pcl->cl_chunks)) + return NULL; + return list_next_entry(chunk, ch_list); +} + +/** + * pcl_for_each_chunk - Iterate over chunks in a parsed chunk list + * @pos: the loop cursor + * @pcl: a parsed chunk list + */ +#define pcl_for_each_chunk(pos, pcl) \ + for (pos = list_first_entry(&(pcl)->cl_chunks, struct svc_rdma_chunk, ch_list); \ + &pos->ch_list != &(pcl)->cl_chunks; \ + pos = list_next_entry(pos, ch_list)) + +/** + * pcl_for_each_segment - Iterate over segments in a parsed chunk + * @pos: the loop cursor + * @chunk: a parsed chunk + */ +#define pcl_for_each_segment(pos, chunk) \ + for (pos = &(chunk)->ch_segments[0]; \ + pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \ + pos++) + +/** + * pcl_chunk_end_offset - Return offset of byte range following @chunk + * @chunk: chunk in @pcl + * + * Returns starting offset of the region just after @chunk + */ +static inline unsigned int +pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk) +{ + return xdr_align_size(chunk->ch_position + chunk->ch_payload_length); +} + +struct svc_rdma_recv_ctxt; + +extern void pcl_free(struct svc_rdma_pcl *pcl); +extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p); +extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt, + struct svc_rdma_pcl *pcl, __be32 *p); +extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl, + const struct xdr_buf *xdr, + int (*actor)(const struct xdr_buf *, + void *), + void *data); + +#endif /* SVC_RDMA_PCL_H */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b05963f09ebf..da2a2531e110 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/svc_xprt.h * @@ -19,19 +20,23 @@ struct svc_xprt_ops { struct svc_xprt *(*xpo_accept)(struct svc_xprt *); int (*xpo_has_wspace)(struct svc_xprt *); int (*xpo_recvfrom)(struct svc_rqst *); - void (*xpo_prep_reply_hdr)(struct svc_rqst *); int (*xpo_sendto)(struct svc_rqst *); - void (*xpo_release_rqst)(struct svc_rqst *); + int (*xpo_result_payload)(struct svc_rqst *, unsigned int, + unsigned int); + void (*xpo_release_ctxt)(struct svc_xprt *xprt, void *ctxt); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); + void (*xpo_kill_temp_xprt)(struct svc_xprt *); + void (*xpo_handshake)(struct svc_xprt *xprt); }; struct svc_xprt_class { const char *xcl_name; struct module *xcl_owner; - struct svc_xprt_ops *xcl_ops; + const struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; + int xcl_ident; }; /* @@ -46,26 +51,16 @@ struct svc_xpt_user { struct svc_xprt { struct svc_xprt_class *xpt_class; - struct svc_xprt_ops *xpt_ops; + const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; + ktime_t xpt_qtime; struct list_head xpt_list; - struct list_head xpt_ready; + struct lwq_node xpt_ready; unsigned long xpt_flags; -#define XPT_BUSY 0 /* enqueued/receiving */ -#define XPT_CONN 1 /* conn pending */ -#define XPT_CLOSE 2 /* dead or dying */ -#define XPT_DATA 3 /* data pending */ -#define XPT_TEMP 4 /* connected transport */ -#define XPT_DEAD 6 /* transport closed */ -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ -#define XPT_DEFERRED 8 /* deferred request pending */ -#define XPT_OLD 9 /* used for xprt aging mark+sweep */ -#define XPT_DETACHED 10 /* detached from tempsocks list */ -#define XPT_LISTENER 11 /* listening endpoint */ -#define XPT_CACHE_AUTH 12 /* cache auth info */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ + atomic_t xpt_nr_rqsts; /* Number of requests */ struct mutex xpt_mutex; /* to serialize sending data */ spinlock_t xpt_lock; /* protects sk_deferred * and xpt_auth_cache */ @@ -76,13 +71,62 @@ struct svc_xprt { size_t xpt_locallen; /* length of address */ struct sockaddr_storage xpt_remote; /* remote peer's address */ size_t xpt_remotelen; /* length of address */ - struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */ + char xpt_remotebuf[INET6_ADDRSTRLEN + 10]; struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + netns_tracker ns_tracker; + const struct cred *xpt_cred; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ + struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; +/* flag bits for xpt_flags */ +enum { + XPT_BUSY, /* enqueued/receiving */ + XPT_CONN, /* conn pending */ + XPT_CLOSE, /* dead or dying */ + XPT_DATA, /* data pending */ + XPT_TEMP, /* connected transport */ + XPT_DEAD, /* transport closed */ + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */ + XPT_DEFERRED, /* deferred request pending */ + XPT_OLD, /* used for xprt aging mark+sweep */ + XPT_LISTENER, /* listening endpoint */ + XPT_CACHE_AUTH, /* cache auth info */ + XPT_LOCAL, /* connection from loopback interface */ + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */ + XPT_CONG_CTRL, /* has congestion control */ + XPT_HANDSHAKE, /* xprt requests a handshake */ + XPT_TLS_SESSION, /* transport-layer security established */ + XPT_PEER_AUTH, /* peer has been authenticated */ + XPT_PEER_VALID, /* peer has presented a filehandle that + * it has access to. It is NOT counted + * in ->sv_tmpcnt. + */ + XPT_RPCB_UNREG, /* transport that needs unregistering + * with rpcbind (TCP, UDP) on destroy + */ +}; + +/* + * Maximum number of "tmp" connections - those without XPT_PEER_VALID - + * permitted on any service. + */ +#define XPT_MAX_TMP_CONN 64 + +static inline void svc_xprt_set_valid(struct svc_xprt *xpt) +{ + if (test_bit(XPT_TEMP, &xpt->xpt_flags) && + !test_and_set_bit(XPT_PEER_VALID, &xpt->xpt_flags)) { + struct svc_serv *serv = xpt->xpt_server; + + spin_lock(&serv->sv_lock); + serv->sv_tmpcnt -= 1; + spin_unlock(&serv->sv_lock); + } +} + static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) { spin_lock(&xpt->xpt_lock); @@ -107,23 +151,41 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u return 0; } +static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt) +{ + return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) || + (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); -int svc_create_xprt(struct svc_serv *, const char *, struct net *, - const int, const unsigned short, int); +int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name, + struct net *net, struct sockaddr *sap, + int flags, const struct cred *cred); +int svc_xprt_create(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags, + const struct cred *cred); +void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net, + bool unregister); +void svc_xprt_received(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); -void svc_close_xprt(struct svc_xprt *xprt); +void svc_xprt_close(struct svc_xprt *xprt); int svc_port_is_privileged(struct sockaddr *sin); int svc_print_xprts(char *buf, int maxlen); +struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name, + struct net *net, const struct sockaddr *sa); struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, struct net *net, const sa_family_t af, const unsigned short port); int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen); void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *xprt); +void svc_age_temp_xprts_now(struct svc_serv *, struct sockaddr *); +void svc_xprt_deferred_close(struct svc_xprt *xprt); static inline void svc_xprt_get(struct svc_xprt *xprt) { @@ -142,7 +204,10 @@ static inline void svc_xprt_set_remote(struct svc_xprt *xprt, { memcpy(&xprt->xpt_remote, sa, salen); xprt->xpt_remotelen = salen; + snprintf(xprt->xpt_remotebuf, sizeof(xprt->xpt_remotebuf) - 1, + "%pISpc", sa); } + static inline unsigned short svc_addr_port(const struct sockaddr *sa) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sa; diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h index 8d71d6577459..4b92fec23a49 100644 --- a/include/linux/sunrpc/svcauth.h +++ b/include/linux/sunrpc/svcauth.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/svcauth.h * @@ -9,13 +10,13 @@ #ifndef _LINUX_SUNRPC_SVCAUTH_H_ #define _LINUX_SUNRPC_SVCAUTH_H_ -#ifdef __KERNEL__ - #include <linux/string.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/clnt.h> #include <linux/hash.h> +#include <linux/stringhash.h> #include <linux/cred.h> struct svc_cred { @@ -23,14 +24,22 @@ struct svc_cred { kgid_t cr_gid; struct group_info *cr_group_info; u32 cr_flavor; /* pseudoflavor */ - char *cr_principal; /* for gss */ + /* name of form servicetype/hostname@REALM, passed down by + * gss-proxy: */ + char *cr_raw_principal; + /* name of form servicetype@hostname, passed down by + * rpc.svcgssd, or computed from the above: */ + char *cr_principal; + char *cr_targ_princ; struct gss_api_mech *cr_gss_mech; }; static inline void init_svc_cred(struct svc_cred *cred) { cred->cr_group_info = NULL; + cred->cr_raw_principal = NULL; cred->cr_principal = NULL; + cred->cr_targ_princ = NULL; cred->cr_gss_mech = NULL; } @@ -38,7 +47,9 @@ static inline void free_svc_cred(struct svc_cred *cred) { if (cred->cr_group_info) put_group_info(cred->cr_group_info); + kfree(cred->cr_raw_principal); kfree(cred->cr_principal); + kfree(cred->cr_targ_princ); gss_mech_put(cred->cr_gss_mech); init_svc_cred(cred); } @@ -70,6 +81,19 @@ struct auth_domain { struct hlist_node hash; char *name; struct auth_ops *flavour; + struct rcu_head rcu_head; +}; + +enum svc_auth_status { + SVC_GARBAGE = 1, + SVC_VALID, + SVC_NEGATIVE, + SVC_OK, + SVC_DROP, + SVC_CLOSE, + SVC_DENIED, + SVC_PENDING, + SVC_COMPLETE, }; /* @@ -87,6 +111,8 @@ struct auth_domain { * is (probably) already in place. Certainly space is * reserved for it. * DROP - simply drop the request. It may have been deferred + * CLOSE - like SVC_DROP, but request is definitely lost. + * If there is a tcp connection, it should be closed. * GARBAGE - rpc garbage_args error * SYSERR - rpc system_err error * DENIED - authp holds reason for denial. @@ -100,101 +126,64 @@ struct auth_domain { * * release() is given a request after the procedure has been run. * It should sign/encrypt the results if needed - * It should return: - * OK - the resbuf is ready to be sent - * DROP - the reply should be quitely dropped - * DENIED - authp holds a reason for MSG_DENIED - * SYSERR - rpc system_err * * domain_release() * This call releases a domain. + * * set_client() - * Givens a pending request (struct svc_rqst), finds and assigns + * Given a pending request (struct svc_rqst), finds and assigns * an appropriate 'auth_domain' as the client. + * + * pseudoflavor() + * Returns RPC_AUTH pseudoflavor in use by @rqstp. */ struct auth_ops { char * name; struct module *owner; int flavour; - int (*accept)(struct svc_rqst *rq, __be32 *authp); - int (*release)(struct svc_rqst *rq); - void (*domain_release)(struct auth_domain *); - int (*set_client)(struct svc_rqst *rq); -}; -#define SVC_GARBAGE 1 -#define SVC_SYSERR 2 -#define SVC_VALID 3 -#define SVC_NEGATIVE 4 -#define SVC_OK 5 -#define SVC_DROP 6 -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely - * lost so if there is a tcp connection, it - * should be closed - */ -#define SVC_DENIED 8 -#define SVC_PENDING 9 -#define SVC_COMPLETE 10 + enum svc_auth_status (*accept)(struct svc_rqst *rqstp); + int (*release)(struct svc_rqst *rqstp); + void (*domain_release)(struct auth_domain *dom); + enum svc_auth_status (*set_client)(struct svc_rqst *rqstp); + rpc_authflavor_t (*pseudoflavor)(struct svc_rqst *rqstp); +}; struct svc_xprt; -extern int svc_authenticate(struct svc_rqst *rqstp, __be32 *authp); +extern rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp); extern int svc_authorise(struct svc_rqst *rqstp); -extern int svc_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp); extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops); extern void svc_auth_unregister(rpc_authflavor_t flavor); +extern void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt, + const struct cred *, + struct svc_cred *); + extern struct auth_domain *unix_domain_find(char *name); extern void auth_domain_put(struct auth_domain *item); -extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom); extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new); extern struct auth_domain *auth_domain_find(char *name); -extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr); -extern int auth_unix_forget_old(struct auth_domain *dom); extern void svcauth_unix_purge(struct net *net); extern void svcauth_unix_info_release(struct svc_xprt *xpt); -extern int svcauth_unix_set_client(struct svc_rqst *rqstp); +extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp); extern int unix_gid_cache_create(struct net *net); extern void unix_gid_cache_destroy(struct net *net); -static inline unsigned long hash_str(char *name, int bits) +/* + * The <stringhash.h> functions are good enough that we don't need to + * use hash_32() on them; just extracting the high bits is enough. + */ +static inline unsigned long hash_str(char const *name, int bits) { - unsigned long hash = 0; - unsigned long l = 0; - int len = 0; - unsigned char c; - do { - if (unlikely(!(c = *name++))) { - c = (char)len; len = -1; - } - l = (l << 8) | c; - len++; - if ((len & (BITS_PER_LONG/8-1))==0) - hash = hash_long(hash^l, BITS_PER_LONG); - } while (len); - return hash >> (BITS_PER_LONG - bits); + return hashlen_hash(hashlen_string(NULL, name)) >> (32 - bits); } -static inline unsigned long hash_mem(char *buf, int length, int bits) +static inline unsigned long hash_mem(char const *buf, int length, int bits) { - unsigned long hash = 0; - unsigned long l = 0; - int len = 0; - unsigned char c; - do { - if (len == length) { - c = (char)len; len = -1; - } else - c = *buf++; - l = (l << 8) | c; - len++; - if ((len & (BITS_PER_LONG/8-1))==0) - hash = hash_long(hash^l, BITS_PER_LONG); - } while (len); - return hash >> (BITS_PER_LONG - bits); + return full_name_hash(NULL, buf, length) >> (32 - bits); } -#endif /* __KERNEL__ */ - #endif /* _LINUX_SUNRPC_SVCAUTH_H_ */ diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index 726aff1a5201..f09c82b0a7ae 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/svcauth_gss.h * @@ -8,7 +9,6 @@ #ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H #define _LINUX_SUNRPC_SVCAUTH_GSS_H -#ifdef __KERNEL__ #include <linux/sched.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> @@ -20,8 +20,8 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); -int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor, + char *name); u32 svcauth_gss_flavor(struct auth_domain *dom); -#endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 62fd1b756e99..de37069aba90 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/svcsock.h * @@ -22,12 +23,15 @@ struct svc_sock { /* We keep the old state_change and data_ready CB's here */ void (*sk_ostate)(struct sock *); - void (*sk_odata)(struct sock *, int bytes); + void (*sk_odata)(struct sock *); void (*sk_owspace)(struct sock *); + /* For sends (protected by xpt_mutex) */ + struct bio_vec *sk_bvec; + /* private TCP part */ /* On-the-wire fragment header: */ - __be32 sk_reclen; + __be32 sk_marker; /* As we receive a record, this includes the length received so * far (including the fragment header): */ u32 sk_tcplen; @@ -35,33 +39,35 @@ struct svc_sock { * received so far in the fragments making up this rpc: */ u32 sk_datalen; - struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ + struct page_frag_cache sk_frag_cache; + + struct completion sk_handshake_done; + + /* received data */ + unsigned long sk_maxpages; + struct page * sk_pages[] __counted_by(sk_maxpages); }; static inline u32 svc_sock_reclen(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; + return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK; } static inline u32 svc_sock_final_rec(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; + return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT; } /* * Function prototypes. */ -void svc_close_net(struct svc_serv *, struct net *); -int svc_recv(struct svc_rqst *, long); -int svc_send(struct svc_rqst *); -void svc_drop(struct svc_rqst *); -void svc_sock_update_bufs(struct svc_serv *serv); -int svc_addsock(struct svc_serv *serv, const int fd, - char *name_return, const size_t len); +void svc_recv(struct svc_rqst *rqstp); +void svc_send(struct svc_rqst *rqstp); +int svc_addsock(struct svc_serv *serv, struct net *net, + const int fd, char *name_return, const size_t len, + const struct cred *cred); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); -struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot); -void svc_sock_destroy(struct svc_xprt *); /* * svc_makesock socket characteristics diff --git a/include/linux/sunrpc/timer.h b/include/linux/sunrpc/timer.h index 697d6e69d61f..242dbe00b5ff 100644 --- a/include/linux/sunrpc/timer.h +++ b/include/linux/sunrpc/timer.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/timer.h * diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h index d222f47550af..bd3c8e05632b 100644 --- a/include/linux/sunrpc/types.h +++ b/include/linux/sunrpc/types.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/types.h * @@ -10,6 +11,7 @@ #define _LINUX_SUNRPC_TYPES_H_ #include <linux/timer.h> +#include <linux/sched/signal.h> #include <linux/workqueue.h> #include <linux/sunrpc/debug.h> #include <linux/list.h> diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 15f9204ee70b..152597750f55 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * XDR standard data types and function declarations * @@ -10,21 +11,28 @@ #ifndef _SUNRPC_XDR_H_ #define _SUNRPC_XDR_H_ -#ifdef __KERNEL__ - #include <linux/uio.h> #include <asm/byteorder.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/scatterlist.h> +struct bio_vec; +struct rpc_rqst; + +/* + * Size of an XDR encoding unit in bytes, i.e. 32 bits, + * as defined in Section 3 of RFC 4506. All encoded + * XDR data items are aligned on a boundary of 32 bits. + */ +#define XDR_UNIT sizeof(__be32) + /* * Buffer adjustment */ #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { @@ -33,13 +41,6 @@ struct xdr_netobj { }; /* - * This is the legacy generic XDR function. rqstp is either a rpc_rqst - * (client side) or svc_rqst pointer (server side). - * Encode functions always assume there's enough room in the buffer. - */ -typedef int (*kxdrproc_t)(void *rqstp, __be32 *data, void *obj); - -/* * Basic structure for transmission/reception of a client XDR message. * Features a header (for a linear buffer containing RPC headers * and the data payload for short messages), and then an array of @@ -56,17 +57,32 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ + struct bio_vec *bvec; struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ #define XDRBUF_READ 0x01 /* target of file read */ #define XDRBUF_WRITE 0x02 /* source of file write */ +#define XDRBUF_SPARSE_PAGES 0x04 /* Page array is sparse */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ }; +static inline void +xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->pages = NULL; + buf->page_len = 0; + buf->flags = 0; + buf->len = 0; + buf->buflen = len; +} + /* * pre-xdr'ed macros. */ @@ -75,6 +91,17 @@ struct xdr_buf { #define xdr_one cpu_to_be32(1) #define xdr_two cpu_to_be32(2) +#define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL) +#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX) +#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT) +#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS) +#define rpc_auth_tls cpu_to_be32(RPC_AUTH_TLS) + +#define rpc_call cpu_to_be32(RPC_CALL) +#define rpc_reply cpu_to_be32(RPC_REPLY) + +#define rpc_msg_accepted cpu_to_be32(RPC_MSG_ACCEPTED) + #define rpc_success cpu_to_be32(RPC_SUCCESS) #define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL) #define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH) @@ -83,15 +110,19 @@ struct xdr_buf { #define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR) #define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY) +#define rpc_mismatch cpu_to_be32(RPC_MISMATCH) +#define rpc_auth_error cpu_to_be32(RPC_AUTH_ERROR) + #define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK) #define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED) #define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED) #define rpc_autherr_badverf cpu_to_be32(RPC_AUTH_BADVERF) #define rpc_autherr_rejectedverf cpu_to_be32(RPC_AUTH_REJECTEDVERF) #define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK) +#define rpc_autherr_invalidresp cpu_to_be32(RPC_AUTH_INVALIDRESP) +#define rpc_autherr_failed cpu_to_be32(RPC_AUTH_FAILED) #define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM) #define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM) -#define rpc_autherr_oldseqnum cpu_to_be32(101) /* * Miscellaneous XDR helper functions @@ -99,14 +130,16 @@ struct xdr_buf { __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); __be32 *xdr_encode_string(__be32 *p, const char *s); -__be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, - unsigned int maxlen); __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); -__be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); void xdr_inline_pages(struct xdr_buf *, unsigned int, struct page **, unsigned int, unsigned int); -void xdr_terminate_string(struct xdr_buf *, const u32); +void xdr_terminate_string(const struct xdr_buf *, const u32); +size_t xdr_buf_pagecount(const struct xdr_buf *buf); +int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); +void xdr_free_bvec(struct xdr_buf *buf); +unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, + const struct xdr_buf *xdr); static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { @@ -137,6 +170,13 @@ xdr_decode_opaque_fixed(__be32 *p, void *ptr, unsigned int len) return p + XDR_QUADLEN(len); } +static inline void xdr_netobj_dup(struct xdr_netobj *dst, + struct xdr_netobj *src, gfp_t gfp_mask) +{ + dst->data = kmemdup(src->data, src->len, gfp_mask); + dst->len = src->len; +} + /* * Adjust kvec to reflect end of xdr'ed data (RPC client XDR) */ @@ -149,33 +189,14 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) /* * XDR buffer helper functions */ -extern void xdr_shift_buf(struct xdr_buf *, size_t); -extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); -extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); +extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); -extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); -extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); - -/* - * Helper structure for copying from an sk_buff. - */ -struct xdr_skb_reader { - struct sk_buff *skb; - unsigned int offset; - size_t count; - __wsum csum; -}; +extern int read_bytes_from_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); +extern int write_bytes_to_xdr_buf(const struct xdr_buf *, unsigned int, void *, unsigned int); -typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len); - -size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len); -extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *); -extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, - struct xdr_skb_reader *, xdr_skb_read_actor); - -extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32); -extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *); +extern int xdr_encode_word(const struct xdr_buf *, unsigned int, u32); +extern int xdr_decode_word(const struct xdr_buf *, unsigned int, u32 *); struct xdr_array2_desc; typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); @@ -186,9 +207,9 @@ struct xdr_array2_desc { xdr_xcode_elem_t xcode; }; -extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_decode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); -extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, +extern int xdr_encode_array2(const struct xdr_buf *buf, unsigned int base, struct xdr_array2_desc *desc); extern void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len); @@ -204,29 +225,590 @@ struct xdr_stream { struct kvec *iov; /* pointer to the current kvec */ struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ + void *page_kaddr; /* kmapped address of the current page */ unsigned int nwords; /* Remaining decode buffer length */ + + struct rpc_rqst *rqst; /* For debugging */ }; /* * These are the xdr_stream style generic XDR encode and decode functions. */ -typedef void (*kxdreproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); -typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj); - -extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj); +typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj); + +extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); +void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf); extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); +extern int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes); +extern void __xdr_commit_encode(struct xdr_stream *xdr); +extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); +extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr); -extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern unsigned int xdr_page_pos(const struct xdr_stream *xdr); +extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, + __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); -extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); +extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); -extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); +extern void xdr_set_pagelen(struct xdr_stream *, unsigned int len); +extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, + unsigned int len); +extern unsigned int xdr_stream_move_subsegment(struct xdr_stream *xdr, unsigned int offset, + unsigned int target, unsigned int length); +extern unsigned int xdr_stream_zero(struct xdr_stream *xdr, unsigned int offset, + unsigned int length); + +/** + * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to an empty buffer + * @buflen: size of 'buf' + * + * The scratch buffer is used when decoding from an array of pages. + * If an xdr_inline_decode() call spans across page boundaries, then + * we copy the data into the scratch buffer in order to allow linear + * access. + */ +static inline void +xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen) +{ + xdr->scratch.iov_base = buf; + xdr->scratch.iov_len = buflen; +} + +/** + * xdr_set_scratch_folio - Attach a scratch buffer for decoding data + * @xdr: pointer to xdr_stream struct + * @page: an anonymous folio + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_set_scratch_folio(struct xdr_stream *xdr, struct folio *folio) +{ + xdr_set_scratch_buffer(xdr, folio_address(folio), folio_size(folio)); +} + +/** + * xdr_reset_scratch_buffer - Clear scratch buffer information + * @xdr: pointer to xdr_stream struct + * + * See xdr_set_scratch_buffer(). + */ +static inline void +xdr_reset_scratch_buffer(struct xdr_stream *xdr) +{ + xdr_set_scratch_buffer(xdr, NULL, 0); +} + +/** + * xdr_commit_encode - Ensure all data is written to xdr->buf + * @xdr: pointer to xdr_stream + * + * Handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place. __xdr_commit_encode() does that copying. + */ +static inline void xdr_commit_encode(struct xdr_stream *xdr) +{ + if (unlikely(xdr->scratch.iov_len)) + __xdr_commit_encode(xdr); +} + +/** + * xdr_stream_remaining - Return the number of bytes remaining in the stream + * @xdr: pointer to struct xdr_stream + * + * Return value: + * Number of bytes remaining in @xdr before xdr->end + */ +static inline size_t +xdr_stream_remaining(const struct xdr_stream *xdr) +{ + return xdr->nwords << 2; +} + +ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, + size_t maxlen, gfp_t gfp_flags); +ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, + void **body, unsigned int *body_len); +ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor, + void *body, unsigned int body_len); + +/** + * xdr_align_size - Calculate padded size of an object + * @n: Size of an object being XDR encoded (in bytes) + * + * Return value: + * Size (in bytes) of the object including xdr padding + */ +static inline size_t +xdr_align_size(size_t n) +{ + const size_t mask = XDR_UNIT - 1; + + return (n + mask) & ~mask; +} + +/** + * xdr_pad_size - Calculate size of an object's pad + * @n: Size of an object being XDR encoded (in bytes) + * + * This implementation avoids the need for conditional + * branches or modulo division. + * + * Return value: + * Size (in bytes) of the needed XDR pad + */ +static inline size_t xdr_pad_size(size_t n) +{ + return xdr_align_size(n) - n; +} + +/** + * xdr_stream_encode_item_present - Encode a "present" list item + * @xdr: pointer to xdr_stream + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = xdr_one; + return len; +} + +/** + * xdr_stream_encode_item_absent - Encode a "not present" list item + * @xdr: pointer to xdr_stream + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = xdr_zero; + return len; +} + +/** + * xdr_encode_bool - Encode a boolean item + * @p: address in a buffer into which to encode + * @n: boolean value to encode + * + * Return value: + * Address of item following the encoded boolean + */ +static inline __be32 *xdr_encode_bool(__be32 *p, u32 n) +{ + *p++ = n ? xdr_one : xdr_zero; + return p; +} + +/** + * xdr_stream_encode_bool - Encode a boolean item + * @xdr: pointer to xdr_stream + * @n: boolean value to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = XDR_UNIT; + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_bool(p, n); + return len; +} + +/** + * xdr_stream_encode_u32 - Encode a 32-bit integer + * @xdr: pointer to xdr_stream + * @n: integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = cpu_to_be32(n); + return len; +} + +/** + * xdr_stream_encode_be32 - Encode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @n: integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = n; + return len; +} + +/** + * xdr_stream_encode_u64 - Encode a 64-bit integer + * @xdr: pointer to xdr_stream + * @n: 64-bit integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_hyper(p, n); + return len; +} + +/** + * xdr_stream_encode_opaque_inline - Encode opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to void pointer + * @len: size of object + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len) +{ + size_t count = sizeof(__u32) + xdr_align_size(len); + __be32 *p = xdr_reserve_space(xdr, count); + + if (unlikely(!p)) { + *ptr = NULL; + return -EMSGSIZE; + } + xdr_encode_opaque(p, NULL, len); + *ptr = ++p; + return count; +} + +/** + * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to opaque data object + * @len: size of object pointed to by @ptr + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t len) +{ + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_opaque_fixed(p, ptr, len); + return xdr_align_size(len); +} + +/** + * xdr_stream_encode_opaque - Encode variable length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: pointer to opaque data object + * @len: size of object pointed to by @ptr + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len) +{ + size_t count = sizeof(__u32) + xdr_align_size(len); + __be32 *p = xdr_reserve_space(xdr, count); + + if (unlikely(!p)) + return -EMSGSIZE; + xdr_encode_opaque(p, ptr, len); + return count; +} + +/** + * xdr_stream_encode_uint32_array - Encode variable length array of integers + * @xdr: pointer to xdr_stream + * @array: array of integers + * @array_size: number of elements in @array + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_uint32_array(struct xdr_stream *xdr, + const __u32 *array, size_t array_size) +{ + ssize_t ret = (array_size+1) * sizeof(__u32); + __be32 *p = xdr_reserve_space(xdr, ret); + + if (unlikely(!p)) + return -EMSGSIZE; + *p++ = cpu_to_be32(array_size); + for (; array_size > 0; p++, array++, array_size--) + *p = cpu_to_be32p(array); + return ret; +} + +/** + * xdr_item_is_absent - symbolically handle XDR discriminators + * @p: pointer to undecoded discriminator + * + * Return values: + * %true if the following XDR item is absent + * %false if the following XDR item is present + */ +static inline bool xdr_item_is_absent(const __be32 *p) +{ + return *p == xdr_zero; +} + +/** + * xdr_item_is_present - symbolically handle XDR discriminators + * @p: pointer to undecoded discriminator + * + * Return values: + * %true if the following XDR item is present + * %false if the following XDR item is absent + */ +static inline bool xdr_item_is_present(const __be32 *p) +{ + return *p != xdr_zero; +} + +/** + * xdr_stream_decode_bool - Decode a boolean + * @xdr: pointer to xdr_stream + * @ptr: pointer to a u32 in which to store the result + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = (*p != xdr_zero); + return 0; +} + +/** + * xdr_stream_decode_u32 - Decode a 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = be32_to_cpup(p); + return 0; +} + +/** + * xdr_stream_decode_be32 - Decode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + *ptr = *p; + return 0; +} -#endif /* __KERNEL__ */ +/** + * xdr_stream_decode_u64 - Decode a 64-bit integer + * @xdr: pointer to xdr_stream + * @ptr: location to store 64-bit integer + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr) +{ + const size_t count = sizeof(*ptr); + __be32 *p = xdr_inline_decode(xdr, count); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_hyper(p, ptr); + return 0; +} + +/** + * xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: location to store data + * @len: size of buffer pointed to by @ptr + * + * Return values: + * %0 on success + * %-EBADMSG on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len) +{ + __be32 *p = xdr_inline_decode(xdr, len); + + if (unlikely(!p)) + return -EBADMSG; + xdr_decode_opaque_fixed(p, ptr, len); + return 0; +} + +/** + * xdr_stream_decode_opaque_inline - Decode variable length opaque xdr data + * @xdr: pointer to xdr_stream + * @ptr: location to store pointer to opaque data + * @maxlen: maximum acceptable object size + * + * Note: the pointer stored in @ptr cannot be assumed valid after the XDR + * buffer has been destroyed, or even after calling xdr_inline_decode() + * on @xdr. It is therefore expected that the object it points to should + * be processed immediately. + * + * Return values: + * On success, returns size of object stored in *@ptr + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the object would exceed @maxlen + */ +static inline ssize_t +xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxlen) +{ + __be32 *p; + __u32 len; + + *ptr = NULL; + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return -EBADMSG; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EBADMSG; + if (unlikely(len > maxlen)) + return -EMSGSIZE; + *ptr = p; + } + return len; +} + +/** + * xdr_stream_decode_uint32_array - Decode variable length array of integers + * @xdr: pointer to xdr_stream + * @array: location to store the integer array or NULL + * @array_size: number of elements to store + * + * Return values: + * On success, returns number of elements stored in @array + * %-EBADMSG on XDR buffer overflow + * %-EMSGSIZE if the size of the array exceeds @array_size + */ +static inline ssize_t +xdr_stream_decode_uint32_array(struct xdr_stream *xdr, + __u32 *array, size_t array_size) +{ + __be32 *p; + __u32 len; + ssize_t retval; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return -EBADMSG; + if (U32_MAX >= SIZE_MAX / sizeof(*p) && len > SIZE_MAX / sizeof(*p)) + return -EBADMSG; + p = xdr_inline_decode(xdr, len * sizeof(*p)); + if (unlikely(!p)) + return -EBADMSG; + if (array == NULL) + return len; + if (len <= array_size) { + if (len < array_size) + memset(array+len, 0, (array_size-len)*sizeof(*array)); + array_size = len; + retval = len; + } else + retval = -EMSGSIZE; + for (; array_size > 0; p++, array++, array_size--) + *array = be32_to_cpup(p); + return retval; +} #endif /* _SUNRPC_XDR_H_ */ diff --git a/include/linux/sunrpc/xdrgen/_builtins.h b/include/linux/sunrpc/xdrgen/_builtins.h new file mode 100644 index 000000000000..66ca3ece951a --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_builtins.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__BUILTINS_H_ +#define _SUNRPC_XDRGEN__BUILTINS_H_ + +#include <linux/sunrpc/xdr.h> + +static inline bool +xdrgen_decode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_encode_void(struct xdr_stream *xdr) +{ + return true; +} + +static inline bool +xdrgen_decode_bool(struct xdr_stream *xdr, bool *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = (*p != xdr_zero); + return true; +} + +static inline bool +xdrgen_encode_bool(struct xdr_stream *xdr, bool val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = val ? xdr_one : xdr_zero; + return true; +} + +static inline bool +xdrgen_decode_int(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_int(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_int(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_int(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_long(struct xdr_stream *xdr, s32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_long(struct xdr_stream *xdr, s32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_unsigned_long(struct xdr_stream *xdr, u32 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *ptr = be32_to_cpup(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_long(struct xdr_stream *xdr, u32 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT); + + if (unlikely(!p)) + return false; + *p = cpu_to_be32(val); + return true; +} + +static inline bool +xdrgen_decode_hyper(struct xdr_stream *xdr, s64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_hyper(struct xdr_stream *xdr, s64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_unsigned_hyper(struct xdr_stream *xdr, u64 *ptr) +{ + __be32 *p = xdr_inline_decode(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + *ptr = get_unaligned_be64(p); + return true; +} + +static inline bool +xdrgen_encode_unsigned_hyper(struct xdr_stream *xdr, u64 val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT * 2); + + if (unlikely(!p)) + return false; + put_unaligned_be64(val, p); + return true; +} + +static inline bool +xdrgen_decode_string(struct xdr_stream *xdr, string *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (unsigned char *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_string(struct xdr_stream *xdr, string val, u32 maxlen) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +static inline bool +xdrgen_decode_opaque(struct xdr_stream *xdr, opaque *ptr, u32 maxlen) +{ + __be32 *p; + u32 len; + + if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) + return false; + if (unlikely(maxlen && len > maxlen)) + return false; + if (len != 0) { + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return false; + ptr->data = (u8 *)p; + } + ptr->len = len; + return true; +} + +static inline bool +xdrgen_encode_opaque(struct xdr_stream *xdr, opaque val) +{ + __be32 *p = xdr_reserve_space(xdr, XDR_UNIT + xdr_align_size(val.len)); + + if (unlikely(!p)) + return false; + xdr_encode_opaque(p, val.data, val.len); + return true; +} + +#endif /* _SUNRPC_XDRGEN__BUILTINS_H_ */ diff --git a/include/linux/sunrpc/xdrgen/_defs.h b/include/linux/sunrpc/xdrgen/_defs.h new file mode 100644 index 000000000000..20c7270aa64d --- /dev/null +++ b/include/linux/sunrpc/xdrgen/_defs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Oracle and/or its affiliates. + * + * This header defines XDR data type primitives specified in + * Section 4 of RFC 4506, used by RPC programs implemented + * in the Linux kernel. + */ + +#ifndef _SUNRPC_XDRGEN__DEFS_H_ +#define _SUNRPC_XDRGEN__DEFS_H_ + +#define TRUE (true) +#define FALSE (false) + +typedef struct { + u32 len; + unsigned char *data; +} string; + +typedef struct { + u32 len; + u8 *data; +} opaque; + +#define XDR_void (0) +#define XDR_bool (1) +#define XDR_int (1) +#define XDR_unsigned_int (1) +#define XDR_long (1) +#define XDR_unsigned_long (1) +#define XDR_hyper (2) +#define XDR_unsigned_hyper (2) + +#endif /* _SUNRPC_XDRGEN__DEFS_H_ */ diff --git a/include/linux/sunrpc/xdrgen/nfs4_1.h b/include/linux/sunrpc/xdrgen/nfs4_1.h new file mode 100644 index 000000000000..cf21a14aa885 --- /dev/null +++ b/include/linux/sunrpc/xdrgen/nfs4_1.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Generated by xdrgen. Manual edits will be lost. */ +/* XDR specification file: ../../Documentation/sunrpc/xdr/nfs4_1.x */ +/* XDR specification modification time: Mon Oct 14 09:10:13 2024 */ + +#ifndef _LINUX_XDRGEN_NFS4_1_DEF_H +#define _LINUX_XDRGEN_NFS4_1_DEF_H + +#include <linux/types.h> +#include <linux/sunrpc/xdrgen/_defs.h> + +typedef s64 int64_t; + +typedef u32 uint32_t; + +typedef struct { + u32 count; + uint32_t *element; +} bitmap4; + +struct nfstime4 { + int64_t seconds; + uint32_t nseconds; +}; + +typedef bool fattr4_offline; + +enum { FATTR4_OFFLINE = 83 }; + +struct open_arguments4 { + bitmap4 oa_share_access; + bitmap4 oa_share_deny; + bitmap4 oa_share_access_want; + bitmap4 oa_open_claim; + bitmap4 oa_create_mode; +}; + +enum open_args_share_access4 { + OPEN_ARGS_SHARE_ACCESS_READ = 1, + OPEN_ARGS_SHARE_ACCESS_WRITE = 2, + OPEN_ARGS_SHARE_ACCESS_BOTH = 3, +}; +typedef enum open_args_share_access4 open_args_share_access4; + +enum open_args_share_deny4 { + OPEN_ARGS_SHARE_DENY_NONE = 0, + OPEN_ARGS_SHARE_DENY_READ = 1, + OPEN_ARGS_SHARE_DENY_WRITE = 2, + OPEN_ARGS_SHARE_DENY_BOTH = 3, +}; +typedef enum open_args_share_deny4 open_args_share_deny4; + +enum open_args_share_access_want4 { + OPEN_ARGS_SHARE_ACCESS_WANT_ANY_DELEG = 3, + OPEN_ARGS_SHARE_ACCESS_WANT_NO_DELEG = 4, + OPEN_ARGS_SHARE_ACCESS_WANT_CANCEL = 5, + OPEN_ARGS_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 17, + OPEN_ARGS_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 18, + OPEN_ARGS_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 20, + OPEN_ARGS_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 21, +}; +typedef enum open_args_share_access_want4 open_args_share_access_want4; + +enum open_args_open_claim4 { + OPEN_ARGS_OPEN_CLAIM_NULL = 0, + OPEN_ARGS_OPEN_CLAIM_PREVIOUS = 1, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_CUR = 2, + OPEN_ARGS_OPEN_CLAIM_DELEGATE_PREV = 3, + OPEN_ARGS_OPEN_CLAIM_FH = 4, + OPEN_ARGS_OPEN_CLAIM_DELEG_CUR_FH = 5, + OPEN_ARGS_OPEN_CLAIM_DELEG_PREV_FH = 6, +}; +typedef enum open_args_open_claim4 open_args_open_claim4; + +enum open_args_createmode4 { + OPEN_ARGS_CREATEMODE_UNCHECKED4 = 0, + OPEN_ARGS_CREATE_MODE_GUARDED = 1, + OPEN_ARGS_CREATEMODE_EXCLUSIVE4 = 2, + OPEN_ARGS_CREATE_MODE_EXCLUSIVE4_1 = 3, +}; +typedef enum open_args_createmode4 open_args_createmode4; + +typedef struct open_arguments4 fattr4_open_arguments; + +enum { FATTR4_OPEN_ARGUMENTS = 86 }; + +enum { OPEN4_RESULT_NO_OPEN_STATEID = 0x00000010 }; + +typedef struct nfstime4 fattr4_time_deleg_access; + +typedef struct nfstime4 fattr4_time_deleg_modify; + +enum { FATTR4_TIME_DELEG_ACCESS = 84 }; + +enum { FATTR4_TIME_DELEG_MODIFY = 85 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_MASK = 0xFF00 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_PREFERENCE = 0x0000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_READ_DELEG = 0x0100 }; + +enum { OPEN4_SHARE_ACCESS_WANT_WRITE_DELEG = 0x0200 }; + +enum { OPEN4_SHARE_ACCESS_WANT_ANY_DELEG = 0x0300 }; + +enum { OPEN4_SHARE_ACCESS_WANT_NO_DELEG = 0x0400 }; + +enum { OPEN4_SHARE_ACCESS_WANT_CANCEL = 0x0500 }; + +enum { OPEN4_SHARE_ACCESS_WANT_SIGNAL_DELEG_WHEN_RESRC_AVAIL = 0x10000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_PUSH_DELEG_WHEN_UNCONTENDED = 0x20000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS = 0x100000 }; + +enum { OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION = 0x200000 }; + +enum open_delegation_type4 { + OPEN_DELEGATE_NONE = 0, + OPEN_DELEGATE_READ = 1, + OPEN_DELEGATE_WRITE = 2, + OPEN_DELEGATE_NONE_EXT = 3, + OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, +}; +typedef enum open_delegation_type4 open_delegation_type4; + +#define NFS4_int64_t_sz \ + (XDR_hyper) +#define NFS4_uint32_t_sz \ + (XDR_unsigned_int) +#define NFS4_bitmap4_sz (XDR_unsigned_int) +#define NFS4_nfstime4_sz \ + (NFS4_int64_t_sz + NFS4_uint32_t_sz) +#define NFS4_fattr4_offline_sz \ + (XDR_bool) +#define NFS4_open_arguments4_sz \ + (NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz + NFS4_bitmap4_sz) +#define NFS4_open_args_share_access4_sz (XDR_int) +#define NFS4_open_args_share_deny4_sz (XDR_int) +#define NFS4_open_args_share_access_want4_sz (XDR_int) +#define NFS4_open_args_open_claim4_sz (XDR_int) +#define NFS4_open_args_createmode4_sz (XDR_int) +#define NFS4_fattr4_open_arguments_sz \ + (NFS4_open_arguments4_sz) +#define NFS4_fattr4_time_deleg_access_sz \ + (NFS4_nfstime4_sz) +#define NFS4_fattr4_time_deleg_modify_sz \ + (NFS4_nfstime4_sz) +#define NFS4_open_delegation_type4_sz (XDR_int) + +#endif /* _LINUX_XDRGEN_NFS4_1_DEF_H */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index cec7b9b5e1bf..f46d1fb8f71a 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/xprt.h * @@ -13,27 +14,23 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/ktime.h> +#include <linux/kref.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/msg_prot.h> -#ifdef __KERNEL__ - #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT -/* - * This describes a timeout strategy - */ -struct rpc_timeout { - unsigned long to_initval, /* initial timeout */ - to_maxval, /* max timeout */ - to_increment; /* if !exponential */ - unsigned int to_retries; /* max # of retries */ - unsigned char to_exponential; -}; +#define RPC_CWNDSHIFT (8U) +#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) +#define RPC_INITCWND RPC_CWNDSCALE +#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) +#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) + +#define RPC_GSS_SEQNO_ARRAY_SIZE 3U enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, @@ -47,7 +44,11 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; +struct xprt_class; struct seq_file; +struct svc_serv; +struct net; +#include <linux/lwq.h> /* * This describes a complete RPC request @@ -67,16 +68,25 @@ struct rpc_rqst { struct rpc_cred * rq_cred; /* Bound cred */ __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ - u32 rq_seqno; /* gss seq no. used on req. */ + u32 rq_seqnos[RPC_GSS_SEQNO_ARRAY_SIZE]; /* past gss req seq nos. */ + unsigned int rq_seqno_count; /* number of entries in rq_seqnos */ int rq_enc_pages_num; struct page **rq_enc_pages; /* scratch pages for use by gss privacy code */ void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ - struct list_head rq_list; - __u32 * rq_buffer; /* XDR encode buffer */ - size_t rq_callsize, - rq_rcvsize; + union { + struct list_head rq_list; /* Slot allocation list */ + struct rb_node rq_recv; /* Receive queue */ + }; + + struct list_head rq_xmit; /* Send queue */ + struct list_head rq_xmit2; /* Send queue */ + + void *rq_buffer; /* Call XDR encode buffer */ + size_t rq_callsize; + void *rq_rbuffer; /* Reply XDR decode buffer */ + size_t rq_rcvsize; size_t rq_xmit_bytes_sent; /* total bytes sent */ size_t rq_reply_bytes_recvd; /* total reply bytes */ /* received */ @@ -85,6 +95,7 @@ struct rpc_rqst { * used in the softirq. */ unsigned long rq_majortimeo; /* major timeout alarm */ + unsigned long rq_minortimeo; /* minor timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ ktime_t rq_rtt; /* round-trip time */ unsigned int rq_retries; /* # of retries */ @@ -92,6 +103,7 @@ struct rpc_rqst { /* A cookie used to track the state of the transport connection */ + atomic_t rq_pin; /* * Partial send handling @@ -102,7 +114,7 @@ struct rpc_rqst { int rq_ntrans; #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head rq_bc_list; /* Callback service list */ + struct lwq_node rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ #endif /* CONFIG_SUNRPC_BACKCHANEL */ @@ -110,23 +122,71 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len +static inline int xprt_rqst_add_seqno(struct rpc_rqst *req, u32 seqno) +{ + if (likely(req->rq_seqno_count < RPC_GSS_SEQNO_ARRAY_SIZE)) + req->rq_seqno_count++; + + /* Shift array to make room for the newest element at the beginning */ + memmove(&req->rq_seqnos[1], &req->rq_seqnos[0], + (RPC_GSS_SEQNO_ARRAY_SIZE - 1) * sizeof(req->rq_seqnos[0])); + req->rq_seqnos[0] = seqno; + return 0; +} + +/* RPC transport layer security policies */ +enum xprtsec_policies { + RPC_XPRTSEC_NONE = 0, + RPC_XPRTSEC_TLS_ANON, + RPC_XPRTSEC_TLS_X509, +}; + +struct xprtsec_parms { + enum xprtsec_policies policy; + + /* authentication material */ + key_serial_t cert_serial; + key_serial_t privkey_serial; +}; + struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task); + void (*free_slot)(struct rpc_xprt *xprt, + struct rpc_rqst *req); void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); - void * (*buf_alloc)(struct rpc_task *task, size_t size); - void (*buf_free)(void *buffer); - int (*send_request)(struct rpc_task *task); - void (*set_retrans_timeout)(struct rpc_task *task); + int (*get_srcaddr)(struct rpc_xprt *xprt, char *buf, + size_t buflen); + unsigned short (*get_srcport)(struct rpc_xprt *xprt); + int (*buf_alloc)(struct rpc_task *task); + void (*buf_free)(struct rpc_task *task); + int (*prepare_request)(struct rpc_rqst *req, + struct xdr_buf *buf); + int (*send_request)(struct rpc_rqst *req); + void (*abort_send_request)(struct rpc_rqst *req); + void (*wait_for_reply_request)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_request)(struct rpc_task *task); void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); + void (*set_connect_timeout)(struct rpc_xprt *xprt, + unsigned long connect_timeout, + unsigned long reconnect_timeout); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); + int (*bc_setup)(struct rpc_xprt *xprt, + unsigned int min_reqs); + size_t (*bc_maxpayload)(struct rpc_xprt *xprt); + unsigned int (*bc_num_slots)(struct rpc_xprt *xprt); + void (*bc_free_rqst)(struct rpc_rqst *rqst); + void (*bc_destroy)(struct rpc_xprt *xprt, + unsigned int max_reqs); }; /* @@ -144,12 +204,16 @@ enum xprt_transports { XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, + XPRT_TRANSPORT_TCP_TLS = 258, }; +struct rpc_sysfs_xprt; struct rpc_xprt { - atomic_t count; /* Reference count */ - struct rpc_xprt_ops * ops; /* transport methods */ + struct kref kref; /* Reference count */ + const struct rpc_xprt_ops *ops; /* transport methods */ + unsigned int id; /* transport id */ const struct rpc_timeout *timeout; /* timeout parms */ struct sockaddr_storage addr; /* server address */ @@ -161,8 +225,6 @@ struct rpc_xprt { size_t max_payload; /* largest RPC payload size, in bytes */ - unsigned int tsh_size; /* size of transport specific - header */ struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ @@ -171,18 +233,25 @@ struct rpc_xprt { struct list_head free; /* free slots */ unsigned int max_reqs; /* max number of slots */ unsigned int min_reqs; /* min number of slots */ - atomic_t num_reqs; /* total slots */ + unsigned int num_reqs; /* total slots */ unsigned long state; /* transport state */ - unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + unsigned char resvport : 1, /* use a reserved port */ + reuseport : 1; /* reuse port on reconnect */ + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ /* + * Multipath + */ + struct list_head xprt_switch; + + /* * Connection of transports */ unsigned long bind_timeout, reestablish_timeout; + struct xprtsec_parms xprtsec; unsigned int connect_cookie; /* A cookie that gets bumped every time the transport is reconnected */ @@ -193,26 +262,37 @@ struct rpc_xprt { struct work_struct task_cleanup; struct timer_list timer; unsigned long last_used, - idle_timeout; + idle_timeout, + connect_timeout, + max_reconnect_timeout; /* * Send stuff */ + atomic_long_t queuelen; spinlock_t transport_lock; /* lock transport info */ spinlock_t reserve_lock; /* lock slot table */ + spinlock_t queue_lock; /* send/receive queue lock */ u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ + + struct list_head xmit_queue; /* Send queue */ + atomic_long_t xmit_queuelen; + struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ + unsigned int bc_alloc_max; unsigned int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_slot_count; /* Number of allocated slots */ spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated * backchannel rpc_rqst's */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ - struct list_head recv; + + struct rb_root recv_queue; /* Receive queue */ struct { unsigned long bind_count, /* total number of binds */ @@ -231,8 +311,16 @@ struct rpc_xprt { } stat; struct net *xprt_net; + netns_tracker ns_tracker; const char *servername; const char *address_strings[RPC_DISPLAY_MAX]; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + struct dentry *debugfs; /* debugfs directory */ +#endif + struct rcu_head rcu; + const struct xprt_class *xprt_class; + struct rpc_sysfs_xprt *xprt_sysfs; + bool main; /*mark if this is the 1st transport */ }; #if defined(CONFIG_SUNRPC_BACKCHANNEL) @@ -266,7 +354,11 @@ struct xprt_create { size_t addrlen; const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + struct rpc_xprt_switch *bc_xps; unsigned int flags; + struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct xprt_class { @@ -275,6 +367,7 @@ struct xprt_class { struct rpc_xprt * (*setup)(struct xprt_create *); struct module *owner; char name[32]; + const char * netid[]; }; /* @@ -282,13 +375,22 @@ struct xprt_class { */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); +unsigned long xprt_reconnect_delay(const struct rpc_xprt *xprt); +void xprt_reconnect_backoff(struct rpc_xprt *xprt, + unsigned long init_to); void xprt_reserve(struct rpc_task *task); void xprt_retry_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); -void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task); -int xprt_prepare_transmit(struct rpc_task *task); +void xprt_free_slot(struct rpc_xprt *xprt, + struct rpc_rqst *req); +bool xprt_prepare_transmit(struct rpc_task *task); +void xprt_request_enqueue_transmit(struct rpc_task *task); +int xprt_request_enqueue_receive(struct rpc_task *task); +void xprt_request_wait_receive(struct rpc_task *task); +void xprt_request_dequeue_xprt(struct rpc_task *task); +bool xprt_request_need_retransmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); int xprt_adjust_timeout(struct rpc_rqst *req); @@ -301,10 +403,20 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_req); void xprt_free(struct rpc_xprt *); +void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task); +bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req); +void xprt_cleanup_ids(void); -static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) { - return p + xprt->tsh_size; + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); } /* @@ -312,20 +424,27 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * */ int xprt_register_transport(struct xprt_class *type); int xprt_unregister_transport(struct xprt_class *type); -int xprt_load_transport(const char *); -void xprt_set_retrans_timeout_def(struct rpc_task *task); -void xprt_set_retrans_timeout_rtt(struct rpc_task *task); +int xprt_find_transport_ident(const char *); +void xprt_wait_for_reply_request_def(struct rpc_task *task); +void xprt_wait_for_reply_request_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); -void xprt_write_space(struct rpc_xprt *xprt); +void xprt_wait_for_buffer_space(struct rpc_xprt *xprt); +bool xprt_write_space(struct rpc_xprt *xprt); void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result); struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid); +void xprt_update_rtt(struct rpc_task *task); void xprt_complete_rqst(struct rpc_task *task, int copied); +void xprt_pin_rqst(struct rpc_rqst *req); +void xprt_unpin_rqst(struct rpc_rqst *req); void xprt_release_rqst_cong(struct rpc_task *task); +bool xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); + +bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); +void xprt_unlock_connect(struct rpc_xprt *, void *); +void xprt_release_write(struct rpc_xprt *, struct rpc_task *); /* * Reserved bit positions in xprt->state @@ -337,9 +456,12 @@ int xs_swapper(struct rpc_xprt *xprt, int enable); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) -#define XPRT_CONNECTION_ABORT (7) -#define XPRT_CONNECTION_CLOSE (8) +#define XPRT_OFFLINE (7) +#define XPRT_REMOVE (8) #define XPRT_CONGESTED (9) +#define XPRT_CWND_WAIT (10) +#define XPRT_WRITE_SPACE (11) +#define XPRT_SND_IS_COOKIE (12) static inline void xprt_set_connected(struct rpc_xprt *xprt) { @@ -368,9 +490,9 @@ static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt) static inline void xprt_clear_connecting(struct rpc_xprt *xprt) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_CONNECTING, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } static inline int xprt_connecting(struct rpc_xprt *xprt) @@ -400,9 +522,9 @@ static inline void xprt_clear_bound(struct rpc_xprt *xprt) static inline void xprt_clear_binding(struct rpc_xprt *xprt) { - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(XPRT_BINDING, &xprt->state); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); } static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) @@ -410,6 +532,7 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } -#endif /* __KERNEL__*/ - +void xprt_set_offline_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); +void xprt_set_online_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); +void xprt_delete_locked(struct rpc_xprt *xprt, struct rpc_xprt_switch *xps); #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h new file mode 100644 index 000000000000..e4db5022fe92 --- /dev/null +++ b/include/linux/sunrpc/xprtmultipath.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RPC client multipathing definitions + * + * Copyright (c) 2015, 2016, Primary Data, Inc. All rights reserved. + * + * Trond Myklebust <trond.myklebust@primarydata.com> + */ +#ifndef _NET_SUNRPC_XPRTMULTIPATH_H +#define _NET_SUNRPC_XPRTMULTIPATH_H + +struct rpc_xprt_iter_ops; +struct rpc_sysfs_xprt_switch; +struct rpc_xprt_switch { + spinlock_t xps_lock; + struct kref xps_kref; + + unsigned int xps_id; + unsigned int xps_nxprts; + unsigned int xps_nactive; + unsigned int xps_nunique_destaddr_xprts; + atomic_long_t xps_queuelen; + struct list_head xps_xprt_list; + + struct net * xps_net; + + const struct rpc_xprt_iter_ops *xps_iter_ops; + + struct rpc_sysfs_xprt_switch *xps_sysfs; + struct rcu_head xps_rcu; +}; + +struct rpc_xprt_iter { + struct rpc_xprt_switch __rcu *xpi_xpswitch; + struct rpc_xprt * xpi_cursor; + + const struct rpc_xprt_iter_ops *xpi_ops; +}; + + +struct rpc_xprt_iter_ops { + void (*xpi_rewind)(struct rpc_xprt_iter *); + struct rpc_xprt *(*xpi_xprt)(struct rpc_xprt_iter *); + struct rpc_xprt *(*xpi_next)(struct rpc_xprt_iter *); +}; + +extern struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, + gfp_t gfp_flags); + +extern struct rpc_xprt_switch *xprt_switch_get(struct rpc_xprt_switch *xps); +extern void xprt_switch_put(struct rpc_xprt_switch *xps); + +extern void rpc_xprt_switch_set_roundrobin(struct rpc_xprt_switch *xps); + +extern void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps, + struct rpc_xprt *xprt); +extern void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps, + struct rpc_xprt *xprt, bool offline); +extern struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps); + +extern void xprt_iter_init(struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *xps); + +extern void xprt_iter_init_listall(struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *xps); + +extern void xprt_iter_init_listoffline(struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *xps); + +extern void xprt_iter_destroy(struct rpc_xprt_iter *xpi); + +extern void xprt_iter_rewind(struct rpc_xprt_iter *xpi); + +extern struct rpc_xprt_switch *xprt_iter_xchg_switch( + struct rpc_xprt_iter *xpi, + struct rpc_xprt_switch *newswitch); + +extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); +extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); + +extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap); + +extern void xprt_multipath_cleanup_ids(void); + +#endif diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c2f04e1ae159..16c239e0d6dd 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * @@ -41,11 +42,6 @@ #define _LINUX_SUNRPC_XPRTRDMA_H /* - * rpcbind (v3+) RDMA netid. - */ -#define RPCBIND_NETID_RDMA "rdma" - -/* * Constants. Max RPC/NFS header is big enough to account for * additional marshaling buffers passed down by Linux client. * @@ -53,24 +49,23 @@ * fully-chunked NFS message (read chunks are the largest). Note only * a single chunk type per message is supported currently. */ -#define RPCRDMA_MIN_SLOT_TABLE (2U) -#define RPCRDMA_DEF_SLOT_TABLE (32U) -#define RPCRDMA_MAX_SLOT_TABLE (256U) - -#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ - -#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ +#define RPCRDMA_MIN_SLOT_TABLE (4U) +#define RPCRDMA_DEF_SLOT_TABLE (128U) +#define RPCRDMA_MAX_SLOT_TABLE (16384U) -/* memory registration strategies */ -#define RPCRDMA_PERSISTENT_REGISTRATION (1) +#define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ +#define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ +#define RPCRDMA_MAX_INLINE (65536) /* max inline thresh */ +/* Memory registration strategies, by number. + * This is part of a kernel / user space API. Do not remove. */ enum rpcrdma_memreg { RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_REGISTER, RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MTHCAFMR, - RPCRDMA_FRMR, + RPCRDMA_FRWR, RPCRDMA_ALLPHYSICAL, RPCRDMA_LAST }; diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 1ad36cc25b2e..700a1e6c047c 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * linux/include/linux/sunrpc/xprtsock.h * @@ -7,8 +8,6 @@ #ifndef _LINUX_SUNRPC_XPRTSOCK_H #define _LINUX_SUNRPC_XPRTSOCK_H -#ifdef __KERNEL__ - int init_socket_xprt(void); void cleanup_socket_xprt(void); @@ -17,6 +16,82 @@ void cleanup_socket_xprt(void); #define RPC_DEF_MIN_RESVPORT (665U) #define RPC_DEF_MAX_RESVPORT (1023U) -#endif /* __KERNEL__ */ +struct sock_xprt { + struct rpc_xprt xprt; + + /* + * Network layer + */ + struct socket * sock; + struct sock * inet; + struct file * file; + + /* + * State of TCP reply receive + */ + struct { + struct { + __be32 fraghdr, + xid, + calldir; + } __attribute__((packed)); + + u32 offset, + len; + + unsigned long copied; + } recv; + + /* + * State of TCP transmit queue + */ + struct { + u32 offset; + } xmit; + + /* + * Connection of transports + */ + unsigned long sock_state; + struct delayed_work connect_worker; + struct work_struct error_worker; + struct work_struct recv_worker; + struct mutex recv_mutex; + struct completion handshake_done; + struct sockaddr_storage srcaddr; + unsigned short srcport; + int xprt_err; + struct rpc_clnt *clnt; + + /* + * UDP socket buffer size parameters + */ + size_t rcvsize, + sndsize; + + struct rpc_timeout tcp_timeout; + + /* + * Saved socket callback addresses + */ + void (*old_data_ready)(struct sock *); + void (*old_state_change)(struct sock *); + void (*old_write_space)(struct sock *); + void (*old_error_report)(struct sock *); +}; + +/* + * TCP RPC flags + */ +#define XPRT_SOCK_CONNECTING 1U +#define XPRT_SOCK_DATA_READY (2) +#define XPRT_SOCK_UPD_TIMEOUT (3) +#define XPRT_SOCK_WAKE_ERROR (4) +#define XPRT_SOCK_WAKE_WRITE (5) +#define XPRT_SOCK_WAKE_PENDING (6) +#define XPRT_SOCK_WAKE_DISCONNECT (7) +#define XPRT_SOCK_CONNECT_SENT (8) +#define XPRT_SOCK_NOSPACE (9) +#define XPRT_SOCK_IGNORE_RECV (10) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ |
