summaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/.kunitconfig29
-rw-r--r--net/sunrpc/Kconfig77
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/addr.c4
-rw-r--r--net/sunrpc/auth.c38
-rw-r--r--net/sunrpc/auth_gss/Makefile6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c94
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h6
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c231
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c833
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_internal.h195
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c416
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c811
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c125
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c104
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c1859
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c130
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c383
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c1
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c35
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c1125
-rw-r--r--net/sunrpc/auth_tls.c175
-rw-r--r--net/sunrpc/backchannel_rqst.c13
-rw-r--r--net/sunrpc/cache.c94
-rw-r--r--net/sunrpc/clnt.c273
-rw-r--r--net/sunrpc/debugfs.c15
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/rpc_pipe.c546
-rw-r--r--net/sunrpc/rpcb_clnt.c48
-rw-r--r--net/sunrpc/sched.c14
-rw-r--r--net/sunrpc/socklib.c166
-rw-r--r--net/sunrpc/stats.c13
-rw-r--r--net/sunrpc/sunrpc.h4
-rw-r--r--net/sunrpc/sunrpc_syms.c1
-rw-r--r--net/sunrpc/svc.c863
-rw-r--r--net/sunrpc/svc_xprt.c688
-rw-r--r--net/sunrpc/svcauth.c93
-rw-r--r--net/sunrpc/svcauth_unix.c230
-rw-r--r--net/sunrpc/svcsock.c530
-rw-r--r--net/sunrpc/sysctl.c47
-rw-r--r--net/sunrpc/sysfs.c213
-rw-r--r--net/sunrpc/sysfs.h7
-rw-r--r--net/sunrpc/xdr.c307
-rw-r--r--net/sunrpc/xprt.c53
-rw-r--r--net/sunrpc/xprtmultipath.c57
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c9
-rw-r--r--net/sunrpc/xprtrdma/ib_client.c184
-rw-r--r--net/sunrpc/xprtrdma/module.c18
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c5
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c75
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c270
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c637
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c310
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c200
-rw-r--r--net/sunrpc/xprtrdma/transport.c12
-rw-r--r--net/sunrpc/xprtrdma/verbs.c107
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h6
-rw-r--r--net/sunrpc/xprtsock.c608
61 files changed, 8387 insertions, 5036 deletions
diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig
new file mode 100644
index 000000000000..eb02b906c295
--- /dev/null
+++ b/net/sunrpc/.kunitconfig
@@ -0,0 +1,29 @@
+CONFIG_KUNIT=y
+CONFIG_UBSAN=y
+CONFIG_STACKTRACE=y
+CONFIG_NET=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_FILE_LOCKING=y
+CONFIG_MULTIUSER=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTS=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_CMAC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_CAMELLIA=y
+CONFIG_NFS_FS=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
+CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST=y
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index bbbb5af0af13..a570e7adf270 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -19,10 +19,10 @@ config SUNRPC_SWAP
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
depends on SUNRPC && CRYPTO
- depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
- depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
default y
select SUNRPC_GSS
+ select CRYPTO_SKCIPHER
+ select CRYPTO_HASH
help
Choose Y here to enable Secure RPC using the Kerberos version 5
GSS-API mechanism (RFC 1964).
@@ -34,21 +34,58 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config SUNRPC_DISABLE_INSECURE_ENCTYPES
- bool "Secure RPC: Disable insecure Kerberos encryption types"
+config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
+ bool "Enable Kerberos enctypes based on AES and SHA-1"
depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS
+ depends on CRYPTO_HMAC && CRYPTO_SHA1
+ depends on CRYPTO_AES
+ default y
+ help
+ Choose Y to enable the use of Kerberos 5 encryption types
+ that utilize Advanced Encryption Standard (AES) ciphers and
+ SHA-1 digests. These include aes128-cts-hmac-sha1-96 and
+ aes256-cts-hmac-sha1-96.
+
+config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA
+ bool "Enable Kerberos encryption types based on Camellia and CMAC"
+ depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA
+ depends on CRYPTO_CMAC
default n
help
- Choose Y here to disable the use of deprecated encryption types
- with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
- deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
- and DES-CBC-MD4. These types were deprecated by RFC 6649 because
- they were found to be insecure.
+ Choose Y to enable the use of Kerberos 5 encryption types
+ that utilize Camellia ciphers (RFC 3713) and CMAC digests
+ (NIST Special Publication 800-38B). These include
+ camellia128-cts-cmac and camellia256-cts-cmac.
- N is the default because many sites have deployed KDCs and
- keytabs that contain only these deprecated encryption types.
- Choosing Y prevents the use of known-insecure encryption types
- but might result in compatibility problems.
+config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2
+ bool "Enable Kerberos enctypes based on AES and SHA-2"
+ depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS
+ depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512
+ depends on CRYPTO_AES
+ default n
+ help
+ Choose Y to enable the use of Kerberos 5 encryption types
+ that utilize Advanced Encryption Standard (AES) ciphers and
+ SHA-2 digests. These include aes128-cts-hmac-sha256-128 and
+ aes256-cts-hmac-sha384-192.
+
+config RPCSEC_GSS_KRB5_KUNIT_TEST
+ tristate "KUnit tests for RPCSEC GSS Kerberos" if !KUNIT_ALL_TESTS
+ depends on RPCSEC_GSS_KRB5 && KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ This builds the KUnit tests for RPCSEC GSS Kerberos 5.
+
+ KUnit tests run during boot and output the results to the debug
+ log in TAP format (https://testanything.org/). Only useful for
+ kernel devs running KUnit test harness and are not for inclusion
+ into a production build.
+
+ For more information on KUnit and unit tests in general, refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
config SUNRPC_DEBUG
bool "RPC: Enable dprintk debugging"
@@ -64,6 +101,20 @@ config SUNRPC_DEBUG
If unsure, say Y.
+config SUNRPC_DEBUG_TRACE
+ bool "RPC: Send dfprintk() output to the trace buffer"
+ depends on SUNRPC_DEBUG && TRACING
+ default n
+ help
+ dprintk() output can be voluminous, which can overwhelm the
+ kernel's logging facility as it must be sent to the console.
+ This option causes dprintk() output to go to the trace buffer
+ instead of the kernel log.
+
+ This will cause warnings about trace_printk() being used to be
+ logged at boot time, so say N unless you are debugging a problem
+ with sunrpc-based clients or services.
+
config SUNRPC_XPRT_RDMA
tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 1c8de397d6ad..f89c10fe7e6a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
- auth.o auth_null.o auth_unix.o \
+ auth.o auth_null.o auth_tls.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
addr.o rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index d435bffc6199..97ff11973c49 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -284,10 +284,10 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
}
if (snprintf(portbuf, sizeof(portbuf),
- ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf))
+ ".%u.%u", port >> 8, port & 0xff) >= (int)sizeof(portbuf))
return NULL;
- if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf))
+ if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >= sizeof(addrbuf))
return NULL;
return kstrdup(addrbuf, gfp_flags);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index fb75a883503f..5a827afd8e3b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -32,7 +32,7 @@ static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
[RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
[RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
- NULL, /* others can be loadable modules */
+ [RPC_AUTH_TLS] = (const struct rpc_authops __force __rcu *)&authtls_ops,
};
static LIST_HEAD(cred_unused);
@@ -40,9 +40,6 @@ static unsigned long number_cred_unused;
static struct cred machine_cred = {
.usage = ATOMIC_INIT(1),
-#ifdef CONFIG_DEBUG_CREDENTIALS
- .magic = CRED_MAGIC,
-#endif
};
/*
@@ -492,7 +489,7 @@ static unsigned long
rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return number_cred_unused * sysctl_vfs_cache_pressure / 100;
+ return number_cred_unused;
}
static void
@@ -769,9 +766,14 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
* @task: controlling RPC task
* @xdr: xdr_stream containing RPC Reply header
*
- * On success, @xdr is updated to point past the verifier and
- * zero is returned. Otherwise, @xdr is in an undefined state
- * and a negative errno is returned.
+ * Return values:
+ * %0: Verifier is valid. @xdr now points past the verifier.
+ * %-EIO: Verifier is corrupted or message ended early.
+ * %-EACCES: Verifier is intact but not valid.
+ * %-EPROTONOSUPPORT: Server does not support the requested auth type.
+ *
+ * When a negative errno is returned, @xdr is left in an undefined
+ * state.
*/
int
rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
@@ -861,11 +863,7 @@ rpcauth_uptodatecred(struct rpc_task *task)
test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
}
-static struct shrinker rpc_cred_shrinker = {
- .count_objects = rpcauth_cache_shrink_count,
- .scan_objects = rpcauth_cache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *rpc_cred_shrinker;
int __init rpcauth_init_module(void)
{
@@ -874,9 +872,17 @@ int __init rpcauth_init_module(void)
err = rpc_init_authunix();
if (err < 0)
goto out1;
- err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred");
- if (err < 0)
+ rpc_cred_shrinker = shrinker_alloc(0, "sunrpc_cred");
+ if (!rpc_cred_shrinker) {
+ err = -ENOMEM;
goto out2;
+ }
+
+ rpc_cred_shrinker->count_objects = rpcauth_cache_shrink_count;
+ rpc_cred_shrinker->scan_objects = rpcauth_cache_shrink_scan;
+
+ shrinker_register(rpc_cred_shrinker);
+
return 0;
out2:
rpc_destroy_authunix();
@@ -887,5 +893,5 @@ out1:
void rpcauth_remove_module(void)
{
rpc_destroy_authunix();
- unregister_shrinker(&rpc_cred_shrinker);
+ shrinker_free(rpc_cred_shrinker);
}
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 4a29f4c5dac4..452f67deebc6 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,11 +5,13 @@
obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
-auth_rpcgss-y := auth_gss.o gss_generic_token.o \
+auth_rpcgss-y := auth_gss.o \
gss_mech_switch.o svcauth_gss.o \
gss_rpc_upcall.o gss_rpc_xdr.o trace.o
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
- gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+
+obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 2d7b1e03110a..5c095cb8cb20 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -49,6 +49,22 @@ static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+/*
+ * This compile-time check verifies that we will not exceed the
+ * slack space allotted by the client and server auth_gss code
+ * before they call gss_wrap().
+ */
+#define GSS_KRB5_MAX_SLACK_NEEDED \
+ (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \
+ + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \
+ + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \
+ + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \
+ + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */ \
+ + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \
+ + XDR_UNIT * 2 /* RPC verifier */ \
+ + GSS_KRB5_TOK_HDR_LEN \
+ + GSS_KRB5_MAX_CKSUM_LEN)
+
#define GSS_CRED_SLACK (RPC_MAX_AUTH_SIZE * 2)
/* length of a krb5 verifier (48), plus data added before arguments when
* using integrity (two 4-byte integers): */
@@ -871,25 +887,16 @@ static void gss_pipe_dentry_destroy(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *gss_pipe = pdo->pdo_data;
- struct rpc_pipe *pipe = gss_pipe->pipe;
- if (pipe->dentry != NULL) {
- rpc_unlink(pipe->dentry);
- pipe->dentry = NULL;
- }
+ rpc_unlink(gss_pipe->pipe);
}
static int gss_pipe_dentry_create(struct dentry *dir,
struct rpc_pipe_dir_object *pdo)
{
struct gss_pipe *p = pdo->pdo_data;
- struct dentry *dentry;
- dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- p->pipe->dentry = dentry;
- return 0;
+ return rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
}
static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
@@ -1042,6 +1049,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
goto err_put_mech;
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
+ BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2;
auth->au_verfsize = GSS_VERF_SLACK >> 2;
auth->au_ralign = GSS_VERF_SLACK >> 2;
@@ -1528,6 +1536,7 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
struct kvec iov;
struct xdr_buf verf_buf;
int status;
+ u32 seqno;
/* Credential */
@@ -1539,15 +1548,16 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
cred_len = p++;
spin_lock(&ctx->gc_seq_lock);
- req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
+ seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
+ xprt_rqst_add_seqno(req, seqno);
spin_unlock(&ctx->gc_seq_lock);
- if (req->rq_seqno == MAXSEQ)
+ if (*req->rq_seqnos == MAXSEQ)
goto expired;
trace_rpcgss_seqno(task);
*p++ = cpu_to_be32(RPC_GSS_VERSION);
*p++ = cpu_to_be32(ctx->gc_proc);
- *p++ = cpu_to_be32(req->rq_seqno);
+ *p++ = cpu_to_be32(*req->rq_seqnos);
*p++ = cpu_to_be32(gss_cred->gc_service);
p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
*cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
@@ -1661,17 +1671,31 @@ gss_refresh_null(struct rpc_task *task)
return 0;
}
+static u32
+gss_validate_seqno_mic(struct gss_cl_ctx *ctx, u32 seqno, __be32 *seq, __be32 *p, u32 len)
+{
+ struct kvec iov;
+ struct xdr_buf verf_buf;
+ struct xdr_netobj mic;
+
+ *seq = cpu_to_be32(seqno);
+ iov.iov_base = seq;
+ iov.iov_len = 4;
+ xdr_buf_from_iov(&iov, &verf_buf);
+ mic.data = (u8 *)p;
+ mic.len = len;
+ return gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+}
+
static int
gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
__be32 *p, *seq = NULL;
- struct kvec iov;
- struct xdr_buf verf_buf;
- struct xdr_netobj mic;
u32 len, maj_stat;
int status;
+ int i = 1; /* don't recheck the first item */
p = xdr_inline_decode(xdr, 2 * sizeof(*p));
if (!p)
@@ -1688,13 +1712,10 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
seq = kmalloc(4, GFP_KERNEL);
if (!seq)
goto validate_failed;
- *seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
- iov.iov_base = seq;
- iov.iov_len = 4;
- xdr_buf_from_iov(&iov, &verf_buf);
- mic.data = (u8 *)p;
- mic.len = len;
- maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[0], seq, p, len);
+ /* RFC 2203 5.3.3.1 - compute the checksum of each sequence number in the cache */
+ while (unlikely(maj_stat == GSS_S_BAD_SIG && i < task->tk_rqstp->rq_seqno_count))
+ maj_stat = gss_validate_seqno_mic(ctx, task->tk_rqstp->rq_seqnos[i++], seq, p, len);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat)
@@ -1733,7 +1754,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
if (!p)
goto wrap_failed;
integ_len = p++;
- *p = cpu_to_be32(rqstp->rq_seqno);
+ *p = cpu_to_be32(*rqstp->rq_seqnos);
if (rpcauth_wrap_req_encode(task, xdr))
goto wrap_failed;
@@ -1830,7 +1851,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
if (!p)
goto wrap_failed;
opaque_len = p++;
- *p = cpu_to_be32(rqstp->rq_seqno);
+ *p = cpu_to_be32(*rqstp->rq_seqnos);
if (rpcauth_wrap_req_encode(task, xdr))
goto wrap_failed;
@@ -1858,8 +1879,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
- if (unlikely(snd_buf->len > snd_buf->buflen))
+ if (unlikely(snd_buf->len > snd_buf->buflen)) {
+ status = -EIO;
goto wrap_failed;
+ }
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
@@ -1982,7 +2005,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
offset = rcv_buf->len - xdr_stream_remaining(xdr);
if (xdr_stream_decode_u32(xdr, &seqno))
goto unwrap_failed;
- if (seqno != rqstp->rq_seqno)
+ if (seqno != *rqstp->rq_seqnos)
goto bad_seqno;
if (xdr_buf_subsegment(rcv_buf, &gss_data, offset, len))
goto unwrap_failed;
@@ -2026,7 +2049,7 @@ unwrap_failed:
trace_rpcgss_unwrap_failed(task);
goto out;
bad_seqno:
- trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, seqno);
+ trace_rpcgss_bad_seqno(task, *rqstp->rq_seqnos, seqno);
goto out;
bad_mic:
trace_rpcgss_verify_mic(task, maj_stat);
@@ -2058,7 +2081,7 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
if (maj_stat != GSS_S_COMPLETE)
goto bad_unwrap;
/* gss_unwrap decrypted the sequence number */
- if (be32_to_cpup(p++) != rqstp->rq_seqno)
+ if (be32_to_cpup(p++) != *rqstp->rq_seqnos)
goto bad_seqno;
/* gss_unwrap redacts the opaque blob from the head iovec.
@@ -2074,7 +2097,7 @@ unwrap_failed:
trace_rpcgss_unwrap_failed(task);
return -EIO;
bad_seqno:
- trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
+ trace_rpcgss_bad_seqno(task, *rqstp->rq_seqnos, be32_to_cpup(--p));
return -EIO;
bad_unwrap:
trace_rpcgss_unwrap(task, maj_stat);
@@ -2099,14 +2122,14 @@ gss_xmit_need_reencode(struct rpc_task *task)
if (!ctx)
goto out;
- if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
+ if (gss_seq_is_newer(*req->rq_seqnos, READ_ONCE(ctx->gc_seq)))
goto out_ctx;
seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
- while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
+ while (gss_seq_is_newer(*req->rq_seqnos, seq_xmit)) {
u32 tmp = seq_xmit;
- seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
+ seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, *req->rq_seqnos);
if (seq_xmit == tmp) {
ret = false;
goto out_ctx;
@@ -2115,7 +2138,7 @@ gss_xmit_need_reencode(struct rpc_task *task)
win = ctx->gc_win;
if (win > 0)
- ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
+ ret = !gss_seq_is_newer(*req->rq_seqnos, seq_xmit - win);
out_ctx:
gss_put_ctx(ctx);
@@ -2263,6 +2286,7 @@ static void __exit exit_rpcsec_gss(void)
}
MODULE_ALIAS("rpc-auth-6");
+MODULE_DESCRIPTION("Sun RPC Kerberos RPCSEC_GSS client authentication");
MODULE_LICENSE("GPL");
module_param_named(expired_cred_retry_delay,
gss_expired_cred_retry_delay,
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
index c53b329092d4..4ebc1b7043d9 100644
--- a/net/sunrpc/auth_gss/auth_gss_internal.h
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len)
}
static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest)
{
const void *q;
unsigned int len;
@@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
if (unlikely(q > end || q < p))
return ERR_PTR(-EFAULT);
if (len) {
- dest->data = kmemdup(p, len, GFP_KERNEL);
+ dest->data = kmemdup_noprof(p, len, GFP_KERNEL);
if (unlikely(dest->data == NULL))
return ERR_PTR(-ENOMEM);
} else
@@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
dest->len = len;
return q;
}
+
+#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__))
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
deleted file mode 100644
index 4a4082bb22ad..000000000000
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * linux/net/sunrpc/gss_generic_token.c
- *
- * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/generic/util_token.c
- *
- * Copyright (c) 2000 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/sunrpc/gss_asn1.h>
-
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-
-/* TWRITE_STR from gssapiP_generic.h */
-#define TWRITE_STR(ptr, str, len) \
- memcpy((ptr), (char *) (str), (len)); \
- (ptr) += (len);
-
-/* XXXX this code currently makes the assumption that a mech oid will
- never be longer than 127 bytes. This assumption is not inherent in
- the interfaces, so the code can be fixed if the OSI namespace
- balloons unexpectedly. */
-
-/* Each token looks like this:
-
-0x60 tag for APPLICATION 0, SEQUENCE
- (constructed, definite-length)
- <length> possible multiple bytes, need to parse/generate
- 0x06 tag for OBJECT IDENTIFIER
- <moid_length> compile-time constant string (assume 1 byte)
- <moid_bytes> compile-time constant string
- <inner_bytes> the ANY containing the application token
- bytes 0,1 are the token type
- bytes 2,n are the token data
-
-For the purposes of this abstraction, the token "header" consists of
-the sequence tag and length octets, the mech OID DER encoding, and the
-first two inner bytes, which indicate the token type. The token
-"body" consists of everything else.
-
-*/
-
-static int
-der_length_size( int length)
-{
- if (length < (1<<7))
- return 1;
- else if (length < (1<<8))
- return 2;
-#if (SIZEOF_INT == 2)
- else
- return 3;
-#else
- else if (length < (1<<16))
- return 3;
- else if (length < (1<<24))
- return 4;
- else
- return 5;
-#endif
-}
-
-static void
-der_write_length(unsigned char **buf, int length)
-{
- if (length < (1<<7)) {
- *(*buf)++ = (unsigned char) length;
- } else {
- *(*buf)++ = (unsigned char) (der_length_size(length)+127);
-#if (SIZEOF_INT > 2)
- if (length >= (1<<24))
- *(*buf)++ = (unsigned char) (length>>24);
- if (length >= (1<<16))
- *(*buf)++ = (unsigned char) ((length>>16)&0xff);
-#endif
- if (length >= (1<<8))
- *(*buf)++ = (unsigned char) ((length>>8)&0xff);
- *(*buf)++ = (unsigned char) (length&0xff);
- }
-}
-
-/* returns decoded length, or < 0 on failure. Advances buf and
- decrements bufsize */
-
-static int
-der_read_length(unsigned char **buf, int *bufsize)
-{
- unsigned char sf;
- int ret;
-
- if (*bufsize < 1)
- return -1;
- sf = *(*buf)++;
- (*bufsize)--;
- if (sf & 0x80) {
- if ((sf &= 0x7f) > ((*bufsize)-1))
- return -1;
- if (sf > SIZEOF_INT)
- return -1;
- ret = 0;
- for (; sf; sf--) {
- ret = (ret<<8) + (*(*buf)++);
- (*bufsize)--;
- }
- } else {
- ret = sf;
- }
-
- return ret;
-}
-
-/* returns the length of a token, given the mech oid and the body size */
-
-int
-g_token_size(struct xdr_netobj *mech, unsigned int body_size)
-{
- /* set body_size to sequence contents size */
- body_size += 2 + (int) mech->len; /* NEED overflow check */
- return 1 + der_length_size(body_size) + body_size;
-}
-
-EXPORT_SYMBOL_GPL(g_token_size);
-
-/* fills in a buffer with the token header. The buffer is assumed to
- be the right size. buf is advanced past the token header */
-
-void
-g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
-{
- *(*buf)++ = 0x60;
- der_write_length(buf, 2 + mech->len + body_size);
- *(*buf)++ = 0x06;
- *(*buf)++ = (unsigned char) mech->len;
- TWRITE_STR(*buf, mech->data, ((int) mech->len));
-}
-
-EXPORT_SYMBOL_GPL(g_make_token_header);
-
-/*
- * Given a buffer containing a token, reads and verifies the token,
- * leaving buf advanced past the token header, and setting body_size
- * to the number of remaining bytes. Returns 0 on success,
- * G_BAD_TOK_HEADER for a variety of errors, and G_WRONG_MECH if the
- * mechanism in the token does not match the mech argument. buf and
- * *body_size are left unmodified on error.
- */
-u32
-g_verify_token_header(struct xdr_netobj *mech, int *body_size,
- unsigned char **buf_in, int toksize)
-{
- unsigned char *buf = *buf_in;
- int seqsize;
- struct xdr_netobj toid;
- int ret = 0;
-
- if ((toksize-=1) < 0)
- return G_BAD_TOK_HEADER;
- if (*buf++ != 0x60)
- return G_BAD_TOK_HEADER;
-
- if ((seqsize = der_read_length(&buf, &toksize)) < 0)
- return G_BAD_TOK_HEADER;
-
- if (seqsize != toksize)
- return G_BAD_TOK_HEADER;
-
- if ((toksize-=1) < 0)
- return G_BAD_TOK_HEADER;
- if (*buf++ != 0x06)
- return G_BAD_TOK_HEADER;
-
- if ((toksize-=1) < 0)
- return G_BAD_TOK_HEADER;
- toid.len = *buf++;
-
- if ((toksize-=toid.len) < 0)
- return G_BAD_TOK_HEADER;
- toid.data = buf;
- buf+=toid.len;
-
- if (! g_OID_equal(&toid, mech))
- ret = G_WRONG_MECH;
-
- /* G_WRONG_MECH is not returned immediately because it's more important
- to return G_BAD_TOK_HEADER if the token header is in fact bad */
-
- if ((toksize-=2) < 0)
- return G_BAD_TOK_HEADER;
-
- if (ret)
- return ret;
-
- *buf_in = buf;
- *body_size = toksize;
-
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 3ea58175e159..16dcf115de1e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,9 +34,9 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
-#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
+#include <crypto/utils.h>
#include <linux/err.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -46,11 +46,59 @@
#include <linux/random.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
+#include <kunit/visibility.h>
+
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+/**
+ * krb5_make_confounder - Generate a confounder string
+ * @p: memory location into which to write the string
+ * @conflen: string length to write, in octets
+ *
+ * RFCs 1964 and 3961 mention only "a random confounder" without going
+ * into detail about its function or cryptographic requirements. The
+ * assumed purpose is to prevent repeated encryption of a plaintext with
+ * the same key from generating the same ciphertext. It is also used to
+ * pad minimum plaintext length to at least a single cipher block.
+ *
+ * However, in situations like the GSS Kerberos 5 mechanism, where the
+ * encryption IV is always all zeroes, the confounder also effectively
+ * functions like an IV. Thus, not only must it be unique from message
+ * to message, but it must also be difficult to predict. Otherwise an
+ * attacker can correlate the confounder to previous or future values,
+ * making the encryption easier to break.
+ *
+ * Given that the primary consumer of this encryption mechanism is a
+ * network storage protocol, a type of traffic that often carries
+ * predictable payloads (eg, all zeroes when reading unallocated blocks
+ * from a file), our confounder generation has to be cryptographically
+ * strong.
+ */
+void krb5_make_confounder(u8 *p, int conflen)
+{
+ get_random_bytes(p, conflen);
+}
+
+/**
+ * krb5_encrypt - simple encryption of an RPCSEC GSS payload
+ * @tfm: initialized cipher transform
+ * @iv: pointer to an IV
+ * @in: plaintext to encrypt
+ * @out: OUT: ciphertext
+ * @length: length of input and output buffers, in bytes
+ *
+ * @iv may be NULL to force the use of an all-zero IV.
+ * The buffer containing the IV must be as large as the
+ * cipher's ivsize.
+ *
+ * Return values:
+ * %0: @in successfully encrypted into @out
+ * negative errno: @in not encrypted
+ */
u32
krb5_encrypt(
struct crypto_sync_skcipher *tfm,
@@ -90,44 +138,6 @@ out:
return ret;
}
-u32
-krb5_decrypt(
- struct crypto_sync_skcipher *tfm,
- void * iv,
- void * in,
- void * out,
- int length)
-{
- u32 ret = -EINVAL;
- struct scatterlist sg[1];
- u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-
- if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
- goto out;
-
- if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
- dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
- crypto_sync_skcipher_ivsize(tfm));
- goto out;
- }
- if (iv)
- memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
-
- memcpy(out, in, length);
- sg_init_one(sg, out, length);
-
- skcipher_request_set_sync_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, length, local_iv);
-
- ret = crypto_skcipher_decrypt(req);
- skcipher_request_zero(req);
-out:
- dprintk("RPC: gss_k5decrypt returns %d\n",ret);
- return ret;
-}
-
static int
checksummer(struct scatterlist *sg, void *data)
{
@@ -138,182 +148,78 @@ checksummer(struct scatterlist *sg, void *data)
return crypto_ahash_update(req);
}
-/*
- * checksum the plaintext data and hdrlen bytes of the token header
- * The checksum is performed over the first 8 bytes of the
- * gss token header and then over the data body
+/**
+ * gss_krb5_checksum - Compute the MAC for a GSS Wrap or MIC token
+ * @tfm: an initialized hash transform
+ * @header: pointer to a buffer containing the token header, or NULL
+ * @hdrlen: number of octets in @header
+ * @body: xdr_buf containing an RPC message (body.len is the message length)
+ * @body_offset: byte offset into @body to start checksumming
+ * @cksumout: OUT: a buffer to be filled in with the computed HMAC
+ *
+ * Usually expressed as H = HMAC(K, message)[1..h] .
+ *
+ * Caller provides the truncation length of the output token (h) in
+ * cksumout.len.
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Digest computed, @cksumout filled in
+ * %GSS_S_FAILURE: Call failed
*/
u32
-make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
- struct xdr_buf *body, int body_offset, u8 *cksumkey,
- unsigned int usage, struct xdr_netobj *cksumout)
+gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen,
+ const struct xdr_buf *body, int body_offset,
+ struct xdr_netobj *cksumout)
{
- struct crypto_ahash *tfm;
struct ahash_request *req;
- struct scatterlist sg[1];
- int err = -1;
+ int err = -ENOMEM;
u8 *checksumdata;
- unsigned int checksumlen;
-
- if (cksumout->len < kctx->gk5e->cksumlength) {
- dprintk("%s: checksum buffer length, %u, too small for %s\n",
- __func__, cksumout->len, kctx->gk5e->name);
- return GSS_S_FAILURE;
- }
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
- if (checksumdata == NULL)
+ checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
+ if (!checksumdata)
return GSS_S_FAILURE;
- tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- goto out_free_cksum;
-
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_ahash;
-
+ goto out_free_cksum;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
-
- checksumlen = crypto_ahash_digestsize(tfm);
-
- if (cksumkey != NULL) {
- err = crypto_ahash_setkey(tfm, cksumkey,
- kctx->gk5e->keylength);
- if (err)
- goto out;
- }
-
err = crypto_ahash_init(req);
if (err)
- goto out;
- sg_init_one(sg, header, hdrlen);
- ahash_request_set_crypt(req, sg, NULL, hdrlen);
- err = crypto_ahash_update(req);
- if (err)
- goto out;
- err = xdr_process_buf(body, body_offset, body->len - body_offset,
- checksummer, req);
- if (err)
- goto out;
- ahash_request_set_crypt(req, NULL, checksumdata, 0);
- err = crypto_ahash_final(req);
- if (err)
- goto out;
-
- switch (kctx->gk5e->ctype) {
- case CKSUMTYPE_RSA_MD5:
- err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
- checksumdata, checksumlen);
- if (err)
- goto out;
- memcpy(cksumout->data,
- checksumdata + checksumlen - kctx->gk5e->cksumlength,
- kctx->gk5e->cksumlength);
- break;
- case CKSUMTYPE_HMAC_SHA1_DES3:
- memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
- break;
- default:
- BUG();
- break;
- }
- cksumout->len = kctx->gk5e->cksumlength;
-out:
- ahash_request_free(req);
-out_free_ahash:
- crypto_free_ahash(tfm);
-out_free_cksum:
- kfree(checksumdata);
- return err ? GSS_S_FAILURE : 0;
-}
-
-/*
- * checksum the plaintext data and hdrlen bytes of the token header
- * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
- * body then over the first 16 octets of the MIC token
- * Inclusion of the header data in the calculation of the
- * checksum is optional.
- */
-u32
-make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
- struct xdr_buf *body, int body_offset, u8 *cksumkey,
- unsigned int usage, struct xdr_netobj *cksumout)
-{
- struct crypto_ahash *tfm;
- struct ahash_request *req;
- struct scatterlist sg[1];
- int err = -1;
- u8 *checksumdata;
-
- if (kctx->gk5e->keyed_cksum == 0) {
- dprintk("%s: expected keyed hash for %s\n",
- __func__, kctx->gk5e->name);
- return GSS_S_FAILURE;
- }
- if (cksumkey == NULL) {
- dprintk("%s: no key supplied for %s\n",
- __func__, kctx->gk5e->name);
- return GSS_S_FAILURE;
- }
-
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
- if (!checksumdata)
- return GSS_S_FAILURE;
-
- tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- goto out_free_cksum;
-
- req = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!req)
goto out_free_ahash;
- ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
-
- err = crypto_ahash_setkey(tfm, cksumkey, kctx->gk5e->keylength);
- if (err)
- goto out;
-
- err = crypto_ahash_init(req);
- if (err)
- goto out;
+ /*
+ * Per RFC 4121 Section 4.2.4, the checksum is performed over the
+ * data body first, then over the octets in "header".
+ */
err = xdr_process_buf(body, body_offset, body->len - body_offset,
checksummer, req);
if (err)
- goto out;
- if (header != NULL) {
+ goto out_free_ahash;
+ if (header) {
+ struct scatterlist sg[1];
+
sg_init_one(sg, header, hdrlen);
ahash_request_set_crypt(req, sg, NULL, hdrlen);
err = crypto_ahash_update(req);
if (err)
- goto out;
+ goto out_free_ahash;
}
+
ahash_request_set_crypt(req, NULL, checksumdata, 0);
err = crypto_ahash_final(req);
if (err)
- goto out;
+ goto out_free_ahash;
+
+ memcpy(cksumout->data, checksumdata,
+ min_t(int, cksumout->len, crypto_ahash_digestsize(tfm)));
- cksumout->len = kctx->gk5e->cksumlength;
-
- switch (kctx->gk5e->ctype) {
- case CKSUMTYPE_HMAC_SHA1_96_AES128:
- case CKSUMTYPE_HMAC_SHA1_96_AES256:
- /* note that this truncates the hash */
- memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
- break;
- default:
- BUG();
- break;
- }
-out:
- ahash_request_free(req);
out_free_ahash:
- crypto_free_ahash(tfm);
+ ahash_request_free(req);
out_free_cksum:
- kfree(checksumdata);
- return err ? GSS_S_FAILURE : 0;
+ kfree_sensitive(checksumdata);
+ return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
+EXPORT_SYMBOL_IF_KUNIT(gss_krb5_checksum);
struct encryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
@@ -392,35 +298,6 @@ encryptor(struct scatterlist *sg, void *data)
return 0;
}
-int
-gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
- int offset, struct page **pages)
-{
- int ret;
- struct encryptor_desc desc;
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-
- BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
-
- skcipher_request_set_sync_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
-
- memset(desc.iv, 0, sizeof(desc.iv));
- desc.req = req;
- desc.pos = offset;
- desc.outbuf = buf;
- desc.pages = pages;
- desc.fragno = 0;
- desc.fraglen = 0;
-
- sg_init_table(desc.infrags, 4);
- sg_init_table(desc.outfrags, 4);
-
- ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
- skcipher_request_zero(req);
- return ret;
-}
-
struct decryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct skcipher_request *req;
@@ -475,32 +352,6 @@ decryptor(struct scatterlist *sg, void *data)
return 0;
}
-int
-gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
- int offset)
-{
- int ret;
- struct decryptor_desc desc;
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
-
- /* XXXJBF: */
- BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
-
- skcipher_request_set_sync_tfm(req, tfm);
- skcipher_request_set_callback(req, 0, NULL, NULL);
-
- memset(desc.iv, 0, sizeof(desc.iv));
- desc.req = req;
- desc.fragno = 0;
- desc.fraglen = 0;
-
- sg_init_table(desc.frags, 4);
-
- ret = xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
- skcipher_request_zero(req);
- return ret;
-}
-
/*
* This function makes the assumption that it was ultimately called
* from gss_wrap().
@@ -526,7 +377,6 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
if (shiftlen == 0)
return 0;
- BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
p = buf->head[0].iov_base + base;
@@ -590,45 +440,172 @@ gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
ret = write_bytes_to_xdr_buf(buf, offset, data, len);
+#if IS_ENABLED(CONFIG_KUNIT)
+ /*
+ * CBC-CTS does not define an output IV but RFC 3962 defines it as the
+ * penultimate block of ciphertext, so copy that into the IV buffer
+ * before returning.
+ */
+ if (encrypt)
+ memcpy(iv, data, crypto_sync_skcipher_ivsize(cipher));
+#endif
+
out:
kfree(data);
return ret;
}
+/**
+ * krb5_cbc_cts_encrypt - encrypt in CBC mode with CTS
+ * @cts_tfm: CBC cipher with CTS
+ * @cbc_tfm: base CBC cipher
+ * @offset: starting byte offset for plaintext
+ * @buf: OUT: output buffer
+ * @pages: plaintext
+ * @iv: output CBC initialization vector, or NULL
+ * @ivsize: size of @iv, in octets
+ *
+ * To provide confidentiality, encrypt using cipher block chaining
+ * with ciphertext stealing. Message integrity is handled separately.
+ *
+ * Return values:
+ * %0: encryption successful
+ * negative errno: encryption could not be completed
+ */
+VISIBLE_IF_KUNIT
+int krb5_cbc_cts_encrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm,
+ u32 offset, struct xdr_buf *buf, struct page **pages,
+ u8 *iv, unsigned int ivsize)
+{
+ u32 blocksize, nbytes, nblocks, cbcbytes;
+ struct encryptor_desc desc;
+ int err;
+
+ blocksize = crypto_sync_skcipher_blocksize(cts_tfm);
+ nbytes = buf->len - offset;
+ nblocks = (nbytes + blocksize - 1) / blocksize;
+ cbcbytes = 0;
+ if (nblocks > 2)
+ cbcbytes = (nblocks - 2) * blocksize;
+
+ memset(desc.iv, 0, sizeof(desc.iv));
+
+ /* Handle block-sized chunks of plaintext with CBC. */
+ if (cbcbytes) {
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, cbc_tfm);
+
+ desc.pos = offset;
+ desc.fragno = 0;
+ desc.fraglen = 0;
+ desc.pages = pages;
+ desc.outbuf = buf;
+ desc.req = req;
+
+ skcipher_request_set_sync_tfm(req, cbc_tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+
+ sg_init_table(desc.infrags, 4);
+ sg_init_table(desc.outfrags, 4);
+
+ err = xdr_process_buf(buf, offset, cbcbytes, encryptor, &desc);
+ skcipher_request_zero(req);
+ if (err)
+ return err;
+ }
+
+ /* Remaining plaintext is handled with CBC-CTS. */
+ err = gss_krb5_cts_crypt(cts_tfm, buf, offset + cbcbytes,
+ desc.iv, pages, 1);
+ if (err)
+ return err;
+
+ if (unlikely(iv))
+ memcpy(iv, desc.iv, ivsize);
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(krb5_cbc_cts_encrypt);
+
+/**
+ * krb5_cbc_cts_decrypt - decrypt in CBC mode with CTS
+ * @cts_tfm: CBC cipher with CTS
+ * @cbc_tfm: base CBC cipher
+ * @offset: starting byte offset for plaintext
+ * @buf: OUT: output buffer
+ *
+ * Return values:
+ * %0: decryption successful
+ * negative errno: decryption could not be completed
+ */
+VISIBLE_IF_KUNIT
+int krb5_cbc_cts_decrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm,
+ u32 offset, struct xdr_buf *buf)
+{
+ u32 blocksize, nblocks, cbcbytes;
+ struct decryptor_desc desc;
+ int err;
+
+ blocksize = crypto_sync_skcipher_blocksize(cts_tfm);
+ nblocks = (buf->len + blocksize - 1) / blocksize;
+ cbcbytes = 0;
+ if (nblocks > 2)
+ cbcbytes = (nblocks - 2) * blocksize;
+
+ memset(desc.iv, 0, sizeof(desc.iv));
+
+ /* Handle block-sized chunks of plaintext with CBC. */
+ if (cbcbytes) {
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, cbc_tfm);
+
+ desc.fragno = 0;
+ desc.fraglen = 0;
+ desc.req = req;
+
+ skcipher_request_set_sync_tfm(req, cbc_tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+
+ sg_init_table(desc.frags, 4);
+
+ err = xdr_process_buf(buf, 0, cbcbytes, decryptor, &desc);
+ skcipher_request_zero(req);
+ if (err)
+ return err;
+ }
+
+ /* Remaining plaintext is handled with CBC-CTS. */
+ return gss_krb5_cts_crypt(cts_tfm, buf, cbcbytes, desc.iv, NULL, 0);
+}
+EXPORT_SYMBOL_IF_KUNIT(krb5_cbc_cts_decrypt);
+
u32
gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, struct page **pages)
{
u32 err;
struct xdr_netobj hmac;
- u8 *cksumkey;
u8 *ecptr;
struct crypto_sync_skcipher *cipher, *aux_cipher;
- int blocksize;
+ struct crypto_ahash *ahash;
struct page **save_pages;
- int nblocks, nbytes;
- struct encryptor_desc desc;
- u32 cbcbytes;
- unsigned int usage;
+ unsigned int conflen;
if (kctx->initiate) {
cipher = kctx->initiator_enc;
aux_cipher = kctx->initiator_enc_aux;
- cksumkey = kctx->initiator_integ;
- usage = KG_USAGE_INITIATOR_SEAL;
+ ahash = kctx->initiator_integ;
} else {
cipher = kctx->acceptor_enc;
aux_cipher = kctx->acceptor_enc_aux;
- cksumkey = kctx->acceptor_integ;
- usage = KG_USAGE_ACCEPTOR_SEAL;
+ ahash = kctx->acceptor_integ;
}
- blocksize = crypto_sync_skcipher_blocksize(cipher);
+ conflen = crypto_sync_skcipher_blocksize(cipher);
/* hide the gss token header and insert the confounder */
offset += GSS_KRB5_TOK_HDR_LEN;
- if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
+ if (xdr_extend_head(buf, offset, conflen))
return GSS_S_FAILURE;
- gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
+ krb5_make_confounder(buf->head[0].iov_base + offset, conflen);
offset -= GSS_KRB5_TOK_HDR_LEN;
if (buf->tail[0].iov_base != NULL) {
@@ -645,8 +622,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
buf->len += GSS_KRB5_TOK_HDR_LEN;
- /* Do the HMAC */
- hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
+ hmac.len = kctx->gk5e->cksumlength;
hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
/*
@@ -659,152 +635,321 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
save_pages = buf->pages;
buf->pages = pages;
- err = make_checksum_v2(kctx, NULL, 0, buf,
- offset + GSS_KRB5_TOK_HDR_LEN,
- cksumkey, usage, &hmac);
+ err = gss_krb5_checksum(ahash, NULL, 0, buf,
+ offset + GSS_KRB5_TOK_HDR_LEN, &hmac);
buf->pages = save_pages;
if (err)
return GSS_S_FAILURE;
- nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
- nblocks = (nbytes + blocksize - 1) / blocksize;
- cbcbytes = 0;
- if (nblocks > 2)
- cbcbytes = (nblocks - 2) * blocksize;
-
- memset(desc.iv, 0, sizeof(desc.iv));
-
- if (cbcbytes) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
-
- desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
- desc.fragno = 0;
- desc.fraglen = 0;
- desc.pages = pages;
- desc.outbuf = buf;
- desc.req = req;
-
- skcipher_request_set_sync_tfm(req, aux_cipher);
- skcipher_request_set_callback(req, 0, NULL, NULL);
-
- sg_init_table(desc.infrags, 4);
- sg_init_table(desc.outfrags, 4);
-
- err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
- cbcbytes, encryptor, &desc);
- skcipher_request_zero(req);
- if (err)
- goto out_err;
- }
-
- /* Make sure IV carries forward from any CBC results. */
- err = gss_krb5_cts_crypt(cipher, buf,
- offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
- desc.iv, pages, 1);
- if (err) {
- err = GSS_S_FAILURE;
- goto out_err;
- }
+ err = krb5_cbc_cts_encrypt(cipher, aux_cipher,
+ offset + GSS_KRB5_TOK_HDR_LEN,
+ buf, pages, NULL, 0);
+ if (err)
+ return GSS_S_FAILURE;
/* Now update buf to account for HMAC */
buf->tail[0].iov_len += kctx->gk5e->cksumlength;
buf->len += kctx->gk5e->cksumlength;
-out_err:
- if (err)
- err = GSS_S_FAILURE;
- return err;
+ return GSS_S_COMPLETE;
}
u32
gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
{
- struct xdr_buf subbuf;
- u32 ret = 0;
- u8 *cksum_key;
struct crypto_sync_skcipher *cipher, *aux_cipher;
+ struct crypto_ahash *ahash;
struct xdr_netobj our_hmac_obj;
u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
- int nblocks, blocksize, cbcbytes;
- struct decryptor_desc desc;
- unsigned int usage;
+ struct xdr_buf subbuf;
+ u32 ret = 0;
if (kctx->initiate) {
cipher = kctx->acceptor_enc;
aux_cipher = kctx->acceptor_enc_aux;
- cksum_key = kctx->acceptor_integ;
- usage = KG_USAGE_ACCEPTOR_SEAL;
+ ahash = kctx->acceptor_integ;
} else {
cipher = kctx->initiator_enc;
aux_cipher = kctx->initiator_enc_aux;
- cksum_key = kctx->initiator_integ;
- usage = KG_USAGE_INITIATOR_SEAL;
+ ahash = kctx->initiator_integ;
}
- blocksize = crypto_sync_skcipher_blocksize(cipher);
-
/* create a segment skipping the header and leaving out the checksum */
xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
(len - offset - GSS_KRB5_TOK_HDR_LEN -
kctx->gk5e->cksumlength));
- nblocks = (subbuf.len + blocksize - 1) / blocksize;
+ ret = krb5_cbc_cts_decrypt(cipher, aux_cipher, 0, &subbuf);
+ if (ret)
+ goto out_err;
- cbcbytes = 0;
- if (nblocks > 2)
- cbcbytes = (nblocks - 2) * blocksize;
+ our_hmac_obj.len = kctx->gk5e->cksumlength;
+ our_hmac_obj.data = our_hmac;
+ ret = gss_krb5_checksum(ahash, NULL, 0, &subbuf, 0, &our_hmac_obj);
+ if (ret)
+ goto out_err;
- memset(desc.iv, 0, sizeof(desc.iv));
+ /* Get the packet's hmac value */
+ ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
+ pkt_hmac, kctx->gk5e->cksumlength);
+ if (ret)
+ goto out_err;
- if (cbcbytes) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+ if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+ ret = GSS_S_BAD_SIG;
+ goto out_err;
+ }
+ *headskip = crypto_sync_skcipher_blocksize(cipher);
+ *tailskip = kctx->gk5e->cksumlength;
+out_err:
+ if (ret && ret != GSS_S_BAD_SIG)
+ ret = GSS_S_FAILURE;
+ return ret;
+}
- desc.fragno = 0;
- desc.fraglen = 0;
- desc.req = req;
+/**
+ * krb5_etm_checksum - Compute a MAC for a GSS Wrap token
+ * @cipher: an initialized cipher transform
+ * @tfm: an initialized hash transform
+ * @body: xdr_buf containing an RPC message (body.len is the message length)
+ * @body_offset: byte offset into @body to start checksumming
+ * @cksumout: OUT: a buffer to be filled in with the computed HMAC
+ *
+ * Usually expressed as H = HMAC(K, IV | ciphertext)[1..h] .
+ *
+ * Caller provides the truncation length of the output token (h) in
+ * cksumout.len.
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Digest computed, @cksumout filled in
+ * %GSS_S_FAILURE: Call failed
+ */
+VISIBLE_IF_KUNIT
+u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
+ struct crypto_ahash *tfm, const struct xdr_buf *body,
+ int body_offset, struct xdr_netobj *cksumout)
+{
+ unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
+ struct ahash_request *req;
+ struct scatterlist sg[1];
+ u8 *iv, *checksumdata;
+ int err = -ENOMEM;
- skcipher_request_set_sync_tfm(req, aux_cipher);
- skcipher_request_set_callback(req, 0, NULL, NULL);
+ checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
+ if (!checksumdata)
+ return GSS_S_FAILURE;
+ /* For RPCSEC, the "initial cipher state" is always all zeroes. */
+ iv = kzalloc(ivsize, GFP_KERNEL);
+ if (!iv)
+ goto out_free_mem;
- sg_init_table(desc.frags, 4);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto out_free_mem;
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ err = crypto_ahash_init(req);
+ if (err)
+ goto out_free_ahash;
- ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
- skcipher_request_zero(req);
- if (ret)
- goto out_err;
+ sg_init_one(sg, iv, ivsize);
+ ahash_request_set_crypt(req, sg, NULL, ivsize);
+ err = crypto_ahash_update(req);
+ if (err)
+ goto out_free_ahash;
+ err = xdr_process_buf(body, body_offset, body->len - body_offset,
+ checksummer, req);
+ if (err)
+ goto out_free_ahash;
+
+ ahash_request_set_crypt(req, NULL, checksumdata, 0);
+ err = crypto_ahash_final(req);
+ if (err)
+ goto out_free_ahash;
+ memcpy(cksumout->data, checksumdata, cksumout->len);
+
+out_free_ahash:
+ ahash_request_free(req);
+out_free_mem:
+ kfree(iv);
+ kfree_sensitive(checksumdata);
+ return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
+}
+EXPORT_SYMBOL_IF_KUNIT(krb5_etm_checksum);
+
+/**
+ * krb5_etm_encrypt - Encrypt using the RFC 8009 rules
+ * @kctx: Kerberos context
+ * @offset: starting offset of the payload, in bytes
+ * @buf: OUT: send buffer to contain the encrypted payload
+ * @pages: plaintext payload
+ *
+ * The main difference with aes_encrypt is that "The HMAC is
+ * calculated over the cipher state concatenated with the AES
+ * output, instead of being calculated over the confounder and
+ * plaintext. This allows the message receiver to verify the
+ * integrity of the message before decrypting the message."
+ *
+ * RFC 8009 Section 5:
+ *
+ * encryption function: as follows, where E() is AES encryption in
+ * CBC-CS3 mode, and h is the size of truncated HMAC (128 bits or
+ * 192 bits as described above).
+ *
+ * N = random value of length 128 bits (the AES block size)
+ * IV = cipher state
+ * C = E(Ke, N | plaintext, IV)
+ * H = HMAC(Ki, IV | C)
+ * ciphertext = C | H[1..h]
+ *
+ * This encryption formula provides AEAD EtM with key separation.
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Encryption successful
+ * %GSS_S_FAILURE: Encryption failed
+ */
+u32
+krb5_etm_encrypt(struct krb5_ctx *kctx, u32 offset,
+ struct xdr_buf *buf, struct page **pages)
+{
+ struct crypto_sync_skcipher *cipher, *aux_cipher;
+ struct crypto_ahash *ahash;
+ struct xdr_netobj hmac;
+ unsigned int conflen;
+ u8 *ecptr;
+ u32 err;
+
+ if (kctx->initiate) {
+ cipher = kctx->initiator_enc;
+ aux_cipher = kctx->initiator_enc_aux;
+ ahash = kctx->initiator_integ;
+ } else {
+ cipher = kctx->acceptor_enc;
+ aux_cipher = kctx->acceptor_enc_aux;
+ ahash = kctx->acceptor_integ;
}
+ conflen = crypto_sync_skcipher_blocksize(cipher);
- /* Make sure IV carries forward from any CBC results. */
- ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
- if (ret)
+ offset += GSS_KRB5_TOK_HDR_LEN;
+ if (xdr_extend_head(buf, offset, conflen))
+ return GSS_S_FAILURE;
+ krb5_make_confounder(buf->head[0].iov_base + offset, conflen);
+ offset -= GSS_KRB5_TOK_HDR_LEN;
+
+ if (buf->tail[0].iov_base) {
+ ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
+ } else {
+ buf->tail[0].iov_base = buf->head[0].iov_base
+ + buf->head[0].iov_len;
+ buf->tail[0].iov_len = 0;
+ ecptr = buf->tail[0].iov_base;
+ }
+
+ memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
+ buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
+ buf->len += GSS_KRB5_TOK_HDR_LEN;
+
+ err = krb5_cbc_cts_encrypt(cipher, aux_cipher,
+ offset + GSS_KRB5_TOK_HDR_LEN,
+ buf, pages, NULL, 0);
+ if (err)
+ return GSS_S_FAILURE;
+
+ hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+ hmac.len = kctx->gk5e->cksumlength;
+ err = krb5_etm_checksum(cipher, ahash,
+ buf, offset + GSS_KRB5_TOK_HDR_LEN, &hmac);
+ if (err)
goto out_err;
+ buf->tail[0].iov_len += kctx->gk5e->cksumlength;
+ buf->len += kctx->gk5e->cksumlength;
+ return GSS_S_COMPLETE;
- /* Calculate our hmac over the plaintext data */
- our_hmac_obj.len = sizeof(our_hmac);
- our_hmac_obj.data = our_hmac;
+out_err:
+ return GSS_S_FAILURE;
+}
+
+/**
+ * krb5_etm_decrypt - Decrypt using the RFC 8009 rules
+ * @kctx: Kerberos context
+ * @offset: starting offset of the ciphertext, in bytes
+ * @len: size of ciphertext to unwrap
+ * @buf: ciphertext to unwrap
+ * @headskip: OUT: the enctype's confounder length, in octets
+ * @tailskip: OUT: the enctype's HMAC length, in octets
+ *
+ * RFC 8009 Section 5:
+ *
+ * decryption function: as follows, where D() is AES decryption in
+ * CBC-CS3 mode, and h is the size of truncated HMAC.
+ *
+ * (C, H) = ciphertext
+ * (Note: H is the last h bits of the ciphertext.)
+ * IV = cipher state
+ * if H != HMAC(Ki, IV | C)[1..h]
+ * stop, report error
+ * (N, P) = D(Ke, C, IV)
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Decryption successful
+ * %GSS_S_BAD_SIG: computed HMAC != received HMAC
+ * %GSS_S_FAILURE: Decryption failed
+ */
+u32
+krb5_etm_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
+{
+ struct crypto_sync_skcipher *cipher, *aux_cipher;
+ u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+ u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+ struct xdr_netobj our_hmac_obj;
+ struct crypto_ahash *ahash;
+ struct xdr_buf subbuf;
+ u32 ret = 0;
- ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
- cksum_key, usage, &our_hmac_obj);
+ if (kctx->initiate) {
+ cipher = kctx->acceptor_enc;
+ aux_cipher = kctx->acceptor_enc_aux;
+ ahash = kctx->acceptor_integ;
+ } else {
+ cipher = kctx->initiator_enc;
+ aux_cipher = kctx->initiator_enc_aux;
+ ahash = kctx->initiator_integ;
+ }
+
+ /* Extract the ciphertext into @subbuf. */
+ xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
+ (len - offset - GSS_KRB5_TOK_HDR_LEN -
+ kctx->gk5e->cksumlength));
+
+ our_hmac_obj.data = our_hmac;
+ our_hmac_obj.len = kctx->gk5e->cksumlength;
+ ret = krb5_etm_checksum(cipher, ahash, &subbuf, 0, &our_hmac_obj);
if (ret)
goto out_err;
-
- /* Get the packet's hmac value */
ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
pkt_hmac, kctx->gk5e->cksumlength);
if (ret)
goto out_err;
-
if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
ret = GSS_S_BAD_SIG;
goto out_err;
}
- *headskip = kctx->gk5e->conflen;
+
+ ret = krb5_cbc_cts_decrypt(cipher, aux_cipher, 0, &subbuf);
+ if (ret) {
+ ret = GSS_S_FAILURE;
+ goto out_err;
+ }
+
+ *headskip = crypto_sync_skcipher_blocksize(cipher);
*tailskip = kctx->gk5e->cksumlength;
+ return GSS_S_COMPLETE;
+
out_err:
- if (ret && ret != GSS_S_BAD_SIG)
+ if (ret != GSS_S_BAD_SIG)
ret = GSS_S_FAILURE;
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
new file mode 100644
index 000000000000..8769e9e705bf
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
+ * SunRPC GSS Kerberos 5 mechanism internal definitions
+ *
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ */
+
+#ifndef _NET_SUNRPC_AUTH_GSS_KRB5_INTERNAL_H
+#define _NET_SUNRPC_AUTH_GSS_KRB5_INTERNAL_H
+
+/*
+ * The RFCs often specify payload lengths in bits. This helper
+ * converts a specified bit-length to the number of octets/bytes.
+ */
+#define BITS2OCTETS(x) ((x) / 8)
+
+struct krb5_ctx;
+
+struct gss_krb5_enctype {
+ const u32 etype; /* encryption (key) type */
+ const u32 ctype; /* checksum type */
+ const char *name; /* "friendly" name */
+ const char *encrypt_name; /* crypto encrypt name */
+ const char *aux_cipher; /* aux encrypt cipher name */
+ const char *cksum_name; /* crypto checksum name */
+ const u16 signalg; /* signing algorithm */
+ const u16 sealalg; /* sealing algorithm */
+ const u32 cksumlength; /* checksum length */
+ const u32 keyed_cksum; /* is it a keyed cksum? */
+ const u32 keybytes; /* raw key len, in bytes */
+ const u32 keylength; /* protocol key length, in octets */
+ const u32 Kc_length; /* checksum subkey length, in octets */
+ const u32 Ke_length; /* encryption subkey length, in octets */
+ const u32 Ki_length; /* integrity subkey length, in octets */
+
+ int (*derive_key)(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *in,
+ struct xdr_netobj *out,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask);
+ u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset,
+ struct xdr_buf *buf, struct page **pages);
+ u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip);
+ u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text,
+ struct xdr_netobj *token);
+ u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token);
+ u32 (*wrap)(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages);
+ u32 (*unwrap)(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align);
+};
+
+/* krb5_ctx flags definitions */
+#define KRB5_CTX_FLAG_INITIATOR 0x00000001
+#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004
+
+struct krb5_ctx {
+ int initiate; /* 1 = initiating, 0 = accepting */
+ u32 enctype;
+ u32 flags;
+ const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
+ struct crypto_sync_skcipher *enc;
+ struct crypto_sync_skcipher *seq;
+ struct crypto_sync_skcipher *acceptor_enc;
+ struct crypto_sync_skcipher *initiator_enc;
+ struct crypto_sync_skcipher *acceptor_enc_aux;
+ struct crypto_sync_skcipher *initiator_enc_aux;
+ struct crypto_ahash *acceptor_sign;
+ struct crypto_ahash *initiator_sign;
+ struct crypto_ahash *initiator_integ;
+ struct crypto_ahash *acceptor_integ;
+ u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
+ u8 cksum[GSS_KRB5_MAX_KEYLEN];
+ atomic_t seq_send;
+ atomic64_t seq_send64;
+ time64_t endtime;
+ struct xdr_netobj mech_used;
+};
+
+/*
+ * GSS Kerberos 5 mechanism Per-Message calls.
+ */
+
+u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+ struct xdr_netobj *token);
+
+u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token);
+
+u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages);
+
+u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align);
+
+/*
+ * Implementation internal functions
+ */
+
+/* Key Derivation Functions */
+
+int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask);
+
+int krb5_kdf_hmac_sha2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *in_constant,
+ gfp_t gfp_mask);
+
+int krb5_kdf_feedback_cmac(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *in_constant,
+ gfp_t gfp_mask);
+
+/**
+ * krb5_derive_key - Derive a subkey from a protocol key
+ * @kctx: Kerberos 5 context
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @usage: key usage value
+ * @seed: key usage seed (one octet)
+ * @gfp_mask: memory allocation control flags
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+static inline int krb5_derive_key(struct krb5_ctx *kctx,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ u32 usage, u8 seed, gfp_t gfp_mask)
+{
+ const struct gss_krb5_enctype *gk5e = kctx->gk5e;
+ u8 label_data[GSS_KRB5_K5CLENGTH];
+ struct xdr_netobj label = {
+ .len = sizeof(label_data),
+ .data = label_data,
+ };
+ __be32 *p = (__be32 *)label_data;
+
+ *p = cpu_to_be32(usage);
+ label_data[4] = seed;
+ return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask);
+}
+
+void krb5_make_confounder(u8 *p, int conflen);
+
+u32 gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen,
+ const struct xdr_buf *body, int body_offset,
+ struct xdr_netobj *cksumout);
+
+u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in,
+ void *out, int length);
+
+int xdr_extend_head(struct xdr_buf *buf, unsigned int base,
+ unsigned int shiftlen);
+
+u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
+ struct xdr_buf *buf, struct page **pages);
+
+u32 gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen);
+
+u32 krb5_etm_encrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
+ struct page **pages);
+
+u32 krb5_etm_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip);
+
+#if IS_ENABLED(CONFIG_KUNIT)
+void krb5_nfold(u32 inbits, const u8 *in, u32 outbits, u8 *out);
+const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype);
+int krb5_cbc_cts_encrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm, u32 offset,
+ struct xdr_buf *buf, struct page **pages,
+ u8 *iv, unsigned int ivsize);
+int krb5_cbc_cts_decrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm,
+ u32 offset, struct xdr_buf *buf);
+u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
+ struct crypto_ahash *tfm, const struct xdr_buf *body,
+ int body_offset, struct xdr_netobj *cksumout);
+#endif
+
+#endif /* _NET_SUNRPC_AUTH_GSS_KRB5_INTERNAL_H */
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 726c076950c0..4eb19c3a54c7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -60,18 +60,27 @@
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
#include <linux/lcm.h>
+#include <crypto/hash.h>
+#include <kunit/visibility.h>
+
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-/*
+/**
+ * krb5_nfold - n-fold function
+ * @inbits: number of bits in @in
+ * @in: buffer containing input to fold
+ * @outbits: number of bits in the output buffer
+ * @out: buffer to hold the result
+ *
* This is the n-fold function as described in rfc3961, sec 5.1
* Taken from MIT Kerberos and modified.
*/
-
-static void krb5_nfold(u32 inbits, const u8 *in,
- u32 outbits, u8 *out)
+VISIBLE_IF_KUNIT
+void krb5_nfold(u32 inbits, const u8 *in, u32 outbits, u8 *out)
{
unsigned long ulcm;
int byte, i, msbit;
@@ -132,40 +141,36 @@ static void krb5_nfold(u32 inbits, const u8 *in,
}
}
}
+EXPORT_SYMBOL_IF_KUNIT(krb5_nfold);
/*
* This is the DK (derive_key) function as described in rfc3961, sec 5.1
* Taken from MIT Kerberos and modified.
*/
-
-u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *in_constant,
- gfp_t gfp_mask)
+static int krb5_DK(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey, u8 *rawkey,
+ const struct xdr_netobj *in_constant, gfp_t gfp_mask)
{
size_t blocksize, keybytes, keylength, n;
- unsigned char *inblockdata, *outblockdata, *rawkey;
+ unsigned char *inblockdata, *outblockdata;
struct xdr_netobj inblock, outblock;
struct crypto_sync_skcipher *cipher;
- u32 ret = EINVAL;
+ int ret = -EINVAL;
- blocksize = gk5e->blocksize;
keybytes = gk5e->keybytes;
keylength = gk5e->keylength;
- if ((inkey->len != keylength) || (outkey->len != keylength))
+ if (inkey->len != keylength)
goto err_return;
cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
if (IS_ERR(cipher))
goto err_return;
+ blocksize = crypto_sync_skcipher_blocksize(cipher);
if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
- goto err_return;
-
- /* allocate and set up buffers */
+ goto err_free_cipher;
- ret = ENOMEM;
+ ret = -ENOMEM;
inblockdata = kmalloc(blocksize, gfp_mask);
if (inblockdata == NULL)
goto err_free_cipher;
@@ -174,10 +179,6 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
if (outblockdata == NULL)
goto err_free_in;
- rawkey = kmalloc(keybytes, gfp_mask);
- if (rawkey == NULL)
- goto err_free_out;
-
inblock.data = (char *) inblockdata;
inblock.len = blocksize;
@@ -197,8 +198,8 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
n = 0;
while (n < keybytes) {
- (*(gk5e->encrypt))(cipher, NULL, inblock.data,
- outblock.data, inblock.len);
+ krb5_encrypt(cipher, NULL, inblock.data, outblock.data,
+ inblock.len);
if ((keybytes - n) <= outblock.len) {
memcpy(rawkey + n, outblock.data, (keybytes - n));
@@ -210,26 +211,8 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
n += outblock.len;
}
- /* postprocess the key */
-
- inblock.data = (char *) rawkey;
- inblock.len = keybytes;
-
- BUG_ON(gk5e->mk_key == NULL);
- ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
- if (ret) {
- dprintk("%s: got %d from mk_key function for '%s'\n",
- __func__, ret, gk5e->encrypt_name);
- goto err_free_raw;
- }
-
- /* clean memory, free resources and exit */
-
ret = 0;
-err_free_raw:
- kfree_sensitive(rawkey);
-err_free_out:
kfree_sensitive(outblockdata);
err_free_in:
kfree_sensitive(inblockdata);
@@ -239,84 +222,325 @@ err_return:
return ret;
}
-#define smask(step) ((1<<step)-1)
-#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
-#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
-
-static void mit_des_fixup_key_parity(u8 key[8])
-{
- int i;
- for (i = 0; i < 8; i++) {
- key[i] &= 0xfe;
- key[i] |= 1^parity_char(key[i]);
- }
-}
-
/*
- * This is the des3 key derivation postprocess function
+ * This is the identity function, with some sanity checking.
*/
-u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
+static int krb5_random_to_key_v2(const struct gss_krb5_enctype *gk5e,
+ struct xdr_netobj *randombits,
+ struct xdr_netobj *key)
{
- int i;
- u32 ret = EINVAL;
+ int ret = -EINVAL;
- if (key->len != 24) {
+ if (key->len != 16 && key->len != 32) {
dprintk("%s: key->len is %d\n", __func__, key->len);
goto err_out;
}
- if (randombits->len != 21) {
+ if (randombits->len != 16 && randombits->len != 32) {
dprintk("%s: randombits->len is %d\n",
__func__, randombits->len);
goto err_out;
}
+ if (randombits->len != key->len) {
+ dprintk("%s: randombits->len is %d, key->len is %d\n",
+ __func__, randombits->len, key->len);
+ goto err_out;
+ }
+ memcpy(key->data, randombits->data, key->len);
+ ret = 0;
+err_out:
+ return ret;
+}
+
+/**
+ * krb5_derive_key_v2 - Derive a subkey for an RFC 3962 enctype
+ * @gk5e: Kerberos 5 enctype profile
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @label: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask)
+{
+ struct xdr_netobj inblock;
+ int ret;
+
+ inblock.len = gk5e->keybytes;
+ inblock.data = kmalloc(inblock.len, gfp_mask);
+ if (!inblock.data)
+ return -ENOMEM;
- /* take the seven bytes, move them around into the top 7 bits of the
- 8 key bytes, then compute the parity bits. Do this three times. */
+ ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
+ if (!ret)
+ ret = krb5_random_to_key_v2(gk5e, &inblock, outkey);
+
+ kfree_sensitive(inblock.data);
+ return ret;
+}
- for (i = 0; i < 3; i++) {
- memcpy(key->data + i*8, randombits->data + i*7, 7);
- key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
- ((key->data[i*8+1]&1)<<2) |
- ((key->data[i*8+2]&1)<<3) |
- ((key->data[i*8+3]&1)<<4) |
- ((key->data[i*8+4]&1)<<5) |
- ((key->data[i*8+5]&1)<<6) |
- ((key->data[i*8+6]&1)<<7));
+/*
+ * K(i) = CMAC(key, K(i-1) | i | constant | 0x00 | k)
+ *
+ * i: A block counter is used with a length of 4 bytes, represented
+ * in big-endian order.
+ *
+ * constant: The label input to the KDF is the usage constant supplied
+ * to the key derivation function
+ *
+ * k: The length of the output key in bits, represented as a 4-byte
+ * string in big-endian order.
+ *
+ * Caller fills in K(i-1) in @step, and receives the result K(i)
+ * in the same buffer.
+ */
+static int
+krb5_cmac_Ki(struct crypto_shash *tfm, const struct xdr_netobj *constant,
+ u32 outlen, u32 count, struct xdr_netobj *step)
+{
+ __be32 k = cpu_to_be32(outlen * 8);
+ SHASH_DESC_ON_STACK(desc, tfm);
+ __be32 i = cpu_to_be32(count);
+ u8 zero = 0;
+ int ret;
+
+ desc->tfm = tfm;
+ ret = crypto_shash_init(desc);
+ if (ret)
+ goto out_err;
+
+ ret = crypto_shash_update(desc, step->data, step->len);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&i, sizeof(i));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, constant->data, constant->len);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, &zero, sizeof(zero));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&k, sizeof(k));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_final(desc, step->data);
+ if (ret)
+ goto out_err;
+
+out_err:
+ shash_desc_zero(desc);
+ return ret;
+}
- mit_des_fixup_key_parity(key->data + i*8);
+/**
+ * krb5_kdf_feedback_cmac - Derive a subkey for a Camellia/CMAC-based enctype
+ * @gk5e: Kerberos 5 enctype parameters
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @constant: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * RFC 6803 Section 3:
+ *
+ * "We use a key derivation function from the family specified in
+ * [SP800-108], Section 5.2, 'KDF in Feedback Mode'."
+ *
+ * n = ceiling(k / 128)
+ * K(0) = zeros
+ * K(i) = CMAC(key, K(i-1) | i | constant | 0x00 | k)
+ * DR(key, constant) = k-truncate(K(1) | K(2) | ... | K(n))
+ * KDF-FEEDBACK-CMAC(key, constant) = random-to-key(DR(key, constant))
+ *
+ * Caller sets @outkey->len to the desired length of the derived key (k).
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int
+krb5_kdf_feedback_cmac(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *constant,
+ gfp_t gfp_mask)
+{
+ struct xdr_netobj step = { .data = NULL };
+ struct xdr_netobj DR = { .data = NULL };
+ unsigned int blocksize, offset;
+ struct crypto_shash *tfm;
+ int n, count, ret;
+
+ /*
+ * This implementation assumes the CMAC used for an enctype's
+ * key derivation is the same as the CMAC used for its
+ * checksumming. This happens to be true for enctypes that
+ * are currently supported by this implementation.
+ */
+ tfm = crypto_alloc_shash(gk5e->cksum_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
+ }
+ ret = crypto_shash_setkey(tfm, inkey->data, inkey->len);
+ if (ret)
+ goto out_free_tfm;
+
+ blocksize = crypto_shash_digestsize(tfm);
+ n = (outkey->len + blocksize - 1) / blocksize;
+
+ /* K(0) is all zeroes */
+ ret = -ENOMEM;
+ step.len = blocksize;
+ step.data = kzalloc(step.len, gfp_mask);
+ if (!step.data)
+ goto out_free_tfm;
+
+ DR.len = blocksize * n;
+ DR.data = kmalloc(DR.len, gfp_mask);
+ if (!DR.data)
+ goto out_free_tfm;
+
+ /* XXX: Does not handle partial-block key sizes */
+ for (offset = 0, count = 1; count <= n; count++) {
+ ret = krb5_cmac_Ki(tfm, constant, outkey->len, count, &step);
+ if (ret)
+ goto out_free_tfm;
+
+ memcpy(DR.data + offset, step.data, blocksize);
+ offset += blocksize;
}
+
+ /* k-truncate and random-to-key */
+ memcpy(outkey->data, DR.data, outkey->len);
ret = 0;
-err_out:
+
+out_free_tfm:
+ crypto_free_shash(tfm);
+out:
+ kfree_sensitive(step.data);
+ kfree_sensitive(DR.data);
return ret;
}
/*
- * This is the aes key derivation postprocess function
+ * K1 = HMAC-SHA(key, 0x00000001 | label | 0x00 | k)
+ *
+ * key: The source of entropy from which subsequent keys are derived.
+ *
+ * label: An octet string describing the intended usage of the
+ * derived key.
+ *
+ * k: Length in bits of the key to be outputted, expressed in
+ * big-endian binary representation in 4 bytes.
*/
-u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
+static int
+krb5_hmac_K1(struct crypto_shash *tfm, const struct xdr_netobj *label,
+ u32 outlen, struct xdr_netobj *K1)
{
- u32 ret = EINVAL;
+ __be32 k = cpu_to_be32(outlen * 8);
+ SHASH_DESC_ON_STACK(desc, tfm);
+ __be32 one = cpu_to_be32(1);
+ u8 zero = 0;
+ int ret;
+
+ desc->tfm = tfm;
+ ret = crypto_shash_init(desc);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&one, sizeof(one));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, label->data, label->len);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, &zero, sizeof(zero));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&k, sizeof(k));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_final(desc, K1->data);
+ if (ret)
+ goto out_err;
+
+out_err:
+ shash_desc_zero(desc);
+ return ret;
+}
- if (key->len != 16 && key->len != 32) {
- dprintk("%s: key->len is %d\n", __func__, key->len);
- goto err_out;
- }
- if (randombits->len != 16 && randombits->len != 32) {
- dprintk("%s: randombits->len is %d\n",
- __func__, randombits->len);
- goto err_out;
+/**
+ * krb5_kdf_hmac_sha2 - Derive a subkey for an AES/SHA2-based enctype
+ * @gk5e: Kerberos 5 enctype policy parameters
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @label: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * RFC 8009 Section 3:
+ *
+ * "We use a key derivation function from Section 5.1 of [SP800-108],
+ * which uses the HMAC algorithm as the PRF."
+ *
+ * function KDF-HMAC-SHA2(key, label, [context,] k):
+ * k-truncate(K1)
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int
+krb5_kdf_hmac_sha2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask)
+{
+ struct crypto_shash *tfm;
+ struct xdr_netobj K1 = {
+ .data = NULL,
+ };
+ int ret;
+
+ /*
+ * This implementation assumes the HMAC used for an enctype's
+ * key derivation is the same as the HMAC used for its
+ * checksumming. This happens to be true for enctypes that
+ * are currently supported by this implementation.
+ */
+ tfm = crypto_alloc_shash(gk5e->cksum_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
}
- if (randombits->len != key->len) {
- dprintk("%s: randombits->len is %d, key->len is %d\n",
- __func__, randombits->len, key->len);
- goto err_out;
+ ret = crypto_shash_setkey(tfm, inkey->data, inkey->len);
+ if (ret)
+ goto out_free_tfm;
+
+ K1.len = crypto_shash_digestsize(tfm);
+ K1.data = kmalloc(K1.len, gfp_mask);
+ if (!K1.data) {
+ ret = -ENOMEM;
+ goto out_free_tfm;
}
- memcpy(key->data, randombits->data, key->len);
- ret = 0;
-err_out:
+
+ ret = krb5_hmac_K1(tfm, label, outkey->len, &K1);
+ if (ret)
+ goto out_free_tfm;
+
+ /* k-truncate and random-to-key */
+ memcpy(outkey->data, K1.data, outkey->len);
+
+out_free_tfm:
+ kfree_sensitive(K1.data);
+ crypto_free_shash(tfm);
+out:
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 1c092b05c2bb..3366505bc669 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -19,449 +19,376 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include <kunit/visibility.h>
#include "auth_gss_internal.h"
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-static struct gss_api_mech gss_kerberos_mech; /* forward declaration */
+static struct gss_api_mech gss_kerberos_mech;
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
-#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
/*
- * DES (All DES enctypes are mapped to the same gss functionality)
- */
- {
- .etype = ENCTYPE_DES_CBC_RAW,
- .ctype = CKSUMTYPE_RSA_MD5,
- .name = "des-cbc-crc",
- .encrypt_name = "cbc(des)",
- .cksum_name = "md5",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = NULL,
- .signalg = SGN_ALG_DES_MAC_MD5,
- .sealalg = SEAL_ALG_DES,
- .keybytes = 7,
- .keylength = 8,
- .blocksize = 8,
- .conflen = 8,
- .cksumlength = 8,
- .keyed_cksum = 0,
- },
-#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
- /*
- * 3DES
- */
- {
- .etype = ENCTYPE_DES3_CBC_RAW,
- .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
- .name = "des3-hmac-sha1",
- .encrypt_name = "cbc(des3_ede)",
- .cksum_name = "hmac(sha1)",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = gss_krb5_des3_make_key,
- .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
- .sealalg = SEAL_ALG_DES3KD,
- .keybytes = 21,
- .keylength = 24,
- .blocksize = 8,
- .conflen = 8,
- .cksumlength = 20,
- .keyed_cksum = 1,
- },
- /*
- * AES128
+ * AES-128 with SHA-1 (RFC 3962)
*/
{
.etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
.ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
.name = "aes128-cts",
.encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = gss_krb5_aes_make_key,
- .encrypt_v2 = gss_krb5_aes_encrypt,
- .decrypt_v2 = gss_krb5_aes_decrypt,
+ .derive_key = krb5_derive_key_v2,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+
.signalg = -1,
.sealalg = -1,
.keybytes = 16,
- .keylength = 16,
- .blocksize = 16,
- .conflen = 16,
- .cksumlength = 12,
+ .keylength = BITS2OCTETS(128),
+ .Kc_length = BITS2OCTETS(128),
+ .Ke_length = BITS2OCTETS(128),
+ .Ki_length = BITS2OCTETS(128),
+ .cksumlength = BITS2OCTETS(96),
.keyed_cksum = 1,
},
/*
- * AES256
+ * AES-256 with SHA-1 (RFC 3962)
*/
{
.etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
.ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
.name = "aes256-cts",
.encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = gss_krb5_aes_make_key,
- .encrypt_v2 = gss_krb5_aes_encrypt,
- .decrypt_v2 = gss_krb5_aes_decrypt,
+ .derive_key = krb5_derive_key_v2,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+
.signalg = -1,
.sealalg = -1,
.keybytes = 32,
- .keylength = 32,
- .blocksize = 16,
- .conflen = 16,
- .cksumlength = 12,
+ .keylength = BITS2OCTETS(256),
+ .Kc_length = BITS2OCTETS(256),
+ .Ke_length = BITS2OCTETS(256),
+ .Ki_length = BITS2OCTETS(256),
+ .cksumlength = BITS2OCTETS(96),
.keyed_cksum = 1,
},
-};
+#endif
-static const int num_supported_enctypes =
- ARRAY_SIZE(supported_gss_krb5_enctypes);
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA)
+ /*
+ * Camellia-128 with CMAC (RFC 6803)
+ */
+ {
+ .etype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .ctype = CKSUMTYPE_CMAC_CAMELLIA128,
+ .name = "camellia128-cts-cmac",
+ .encrypt_name = "cts(cbc(camellia))",
+ .aux_cipher = "cbc(camellia)",
+ .cksum_name = "cmac(camellia)",
+ .cksumlength = BITS2OCTETS(128),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(128),
+ .Kc_length = BITS2OCTETS(128),
+ .Ke_length = BITS2OCTETS(128),
+ .Ki_length = BITS2OCTETS(128),
+
+ .derive_key = krb5_kdf_feedback_cmac,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+ /*
+ * Camellia-256 with CMAC (RFC 6803)
+ */
+ {
+ .etype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .ctype = CKSUMTYPE_CMAC_CAMELLIA256,
+ .name = "camellia256-cts-cmac",
+ .encrypt_name = "cts(cbc(camellia))",
+ .aux_cipher = "cbc(camellia)",
+ .cksum_name = "cmac(camellia)",
+ .cksumlength = BITS2OCTETS(128),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(256),
+ .Kc_length = BITS2OCTETS(256),
+ .Ke_length = BITS2OCTETS(256),
+ .Ki_length = BITS2OCTETS(256),
+
+ .derive_key = krb5_kdf_feedback_cmac,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+#endif
-static int
-supported_gss_krb5_enctype(int etype)
-{
- int i;
- for (i = 0; i < num_supported_enctypes; i++)
- if (supported_gss_krb5_enctypes[i].etype == etype)
- return 1;
- return 0;
-}
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2)
+ /*
+ * AES-128 with SHA-256 (RFC 8009)
+ */
+ {
+ .etype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .ctype = CKSUMTYPE_HMAC_SHA256_128_AES128,
+ .name = "aes128-cts-hmac-sha256-128",
+ .encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
+ .cksum_name = "hmac(sha256)",
+ .cksumlength = BITS2OCTETS(128),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(128),
+ .Kc_length = BITS2OCTETS(128),
+ .Ke_length = BITS2OCTETS(128),
+ .Ki_length = BITS2OCTETS(128),
+
+ .derive_key = krb5_kdf_hmac_sha2,
+ .encrypt = krb5_etm_encrypt,
+ .decrypt = krb5_etm_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+ /*
+ * AES-256 with SHA-384 (RFC 8009)
+ */
+ {
+ .etype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .ctype = CKSUMTYPE_HMAC_SHA384_192_AES256,
+ .name = "aes256-cts-hmac-sha384-192",
+ .encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
+ .cksum_name = "hmac(sha384)",
+ .cksumlength = BITS2OCTETS(192),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(256),
+ .Kc_length = BITS2OCTETS(192),
+ .Ke_length = BITS2OCTETS(256),
+ .Ki_length = BITS2OCTETS(192),
+
+ .derive_key = krb5_kdf_hmac_sha2,
+ .encrypt = krb5_etm_encrypt,
+ .decrypt = krb5_etm_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+#endif
+};
-static const struct gss_krb5_enctype *
-get_gss_krb5_enctype(int etype)
-{
- int i;
- for (i = 0; i < num_supported_enctypes; i++)
- if (supported_gss_krb5_enctypes[i].etype == etype)
- return &supported_gss_krb5_enctypes[i];
- return NULL;
-}
+/*
+ * The list of advertised enctypes is specified in order of most
+ * preferred to least.
+ */
+static char gss_krb5_enctype_priority_list[64];
-static inline const void *
-get_key(const void *p, const void *end,
- struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
+static void gss_krb5_prepare_enctype_priority_list(void)
{
- struct xdr_netobj key;
- int alg;
-
- p = simple_get_bytes(p, end, &alg, sizeof(alg));
- if (IS_ERR(p))
- goto out_err;
-
- switch (alg) {
- case ENCTYPE_DES_CBC_CRC:
- case ENCTYPE_DES_CBC_MD4:
- case ENCTYPE_DES_CBC_MD5:
- /* Map all these key types to ENCTYPE_DES_CBC_RAW */
- alg = ENCTYPE_DES_CBC_RAW;
- break;
- }
-
- if (!supported_gss_krb5_enctype(alg)) {
- printk(KERN_WARNING "gss_kerberos_mech: unsupported "
- "encryption key algorithm %d\n", alg);
- p = ERR_PTR(-EINVAL);
- goto out_err;
- }
- p = simple_get_netobj(p, end, &key);
- if (IS_ERR(p))
- goto out_err;
-
- *res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
- if (IS_ERR(*res)) {
- printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
- "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
- *res = NULL;
- goto out_err_free_key;
- }
- if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) {
- printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
- "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
- goto out_err_free_tfm;
+ static const u32 gss_krb5_enctypes[] = {
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2)
+ ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA)
+ ENCTYPE_CAMELLIA256_CTS_CMAC,
+ ENCTYPE_CAMELLIA128_CTS_CMAC,
+#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
+ ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+ ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+#endif
+ };
+ size_t total, i;
+ char buf[16];
+ char *sep;
+ int n;
+
+ sep = "";
+ gss_krb5_enctype_priority_list[0] = '\0';
+ for (total = 0, i = 0; i < ARRAY_SIZE(gss_krb5_enctypes); i++) {
+ n = sprintf(buf, "%s%u", sep, gss_krb5_enctypes[i]);
+ if (n < 0)
+ break;
+ if (total + n >= sizeof(gss_krb5_enctype_priority_list))
+ break;
+ strcat(gss_krb5_enctype_priority_list, buf);
+ sep = ",";
+ total += n;
}
-
- kfree(key.data);
- return p;
-
-out_err_free_tfm:
- crypto_free_sync_skcipher(*res);
-out_err_free_key:
- kfree(key.data);
- p = ERR_PTR(-EINVAL);
-out_err:
- return p;
}
-static int
-gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
+/**
+ * gss_krb5_lookup_enctype - Retrieve profile information for a given enctype
+ * @etype: ENCTYPE value
+ *
+ * Returns a pointer to a gss_krb5_enctype structure, or NULL if no
+ * matching etype is found.
+ */
+VISIBLE_IF_KUNIT
+const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype)
{
- u32 seq_send;
- int tmp;
- u32 time32;
-
- p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
- if (IS_ERR(p))
- goto out_err;
-
- /* Old format supports only DES! Any other enctype uses new format */
- ctx->enctype = ENCTYPE_DES_CBC_RAW;
-
- ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
- if (ctx->gk5e == NULL) {
- p = ERR_PTR(-EINVAL);
- goto out_err;
- }
-
- /* The downcall format was designed before we completely understood
- * the uses of the context fields; so it includes some stuff we
- * just give some minimal sanity-checking, and some we ignore
- * completely (like the next twenty bytes): */
- if (unlikely(p + 20 > end || p + 20 < p)) {
- p = ERR_PTR(-EFAULT);
- goto out_err;
- }
- p += 20;
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SGN_ALG_DES_MAC_MD5) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SEAL_ALG_DES) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &time32, sizeof(time32));
- if (IS_ERR(p))
- goto out_err;
- /* unsigned 32-bit time overflows in year 2106 */
- ctx->endtime = (time64_t)time32;
- p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
- if (IS_ERR(p))
- goto out_err;
- atomic_set(&ctx->seq_send, seq_send);
- p = simple_get_netobj(p, end, &ctx->mech_used);
- if (IS_ERR(p))
- goto out_err;
- p = get_key(p, end, ctx, &ctx->enc);
- if (IS_ERR(p))
- goto out_err_free_mech;
- p = get_key(p, end, ctx, &ctx->seq);
- if (IS_ERR(p))
- goto out_err_free_key1;
- if (p != end) {
- p = ERR_PTR(-EFAULT);
- goto out_err_free_key2;
- }
+ size_t i;
- return 0;
-
-out_err_free_key2:
- crypto_free_sync_skcipher(ctx->seq);
-out_err_free_key1:
- crypto_free_sync_skcipher(ctx->enc);
-out_err_free_mech:
- kfree(ctx->mech_used.data);
-out_err:
- return PTR_ERR(p);
+ for (i = 0; i < ARRAY_SIZE(supported_gss_krb5_enctypes); i++)
+ if (supported_gss_krb5_enctypes[i].etype == etype)
+ return &supported_gss_krb5_enctypes[i];
+ return NULL;
}
+EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype);
static struct crypto_sync_skcipher *
-context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
+gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key)
{
- struct crypto_sync_skcipher *cp;
+ struct crypto_sync_skcipher *tfm;
- cp = crypto_alloc_sync_skcipher(cname, 0, 0);
- if (IS_ERR(cp)) {
- dprintk("gss_kerberos_mech: unable to initialize "
- "crypto algorithm %s\n", cname);
+ tfm = crypto_alloc_sync_skcipher(cname, 0, 0);
+ if (IS_ERR(tfm))
return NULL;
- }
- if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
- dprintk("gss_kerberos_mech: error setting key for "
- "crypto algorithm %s\n", cname);
- crypto_free_sync_skcipher(cp);
+ if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
+ crypto_free_sync_skcipher(tfm);
return NULL;
}
- return cp;
-}
-
-static inline void
-set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
-{
- cdata[0] = (usage>>24)&0xff;
- cdata[1] = (usage>>16)&0xff;
- cdata[2] = (usage>>8)&0xff;
- cdata[3] = usage&0xff;
- cdata[4] = seed;
+ return tfm;
}
-static int
-context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
+static struct crypto_ahash *
+gss_krb5_alloc_hash_v2(struct krb5_ctx *kctx, const struct xdr_netobj *key)
{
- struct xdr_netobj c, keyin, keyout;
- u8 cdata[GSS_KRB5_K5CLENGTH];
- u32 err;
-
- c.len = GSS_KRB5_K5CLENGTH;
- c.data = cdata;
-
- keyin.data = ctx->Ksess;
- keyin.len = ctx->gk5e->keylength;
- keyout.len = ctx->gk5e->keylength;
+ struct crypto_ahash *tfm;
- /* seq uses the raw key */
- ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
- ctx->Ksess);
- if (ctx->seq == NULL)
- goto out_err;
-
- ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
- ctx->Ksess);
- if (ctx->enc == NULL)
- goto out_free_seq;
-
- /* derive cksum */
- set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
- keyout.data = ctx->cksum;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving cksum key\n",
- __func__, err);
- goto out_free_enc;
+ tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return NULL;
+ if (crypto_ahash_setkey(tfm, key->data, key->len)) {
+ crypto_free_ahash(tfm);
+ return NULL;
}
-
- return 0;
-
-out_free_enc:
- crypto_free_sync_skcipher(ctx->enc);
-out_free_seq:
- crypto_free_sync_skcipher(ctx->seq);
-out_err:
- return -EINVAL;
+ return tfm;
}
static int
-context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
+gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
- struct xdr_netobj c, keyin, keyout;
- u8 cdata[GSS_KRB5_K5CLENGTH];
- u32 err;
-
- c.len = GSS_KRB5_K5CLENGTH;
- c.data = cdata;
-
- keyin.data = ctx->Ksess;
- keyin.len = ctx->gk5e->keylength;
- keyout.len = ctx->gk5e->keylength;
+ struct xdr_netobj keyin = {
+ .len = ctx->gk5e->keylength,
+ .data = ctx->Ksess,
+ };
+ struct xdr_netobj keyout;
+ int ret = -EINVAL;
+
+ keyout.data = kmalloc(GSS_KRB5_MAX_KEYLEN, gfp_mask);
+ if (!keyout.data)
+ return -ENOMEM;
/* initiator seal encryption */
- set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
- keyout.data = ctx->initiator_seal;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving initiator_seal key\n",
- __func__, err);
- goto out_err;
- }
- ctx->initiator_enc = context_v2_alloc_cipher(ctx,
- ctx->gk5e->encrypt_name,
- ctx->initiator_seal);
+ keyout.len = ctx->gk5e->Ke_length;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_INITIATOR_SEAL,
+ KEY_USAGE_SEED_ENCRYPTION, gfp_mask))
+ goto out;
+ ctx->initiator_enc = gss_krb5_alloc_cipher_v2(ctx->gk5e->encrypt_name,
+ &keyout);
if (ctx->initiator_enc == NULL)
- goto out_err;
+ goto out;
+ if (ctx->gk5e->aux_cipher) {
+ ctx->initiator_enc_aux =
+ gss_krb5_alloc_cipher_v2(ctx->gk5e->aux_cipher,
+ &keyout);
+ if (ctx->initiator_enc_aux == NULL)
+ goto out_free;
+ }
/* acceptor seal encryption */
- set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
- keyout.data = ctx->acceptor_seal;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving acceptor_seal key\n",
- __func__, err);
- goto out_free_initiator_enc;
- }
- ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
- ctx->gk5e->encrypt_name,
- ctx->acceptor_seal);
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_ACCEPTOR_SEAL,
+ KEY_USAGE_SEED_ENCRYPTION, gfp_mask))
+ goto out_free;
+ ctx->acceptor_enc = gss_krb5_alloc_cipher_v2(ctx->gk5e->encrypt_name,
+ &keyout);
if (ctx->acceptor_enc == NULL)
- goto out_free_initiator_enc;
+ goto out_free;
+ if (ctx->gk5e->aux_cipher) {
+ ctx->acceptor_enc_aux =
+ gss_krb5_alloc_cipher_v2(ctx->gk5e->aux_cipher,
+ &keyout);
+ if (ctx->acceptor_enc_aux == NULL)
+ goto out_free;
+ }
/* initiator sign checksum */
- set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
- keyout.data = ctx->initiator_sign;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving initiator_sign key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
+ keyout.len = ctx->gk5e->Kc_length;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_INITIATOR_SIGN,
+ KEY_USAGE_SEED_CHECKSUM, gfp_mask))
+ goto out_free;
+ ctx->initiator_sign = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->initiator_sign == NULL)
+ goto out_free;
/* acceptor sign checksum */
- set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
- keyout.data = ctx->acceptor_sign;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving acceptor_sign key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_ACCEPTOR_SIGN,
+ KEY_USAGE_SEED_CHECKSUM, gfp_mask))
+ goto out_free;
+ ctx->acceptor_sign = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->acceptor_sign == NULL)
+ goto out_free;
/* initiator seal integrity */
- set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
- keyout.data = ctx->initiator_integ;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving initiator_integ key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
+ keyout.len = ctx->gk5e->Ki_length;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_INITIATOR_SEAL,
+ KEY_USAGE_SEED_INTEGRITY, gfp_mask))
+ goto out_free;
+ ctx->initiator_integ = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->initiator_integ == NULL)
+ goto out_free;
/* acceptor seal integrity */
- set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
- keyout.data = ctx->acceptor_integ;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving acceptor_integ key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
-
- switch (ctx->enctype) {
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- ctx->initiator_enc_aux =
- context_v2_alloc_cipher(ctx, "cbc(aes)",
- ctx->initiator_seal);
- if (ctx->initiator_enc_aux == NULL)
- goto out_free_acceptor_enc;
- ctx->acceptor_enc_aux =
- context_v2_alloc_cipher(ctx, "cbc(aes)",
- ctx->acceptor_seal);
- if (ctx->acceptor_enc_aux == NULL) {
- crypto_free_sync_skcipher(ctx->initiator_enc_aux);
- goto out_free_acceptor_enc;
- }
- }
-
- return 0;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_ACCEPTOR_SEAL,
+ KEY_USAGE_SEED_INTEGRITY, gfp_mask))
+ goto out_free;
+ ctx->acceptor_integ = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->acceptor_integ == NULL)
+ goto out_free;
+
+ ret = 0;
+out:
+ kfree_sensitive(keyout.data);
+ return ret;
-out_free_acceptor_enc:
+out_free:
+ crypto_free_ahash(ctx->acceptor_integ);
+ crypto_free_ahash(ctx->initiator_integ);
+ crypto_free_ahash(ctx->acceptor_sign);
+ crypto_free_ahash(ctx->initiator_sign);
+ crypto_free_sync_skcipher(ctx->acceptor_enc_aux);
crypto_free_sync_skcipher(ctx->acceptor_enc);
-out_free_initiator_enc:
+ crypto_free_sync_skcipher(ctx->initiator_enc_aux);
crypto_free_sync_skcipher(ctx->initiator_enc);
-out_err:
- return -EINVAL;
+ goto out;
}
static int
@@ -471,6 +398,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
u64 seq_send64;
int keylen;
u32 time32;
+ int ret;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
if (IS_ERR(p))
@@ -497,10 +425,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
if (IS_ERR(p))
goto out_err;
- /* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
- if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
- ctx->enctype = ENCTYPE_DES3_CBC_RAW;
- ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+ ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
ctx->enctype);
@@ -526,25 +451,23 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- switch (ctx->enctype) {
- case ENCTYPE_DES3_CBC_RAW:
- return context_derive_keys_des3(ctx, gfp_mask);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return context_derive_keys_new(ctx, gfp_mask);
- default:
- return -EINVAL;
+ ret = gss_krb5_import_ctx_v2(ctx, gfp_mask);
+ if (ret) {
+ p = ERR_PTR(ret);
+ goto out_free;
}
+ return 0;
+
+out_free:
+ kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}
static int
-gss_import_sec_context_kerberos(const void *p, size_t len,
- struct gss_ctx *ctx_id,
- time64_t *endtime,
- gfp_t gfp_mask)
+gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id,
+ time64_t *endtime, gfp_t gfp_mask)
{
const void *end = (const void *)((const char *)p + len);
struct krb5_ctx *ctx;
@@ -554,24 +477,22 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
if (ctx == NULL)
return -ENOMEM;
- if (len == 85)
- ret = gss_import_v1_context(p, end, ctx);
- else
- ret = gss_import_v2_context(p, end, ctx, gfp_mask);
-
- if (ret == 0) {
- ctx_id->internal_ctx_id = ctx;
- if (endtime)
- *endtime = ctx->endtime;
- } else
+ ret = gss_import_v2_context(p, end, ctx, gfp_mask);
+ memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess));
+ if (ret) {
kfree(ctx);
+ return ret;
+ }
- dprintk("RPC: %s: returning %d\n", __func__, ret);
- return ret;
+ ctx_id->internal_ctx_id = ctx;
+ if (endtime)
+ *endtime = ctx->endtime;
+ return 0;
}
static void
-gss_delete_sec_context_kerberos(void *internal_ctx) {
+gss_krb5_delete_sec_context(void *internal_ctx)
+{
struct krb5_ctx *kctx = internal_ctx;
crypto_free_sync_skcipher(kctx->seq);
@@ -580,17 +501,105 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
crypto_free_sync_skcipher(kctx->initiator_enc);
crypto_free_sync_skcipher(kctx->acceptor_enc_aux);
crypto_free_sync_skcipher(kctx->initiator_enc_aux);
+ crypto_free_ahash(kctx->acceptor_sign);
+ crypto_free_ahash(kctx->initiator_sign);
+ crypto_free_ahash(kctx->acceptor_integ);
+ crypto_free_ahash(kctx->initiator_integ);
kfree(kctx->mech_used.data);
kfree(kctx);
}
+/**
+ * gss_krb5_get_mic - get_mic for the Kerberos GSS mechanism
+ * @gctx: GSS context
+ * @text: plaintext to checksum
+ * @token: buffer into which to write the computed checksum
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - success, and @token is filled in
+ * %GSS_S_FAILURE - checksum could not be generated
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_get_mic(struct gss_ctx *gctx, struct xdr_buf *text,
+ struct xdr_netobj *token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->get_mic(kctx, text, token);
+}
+
+/**
+ * gss_krb5_verify_mic - verify_mic for the Kerberos GSS mechanism
+ * @gctx: GSS context
+ * @message_buffer: plaintext to check
+ * @read_token: received checksum to check
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - computed and received checksums match
+ * %GSS_S_DEFECTIVE_TOKEN - received checksum is not valid
+ * %GSS_S_BAD_SIG - computed and received checksums do not match
+ * %GSS_S_FAILURE - received checksum could not be checked
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_verify_mic(struct gss_ctx *gctx,
+ struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->verify_mic(kctx, message_buffer, read_token);
+}
+
+/**
+ * gss_krb5_wrap - gss_wrap for the Kerberos GSS mechanism
+ * @gctx: initialized GSS context
+ * @offset: byte offset in @buf to start writing the cipher text
+ * @buf: OUT: send buffer
+ * @pages: plaintext to wrap
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - success, @buf has been updated
+ * %GSS_S_FAILURE - @buf could not be wrapped
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_wrap(struct gss_ctx *gctx, int offset,
+ struct xdr_buf *buf, struct page **pages)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->wrap(kctx, offset, buf, pages);
+}
+
+/**
+ * gss_krb5_unwrap - gss_unwrap for the Kerberos GSS mechanism
+ * @gctx: initialized GSS context
+ * @offset: starting byte offset into @buf
+ * @len: size of ciphertext to unwrap
+ * @buf: ciphertext to unwrap
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - success, @buf has been updated
+ * %GSS_S_DEFECTIVE_TOKEN - received blob is not valid
+ * %GSS_S_BAD_SIG - computed and received checksums do not match
+ * %GSS_S_FAILURE - @buf could not be unwrapped
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_unwrap(struct gss_ctx *gctx, int offset,
+ int len, struct xdr_buf *buf)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->unwrap(kctx, offset, len, buf,
+ &gctx->slack, &gctx->align);
+}
+
static const struct gss_api_ops gss_kerberos_ops = {
- .gss_import_sec_context = gss_import_sec_context_kerberos,
- .gss_get_mic = gss_get_mic_kerberos,
- .gss_verify_mic = gss_verify_mic_kerberos,
- .gss_wrap = gss_wrap_kerberos,
- .gss_unwrap = gss_unwrap_kerberos,
- .gss_delete_sec_context = gss_delete_sec_context_kerberos,
+ .gss_import_sec_context = gss_krb5_import_sec_context,
+ .gss_get_mic = gss_krb5_get_mic,
+ .gss_verify_mic = gss_krb5_verify_mic,
+ .gss_wrap = gss_krb5_wrap,
+ .gss_unwrap = gss_krb5_unwrap,
+ .gss_delete_sec_context = gss_krb5_delete_sec_context,
};
static struct pf_desc gss_kerberos_pfs[] = {
@@ -631,13 +640,14 @@ static struct gss_api_mech gss_kerberos_mech = {
.gm_ops = &gss_kerberos_ops,
.gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
.gm_pfs = gss_kerberos_pfs,
- .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
+ .gm_upcall_enctypes = gss_krb5_enctype_priority_list,
};
static int __init init_kerberos_module(void)
{
int status;
+ gss_krb5_prepare_enctype_priority_list();
status = gss_mech_register(&gss_kerberos_mech);
if (status)
printk("Failed to register kerberos gss mechanism!\n");
@@ -649,6 +659,7 @@ static void __exit cleanup_kerberos_module(void)
gss_mech_unregister(&gss_kerberos_mech);
}
+MODULE_DESCRIPTION("Sun RPC Kerberos 5 module");
MODULE_LICENSE("GPL");
module_init(init_kerberos_module);
module_exit(cleanup_kerberos_module);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 33061417ec97..ce540df9bce4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -65,37 +65,13 @@
#include <linux/crypto.h>
#include <linux/atomic.h>
+#include "gss_krb5_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
static void *
-setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
-{
- u16 *ptr;
- void *krb5_hdr;
- int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
-
- token->len = g_token_size(&ctx->mech_used, body_size);
-
- ptr = (u16 *)token->data;
- g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
-
- /* ptr now at start of header described in rfc 1964, section 1.2.1: */
- krb5_hdr = ptr;
- *ptr++ = KG_TOK_MIC_MSG;
- /*
- * signalg is stored as if it were converted from LE to host endian, even
- * though it's an opaque pair of bytes according to the RFC.
- */
- *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
- *ptr++ = SEAL_ALG_NONE;
- *ptr = 0xffff;
-
- return krb5_hdr;
-}
-
-static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
u16 *ptr;
@@ -108,8 +84,10 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
flags |= 0x04;
/* Per rfc 4121, sec 4.2.6.1, there is no header,
- * just start the token */
- krb5_hdr = ptr = (u16 *)token->data;
+ * just start the token.
+ */
+ krb5_hdr = (u16 *)token->data;
+ ptr = krb5_hdr;
*ptr++ = KG2_TOK_MIC;
p = (u8 *)ptr;
@@ -123,57 +101,18 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
return krb5_hdr;
}
-static u32
-gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- void *ptr;
- time64_t now;
- u32 seq_send;
- u8 *cksumkey;
-
- dprintk("RPC: %s\n", __func__);
- BUG_ON(ctx == NULL);
-
- now = ktime_get_real_seconds();
-
- ptr = setup_token(ctx, token);
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
- KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&ctx->seq_send);
-
- if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
- return GSS_S_FAILURE;
-
- return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-static u32
-gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
+u32
+gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+ struct xdr_netobj *token)
{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
- .data = cksumdata};
+ struct crypto_ahash *tfm = ctx->initiate ?
+ ctx->initiator_sign : ctx->acceptor_sign;
+ struct xdr_netobj cksumobj = {
+ .len = ctx->gk5e->cksumlength,
+ };
+ __be64 seq_send_be64;
void *krb5_hdr;
time64_t now;
- u8 *cksumkey;
- unsigned int cksum_usage;
- __be64 seq_send_be64;
dprintk("RPC: %s\n", __func__);
@@ -184,39 +123,11 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
- if (ctx->initiate) {
- cksumkey = ctx->initiator_sign;
- cksum_usage = KG_USAGE_INITIATOR_SIGN;
- } else {
- cksumkey = ctx->acceptor_sign;
- cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
- }
-
- if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
- text, 0, cksumkey, cksum_usage, &cksumobj))
+ cksumobj.data = krb5_hdr + GSS_KRB5_TOK_HDR_LEN;
+ if (gss_krb5_checksum(tfm, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
+ text, 0, &cksumobj))
return GSS_S_FAILURE;
- memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
-
now = ktime_get_real_seconds();
-
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
-
-u32
-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
-{
- struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
-
- switch (ctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_get_mic_v1(ctx, text, token);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_get_mic_v2(ctx, text, token);
- }
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
deleted file mode 100644
index 3200b971a814..000000000000
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * linux/net/sunrpc/gss_krb5_seqnum.c
- *
- * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
- *
- * Copyright (c) 2000 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/sunrpc/gss_krb5.h>
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-s32
-krb5_make_seq_num(struct krb5_ctx *kctx,
- struct crypto_sync_skcipher *key,
- int direction,
- u32 seqnum,
- unsigned char *cksum, unsigned char *buf)
-{
- unsigned char *plain;
- s32 code;
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- plain[0] = (unsigned char) (seqnum & 0xff);
- plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
- plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
- plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
-
- plain[4] = direction;
- plain[5] = direction;
- plain[6] = direction;
- plain[7] = direction;
-
- code = krb5_encrypt(key, cksum, plain, buf, 8);
- kfree(plain);
- return code;
-}
-
-s32
-krb5_get_seq_num(struct krb5_ctx *kctx,
- unsigned char *cksum,
- unsigned char *buf,
- int *direction, u32 *seqnum)
-{
- s32 code;
- unsigned char *plain;
- struct crypto_sync_skcipher *key = kctx->seq;
-
- dprintk("RPC: krb5_get_seq_num:\n");
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- goto out;
-
- if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7])) {
- code = (s32)KG_BAD_SEQ;
- goto out;
- }
-
- *direction = plain[4];
-
- *seqnum = ((plain[0]) |
- (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
-
-out:
- kfree(plain);
- return code;
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
new file mode 100644
index 000000000000..a5bff02cd7ba
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -0,0 +1,1859 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ *
+ * KUnit test of SunRPC's GSS Kerberos mechanism. Subsystem
+ * name is "rpcsec_gss_krb5".
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include <linux/kernel.h>
+#include <crypto/hash.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/gss_krb5.h>
+
+#include "gss_krb5_internal.h"
+
+MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING");
+
+struct gss_krb5_test_param {
+ const char *desc;
+ u32 enctype;
+ u32 nfold;
+ u32 constant;
+ const struct xdr_netobj *base_key;
+ const struct xdr_netobj *Ke;
+ const struct xdr_netobj *usage;
+ const struct xdr_netobj *plaintext;
+ const struct xdr_netobj *confounder;
+ const struct xdr_netobj *expected_result;
+ const struct xdr_netobj *expected_hmac;
+ const struct xdr_netobj *next_iv;
+};
+
+static inline void gss_krb5_get_desc(const struct gss_krb5_test_param *param,
+ char *desc)
+{
+ strscpy(desc, param->desc, KUNIT_PARAM_DESC_SIZE);
+}
+
+static void kdf_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj derivedkey;
+ int err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ if (!gk5e)
+ kunit_skip(test, "Encryption type is not available");
+
+ derivedkey.data = kunit_kzalloc(test, param->expected_result->len,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, derivedkey.data);
+ derivedkey.len = param->expected_result->len;
+
+ /* Act */
+ err = gk5e->derive_key(gk5e, param->base_key, &derivedkey,
+ param->usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ derivedkey.data, derivedkey.len), 0,
+ "key mismatch");
+}
+
+static void checksum_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct xdr_buf buf = {
+ .head[0].iov_len = param->plaintext->len,
+ .len = param->plaintext->len,
+ };
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj Kc, checksum;
+ struct crypto_ahash *tfm;
+ int err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ if (!gk5e)
+ kunit_skip(test, "Encryption type is not available");
+
+ Kc.len = gk5e->Kc_length;
+ Kc.data = kunit_kzalloc(test, Kc.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Kc.data);
+ err = gk5e->derive_key(gk5e, param->base_key, &Kc,
+ param->usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ tfm = crypto_alloc_ahash(gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tfm);
+ err = crypto_ahash_setkey(tfm, Kc.data, Kc.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ buf.head[0].iov_base = kunit_kzalloc(test, buf.head[0].iov_len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, buf.head[0].iov_base);
+ memcpy(buf.head[0].iov_base, param->plaintext->data, buf.head[0].iov_len);
+
+ checksum.len = gk5e->cksumlength;
+ checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
+
+ /* Act */
+ err = gss_krb5_checksum(tfm, NULL, 0, &buf, 0, &checksum);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ checksum.data, checksum.len), 0,
+ "checksum mismatch");
+
+ crypto_free_ahash(tfm);
+}
+
+#define DEFINE_HEX_XDR_NETOBJ(name, hex_array...) \
+ static const u8 name ## _data[] = { hex_array }; \
+ static const struct xdr_netobj name = { \
+ .data = (u8 *)name##_data, \
+ .len = sizeof(name##_data), \
+ }
+
+#define DEFINE_STR_XDR_NETOBJ(name, string) \
+ static const u8 name ## _str[] = string; \
+ static const struct xdr_netobj name = { \
+ .data = (u8 *)name##_str, \
+ .len = sizeof(name##_str) - 1, \
+ }
+
+/*
+ * RFC 3961 Appendix A.1. n-fold
+ *
+ * The n-fold function is defined in section 5.1 of RFC 3961.
+ *
+ * This test material is copyright (C) The Internet Society (2005).
+ */
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test1_plaintext,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test1_expected_result,
+ 0xbe, 0x07, 0x26, 0x31, 0x27, 0x6b, 0x19, 0x55
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test2_plaintext,
+ 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test2_expected_result,
+ 0x78, 0xa0, 0x7b, 0x6c, 0xaf, 0x85, 0xfa
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test3_plaintext,
+ 0x52, 0x6f, 0x75, 0x67, 0x68, 0x20, 0x43, 0x6f,
+ 0x6e, 0x73, 0x65, 0x6e, 0x73, 0x75, 0x73, 0x2c,
+ 0x20, 0x61, 0x6e, 0x64, 0x20, 0x52, 0x75, 0x6e,
+ 0x6e, 0x69, 0x6e, 0x67, 0x20, 0x43, 0x6f, 0x64,
+ 0x65
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test3_expected_result,
+ 0xbb, 0x6e, 0xd3, 0x08, 0x70, 0xb7, 0xf0, 0xe0
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test4_plaintext,
+ 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test4_expected_result,
+ 0x59, 0xe4, 0xa8, 0xca, 0x7c, 0x03, 0x85, 0xc3,
+ 0xc3, 0x7b, 0x3f, 0x6d, 0x20, 0x00, 0x24, 0x7c,
+ 0xb6, 0xe6, 0xbd, 0x5b, 0x3e
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test5_plaintext,
+ 0x4d, 0x41, 0x53, 0x53, 0x41, 0x43, 0x48, 0x56,
+ 0x53, 0x45, 0x54, 0x54, 0x53, 0x20, 0x49, 0x4e,
+ 0x53, 0x54, 0x49, 0x54, 0x56, 0x54, 0x45, 0x20,
+ 0x4f, 0x46, 0x20, 0x54, 0x45, 0x43, 0x48, 0x4e,
+ 0x4f, 0x4c, 0x4f, 0x47, 0x59
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test5_expected_result,
+ 0xdb, 0x3b, 0x0d, 0x8f, 0x0b, 0x06, 0x1e, 0x60,
+ 0x32, 0x82, 0xb3, 0x08, 0xa5, 0x08, 0x41, 0x22,
+ 0x9a, 0xd7, 0x98, 0xfa, 0xb9, 0x54, 0x0c, 0x1b
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test6_plaintext,
+ 0x51
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test6_expected_result,
+ 0x51, 0x8a, 0x54, 0xa2, 0x15, 0xa8, 0x45, 0x2a,
+ 0x51, 0x8a, 0x54, 0xa2, 0x15, 0xa8, 0x45, 0x2a,
+ 0x51, 0x8a, 0x54, 0xa2, 0x15
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test7_plaintext,
+ 0x62, 0x61
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test7_expected_result,
+ 0xfb, 0x25, 0xd5, 0x31, 0xae, 0x89, 0x74, 0x49,
+ 0x9f, 0x52, 0xfd, 0x92, 0xea, 0x98, 0x57, 0xc4,
+ 0xba, 0x24, 0xcf, 0x29, 0x7e
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test_kerberos,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test8_expected_result,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test9_expected_result,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73,
+ 0x7b, 0x9b, 0x5b, 0x2b, 0x93, 0x13, 0x2b, 0x93
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test10_expected_result,
+ 0x83, 0x72, 0xc2, 0x36, 0x34, 0x4e, 0x5f, 0x15,
+ 0x50, 0xcd, 0x07, 0x47, 0xe1, 0x5d, 0x62, 0xca,
+ 0x7a, 0x5a, 0x3b, 0xce, 0xa4
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test11_expected_result,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73,
+ 0x7b, 0x9b, 0x5b, 0x2b, 0x93, 0x13, 0x2b, 0x93,
+ 0x5c, 0x9b, 0xdc, 0xda, 0xd9, 0x5c, 0x98, 0x99,
+ 0xc4, 0xca, 0xe4, 0xde, 0xe6, 0xd6, 0xca, 0xe4
+);
+
+static const struct gss_krb5_test_param rfc3961_nfold_test_params[] = {
+ {
+ .desc = "64-fold(\"012345\")",
+ .nfold = 64,
+ .plaintext = &nfold_test1_plaintext,
+ .expected_result = &nfold_test1_expected_result,
+ },
+ {
+ .desc = "56-fold(\"password\")",
+ .nfold = 56,
+ .plaintext = &nfold_test2_plaintext,
+ .expected_result = &nfold_test2_expected_result,
+ },
+ {
+ .desc = "64-fold(\"Rough Consensus, and Running Code\")",
+ .nfold = 64,
+ .plaintext = &nfold_test3_plaintext,
+ .expected_result = &nfold_test3_expected_result,
+ },
+ {
+ .desc = "168-fold(\"password\")",
+ .nfold = 168,
+ .plaintext = &nfold_test4_plaintext,
+ .expected_result = &nfold_test4_expected_result,
+ },
+ {
+ .desc = "192-fold(\"MASSACHVSETTS INSTITVTE OF TECHNOLOGY\")",
+ .nfold = 192,
+ .plaintext = &nfold_test5_plaintext,
+ .expected_result = &nfold_test5_expected_result,
+ },
+ {
+ .desc = "168-fold(\"Q\")",
+ .nfold = 168,
+ .plaintext = &nfold_test6_plaintext,
+ .expected_result = &nfold_test6_expected_result,
+ },
+ {
+ .desc = "168-fold(\"ba\")",
+ .nfold = 168,
+ .plaintext = &nfold_test7_plaintext,
+ .expected_result = &nfold_test7_expected_result,
+ },
+ {
+ .desc = "64-fold(\"kerberos\")",
+ .nfold = 64,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test8_expected_result,
+ },
+ {
+ .desc = "128-fold(\"kerberos\")",
+ .nfold = 128,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test9_expected_result,
+ },
+ {
+ .desc = "168-fold(\"kerberos\")",
+ .nfold = 168,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test10_expected_result,
+ },
+ {
+ .desc = "256-fold(\"kerberos\")",
+ .nfold = 256,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test11_expected_result,
+ },
+};
+
+/* Creates the function rfc3961_nfold_gen_params */
+KUNIT_ARRAY_PARAM(rfc3961_nfold, rfc3961_nfold_test_params, gss_krb5_get_desc);
+
+static void rfc3961_nfold_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ u8 *result;
+
+ /* Arrange */
+ result = kunit_kzalloc(test, 4096, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, result);
+
+ /* Act */
+ krb5_nfold(param->plaintext->len * 8, param->plaintext->data,
+ param->expected_result->len * 8, result);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ result, param->expected_result->len), 0,
+ "result mismatch");
+}
+
+static struct kunit_case rfc3961_test_cases[] = {
+ {
+ .name = "RFC 3961 n-fold",
+ .run_case = rfc3961_nfold_case,
+ .generate_params = rfc3961_nfold_gen_params,
+ },
+ {}
+};
+
+static struct kunit_suite rfc3961_suite = {
+ .name = "RFC 3961 tests",
+ .test_cases = rfc3961_test_cases,
+};
+
+/*
+ * From RFC 3962 Appendix B: Sample Test Vectors
+ *
+ * Some test vectors for CBC with ciphertext stealing, using an
+ * initial vector of all-zero.
+ *
+ * This test material is copyright (C) The Internet Society (2005).
+ */
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_encryption_key,
+ 0x63, 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x20,
+ 0x74, 0x65, 0x72, 0x69, 0x79, 0x61, 0x6b, 0x69
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test1_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test1_expected_result,
+ 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4,
+ 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f,
+ 0x97
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test1_next_iv,
+ 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4,
+ 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test2_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test2_expected_result,
+ 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1,
+ 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test2_next_iv,
+ 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1,
+ 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test3_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test3_expected_result,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test3_next_iv,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test4_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43,
+ 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+ 0x70, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x2c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test4_expected_result,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
+ 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c,
+ 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test4_next_iv,
+ 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c,
+ 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test5_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43,
+ 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+ 0x70, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x2c, 0x20
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test5_expected_result,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
+ 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
+ 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test5_next_iv,
+ 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
+ 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test6_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43,
+ 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+ 0x70, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x2c, 0x20,
+ 0x61, 0x6e, 0x64, 0x20, 0x77, 0x6f, 0x6e, 0x74,
+ 0x6f, 0x6e, 0x20, 0x73, 0x6f, 0x75, 0x70, 0x2e
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test6_expected_result,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
+ 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5,
+ 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40,
+ 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
+ 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test6_next_iv,
+ 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5,
+ 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40
+);
+
+static const struct gss_krb5_test_param rfc3962_encrypt_test_params[] = {
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 1",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test1_plaintext,
+ .expected_result = &rfc3962_enc_test1_expected_result,
+ .next_iv = &rfc3962_enc_test1_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 2",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test2_plaintext,
+ .expected_result = &rfc3962_enc_test2_expected_result,
+ .next_iv = &rfc3962_enc_test2_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 3",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test3_plaintext,
+ .expected_result = &rfc3962_enc_test3_expected_result,
+ .next_iv = &rfc3962_enc_test3_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 4",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test4_plaintext,
+ .expected_result = &rfc3962_enc_test4_expected_result,
+ .next_iv = &rfc3962_enc_test4_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 5",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test5_plaintext,
+ .expected_result = &rfc3962_enc_test5_expected_result,
+ .next_iv = &rfc3962_enc_test5_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 6",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test6_plaintext,
+ .expected_result = &rfc3962_enc_test6_expected_result,
+ .next_iv = &rfc3962_enc_test6_next_iv,
+ },
+};
+
+/* Creates the function rfc3962_encrypt_gen_params */
+KUNIT_ARRAY_PARAM(rfc3962_encrypt, rfc3962_encrypt_test_params,
+ gss_krb5_get_desc);
+
+/*
+ * This tests the implementation of the encryption part of the mechanism.
+ * It does not apply a confounder or test the result of HMAC over the
+ * plaintext.
+ */
+static void rfc3962_encrypt_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_buf buf;
+ void *iv, *text;
+ u32 err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ if (!gk5e)
+ kunit_skip(test, "Encryption type is not available");
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ iv = kunit_kzalloc(test, crypto_sync_skcipher_ivsize(cts_tfm), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, iv);
+
+ text = kunit_kzalloc(test, param->plaintext->len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+
+ memcpy(text, param->plaintext->data, param->plaintext->len);
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ /* Act */
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL,
+ iv, crypto_sync_skcipher_ivsize(cts_tfm));
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ param->expected_result->len, buf.len,
+ "ciphertext length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ text, param->expected_result->len), 0,
+ "ciphertext mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->next_iv->data, iv,
+ param->next_iv->len), 0,
+ "IV mismatch");
+
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case rfc3962_test_cases[] = {
+ {
+ .name = "RFC 3962 encryption",
+ .run_case = rfc3962_encrypt_case,
+ .generate_params = rfc3962_encrypt_gen_params,
+ },
+ {}
+};
+
+static struct kunit_suite rfc3962_suite = {
+ .name = "RFC 3962 suite",
+ .test_cases = rfc3962_test_cases,
+};
+
+/*
+ * From RFC 6803 Section 10. Test vectors
+ *
+ * Sample results for key derivation
+ *
+ * Copyright (c) 2012 IETF Trust and the persons identified as the
+ * document authors. All rights reserved.
+ */
+
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_basekey,
+ 0x57, 0xd0, 0x29, 0x72, 0x98, 0xff, 0xd9, 0xd3,
+ 0x5d, 0xe5, 0xa4, 0x7f, 0xb4, 0xbd, 0xe2, 0x4b
+);
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_Kc,
+ 0xd1, 0x55, 0x77, 0x5a, 0x20, 0x9d, 0x05, 0xf0,
+ 0x2b, 0x38, 0xd4, 0x2a, 0x38, 0x9e, 0x5a, 0x56
+);
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_Ke,
+ 0x64, 0xdf, 0x83, 0xf8, 0x5a, 0x53, 0x2f, 0x17,
+ 0x57, 0x7d, 0x8c, 0x37, 0x03, 0x57, 0x96, 0xab
+);
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_Ki,
+ 0x3e, 0x4f, 0xbd, 0xf3, 0x0f, 0xb8, 0x25, 0x9c,
+ 0x42, 0x5c, 0xb6, 0xc9, 0x6f, 0x1f, 0x46, 0x35
+);
+
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_basekey,
+ 0xb9, 0xd6, 0x82, 0x8b, 0x20, 0x56, 0xb7, 0xbe,
+ 0x65, 0x6d, 0x88, 0xa1, 0x23, 0xb1, 0xfa, 0xc6,
+ 0x82, 0x14, 0xac, 0x2b, 0x72, 0x7e, 0xcf, 0x5f,
+ 0x69, 0xaf, 0xe0, 0xc4, 0xdf, 0x2a, 0x6d, 0x2c
+);
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_Kc,
+ 0xe4, 0x67, 0xf9, 0xa9, 0x55, 0x2b, 0xc7, 0xd3,
+ 0x15, 0x5a, 0x62, 0x20, 0xaf, 0x9c, 0x19, 0x22,
+ 0x0e, 0xee, 0xd4, 0xff, 0x78, 0xb0, 0xd1, 0xe6,
+ 0xa1, 0x54, 0x49, 0x91, 0x46, 0x1a, 0x9e, 0x50
+);
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_Ke,
+ 0x41, 0x2a, 0xef, 0xc3, 0x62, 0xa7, 0x28, 0x5f,
+ 0xc3, 0x96, 0x6c, 0x6a, 0x51, 0x81, 0xe7, 0x60,
+ 0x5a, 0xe6, 0x75, 0x23, 0x5b, 0x6d, 0x54, 0x9f,
+ 0xbf, 0xc9, 0xab, 0x66, 0x30, 0xa4, 0xc6, 0x04
+);
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_Ki,
+ 0xfa, 0x62, 0x4f, 0xa0, 0xe5, 0x23, 0x99, 0x3f,
+ 0xa3, 0x88, 0xae, 0xfd, 0xc6, 0x7e, 0x67, 0xeb,
+ 0xcd, 0x8c, 0x08, 0xe8, 0xa0, 0x24, 0x6b, 0x1d,
+ 0x73, 0xb0, 0xd1, 0xdd, 0x9f, 0xc5, 0x82, 0xb0
+);
+
+DEFINE_HEX_XDR_NETOBJ(usage_checksum,
+ 0x00, 0x00, 0x00, 0x02, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(usage_encryption,
+ 0x00, 0x00, 0x00, 0x02, KEY_USAGE_SEED_ENCRYPTION
+);
+DEFINE_HEX_XDR_NETOBJ(usage_integrity,
+ 0x00, 0x00, 0x00, 0x02, KEY_USAGE_SEED_INTEGRITY
+);
+
+static const struct gss_krb5_test_param rfc6803_kdf_test_params[] = {
+ {
+ .desc = "Derive Kc subkey for camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &camellia128_cts_cmac_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &camellia128_cts_cmac_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &camellia128_cts_cmac_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &camellia128_cts_cmac_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &camellia128_cts_cmac_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &camellia128_cts_cmac_Ki,
+ },
+ {
+ .desc = "Derive Kc subkey for camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &camellia256_cts_cmac_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &camellia256_cts_cmac_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &camellia256_cts_cmac_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &camellia256_cts_cmac_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &camellia256_cts_cmac_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &camellia256_cts_cmac_Ki,
+ },
+};
+
+/* Creates the function rfc6803_kdf_gen_params */
+KUNIT_ARRAY_PARAM(rfc6803_kdf, rfc6803_kdf_test_params, gss_krb5_get_desc);
+
+/*
+ * From RFC 6803 Section 10. Test vectors
+ *
+ * Sample checksums.
+ *
+ * Copyright (c) 2012 IETF Trust and the persons identified as the
+ * document authors. All rights reserved.
+ *
+ * XXX: These tests are likely to fail on EBCDIC or Unicode platforms.
+ */
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test1_plaintext,
+ "abcdefghijk");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test1_basekey,
+ 0x1d, 0xc4, 0x6a, 0x8d, 0x76, 0x3f, 0x4f, 0x93,
+ 0x74, 0x2b, 0xcb, 0xa3, 0x38, 0x75, 0x76, 0xc3
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test1_usage,
+ 0x00, 0x00, 0x00, 0x07, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test1_expected_result,
+ 0x11, 0x78, 0xe6, 0xc5, 0xc4, 0x7a, 0x8c, 0x1a,
+ 0xe0, 0xc4, 0xb9, 0xc7, 0xd4, 0xeb, 0x7b, 0x6b
+);
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test2_plaintext,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test2_basekey,
+ 0x50, 0x27, 0xbc, 0x23, 0x1d, 0x0f, 0x3a, 0x9d,
+ 0x23, 0x33, 0x3f, 0x1c, 0xa6, 0xfd, 0xbe, 0x7c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test2_usage,
+ 0x00, 0x00, 0x00, 0x08, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test2_expected_result,
+ 0xd1, 0xb3, 0x4f, 0x70, 0x04, 0xa7, 0x31, 0xf2,
+ 0x3a, 0x0c, 0x00, 0xbf, 0x6c, 0x3f, 0x75, 0x3a
+);
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test3_plaintext,
+ "123456789");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test3_basekey,
+ 0xb6, 0x1c, 0x86, 0xcc, 0x4e, 0x5d, 0x27, 0x57,
+ 0x54, 0x5a, 0xd4, 0x23, 0x39, 0x9f, 0xb7, 0x03,
+ 0x1e, 0xca, 0xb9, 0x13, 0xcb, 0xb9, 0x00, 0xbd,
+ 0x7a, 0x3c, 0x6d, 0xd8, 0xbf, 0x92, 0x01, 0x5b
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test3_usage,
+ 0x00, 0x00, 0x00, 0x09, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test3_expected_result,
+ 0x87, 0xa1, 0x2c, 0xfd, 0x2b, 0x96, 0x21, 0x48,
+ 0x10, 0xf0, 0x1c, 0x82, 0x6e, 0x77, 0x44, 0xb1
+);
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test4_plaintext,
+ "!@#$%^&*()!@#$%^&*()!@#$%^&*()");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test4_basekey,
+ 0x32, 0x16, 0x4c, 0x5b, 0x43, 0x4d, 0x1d, 0x15,
+ 0x38, 0xe4, 0xcf, 0xd9, 0xbe, 0x80, 0x40, 0xfe,
+ 0x8c, 0x4a, 0xc7, 0xac, 0xc4, 0xb9, 0x3d, 0x33,
+ 0x14, 0xd2, 0x13, 0x36, 0x68, 0x14, 0x7a, 0x05
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test4_usage,
+ 0x00, 0x00, 0x00, 0x0a, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test4_expected_result,
+ 0x3f, 0xa0, 0xb4, 0x23, 0x55, 0xe5, 0x2b, 0x18,
+ 0x91, 0x87, 0x29, 0x4a, 0xa2, 0x52, 0xab, 0x64
+);
+
+static const struct gss_krb5_test_param rfc6803_checksum_test_params[] = {
+ {
+ .desc = "camellia128-cts-cmac checksum test 1",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test1_basekey,
+ .usage = &rfc6803_checksum_test1_usage,
+ .plaintext = &rfc6803_checksum_test1_plaintext,
+ .expected_result = &rfc6803_checksum_test1_expected_result,
+ },
+ {
+ .desc = "camellia128-cts-cmac checksum test 2",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test2_basekey,
+ .usage = &rfc6803_checksum_test2_usage,
+ .plaintext = &rfc6803_checksum_test2_plaintext,
+ .expected_result = &rfc6803_checksum_test2_expected_result,
+ },
+ {
+ .desc = "camellia256-cts-cmac checksum test 3",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test3_basekey,
+ .usage = &rfc6803_checksum_test3_usage,
+ .plaintext = &rfc6803_checksum_test3_plaintext,
+ .expected_result = &rfc6803_checksum_test3_expected_result,
+ },
+ {
+ .desc = "camellia256-cts-cmac checksum test 4",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test4_basekey,
+ .usage = &rfc6803_checksum_test4_usage,
+ .plaintext = &rfc6803_checksum_test4_plaintext,
+ .expected_result = &rfc6803_checksum_test4_expected_result,
+ },
+};
+
+/* Creates the function rfc6803_checksum_gen_params */
+KUNIT_ARRAY_PARAM(rfc6803_checksum, rfc6803_checksum_test_params,
+ gss_krb5_get_desc);
+
+/*
+ * From RFC 6803 Section 10. Test vectors
+ *
+ * Sample encryptions (all using the default cipher state)
+ *
+ * Copyright (c) 2012 IETF Trust and the persons identified as the
+ * document authors. All rights reserved.
+ *
+ * Key usage values are from errata 4326 against RFC 6803.
+ */
+
+static const struct xdr_netobj rfc6803_enc_empty_plaintext = {
+ .len = 0,
+};
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_1byte_plaintext, "1");
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_9byte_plaintext, "9 bytesss");
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_13byte_plaintext, "13 bytes byte");
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_30byte_plaintext,
+ "30 bytes bytes bytes bytes byt"
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test1_confounder,
+ 0xb6, 0x98, 0x22, 0xa1, 0x9a, 0x6b, 0x09, 0xc0,
+ 0xeb, 0xc8, 0x55, 0x7d, 0x1f, 0x1b, 0x6c, 0x0a
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test1_basekey,
+ 0x1d, 0xc4, 0x6a, 0x8d, 0x76, 0x3f, 0x4f, 0x93,
+ 0x74, 0x2b, 0xcb, 0xa3, 0x38, 0x75, 0x76, 0xc3
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test1_expected_result,
+ 0xc4, 0x66, 0xf1, 0x87, 0x10, 0x69, 0x92, 0x1e,
+ 0xdb, 0x7c, 0x6f, 0xde, 0x24, 0x4a, 0x52, 0xdb,
+ 0x0b, 0xa1, 0x0e, 0xdc, 0x19, 0x7b, 0xdb, 0x80,
+ 0x06, 0x65, 0x8c, 0xa3, 0xcc, 0xce, 0x6e, 0xb8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test2_confounder,
+ 0x6f, 0x2f, 0xc3, 0xc2, 0xa1, 0x66, 0xfd, 0x88,
+ 0x98, 0x96, 0x7a, 0x83, 0xde, 0x95, 0x96, 0xd9
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test2_basekey,
+ 0x50, 0x27, 0xbc, 0x23, 0x1d, 0x0f, 0x3a, 0x9d,
+ 0x23, 0x33, 0x3f, 0x1c, 0xa6, 0xfd, 0xbe, 0x7c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test2_expected_result,
+ 0x84, 0x2d, 0x21, 0xfd, 0x95, 0x03, 0x11, 0xc0,
+ 0xdd, 0x46, 0x4a, 0x3f, 0x4b, 0xe8, 0xd6, 0xda,
+ 0x88, 0xa5, 0x6d, 0x55, 0x9c, 0x9b, 0x47, 0xd3,
+ 0xf9, 0xa8, 0x50, 0x67, 0xaf, 0x66, 0x15, 0x59,
+ 0xb8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test3_confounder,
+ 0xa5, 0xb4, 0xa7, 0x1e, 0x07, 0x7a, 0xee, 0xf9,
+ 0x3c, 0x87, 0x63, 0xc1, 0x8f, 0xdb, 0x1f, 0x10
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test3_basekey,
+ 0xa1, 0xbb, 0x61, 0xe8, 0x05, 0xf9, 0xba, 0x6d,
+ 0xde, 0x8f, 0xdb, 0xdd, 0xc0, 0x5c, 0xde, 0xa0
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test3_expected_result,
+ 0x61, 0x9f, 0xf0, 0x72, 0xe3, 0x62, 0x86, 0xff,
+ 0x0a, 0x28, 0xde, 0xb3, 0xa3, 0x52, 0xec, 0x0d,
+ 0x0e, 0xdf, 0x5c, 0x51, 0x60, 0xd6, 0x63, 0xc9,
+ 0x01, 0x75, 0x8c, 0xcf, 0x9d, 0x1e, 0xd3, 0x3d,
+ 0x71, 0xdb, 0x8f, 0x23, 0xaa, 0xbf, 0x83, 0x48,
+ 0xa0
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test4_confounder,
+ 0x19, 0xfe, 0xe4, 0x0d, 0x81, 0x0c, 0x52, 0x4b,
+ 0x5b, 0x22, 0xf0, 0x18, 0x74, 0xc6, 0x93, 0xda
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test4_basekey,
+ 0x2c, 0xa2, 0x7a, 0x5f, 0xaf, 0x55, 0x32, 0x24,
+ 0x45, 0x06, 0x43, 0x4e, 0x1c, 0xef, 0x66, 0x76
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test4_expected_result,
+ 0xb8, 0xec, 0xa3, 0x16, 0x7a, 0xe6, 0x31, 0x55,
+ 0x12, 0xe5, 0x9f, 0x98, 0xa7, 0xc5, 0x00, 0x20,
+ 0x5e, 0x5f, 0x63, 0xff, 0x3b, 0xb3, 0x89, 0xaf,
+ 0x1c, 0x41, 0xa2, 0x1d, 0x64, 0x0d, 0x86, 0x15,
+ 0xc9, 0xed, 0x3f, 0xbe, 0xb0, 0x5a, 0xb6, 0xac,
+ 0xb6, 0x76, 0x89, 0xb5, 0xea
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test5_confounder,
+ 0xca, 0x7a, 0x7a, 0xb4, 0xbe, 0x19, 0x2d, 0xab,
+ 0xd6, 0x03, 0x50, 0x6d, 0xb1, 0x9c, 0x39, 0xe2
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test5_basekey,
+ 0x78, 0x24, 0xf8, 0xc1, 0x6f, 0x83, 0xff, 0x35,
+ 0x4c, 0x6b, 0xf7, 0x51, 0x5b, 0x97, 0x3f, 0x43
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test5_expected_result,
+ 0xa2, 0x6a, 0x39, 0x05, 0xa4, 0xff, 0xd5, 0x81,
+ 0x6b, 0x7b, 0x1e, 0x27, 0x38, 0x0d, 0x08, 0x09,
+ 0x0c, 0x8e, 0xc1, 0xf3, 0x04, 0x49, 0x6e, 0x1a,
+ 0xbd, 0xcd, 0x2b, 0xdc, 0xd1, 0xdf, 0xfc, 0x66,
+ 0x09, 0x89, 0xe1, 0x17, 0xa7, 0x13, 0xdd, 0xbb,
+ 0x57, 0xa4, 0x14, 0x6c, 0x15, 0x87, 0xcb, 0xa4,
+ 0x35, 0x66, 0x65, 0x59, 0x1d, 0x22, 0x40, 0x28,
+ 0x2f, 0x58, 0x42, 0xb1, 0x05, 0xa5
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test6_confounder,
+ 0x3c, 0xbb, 0xd2, 0xb4, 0x59, 0x17, 0x94, 0x10,
+ 0x67, 0xf9, 0x65, 0x99, 0xbb, 0x98, 0x92, 0x6c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test6_basekey,
+ 0xb6, 0x1c, 0x86, 0xcc, 0x4e, 0x5d, 0x27, 0x57,
+ 0x54, 0x5a, 0xd4, 0x23, 0x39, 0x9f, 0xb7, 0x03,
+ 0x1e, 0xca, 0xb9, 0x13, 0xcb, 0xb9, 0x00, 0xbd,
+ 0x7a, 0x3c, 0x6d, 0xd8, 0xbf, 0x92, 0x01, 0x5b
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test6_expected_result,
+ 0x03, 0x88, 0x6d, 0x03, 0x31, 0x0b, 0x47, 0xa6,
+ 0xd8, 0xf0, 0x6d, 0x7b, 0x94, 0xd1, 0xdd, 0x83,
+ 0x7e, 0xcc, 0xe3, 0x15, 0xef, 0x65, 0x2a, 0xff,
+ 0x62, 0x08, 0x59, 0xd9, 0x4a, 0x25, 0x92, 0x66
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test7_confounder,
+ 0xde, 0xf4, 0x87, 0xfc, 0xeb, 0xe6, 0xde, 0x63,
+ 0x46, 0xd4, 0xda, 0x45, 0x21, 0xbb, 0xa2, 0xd2
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test7_basekey,
+ 0x1b, 0x97, 0xfe, 0x0a, 0x19, 0x0e, 0x20, 0x21,
+ 0xeb, 0x30, 0x75, 0x3e, 0x1b, 0x6e, 0x1e, 0x77,
+ 0xb0, 0x75, 0x4b, 0x1d, 0x68, 0x46, 0x10, 0x35,
+ 0x58, 0x64, 0x10, 0x49, 0x63, 0x46, 0x38, 0x33
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test7_expected_result,
+ 0x2c, 0x9c, 0x15, 0x70, 0x13, 0x3c, 0x99, 0xbf,
+ 0x6a, 0x34, 0xbc, 0x1b, 0x02, 0x12, 0x00, 0x2f,
+ 0xd1, 0x94, 0x33, 0x87, 0x49, 0xdb, 0x41, 0x35,
+ 0x49, 0x7a, 0x34, 0x7c, 0xfc, 0xd9, 0xd1, 0x8a,
+ 0x12
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test8_confounder,
+ 0xad, 0x4f, 0xf9, 0x04, 0xd3, 0x4e, 0x55, 0x53,
+ 0x84, 0xb1, 0x41, 0x00, 0xfc, 0x46, 0x5f, 0x88
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test8_basekey,
+ 0x32, 0x16, 0x4c, 0x5b, 0x43, 0x4d, 0x1d, 0x15,
+ 0x38, 0xe4, 0xcf, 0xd9, 0xbe, 0x80, 0x40, 0xfe,
+ 0x8c, 0x4a, 0xc7, 0xac, 0xc4, 0xb9, 0x3d, 0x33,
+ 0x14, 0xd2, 0x13, 0x36, 0x68, 0x14, 0x7a, 0x05
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test8_expected_result,
+ 0x9c, 0x6d, 0xe7, 0x5f, 0x81, 0x2d, 0xe7, 0xed,
+ 0x0d, 0x28, 0xb2, 0x96, 0x35, 0x57, 0xa1, 0x15,
+ 0x64, 0x09, 0x98, 0x27, 0x5b, 0x0a, 0xf5, 0x15,
+ 0x27, 0x09, 0x91, 0x3f, 0xf5, 0x2a, 0x2a, 0x9c,
+ 0x8e, 0x63, 0xb8, 0x72, 0xf9, 0x2e, 0x64, 0xc8,
+ 0x39
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test9_confounder,
+ 0xcf, 0x9b, 0xca, 0x6d, 0xf1, 0x14, 0x4e, 0x0c,
+ 0x0a, 0xf9, 0xb8, 0xf3, 0x4c, 0x90, 0xd5, 0x14
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test9_basekey,
+ 0xb0, 0x38, 0xb1, 0x32, 0xcd, 0x8e, 0x06, 0x61,
+ 0x22, 0x67, 0xfa, 0xb7, 0x17, 0x00, 0x66, 0xd8,
+ 0x8a, 0xec, 0xcb, 0xa0, 0xb7, 0x44, 0xbf, 0xc6,
+ 0x0d, 0xc8, 0x9b, 0xca, 0x18, 0x2d, 0x07, 0x15
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test9_expected_result,
+ 0xee, 0xec, 0x85, 0xa9, 0x81, 0x3c, 0xdc, 0x53,
+ 0x67, 0x72, 0xab, 0x9b, 0x42, 0xde, 0xfc, 0x57,
+ 0x06, 0xf7, 0x26, 0xe9, 0x75, 0xdd, 0xe0, 0x5a,
+ 0x87, 0xeb, 0x54, 0x06, 0xea, 0x32, 0x4c, 0xa1,
+ 0x85, 0xc9, 0x98, 0x6b, 0x42, 0xaa, 0xbe, 0x79,
+ 0x4b, 0x84, 0x82, 0x1b, 0xee
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test10_confounder,
+ 0x64, 0x4d, 0xef, 0x38, 0xda, 0x35, 0x00, 0x72,
+ 0x75, 0x87, 0x8d, 0x21, 0x68, 0x55, 0xe2, 0x28
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test10_basekey,
+ 0xcc, 0xfc, 0xd3, 0x49, 0xbf, 0x4c, 0x66, 0x77,
+ 0xe8, 0x6e, 0x4b, 0x02, 0xb8, 0xea, 0xb9, 0x24,
+ 0xa5, 0x46, 0xac, 0x73, 0x1c, 0xf9, 0xbf, 0x69,
+ 0x89, 0xb9, 0x96, 0xe7, 0xd6, 0xbf, 0xbb, 0xa7
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test10_expected_result,
+ 0x0e, 0x44, 0x68, 0x09, 0x85, 0x85, 0x5f, 0x2d,
+ 0x1f, 0x18, 0x12, 0x52, 0x9c, 0xa8, 0x3b, 0xfd,
+ 0x8e, 0x34, 0x9d, 0xe6, 0xfd, 0x9a, 0xda, 0x0b,
+ 0xaa, 0xa0, 0x48, 0xd6, 0x8e, 0x26, 0x5f, 0xeb,
+ 0xf3, 0x4a, 0xd1, 0x25, 0x5a, 0x34, 0x49, 0x99,
+ 0xad, 0x37, 0x14, 0x68, 0x87, 0xa6, 0xc6, 0x84,
+ 0x57, 0x31, 0xac, 0x7f, 0x46, 0x37, 0x6a, 0x05,
+ 0x04, 0xcd, 0x06, 0x57, 0x14, 0x74
+);
+
+static const struct gss_krb5_test_param rfc6803_encrypt_test_params[] = {
+ {
+ .desc = "Encrypt empty plaintext with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 0,
+ .base_key = &rfc6803_enc_test1_basekey,
+ .plaintext = &rfc6803_enc_empty_plaintext,
+ .confounder = &rfc6803_enc_test1_confounder,
+ .expected_result = &rfc6803_enc_test1_expected_result,
+ },
+ {
+ .desc = "Encrypt 1 byte with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 1,
+ .base_key = &rfc6803_enc_test2_basekey,
+ .plaintext = &rfc6803_enc_1byte_plaintext,
+ .confounder = &rfc6803_enc_test2_confounder,
+ .expected_result = &rfc6803_enc_test2_expected_result,
+ },
+ {
+ .desc = "Encrypt 9 bytes with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 2,
+ .base_key = &rfc6803_enc_test3_basekey,
+ .plaintext = &rfc6803_enc_9byte_plaintext,
+ .confounder = &rfc6803_enc_test3_confounder,
+ .expected_result = &rfc6803_enc_test3_expected_result,
+ },
+ {
+ .desc = "Encrypt 13 bytes with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 3,
+ .base_key = &rfc6803_enc_test4_basekey,
+ .plaintext = &rfc6803_enc_13byte_plaintext,
+ .confounder = &rfc6803_enc_test4_confounder,
+ .expected_result = &rfc6803_enc_test4_expected_result,
+ },
+ {
+ .desc = "Encrypt 30 bytes with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 4,
+ .base_key = &rfc6803_enc_test5_basekey,
+ .plaintext = &rfc6803_enc_30byte_plaintext,
+ .confounder = &rfc6803_enc_test5_confounder,
+ .expected_result = &rfc6803_enc_test5_expected_result,
+ },
+ {
+ .desc = "Encrypt empty plaintext with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 0,
+ .base_key = &rfc6803_enc_test6_basekey,
+ .plaintext = &rfc6803_enc_empty_plaintext,
+ .confounder = &rfc6803_enc_test6_confounder,
+ .expected_result = &rfc6803_enc_test6_expected_result,
+ },
+ {
+ .desc = "Encrypt 1 byte with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 1,
+ .base_key = &rfc6803_enc_test7_basekey,
+ .plaintext = &rfc6803_enc_1byte_plaintext,
+ .confounder = &rfc6803_enc_test7_confounder,
+ .expected_result = &rfc6803_enc_test7_expected_result,
+ },
+ {
+ .desc = "Encrypt 9 bytes with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 2,
+ .base_key = &rfc6803_enc_test8_basekey,
+ .plaintext = &rfc6803_enc_9byte_plaintext,
+ .confounder = &rfc6803_enc_test8_confounder,
+ .expected_result = &rfc6803_enc_test8_expected_result,
+ },
+ {
+ .desc = "Encrypt 13 bytes with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 3,
+ .base_key = &rfc6803_enc_test9_basekey,
+ .plaintext = &rfc6803_enc_13byte_plaintext,
+ .confounder = &rfc6803_enc_test9_confounder,
+ .expected_result = &rfc6803_enc_test9_expected_result,
+ },
+ {
+ .desc = "Encrypt 30 bytes with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 4,
+ .base_key = &rfc6803_enc_test10_basekey,
+ .plaintext = &rfc6803_enc_30byte_plaintext,
+ .confounder = &rfc6803_enc_test10_confounder,
+ .expected_result = &rfc6803_enc_test10_expected_result,
+ },
+};
+
+/* Creates the function rfc6803_encrypt_gen_params */
+KUNIT_ARRAY_PARAM(rfc6803_encrypt, rfc6803_encrypt_test_params,
+ gss_krb5_get_desc);
+
+static void rfc6803_encrypt_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj Ke, Ki, checksum;
+ u8 usage_data[GSS_KRB5_K5CLENGTH];
+ struct xdr_netobj usage = {
+ .data = usage_data,
+ .len = sizeof(usage_data),
+ };
+ struct crypto_ahash *ahash_tfm;
+ unsigned int blocksize;
+ struct xdr_buf buf;
+ void *text;
+ size_t len;
+ u32 err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ if (!gk5e)
+ kunit_skip(test, "Encryption type is not available");
+
+ memset(usage_data, 0, sizeof(usage_data));
+ usage.data[3] = param->constant;
+
+ Ke.len = gk5e->Ke_length;
+ Ke.data = kunit_kzalloc(test, Ke.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ke.data);
+ usage.data[4] = KEY_USAGE_SEED_ENCRYPTION;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ke, &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ blocksize = crypto_sync_skcipher_blocksize(cts_tfm);
+
+ len = param->confounder->len + param->plaintext->len + blocksize;
+ text = kunit_kzalloc(test, len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+ memcpy(text, param->confounder->data, param->confounder->len);
+ memcpy(text + param->confounder->len, param->plaintext->data,
+ param->plaintext->len);
+
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->confounder->len + param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ checksum.len = gk5e->cksumlength;
+ checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
+
+ Ki.len = gk5e->Ki_length;
+ Ki.data = kunit_kzalloc(test, Ki.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ki.data);
+ usage.data[4] = KEY_USAGE_SEED_INTEGRITY;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ki,
+ &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ ahash_tfm = crypto_alloc_ahash(gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ahash_tfm);
+ err = crypto_ahash_setkey(ahash_tfm, Ki.data, Ki.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Act */
+ err = gss_krb5_checksum(ahash_tfm, NULL, 0, &buf, 0, &checksum);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL, NULL, 0);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test, param->expected_result->len,
+ buf.len + checksum.len,
+ "ciphertext length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ buf.head[0].iov_base, buf.len), 0,
+ "encrypted result mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data +
+ (param->expected_result->len - checksum.len),
+ checksum.data, checksum.len), 0,
+ "HMAC mismatch");
+
+ crypto_free_ahash(ahash_tfm);
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case rfc6803_test_cases[] = {
+ {
+ .name = "RFC 6803 key derivation",
+ .run_case = kdf_case,
+ .generate_params = rfc6803_kdf_gen_params,
+ },
+ {
+ .name = "RFC 6803 checksum",
+ .run_case = checksum_case,
+ .generate_params = rfc6803_checksum_gen_params,
+ },
+ {
+ .name = "RFC 6803 encryption",
+ .run_case = rfc6803_encrypt_case,
+ .generate_params = rfc6803_encrypt_gen_params,
+ },
+ {}
+};
+
+static struct kunit_suite rfc6803_suite = {
+ .name = "RFC 6803 suite",
+ .test_cases = rfc6803_test_cases,
+};
+
+/*
+ * From RFC 8009 Appendix A. Test Vectors
+ *
+ * Sample results for SHA-2 enctype key derivation
+ *
+ * This test material is copyright (c) 2016 IETF Trust and the
+ * persons identified as the document authors. All rights reserved.
+ */
+
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_basekey,
+ 0x37, 0x05, 0xd9, 0x60, 0x80, 0xc1, 0x77, 0x28,
+ 0xa0, 0xe8, 0x00, 0xea, 0xb6, 0xe0, 0xd2, 0x3c
+);
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_Kc,
+ 0xb3, 0x1a, 0x01, 0x8a, 0x48, 0xf5, 0x47, 0x76,
+ 0xf4, 0x03, 0xe9, 0xa3, 0x96, 0x32, 0x5d, 0xc3
+);
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_Ke,
+ 0x9b, 0x19, 0x7d, 0xd1, 0xe8, 0xc5, 0x60, 0x9d,
+ 0x6e, 0x67, 0xc3, 0xe3, 0x7c, 0x62, 0xc7, 0x2e
+);
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_Ki,
+ 0x9f, 0xda, 0x0e, 0x56, 0xab, 0x2d, 0x85, 0xe1,
+ 0x56, 0x9a, 0x68, 0x86, 0x96, 0xc2, 0x6a, 0x6c
+);
+
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_basekey,
+ 0x6d, 0x40, 0x4d, 0x37, 0xfa, 0xf7, 0x9f, 0x9d,
+ 0xf0, 0xd3, 0x35, 0x68, 0xd3, 0x20, 0x66, 0x98,
+ 0x00, 0xeb, 0x48, 0x36, 0x47, 0x2e, 0xa8, 0xa0,
+ 0x26, 0xd1, 0x6b, 0x71, 0x82, 0x46, 0x0c, 0x52
+);
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_Kc,
+ 0xef, 0x57, 0x18, 0xbe, 0x86, 0xcc, 0x84, 0x96,
+ 0x3d, 0x8b, 0xbb, 0x50, 0x31, 0xe9, 0xf5, 0xc4,
+ 0xba, 0x41, 0xf2, 0x8f, 0xaf, 0x69, 0xe7, 0x3d
+);
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_Ke,
+ 0x56, 0xab, 0x22, 0xbe, 0xe6, 0x3d, 0x82, 0xd7,
+ 0xbc, 0x52, 0x27, 0xf6, 0x77, 0x3f, 0x8e, 0xa7,
+ 0xa5, 0xeb, 0x1c, 0x82, 0x51, 0x60, 0xc3, 0x83,
+ 0x12, 0x98, 0x0c, 0x44, 0x2e, 0x5c, 0x7e, 0x49
+);
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_Ki,
+ 0x69, 0xb1, 0x65, 0x14, 0xe3, 0xcd, 0x8e, 0x56,
+ 0xb8, 0x20, 0x10, 0xd5, 0xc7, 0x30, 0x12, 0xb6,
+ 0x22, 0xc4, 0xd0, 0x0f, 0xfc, 0x23, 0xed, 0x1f
+);
+
+static const struct gss_krb5_test_param rfc8009_kdf_test_params[] = {
+ {
+ .desc = "Derive Kc subkey for aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &aes128_cts_hmac_sha256_128_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &aes128_cts_hmac_sha256_128_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &aes128_cts_hmac_sha256_128_Ki,
+ },
+ {
+ .desc = "Derive Kc subkey for aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &aes256_cts_hmac_sha384_192_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &aes256_cts_hmac_sha384_192_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &aes256_cts_hmac_sha384_192_Ki,
+ },
+};
+
+/* Creates the function rfc8009_kdf_gen_params */
+KUNIT_ARRAY_PARAM(rfc8009_kdf, rfc8009_kdf_test_params, gss_krb5_get_desc);
+
+/*
+ * From RFC 8009 Appendix A. Test Vectors
+ *
+ * These sample checksums use the above sample key derivation results,
+ * including use of the same base-key and key usage values.
+ *
+ * This test material is copyright (c) 2016 IETF Trust and the
+ * persons identified as the document authors. All rights reserved.
+ */
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_checksum_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_checksum_test1_expected_result,
+ 0xd7, 0x83, 0x67, 0x18, 0x66, 0x43, 0xd6, 0x7b,
+ 0x41, 0x1c, 0xba, 0x91, 0x39, 0xfc, 0x1d, 0xee
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_checksum_test2_expected_result,
+ 0x45, 0xee, 0x79, 0x15, 0x67, 0xee, 0xfc, 0xa3,
+ 0x7f, 0x4a, 0xc1, 0xe0, 0x22, 0x2d, 0xe8, 0x0d,
+ 0x43, 0xc3, 0xbf, 0xa0, 0x66, 0x99, 0x67, 0x2a
+);
+
+static const struct gss_krb5_test_param rfc8009_checksum_test_params[] = {
+ {
+ .desc = "Checksum with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_checksum,
+ .plaintext = &rfc8009_checksum_plaintext,
+ .expected_result = &rfc8009_checksum_test1_expected_result,
+ },
+ {
+ .desc = "Checksum with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_checksum,
+ .plaintext = &rfc8009_checksum_plaintext,
+ .expected_result = &rfc8009_checksum_test2_expected_result,
+ },
+};
+
+/* Creates the function rfc8009_checksum_gen_params */
+KUNIT_ARRAY_PARAM(rfc8009_checksum, rfc8009_checksum_test_params,
+ gss_krb5_get_desc);
+
+/*
+ * From RFC 8009 Appendix A. Test Vectors
+ *
+ * Sample encryptions (all using the default cipher state):
+ * --------------------------------------------------------
+ *
+ * These sample encryptions use the above sample key derivation results,
+ * including use of the same base-key and key usage values.
+ *
+ * This test material is copyright (c) 2016 IETF Trust and the
+ * persons identified as the document authors. All rights reserved.
+ */
+
+static const struct xdr_netobj rfc8009_enc_empty_plaintext = {
+ .len = 0,
+};
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_short_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_block_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_long_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test1_confounder,
+ 0x7e, 0x58, 0x95, 0xea, 0xf2, 0x67, 0x24, 0x35,
+ 0xba, 0xd8, 0x17, 0xf5, 0x45, 0xa3, 0x71, 0x48
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test1_expected_result,
+ 0xef, 0x85, 0xfb, 0x89, 0x0b, 0xb8, 0x47, 0x2f,
+ 0x4d, 0xab, 0x20, 0x39, 0x4d, 0xca, 0x78, 0x1d
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test1_expected_hmac,
+ 0xad, 0x87, 0x7e, 0xda, 0x39, 0xd5, 0x0c, 0x87,
+ 0x0c, 0x0d, 0x5a, 0x0a, 0x8e, 0x48, 0xc7, 0x18
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test2_confounder,
+ 0x7b, 0xca, 0x28, 0x5e, 0x2f, 0xd4, 0x13, 0x0f,
+ 0xb5, 0x5b, 0x1a, 0x5c, 0x83, 0xbc, 0x5b, 0x24
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test2_expected_result,
+ 0x84, 0xd7, 0xf3, 0x07, 0x54, 0xed, 0x98, 0x7b,
+ 0xab, 0x0b, 0xf3, 0x50, 0x6b, 0xeb, 0x09, 0xcf,
+ 0xb5, 0x54, 0x02, 0xce, 0xf7, 0xe6
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test2_expected_hmac,
+ 0x87, 0x7c, 0xe9, 0x9e, 0x24, 0x7e, 0x52, 0xd1,
+ 0x6e, 0xd4, 0x42, 0x1d, 0xfd, 0xf8, 0x97, 0x6c
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test3_confounder,
+ 0x56, 0xab, 0x21, 0x71, 0x3f, 0xf6, 0x2c, 0x0a,
+ 0x14, 0x57, 0x20, 0x0f, 0x6f, 0xa9, 0x94, 0x8f
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test3_expected_result,
+ 0x35, 0x17, 0xd6, 0x40, 0xf5, 0x0d, 0xdc, 0x8a,
+ 0xd3, 0x62, 0x87, 0x22, 0xb3, 0x56, 0x9d, 0x2a,
+ 0xe0, 0x74, 0x93, 0xfa, 0x82, 0x63, 0x25, 0x40,
+ 0x80, 0xea, 0x65, 0xc1, 0x00, 0x8e, 0x8f, 0xc2
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test3_expected_hmac,
+ 0x95, 0xfb, 0x48, 0x52, 0xe7, 0xd8, 0x3e, 0x1e,
+ 0x7c, 0x48, 0xc3, 0x7e, 0xeb, 0xe6, 0xb0, 0xd3
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test4_confounder,
+ 0xa7, 0xa4, 0xe2, 0x9a, 0x47, 0x28, 0xce, 0x10,
+ 0x66, 0x4f, 0xb6, 0x4e, 0x49, 0xad, 0x3f, 0xac
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test4_expected_result,
+ 0x72, 0x0f, 0x73, 0xb1, 0x8d, 0x98, 0x59, 0xcd,
+ 0x6c, 0xcb, 0x43, 0x46, 0x11, 0x5c, 0xd3, 0x36,
+ 0xc7, 0x0f, 0x58, 0xed, 0xc0, 0xc4, 0x43, 0x7c,
+ 0x55, 0x73, 0x54, 0x4c, 0x31, 0xc8, 0x13, 0xbc,
+ 0xe1, 0xe6, 0xd0, 0x72, 0xc1
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test4_expected_hmac,
+ 0x86, 0xb3, 0x9a, 0x41, 0x3c, 0x2f, 0x92, 0xca,
+ 0x9b, 0x83, 0x34, 0xa2, 0x87, 0xff, 0xcb, 0xfc
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test5_confounder,
+ 0xf7, 0x64, 0xe9, 0xfa, 0x15, 0xc2, 0x76, 0x47,
+ 0x8b, 0x2c, 0x7d, 0x0c, 0x4e, 0x5f, 0x58, 0xe4
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test5_expected_result,
+ 0x41, 0xf5, 0x3f, 0xa5, 0xbf, 0xe7, 0x02, 0x6d,
+ 0x91, 0xfa, 0xf9, 0xbe, 0x95, 0x91, 0x95, 0xa0
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test5_expected_hmac,
+ 0x58, 0x70, 0x72, 0x73, 0xa9, 0x6a, 0x40, 0xf0,
+ 0xa0, 0x19, 0x60, 0x62, 0x1a, 0xc6, 0x12, 0x74,
+ 0x8b, 0x9b, 0xbf, 0xbe, 0x7e, 0xb4, 0xce, 0x3c
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test6_confounder,
+ 0xb8, 0x0d, 0x32, 0x51, 0xc1, 0xf6, 0x47, 0x14,
+ 0x94, 0x25, 0x6f, 0xfe, 0x71, 0x2d, 0x0b, 0x9a
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test6_expected_result,
+ 0x4e, 0xd7, 0xb3, 0x7c, 0x2b, 0xca, 0xc8, 0xf7,
+ 0x4f, 0x23, 0xc1, 0xcf, 0x07, 0xe6, 0x2b, 0xc7,
+ 0xb7, 0x5f, 0xb3, 0xf6, 0x37, 0xb9
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test6_expected_hmac,
+ 0xf5, 0x59, 0xc7, 0xf6, 0x64, 0xf6, 0x9e, 0xab,
+ 0x7b, 0x60, 0x92, 0x23, 0x75, 0x26, 0xea, 0x0d,
+ 0x1f, 0x61, 0xcb, 0x20, 0xd6, 0x9d, 0x10, 0xf2
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test7_confounder,
+ 0x53, 0xbf, 0x8a, 0x0d, 0x10, 0x52, 0x65, 0xd4,
+ 0xe2, 0x76, 0x42, 0x86, 0x24, 0xce, 0x5e, 0x63
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test7_expected_result,
+ 0xbc, 0x47, 0xff, 0xec, 0x79, 0x98, 0xeb, 0x91,
+ 0xe8, 0x11, 0x5c, 0xf8, 0xd1, 0x9d, 0xac, 0x4b,
+ 0xbb, 0xe2, 0xe1, 0x63, 0xe8, 0x7d, 0xd3, 0x7f,
+ 0x49, 0xbe, 0xca, 0x92, 0x02, 0x77, 0x64, 0xf6
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test7_expected_hmac,
+ 0x8c, 0xf5, 0x1f, 0x14, 0xd7, 0x98, 0xc2, 0x27,
+ 0x3f, 0x35, 0xdf, 0x57, 0x4d, 0x1f, 0x93, 0x2e,
+ 0x40, 0xc4, 0xff, 0x25, 0x5b, 0x36, 0xa2, 0x66
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test8_confounder,
+ 0x76, 0x3e, 0x65, 0x36, 0x7e, 0x86, 0x4f, 0x02,
+ 0xf5, 0x51, 0x53, 0xc7, 0xe3, 0xb5, 0x8a, 0xf1
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test8_expected_result,
+ 0x40, 0x01, 0x3e, 0x2d, 0xf5, 0x8e, 0x87, 0x51,
+ 0x95, 0x7d, 0x28, 0x78, 0xbc, 0xd2, 0xd6, 0xfe,
+ 0x10, 0x1c, 0xcf, 0xd5, 0x56, 0xcb, 0x1e, 0xae,
+ 0x79, 0xdb, 0x3c, 0x3e, 0xe8, 0x64, 0x29, 0xf2,
+ 0xb2, 0xa6, 0x02, 0xac, 0x86
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test8_expected_hmac,
+ 0xfe, 0xf6, 0xec, 0xb6, 0x47, 0xd6, 0x29, 0x5f,
+ 0xae, 0x07, 0x7a, 0x1f, 0xeb, 0x51, 0x75, 0x08,
+ 0xd2, 0xc1, 0x6b, 0x41, 0x92, 0xe0, 0x1f, 0x62
+);
+
+static const struct gss_krb5_test_param rfc8009_encrypt_test_params[] = {
+ {
+ .desc = "Encrypt empty plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_empty_plaintext,
+ .confounder = &rfc8009_enc_test1_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test1_expected_result,
+ .expected_hmac = &rfc8009_enc_test1_expected_hmac,
+ },
+ {
+ .desc = "Encrypt short plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_short_plaintext,
+ .confounder = &rfc8009_enc_test2_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test2_expected_result,
+ .expected_hmac = &rfc8009_enc_test2_expected_hmac,
+ },
+ {
+ .desc = "Encrypt block plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_block_plaintext,
+ .confounder = &rfc8009_enc_test3_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test3_expected_result,
+ .expected_hmac = &rfc8009_enc_test3_expected_hmac,
+ },
+ {
+ .desc = "Encrypt long plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_long_plaintext,
+ .confounder = &rfc8009_enc_test4_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test4_expected_result,
+ .expected_hmac = &rfc8009_enc_test4_expected_hmac,
+ },
+ {
+ .desc = "Encrypt empty plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_empty_plaintext,
+ .confounder = &rfc8009_enc_test5_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test5_expected_result,
+ .expected_hmac = &rfc8009_enc_test5_expected_hmac,
+ },
+ {
+ .desc = "Encrypt short plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_short_plaintext,
+ .confounder = &rfc8009_enc_test6_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test6_expected_result,
+ .expected_hmac = &rfc8009_enc_test6_expected_hmac,
+ },
+ {
+ .desc = "Encrypt block plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_block_plaintext,
+ .confounder = &rfc8009_enc_test7_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test7_expected_result,
+ .expected_hmac = &rfc8009_enc_test7_expected_hmac,
+ },
+ {
+ .desc = "Encrypt long plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_long_plaintext,
+ .confounder = &rfc8009_enc_test8_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test8_expected_result,
+ .expected_hmac = &rfc8009_enc_test8_expected_hmac,
+ },
+};
+
+/* Creates the function rfc8009_encrypt_gen_params */
+KUNIT_ARRAY_PARAM(rfc8009_encrypt, rfc8009_encrypt_test_params,
+ gss_krb5_get_desc);
+
+static void rfc8009_encrypt_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj Ke, Ki, checksum;
+ u8 usage_data[GSS_KRB5_K5CLENGTH];
+ struct xdr_netobj usage = {
+ .data = usage_data,
+ .len = sizeof(usage_data),
+ };
+ struct crypto_ahash *ahash_tfm;
+ struct xdr_buf buf;
+ void *text;
+ size_t len;
+ u32 err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ if (!gk5e)
+ kunit_skip(test, "Encryption type is not available");
+
+ *(__be32 *)usage.data = cpu_to_be32(2);
+
+ Ke.len = gk5e->Ke_length;
+ Ke.data = kunit_kzalloc(test, Ke.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ke.data);
+ usage.data[4] = KEY_USAGE_SEED_ENCRYPTION;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ke,
+ &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ len = param->confounder->len + param->plaintext->len;
+ text = kunit_kzalloc(test, len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+ memcpy(text, param->confounder->data, param->confounder->len);
+ memcpy(text + param->confounder->len, param->plaintext->data,
+ param->plaintext->len);
+
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->confounder->len + param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ checksum.len = gk5e->cksumlength;
+ checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
+
+ Ki.len = gk5e->Ki_length;
+ Ki.data = kunit_kzalloc(test, Ki.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ki.data);
+ usage.data[4] = KEY_USAGE_SEED_INTEGRITY;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ki,
+ &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ ahash_tfm = crypto_alloc_ahash(gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ahash_tfm);
+ err = crypto_ahash_setkey(ahash_tfm, Ki.data, Ki.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Act */
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL, NULL, 0);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ err = krb5_etm_checksum(cts_tfm, ahash_tfm, &buf, 0, &checksum);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ param->expected_result->len, buf.len,
+ "ciphertext length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ buf.head[0].iov_base,
+ param->expected_result->len), 0,
+ "ciphertext mismatch");
+ KUNIT_EXPECT_EQ_MSG(test, memcmp(param->expected_hmac->data,
+ checksum.data,
+ checksum.len), 0,
+ "HMAC mismatch");
+
+ crypto_free_ahash(ahash_tfm);
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case rfc8009_test_cases[] = {
+ {
+ .name = "RFC 8009 key derivation",
+ .run_case = kdf_case,
+ .generate_params = rfc8009_kdf_gen_params,
+ },
+ {
+ .name = "RFC 8009 checksum",
+ .run_case = checksum_case,
+ .generate_params = rfc8009_checksum_gen_params,
+ },
+ {
+ .name = "RFC 8009 encryption",
+ .run_case = rfc8009_encrypt_case,
+ .generate_params = rfc8009_encrypt_gen_params,
+ },
+ {}
+};
+
+static struct kunit_suite rfc8009_suite = {
+ .name = "RFC 8009 suite",
+ .test_cases = rfc8009_test_cases,
+};
+
+/*
+ * Encryption self-tests
+ */
+
+DEFINE_STR_XDR_NETOBJ(encrypt_selftest_plaintext,
+ "This is the plaintext for the encryption self-test.");
+
+static const struct gss_krb5_test_param encrypt_selftest_params[] = {
+ {
+ .desc = "aes128-cts-hmac-sha1-96 encryption self-test",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "aes256-cts-hmac-sha1-96 encryption self-test",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "camellia128-cts-cmac encryption self-test",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .Ke = &camellia128_cts_cmac_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "camellia256-cts-cmac encryption self-test",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .Ke = &camellia256_cts_cmac_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "aes128-cts-hmac-sha256-128 encryption self-test",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .Ke = &aes128_cts_hmac_sha256_128_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "aes256-cts-hmac-sha384-192 encryption self-test",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .Ke = &aes256_cts_hmac_sha384_192_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+};
+
+/* Creates the function encrypt_selftest_gen_params */
+KUNIT_ARRAY_PARAM(encrypt_selftest, encrypt_selftest_params,
+ gss_krb5_get_desc);
+
+/*
+ * Encrypt and decrypt plaintext, and ensure the input plaintext
+ * matches the output plaintext. A confounder is not added in this
+ * case.
+ */
+static void encrypt_selftest_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_buf buf;
+ void *text;
+ int err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ if (!gk5e)
+ kunit_skip(test, "Encryption type is not available");
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ text = kunit_kzalloc(test, roundup(param->plaintext->len,
+ crypto_sync_skcipher_blocksize(cbc_tfm)),
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+
+ memcpy(text, param->plaintext->data, param->plaintext->len);
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ /* Act */
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL, NULL, 0);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ err = krb5_cbc_cts_decrypt(cts_tfm, cbc_tfm, 0, &buf);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ param->plaintext->len, buf.len,
+ "length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->plaintext->data,
+ buf.head[0].iov_base, buf.len), 0,
+ "plaintext mismatch");
+
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case encryption_test_cases[] = {
+ {
+ .name = "Encryption self-tests",
+ .run_case = encrypt_selftest_case,
+ .generate_params = encrypt_selftest_gen_params,
+ },
+ {}
+};
+
+static struct kunit_suite encryption_test_suite = {
+ .name = "Encryption test suite",
+ .test_cases = encryption_test_cases,
+};
+
+kunit_test_suites(&rfc3961_suite,
+ &rfc3962_suite,
+ &rfc6803_suite,
+ &rfc8009_suite,
+ &encryption_test_suite);
+
+MODULE_DESCRIPTION("Test RPCSEC GSS Kerberos 5 functions");
+MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ba04e3ec970a..ef0e6af9fc95 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -60,102 +60,29 @@
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
-#include <linux/crypto.h>
+
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-
-/* read_token is a mic token, and message_buffer is the data that the mic was
- * supposedly taken over. */
-
-static u32
-gss_verify_mic_v1(struct krb5_ctx *ctx,
- struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- s32 now;
- int direction;
- u32 seqnum;
- unsigned char *ptr = (unsigned char *)read_token->data;
- int bodysize;
- u8 *cksumkey;
-
- dprintk("RPC: krb5_read_token\n");
-
- if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
- read_token->len))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != ctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != SEAL_ALG_NONE)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, message_buffer, 0,
- cksumkey, KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- ctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > ctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
- &direction, &seqnum))
- return GSS_S_FAILURE;
-
- if ((ctx->initiate && direction != 0xff) ||
- (!ctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- return GSS_S_COMPLETE;
-}
-
-static u32
-gss_verify_mic_v2(struct krb5_ctx *ctx,
- struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+u32
+gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token)
{
+ struct crypto_ahash *tfm = ctx->initiate ?
+ ctx->acceptor_sign : ctx->initiator_sign;
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
- .data = cksumdata};
- time64_t now;
+ struct xdr_netobj cksumobj = {
+ .len = ctx->gk5e->cksumlength,
+ .data = cksumdata,
+ };
u8 *ptr = read_token->data;
- u8 *cksumkey;
+ __be16 be16_ptr;
+ time64_t now;
u8 flags;
int i;
- unsigned int cksum_usage;
- __be16 be16_ptr;
dprintk("RPC: %s\n", __func__);
@@ -177,16 +104,8 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
if (ptr[i] != 0xff)
return GSS_S_DEFECTIVE_TOKEN;
- if (ctx->initiate) {
- cksumkey = ctx->acceptor_sign;
- cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
- } else {
- cksumkey = ctx->initiator_sign;
- cksum_usage = KG_USAGE_INITIATOR_SIGN;
- }
-
- if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
- cksumkey, cksum_usage, &cksumobj))
+ if (gss_krb5_checksum(tfm, ptr, GSS_KRB5_TOK_HDR_LEN,
+ message_buffer, 0, &cksumobj))
return GSS_S_FAILURE;
if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
@@ -205,22 +124,3 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
return GSS_S_COMPLETE;
}
-
-u32
-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
- struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token)
-{
- struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
-
- switch (ctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_verify_mic_v1(ctx, message_buffer, read_token);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_verify_mic_v2(ctx, message_buffer, read_token);
- }
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 48337687848c..b3e1738ff6bf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -32,329 +32,14 @@
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
-#include <linux/random.h>
#include <linux/pagemap.h>
+#include "gss_krb5_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-static inline int
-gss_krb5_padding(int blocksize, int length)
-{
- return blocksize - (length % blocksize);
-}
-
-static inline void
-gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
-{
- int padding = gss_krb5_padding(blocksize, buf->len - offset);
- char *p;
- struct kvec *iov;
-
- if (buf->page_len || buf->tail[0].iov_len)
- iov = &buf->tail[0];
- else
- iov = &buf->head[0];
- p = iov->iov_base + iov->iov_len;
- iov->iov_len += padding;
- buf->len += padding;
- memset(p, padding, padding);
-}
-
-static inline int
-gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
-{
- u8 *ptr;
- u8 pad;
- size_t len = buf->len;
-
- if (len <= buf->head[0].iov_len) {
- pad = *(u8 *)(buf->head[0].iov_base + len - 1);
- if (pad > buf->head[0].iov_len)
- return -EINVAL;
- buf->head[0].iov_len -= pad;
- goto out;
- } else
- len -= buf->head[0].iov_len;
- if (len <= buf->page_len) {
- unsigned int last = (buf->page_base + len - 1)
- >>PAGE_SHIFT;
- unsigned int offset = (buf->page_base + len - 1)
- & (PAGE_SIZE - 1);
- ptr = kmap_atomic(buf->pages[last]);
- pad = *(ptr + offset);
- kunmap_atomic(ptr);
- goto out;
- } else
- len -= buf->page_len;
- BUG_ON(len > buf->tail[0].iov_len);
- pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
-out:
- /* XXX: NOTE: we do not adjust the page lengths--they represent
- * a range of data in the real filesystem page cache, and we need
- * to know that range so the xdr code can properly place read data.
- * However adjusting the head length, as we do above, is harmless.
- * In the case of a request that fits into a single page, the server
- * also uses length and head length together to determine the original
- * start of the request to copy the request for deferal; so it's
- * easier on the server if we adjust head and tail length in tandem.
- * It's not really a problem that we don't fool with the page and
- * tail lengths, though--at worst badly formed xdr might lead the
- * server to attempt to parse the padding.
- * XXX: Document all these weird requirements for gss mechanism
- * wrap/unwrap functions. */
- if (pad > blocksize)
- return -EINVAL;
- if (buf->len > pad)
- buf->len -= pad;
- else
- return -EINVAL;
- return 0;
-}
-
-void
-gss_krb5_make_confounder(char *p, u32 conflen)
-{
- static u64 i = 0;
- u64 *q = (u64 *)p;
-
- /* rfc1964 claims this should be "random". But all that's really
- * necessary is that it be unique. And not even that is necessary in
- * our case since our "gssapi" implementation exists only to support
- * rpcsec_gss, so we know that the only buffers we will ever encrypt
- * already begin with a unique sequence number. Just to hedge my bets
- * I'll make a half-hearted attempt at something unique, but ensuring
- * uniqueness would mean worrying about atomicity and rollover, and I
- * don't care enough. */
-
- /* initialize to random value */
- if (i == 0) {
- i = get_random_u32();
- i = (i << 32) | get_random_u32();
- }
-
- switch (conflen) {
- case 16:
- *q++ = i++;
- fallthrough;
- case 8:
- *q++ = i++;
- break;
- default:
- BUG();
- }
-}
-
-/* Assumptions: the head and tail of inbuf are ours to play with.
- * The pages, however, may be real pages in the page cache and we replace
- * them with scratch pages from **pages before writing to them. */
-/* XXX: obviously the above should be documentation of wrap interface,
- * and shouldn't be in this kerberos-specific file. */
-
-/* XXX factor out common code with seal/unseal. */
-
-static u32
-gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- int blocksize = 0, plainlen;
- unsigned char *ptr, *msg_start;
- time64_t now;
- int headlen;
- struct page **tmp_pages;
- u32 seq_send;
- u8 *cksumkey;
- u32 conflen = kctx->gk5e->conflen;
-
- dprintk("RPC: %s\n", __func__);
-
- now = ktime_get_real_seconds();
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- gss_krb5_add_padding(buf, offset, blocksize);
- BUG_ON((buf->len - offset) % blocksize);
- plainlen = conflen + buf->len - offset;
-
- headlen = g_token_size(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
- (buf->len - offset);
-
- ptr = buf->head[0].iov_base + offset;
- /* shift data to make room for header. */
- xdr_extend_head(buf, offset, headlen);
-
- /* XXX Would be cleverer to encrypt while copying. */
- BUG_ON((buf->len - offset - headlen) % blocksize);
-
- g_make_token_header(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN +
- kctx->gk5e->cksumlength + plainlen, &ptr);
-
-
- /* ptr now at header described in rfc 1964, section 1.2.1: */
- ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
- ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
-
- msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
-
- /*
- * signalg and sealalg are stored as if they were converted from LE
- * to host endian, even though they're opaque pairs of bytes according
- * to the RFC.
- */
- *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
- *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
- ptr[6] = 0xff;
- ptr[7] = 0xff;
-
- gss_krb5_make_confounder(msg_start, conflen);
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- /* XXXJBF: UGH!: */
- tmp_pages = buf->pages;
- buf->pages = pages;
- if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
- buf->pages = tmp_pages;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&kctx->seq_send);
-
- /* XXX would probably be more efficient to compute checksum
- * and encrypt at the same time: */
- if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
- return GSS_S_FAILURE;
-
- if (gss_encrypt_xdr_buf(kctx->enc, buf,
- offset + headlen - conflen, pages))
- return GSS_S_FAILURE;
-
- return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-static u32
-gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- time64_t now;
- int direction;
- s32 seqnum;
- unsigned char *ptr;
- int bodysize;
- void *data_start, *orig_start;
- int data_len;
- int blocksize;
- u32 conflen = kctx->gk5e->conflen;
- int crypt_offset;
- u8 *cksumkey;
- unsigned int saved_len = buf->len;
-
- dprintk("RPC: gss_unwrap_kerberos\n");
-
- ptr = (u8 *)buf->head[0].iov_base + offset;
- if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
- len - offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- /* get the sign and seal algorithms */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != kctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != kctx->gk5e->sealalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /*
- * Data starts after token header and checksum. ptr points
- * to the beginning of the token header
- */
- crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
- (unsigned char *)buf->head[0].iov_base;
-
- buf->len = len;
- if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- kctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > kctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
- ptr + 8, &direction, &seqnum))
- return GSS_S_BAD_SIG;
-
- if ((kctx->initiate && direction != 0xff) ||
- (!kctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- /* Copy the data back to the right position. XXX: Would probably be
- * better to copy and encrypt at the same time. */
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
- conflen;
- orig_start = buf->head[0].iov_base + offset;
- data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
- memmove(orig_start, data_start, data_len);
- buf->head[0].iov_len -= (data_start - orig_start);
- buf->len = len - (data_start - orig_start);
-
- if (gss_krb5_remove_padding(buf, blocksize))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* slack must include room for krb5 padding */
- *slack = XDR_QUADLEN(saved_len - buf->len);
- /* The GSS blob always precedes the RPC message payload */
- *align = *slack;
- return GSS_S_COMPLETE;
-}
-
/*
* We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need
* to do more than that, we shift repeatedly. Kevin Coffman reports
@@ -405,9 +90,9 @@ static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift)
_rotate_left(&subbuf, shift);
}
-static u32
-gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
- struct xdr_buf *buf, struct page **pages)
+u32
+gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages)
{
u8 *ptr;
time64_t now;
@@ -418,9 +103,6 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
dprintk("RPC: %s\n", __func__);
- if (kctx->gk5e->encrypt_v2 == NULL)
- return GSS_S_FAILURE;
-
/* make room for gss token header */
if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
return GSS_S_FAILURE;
@@ -448,7 +130,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
be64ptr = (__be64 *)be16ptr;
*be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
- err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
+ err = (*kctx->gk5e->encrypt)(kctx, offset, buf, pages);
if (err)
return err;
@@ -456,10 +138,10 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
-static u32
-gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
+u32
+gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
time64_t now;
u8 *ptr;
@@ -473,9 +155,6 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
dprintk("RPC: %s\n", __func__);
- if (kctx->gk5e->decrypt_v2 == NULL)
- return GSS_S_FAILURE;
-
ptr = buf->head[0].iov_base + offset;
if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
@@ -505,8 +184,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
if (rrc != 0)
rotate_left(offset + 16, buf, rrc);
- err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf,
- &headskip, &tailskip);
+ err = (*kctx->gk5e->decrypt)(kctx, offset, len, buf,
+ &headskip, &tailskip);
if (err)
return GSS_S_FAILURE;
@@ -556,41 +235,3 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
*slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
return GSS_S_COMPLETE;
}
-
-u32
-gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
- struct xdr_buf *buf, struct page **pages)
-{
- struct krb5_ctx *kctx = gctx->internal_ctx_id;
-
- switch (kctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
- }
-}
-
-u32
-gss_unwrap_kerberos(struct gss_ctx *gctx, int offset,
- int len, struct xdr_buf *buf)
-{
- struct krb5_ctx *kctx = gctx->internal_ctx_id;
-
- switch (kctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_unwrap_kerberos_v1(kctx, offset, len, buf,
- &gctx->slack, &gctx->align);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_unwrap_kerberos_v2(kctx, offset, len, buf,
- &gctx->slack, &gctx->align);
- }
-}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index fae632da1058..c84d0cf61980 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/oid_registry.h>
#include <linux/sunrpc/msg_prot.h>
-#include <linux/sunrpc/gss_asn1.h>
#include <linux/sunrpc/auth_gss.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/gss_err.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index d79f12c2550a..7d2cdc2bd374 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) {
- kfree(oa->data);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free_oa;
}
oa->data[0].option.data = CREDS_VALUE;
@@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
/* option buffer */
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds);
if (err)
- return err;
+ goto free_creds;
oa->data[0].value.len = 1; /* presence */
} else {
/* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy);
if (err)
- return err;
+ goto free_creds;
}
}
return 0;
+
+free_creds:
+ kfree(creds);
+free_oa:
+ kfree(oa->data);
+ oa->data = NULL;
+ return err;
}
static int gssx_dec_status(struct xdr_stream *xdr,
@@ -783,12 +794,12 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
struct gssx_res_accept_sec_context *res = data;
u32 value_follows;
int err;
- struct page *scratch;
+ struct folio *scratch;
- scratch = alloc_page(GFP_KERNEL);
+ scratch = folio_alloc(GFP_KERNEL, 0);
if (!scratch)
return -ENOMEM;
- xdr_set_scratch_page(xdr, scratch);
+ xdr_set_scratch_folio(xdr, scratch);
/* res->status */
err = gssx_dec_status(xdr, &res->status);
@@ -833,6 +844,6 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
err = gssx_dec_option_array(xdr, &res->options);
out_free:
- __free_page(scratch);
+ folio_put(scratch);
return err;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index acb822b23af1..a8ec30759a18 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -71,12 +71,11 @@
struct gss_svc_data {
/* decoded gss client cred: */
struct rpc_gss_wire_cred clcred;
- /* save a pointer to the beginning of the encoded verifier,
- * for use in encryption/checksumming in svcauth_gss_release: */
- __be32 *verf_start;
+ u32 gsd_databody_offset;
struct rsc *rsci;
/* for temporary results */
+ __be32 gsd_seq_num;
u8 gsd_scratch[GSS_SCRATCH_SIZE];
};
@@ -258,11 +257,11 @@ static int rsi_parse(struct cache_detail *cd,
rsii.h.flags = 0;
/* expiry */
- expiry = get_expiry(&mesg);
- status = -EINVAL;
- if (expiry == 0)
+ status = get_expiry(&mesg, &expiry);
+ if (status)
goto out;
+ status = -EINVAL;
/* major/minor */
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
@@ -484,11 +483,11 @@ static int rsc_parse(struct cache_detail *cd,
rsci.h.flags = 0;
/* expiry */
- expiry = get_expiry(&mesg);
- status = -EINVAL;
- if (expiry == 0)
+ status = get_expiry(&mesg, &expiry);
+ if (status)
goto out;
+ status = -EINVAL;
rscp = rsc_lookup(cd, &rsci);
if (!rscp)
goto out;
@@ -692,78 +691,49 @@ alreadyseen:
goto out;
}
-static inline u32 round_up_to_quad(u32 i)
-{
- return (i + 3 ) & ~3;
-}
-
-static inline int
-svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
-{
- int l;
-
- if (argv->iov_len < 4)
- return -1;
- o->len = svc_getnl(argv);
- l = round_up_to_quad(o->len);
- if (argv->iov_len < l)
- return -1;
- o->data = argv->iov_base;
- argv->iov_base += l;
- argv->iov_len -= l;
- return 0;
-}
-
-static inline int
-svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
-{
- u8 *p;
-
- if (resv->iov_len + 4 > PAGE_SIZE)
- return -1;
- svc_putnl(resv, o->len);
- p = resv->iov_base + resv->iov_len;
- resv->iov_len += round_up_to_quad(o->len);
- if (resv->iov_len > PAGE_SIZE)
- return -1;
- memcpy(p, o->data, o->len);
- memset(p + o->len, 0, round_up_to_quad(o->len) - o->len);
- return 0;
-}
-
/*
- * Verify the checksum on the header and return SVC_OK on success.
- * Otherwise, return SVC_DROP (in the case of a bad sequence number)
- * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat.
+ * Decode and verify a Call's verifier field. For RPC_AUTH_GSS Calls,
+ * the body of this field contains a variable length checksum.
+ *
+ * GSS-specific auth_stat values are mandated by RFC 2203 Section
+ * 5.3.3.3.
*/
static int
-gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
- __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
+svcauth_gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
+ __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct gss_ctx *ctx_id = rsci->mechctx;
+ u32 flavor, maj_stat;
struct xdr_buf rpchdr;
struct xdr_netobj checksum;
- u32 flavor = 0;
- struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec iov;
- /* data to compute the checksum over: */
+ /*
+ * Compute the checksum of the incoming Call from the
+ * XID field to credential field:
+ */
iov.iov_base = rpcstart;
- iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
+ iov.iov_len = (u8 *)xdr->p - (u8 *)rpcstart;
xdr_buf_from_iov(&iov, &rpchdr);
- rqstp->rq_auth_stat = rpc_autherr_badverf;
- if (argv->iov_len < 4)
- return SVC_DENIED;
- flavor = svc_getnl(argv);
- if (flavor != RPC_AUTH_GSS)
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor,
+ (void **)&checksum.data,
+ &checksum.len) < 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
- if (svc_safe_getnetobj(argv, &checksum))
+ }
+ if (flavor != RPC_AUTH_GSS || checksum.len < XDR_UNIT) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
+ }
- if (rqstp->rq_deferred) /* skip verification of revisited request */
+ if (rqstp->rq_deferred)
return SVC_OK;
- if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
+ maj_stat = gss_verify_mic(ctx_id, &rpchdr, &checksum);
+ if (maj_stat != GSS_S_COMPLETE) {
+ trace_rpcgss_svc_mic(rqstp, maj_stat);
rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
return SVC_DENIED;
}
@@ -778,54 +748,36 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
return SVC_OK;
}
-static int
-gss_write_null_verf(struct svc_rqst *rqstp)
-{
- __be32 *p;
-
- svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL);
- p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
- /* don't really need to check if head->iov_len > PAGE_SIZE ... */
- *p++ = 0;
- if (!xdr_ressize_check(rqstp, p))
- return -1;
- return 0;
-}
-
-static int
-gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
+/*
+ * Construct and encode a Reply's verifier field. The verifier's body
+ * field contains a variable-length checksum of the GSS sequence
+ * number.
+ */
+static bool
+svcauth_gss_encode_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
{
- __be32 *xdr_seq;
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
u32 maj_stat;
struct xdr_buf verf_data;
- struct xdr_netobj mic;
- __be32 *p;
+ struct xdr_netobj checksum;
struct kvec iov;
- int err = -1;
- svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
- xdr_seq = kmalloc(4, GFP_KERNEL);
- if (!xdr_seq)
- return -ENOMEM;
- *xdr_seq = htonl(seq);
-
- iov.iov_base = xdr_seq;
- iov.iov_len = 4;
+ gsd->gsd_seq_num = cpu_to_be32(seq);
+ iov.iov_base = &gsd->gsd_seq_num;
+ iov.iov_len = XDR_UNIT;
xdr_buf_from_iov(&iov, &verf_data);
- p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
- mic.data = (u8 *)(p + 1);
- maj_stat = gss_get_mic(ctx_id, &verf_data, &mic);
+
+ checksum.data = gsd->gsd_scratch;
+ maj_stat = gss_get_mic(ctx_id, &verf_data, &checksum);
if (maj_stat != GSS_S_COMPLETE)
- goto out;
- *p++ = htonl(mic.len);
- memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
- p += XDR_QUADLEN(mic.len);
- if (!xdr_ressize_check(rqstp, p))
- goto out;
- err = 0;
-out:
- kfree(xdr_seq);
- return err;
+ goto bad_mic;
+
+ return xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_GSS,
+ checksum.data, checksum.len) > 0;
+
+bad_mic:
+ trace_rpcgss_svc_get_mic(rqstp, maj_stat);
+ return false;
}
struct gss_domain {
@@ -891,142 +843,125 @@ out:
}
EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor);
-static inline int
-read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
-{
- __be32 raw;
- int status;
-
- status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
- if (status)
- return status;
- *obj = ntohl(raw);
- return 0;
-}
-
-/* It would be nice if this bit of code could be shared with the client.
- * Obstacles:
- * The client shouldn't malloc(), would have to pass in own memory.
- * The server uses base of head iovec as read pointer, while the
- * client uses separate pointer. */
-static int
-unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+/*
+ * RFC 2203, Section 5.3.2.2
+ *
+ * struct rpc_gss_integ_data {
+ * opaque databody_integ<>;
+ * opaque checksum<>;
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ */
+static noinline_for_stack int
+svcauth_gss_unwrap_integ(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
{
struct gss_svc_data *gsd = rqstp->rq_auth_data;
- u32 integ_len, rseqno, maj_stat;
- struct xdr_netobj mic;
- struct xdr_buf integ_buf;
-
- /* NFS READ normally uses splice to send data in-place. However
- * the data in cache can change after the reply's MIC is computed
- * but before the RPC reply is sent. To prevent the client from
- * rejecting the server-computed MIC in this somewhat rare case,
- * do not use splice with the GSS integrity service.
- */
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ u32 len, offset, seq_num, maj_stat;
+ struct xdr_buf *buf = xdr->buf;
+ struct xdr_buf databody_integ;
+ struct xdr_netobj checksum;
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
return 0;
- integ_len = svc_getnl(&buf->head[0]);
- if (integ_len & 3)
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
goto unwrap_failed;
- if (integ_len > buf->len)
+ if (len & 3)
goto unwrap_failed;
- if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
+ offset = xdr_stream_pos(xdr);
+ if (xdr_buf_subsegment(buf, &databody_integ, offset, len))
goto unwrap_failed;
- /* copy out mic... */
- if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
+ /*
+ * The xdr_stream now points to the @seq_num field. The next
+ * XDR data item is the @arg field, which contains the clear
+ * text RPC program payload. The checksum, which follows the
+ * @arg field, is located and decoded without updating the
+ * xdr_stream.
+ */
+
+ offset += len;
+ if (xdr_decode_word(buf, offset, &checksum.len))
goto unwrap_failed;
- if (mic.len > sizeof(gsd->gsd_scratch))
+ if (checksum.len > sizeof(gsd->gsd_scratch))
goto unwrap_failed;
- mic.data = gsd->gsd_scratch;
- if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
+ checksum.data = gsd->gsd_scratch;
+ if (read_bytes_from_xdr_buf(buf, offset + XDR_UNIT, checksum.data,
+ checksum.len))
goto unwrap_failed;
- maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
+
+ maj_stat = gss_verify_mic(ctx, &databody_integ, &checksum);
if (maj_stat != GSS_S_COMPLETE)
goto bad_mic;
- rseqno = svc_getnl(&buf->head[0]);
- if (rseqno != seq)
+
+ /* The received seqno is protected by the checksum. */
+ if (xdr_stream_decode_u32(xdr, &seq_num) < 0)
+ goto unwrap_failed;
+ if (seq_num != seq)
goto bad_seqno;
- /* trim off the mic and padding at the end before returning */
- xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+
+ xdr_truncate_decode(xdr, XDR_UNIT + checksum.len);
return 0;
unwrap_failed:
trace_rpcgss_svc_unwrap_failed(rqstp);
return -EINVAL;
bad_seqno:
- trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno);
+ trace_rpcgss_svc_seqno_bad(rqstp, seq, seq_num);
return -EINVAL;
bad_mic:
trace_rpcgss_svc_mic(rqstp, maj_stat);
return -EINVAL;
}
-static inline int
-total_buf_len(struct xdr_buf *buf)
-{
- return buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len;
-}
-
-static void
-fix_priv_head(struct xdr_buf *buf, int pad)
-{
- if (buf->page_len == 0) {
- /* We need to adjust head and buf->len in tandem in this
- * case to make svc_defer() work--it finds the original
- * buffer start using buf->len - buf->head[0].iov_len. */
- buf->head[0].iov_len -= pad;
- }
-}
-
-static int
-unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+/*
+ * RFC 2203, Section 5.3.2.3
+ *
+ * struct rpc_gss_priv_data {
+ * opaque databody_priv<>
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ */
+static noinline_for_stack int
+svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
{
- u32 priv_len, maj_stat;
- int pad, remaining_len, offset;
- u32 rseqno;
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ u32 len, maj_stat, seq_num, offset;
+ struct xdr_buf *buf = xdr->buf;
+ unsigned int saved_len;
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
-
- priv_len = svc_getnl(&buf->head[0]);
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+ goto unwrap_failed;
if (rqstp->rq_deferred) {
/* Already decrypted last time through! The sequence number
* check at out_seq is unnecessary but harmless: */
goto out_seq;
}
- /* buf->len is the number of bytes from the original start of the
- * request to the end, where head[0].iov_len is just the bytes
- * not yet read from the head, so these two values are different: */
- remaining_len = total_buf_len(buf);
- if (priv_len > remaining_len)
+ if (len > xdr_stream_remaining(xdr))
goto unwrap_failed;
- pad = remaining_len - priv_len;
- buf->len -= pad;
- fix_priv_head(buf, pad);
-
- maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
- pad = priv_len - buf->len;
- /* The upper layers assume the buffer is aligned on 4-byte boundaries.
- * In the krb5p case, at least, the data ends up offset, so we need to
- * move it around. */
- /* XXX: This is very inefficient. It would be better to either do
- * this while we encrypt, or maybe in the receive code, if we can peak
- * ahead and work out the service and mechanism there. */
- offset = xdr_pad_size(buf->head[0].iov_len);
- if (offset) {
- buf->buflen = RPCSVC_MAXPAYLOAD;
- xdr_shift_buf(buf, offset);
- fix_priv_head(buf, pad);
- }
+ offset = xdr_stream_pos(xdr);
+
+ saved_len = buf->len;
+ maj_stat = gss_unwrap(ctx, offset, offset + len, buf);
if (maj_stat != GSS_S_COMPLETE)
goto bad_unwrap;
+ xdr->nwords -= XDR_QUADLEN(saved_len - buf->len);
+
out_seq:
- rseqno = svc_getnl(&buf->head[0]);
- if (rseqno != seq)
+ /* gss_unwrap() decrypted the sequence number. */
+ if (xdr_stream_decode_u32(xdr, &seq_num) < 0)
+ goto unwrap_failed;
+ if (seq_num != seq)
goto bad_seqno;
return 0;
@@ -1034,14 +969,14 @@ unwrap_failed:
trace_rpcgss_svc_unwrap_failed(rqstp);
return -EINVAL;
bad_seqno:
- trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno);
+ trace_rpcgss_svc_seqno_bad(rqstp, seq, seq_num);
return -EINVAL;
bad_unwrap:
trace_rpcgss_svc_unwrap(rqstp, maj_stat);
return -EINVAL;
}
-static int
+static enum svc_auth_status
svcauth_gss_set_client(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1071,87 +1006,38 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
return SVC_OK;
}
-static inline int
-gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
- struct xdr_netobj *out_handle, int *major_status)
+static bool
+svcauth_gss_proc_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
+ struct xdr_netobj *out_handle, int *major_status,
+ u32 seq_num)
{
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct rsc *rsci;
- int rc;
+ bool rc;
if (*major_status != GSS_S_COMPLETE)
- return gss_write_null_verf(rqstp);
+ goto null_verifier;
rsci = gss_svc_searchbyctx(cd, out_handle);
if (rsci == NULL) {
*major_status = GSS_S_NO_CONTEXT;
- return gss_write_null_verf(rqstp);
+ goto null_verifier;
}
- rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN);
+
+ rc = svcauth_gss_encode_verf(rqstp, rsci->mechctx, seq_num);
cache_put(&rsci->h, cd);
return rc;
-}
-
-static inline int
-gss_read_common_verf(struct rpc_gss_wire_cred *gc,
- struct kvec *argv, __be32 *authp,
- struct xdr_netobj *in_handle)
-{
- /* Read the verifier; should be NULL: */
- *authp = rpc_autherr_badverf;
- if (argv->iov_len < 2 * 4)
- return SVC_DENIED;
- if (svc_getnl(argv) != RPC_AUTH_NULL)
- return SVC_DENIED;
- if (svc_getnl(argv) != 0)
- return SVC_DENIED;
- /* Martial context handle and token for upcall: */
- *authp = rpc_autherr_badcred;
- if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
- return SVC_DENIED;
- if (dup_netobj(in_handle, &gc->gc_ctx))
- return SVC_CLOSE;
- *authp = rpc_autherr_badverf;
-
- return 0;
-}
-
-static inline int
-gss_read_verf(struct rpc_gss_wire_cred *gc,
- struct kvec *argv, __be32 *authp,
- struct xdr_netobj *in_handle,
- struct xdr_netobj *in_token)
-{
- struct xdr_netobj tmpobj;
- int res;
-
- res = gss_read_common_verf(gc, argv, authp, in_handle);
- if (res)
- return res;
-
- if (svc_safe_getnetobj(argv, &tmpobj)) {
- kfree(in_handle->data);
- return SVC_DENIED;
- }
- if (dup_netobj(in_token, &tmpobj)) {
- kfree(in_handle->data);
- return SVC_CLOSE;
- }
- return 0;
+null_verifier:
+ return xdr_stream_encode_opaque_auth(xdr, RPC_AUTH_NULL, NULL, 0) > 0;
}
static void gss_free_in_token_pages(struct gssp_in_token *in_token)
{
- u32 inlen;
int i;
i = 0;
- inlen = in_token->page_len;
- while (inlen) {
- if (in_token->pages[i])
- put_page(in_token->pages[i]);
- inlen -= inlen > PAGE_SIZE ? PAGE_SIZE : inlen;
- }
-
+ while (in_token->pages[i])
+ put_page(in_token->pages[i++]);
kfree(in_token->pages);
in_token->pages = NULL;
}
@@ -1161,40 +1047,43 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
struct xdr_netobj *in_handle,
struct gssp_in_token *in_token)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
unsigned int length, pgto_offs, pgfrom_offs;
- int pages, i, res, pgto, pgfrom;
- size_t inlen, to_offs, from_offs;
+ int pages, i, pgto, pgfrom;
+ size_t to_offs, from_offs;
+ u32 inlen;
- res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle);
- if (res)
- return res;
+ if (dup_netobj(in_handle, &gc->gc_ctx))
+ return SVC_CLOSE;
- inlen = svc_getnl(argv);
- if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) {
- kfree(in_handle->data);
- return SVC_DENIED;
- }
+ /*
+ * RFC 2203 Section 5.2.2
+ *
+ * struct rpc_gss_init_arg {
+ * opaque gss_token<>;
+ * };
+ */
+ if (xdr_stream_decode_u32(xdr, &inlen) < 0)
+ goto out_denied_free;
+ if (inlen > xdr_stream_remaining(xdr))
+ goto out_denied_free;
pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
- in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
- if (!in_token->pages) {
- kfree(in_handle->data);
- return SVC_DENIED;
- }
+ in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!in_token->pages)
+ goto out_denied_free;
in_token->page_base = 0;
in_token->page_len = inlen;
for (i = 0; i < pages; i++) {
in_token->pages[i] = alloc_page(GFP_KERNEL);
if (!in_token->pages[i]) {
- kfree(in_handle->data);
gss_free_in_token_pages(in_token);
- return SVC_DENIED;
+ goto out_denied_free;
}
}
- length = min_t(unsigned int, inlen, argv->iov_len);
- memcpy(page_address(in_token->pages[0]), argv->iov_base, length);
+ length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p);
+ memcpy(page_address(in_token->pages[0]), xdr->p, length);
inlen -= length;
to_offs = length;
@@ -1217,26 +1106,41 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
inlen -= length;
}
return 0;
+
+out_denied_free:
+ kfree(in_handle->data);
+ return SVC_DENIED;
}
-static inline int
-gss_write_resv(struct kvec *resv, size_t size_limit,
- struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
- int major_status, int minor_status)
-{
- if (resv->iov_len + 4 > size_limit)
- return -1;
- svc_putnl(resv, RPC_SUCCESS);
- if (svc_safe_putnetobj(resv, out_handle))
- return -1;
- if (resv->iov_len + 3 * 4 > size_limit)
- return -1;
- svc_putnl(resv, major_status);
- svc_putnl(resv, minor_status);
- svc_putnl(resv, GSS_SEQ_WIN);
- if (svc_safe_putnetobj(resv, out_token))
- return -1;
- return 0;
+/*
+ * RFC 2203, Section 5.2.3.1.
+ *
+ * struct rpc_gss_init_res {
+ * opaque handle<>;
+ * unsigned int gss_major;
+ * unsigned int gss_minor;
+ * unsigned int seq_window;
+ * opaque gss_token<>;
+ * };
+ */
+static bool
+svcxdr_encode_gss_init_res(struct xdr_stream *xdr,
+ struct xdr_netobj *handle,
+ struct xdr_netobj *gss_token,
+ unsigned int major_status,
+ unsigned int minor_status, u32 seq_num)
+{
+ if (xdr_stream_encode_opaque(xdr, handle->data, handle->len) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, major_status) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, minor_status) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, seq_num) < 0)
+ return false;
+ if (xdr_stream_encode_opaque(xdr, gss_token->data, gss_token->len) < 0)
+ return false;
+ return true;
}
/*
@@ -1246,20 +1150,44 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
* the upcall results are available, write the verifier and result.
* Otherwise, drop the request pending an answer to the upcall.
*/
-static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc)
+static int
+svcauth_gss_legacy_init(struct svc_rqst *rqstp,
+ struct rpc_gss_wire_cred *gc)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct rsi *rsip, rsikey;
+ __be32 *p;
+ u32 len;
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
- ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat,
- &rsikey.in_handle, &rsikey.in_token);
- if (ret)
- return ret;
+ if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+ return SVC_CLOSE;
+
+ /*
+ * RFC 2203 Section 5.2.2
+ *
+ * struct rpc_gss_init_arg {
+ * opaque gss_token<>;
+ * };
+ */
+ if (xdr_stream_decode_u32(xdr, &len) < 0) {
+ kfree(rsikey.in_handle.data);
+ return SVC_DENIED;
+ }
+ p = xdr_inline_decode(xdr, len);
+ if (!p) {
+ kfree(rsikey.in_handle.data);
+ return SVC_DENIED;
+ }
+ rsikey.in_token.data = kmalloc(len, GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(rsikey.in_token.data)) {
+ kfree(rsikey.in_handle.data);
+ return SVC_CLOSE;
+ }
+ memcpy(rsikey.in_token.data, p, len);
+ rsikey.in_token.len = len;
/* Perform upcall, or find upcall result: */
rsip = rsi_lookup(sn->rsi_cache, &rsikey);
@@ -1271,13 +1199,14 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
return SVC_CLOSE;
ret = SVC_CLOSE;
- /* Got an answer to the upcall; use it: */
- if (gss_write_init_verf(sn->rsc_cache, rqstp,
- &rsip->out_handle, &rsip->major_status))
+ if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &rsip->out_handle,
+ &rsip->major_status, GSS_SEQ_WIN))
+ goto out;
+ if (!svcxdr_set_accept_stat(rqstp))
goto out;
- if (gss_write_resv(resv, PAGE_SIZE,
- &rsip->out_handle, &rsip->out_token,
- rsip->major_status, rsip->minor_status))
+ if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &rsip->out_handle,
+ &rsip->out_token, rsip->major_status,
+ rsip->minor_status, GSS_SEQ_WIN))
goto out;
ret = SVC_COMPLETE;
@@ -1361,7 +1290,6 @@ out:
static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
struct rpc_gss_wire_cred *gc)
{
- struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj cli_handle;
struct gssp_upcall_data ud;
uint64_t handle;
@@ -1399,13 +1327,14 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
goto out;
}
- /* Got an answer to the upcall; use it: */
- if (gss_write_init_verf(sn->rsc_cache, rqstp,
- &cli_handle, &ud.major_status))
+ if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &cli_handle,
+ &ud.major_status, GSS_SEQ_WIN))
goto out;
- if (gss_write_resv(resv, PAGE_SIZE,
- &cli_handle, &ud.out_token,
- ud.major_status, ud.minor_status))
+ if (!svcxdr_set_accept_stat(rqstp))
+ goto out;
+ if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &cli_handle,
+ &ud.out_token, ud.major_status,
+ ud.minor_status, GSS_SEQ_WIN))
goto out;
ret = SVC_COMPLETE;
@@ -1442,6 +1371,31 @@ static bool use_gss_proxy(struct net *net)
return sn->use_gss_proxy;
}
+static noinline_for_stack int
+svcauth_gss_proc_init(struct svc_rqst *rqstp, struct rpc_gss_wire_cred *gc)
+{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ u32 flavor, len;
+ void *body;
+
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
+ return SVC_DENIED;
+ }
+
+ if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ if (!use_gss_proxy(SVC_NET(rqstp)))
+ return svcauth_gss_legacy_init(rqstp, gc);
+ return svcauth_gss_proxy_init(rqstp, gc);
+}
+
#ifdef CONFIG_PROC_FS
static ssize_t write_gssp(struct file *file, const char __user *buf,
@@ -1524,6 +1478,56 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net)
clear_gssp_clnt(sn);
}
}
+
+static ssize_t read_gss_krb5_enctypes(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct rpcsec_gss_oid oid = {
+ .len = 9,
+ .data = "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02",
+ };
+ struct gss_api_mech *mech;
+ ssize_t ret;
+
+ mech = gss_mech_get_by_OID(&oid);
+ if (!mech)
+ return 0;
+ if (!mech->gm_upcall_enctypes) {
+ gss_mech_put(mech);
+ return 0;
+ }
+
+ ret = simple_read_from_buffer(buf, count, ppos,
+ mech->gm_upcall_enctypes,
+ strlen(mech->gm_upcall_enctypes));
+ gss_mech_put(mech);
+ return ret;
+}
+
+static const struct proc_ops gss_krb5_enctypes_proc_ops = {
+ .proc_open = nonseekable_open,
+ .proc_read = read_gss_krb5_enctypes,
+};
+
+static int create_krb5_enctypes_proc_entry(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ sn->gss_krb5_enctypes =
+ proc_create_data("gss_krb5_enctypes", S_IFREG | 0444,
+ sn->proc_net_rpc, &gss_krb5_enctypes_proc_ops,
+ net);
+ return sn->gss_krb5_enctypes ? 0 : -ENOMEM;
+}
+
+static void destroy_krb5_enctypes_proc_entry(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (sn->gss_krb5_enctypes)
+ remove_proc_entry("gss_krb5_enctypes", sn->proc_net_rpc);
+}
+
#else /* CONFIG_PROC_FS */
static int create_use_gss_proxy_proc_entry(struct net *net)
@@ -1533,83 +1537,129 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
+static int create_krb5_enctypes_proc_entry(struct net *net)
+{
+ return 0;
+}
+
+static void destroy_krb5_enctypes_proc_entry(struct net *net) {}
+
#endif /* CONFIG_PROC_FS */
/*
- * Accept an rpcsec packet.
- * If context establishment, punt to user space
- * If data exchange, verify/decrypt
- * If context destruction, handle here
- * In the context establishment and destruction case we encode
- * response here and return SVC_COMPLETE.
+ * The Call's credential body should contain a struct rpc_gss_cred_t.
+ *
+ * RFC 2203 Section 5
+ *
+ * struct rpc_gss_cred_t {
+ * union switch (unsigned int version) {
+ * case RPCSEC_GSS_VERS_1:
+ * struct {
+ * rpc_gss_proc_t gss_proc;
+ * unsigned int seq_num;
+ * rpc_gss_service_t service;
+ * opaque handle<>;
+ * } rpc_gss_cred_vers_1_t;
+ * }
+ * };
*/
-static int
+static bool
+svcauth_gss_decode_credbody(struct xdr_stream *xdr,
+ struct rpc_gss_wire_cred *gc,
+ __be32 **rpcstart)
+{
+ ssize_t handle_len;
+ u32 body_len;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT);
+ if (!p)
+ return false;
+ /*
+ * start of rpc packet is 7 u32's back from here:
+ * xid direction rpcversion prog vers proc flavour
+ */
+ *rpcstart = p - 7;
+ body_len = be32_to_cpup(p);
+ if (body_len > RPC_MAX_AUTH_SIZE)
+ return false;
+
+ /* struct rpc_gss_cred_t */
+ if (xdr_stream_decode_u32(xdr, &gc->gc_v) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &gc->gc_proc) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &gc->gc_seq) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &gc->gc_svc) < 0)
+ return false;
+ handle_len = xdr_stream_decode_opaque_inline(xdr,
+ (void **)&gc->gc_ctx.data,
+ body_len);
+ if (handle_len < 0)
+ return false;
+ if (body_len != XDR_UNIT * 5 + xdr_align_size(handle_len))
+ return false;
+
+ gc->gc_ctx.len = handle_len;
+ return true;
+}
+
+/**
+ * svcauth_gss_accept - Decode and validate incoming RPC_AUTH_GSS credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Success
+ * %SVC_COMPLETE: GSS context lifetime event
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531).
+ */
+static enum svc_auth_status
svcauth_gss_accept(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
- u32 crlen;
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
+ __be32 *rpcstart;
struct rpc_gss_wire_cred *gc;
struct rsc *rsci = NULL;
- __be32 *rpcstart;
- __be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
- rqstp->rq_auth_stat = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_failed;
if (!svcdata)
svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
if (!svcdata)
goto auth_err;
rqstp->rq_auth_data = svcdata;
- svcdata->verf_start = NULL;
+ svcdata->gsd_databody_offset = 0;
svcdata->rsci = NULL;
gc = &svcdata->clcred;
- /* start of rpc packet is 7 u32's back from here:
- * xid direction rpcversion prog vers proc flavour
- */
- rpcstart = argv->iov_base;
- rpcstart -= 7;
-
- /* credential is:
- * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
- * at least 5 u32s, and is preceded by length, so that makes 6.
- */
-
- if (argv->iov_len < 5 * 4)
- goto auth_err;
- crlen = svc_getnl(argv);
- if (svc_getnl(argv) != RPC_GSS_VERSION)
- goto auth_err;
- gc->gc_proc = svc_getnl(argv);
- gc->gc_seq = svc_getnl(argv);
- gc->gc_svc = svc_getnl(argv);
- if (svc_safe_getnetobj(argv, &gc->gc_ctx))
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ if (!svcauth_gss_decode_credbody(&rqstp->rq_arg_stream, gc, &rpcstart))
goto auth_err;
- if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
+ if (gc->gc_v != RPC_GSS_VERSION)
goto auth_err;
- if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
- goto auth_err;
-
- rqstp->rq_auth_stat = rpc_autherr_badverf;
switch (gc->gc_proc) {
case RPC_GSS_PROC_INIT:
case RPC_GSS_PROC_CONTINUE_INIT:
- if (use_gss_proxy(SVC_NET(rqstp)))
- return svcauth_gss_proxy_init(rqstp, gc);
- else
- return svcauth_gss_legacy_init(rqstp, gc);
- case RPC_GSS_PROC_DATA:
+ if (rqstp->rq_proc != 0)
+ goto auth_err;
+ return svcauth_gss_proc_init(rqstp, gc);
case RPC_GSS_PROC_DESTROY:
- /* Look up the context, and check the verifier: */
+ if (rqstp->rq_proc != 0)
+ goto auth_err;
+ fallthrough;
+ case RPC_GSS_PROC_DATA:
rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
if (!rsci)
goto auth_err;
- switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) {
+ switch (svcauth_gss_verify_header(rqstp, rsci, rpcstart, gc)) {
case SVC_OK:
break;
case SVC_DENIED:
@@ -1619,6 +1669,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
}
break;
default:
+ if (rqstp->rq_proc != 0)
+ goto auth_err;
rqstp->rq_auth_stat = rpc_autherr_rejectedcred;
goto auth_err;
}
@@ -1626,19 +1678,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
/* now act upon the command: */
switch (gc->gc_proc) {
case RPC_GSS_PROC_DESTROY:
- if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ goto auth_err;
+ if (!svcxdr_set_accept_stat(rqstp))
goto auth_err;
/* Delete the entry from the cache_list and call cache_put */
sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
- if (resv->iov_len + 4 > PAGE_SIZE)
- goto drop;
- svc_putnl(resv, RPC_SUCCESS);
goto complete;
case RPC_GSS_PROC_DATA:
rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
- svcdata->verf_start = resv->iov_base + resv->iov_len;
- if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ goto auth_err;
+ if (!svcxdr_set_accept_stat(rqstp))
goto auth_err;
+ svcdata->gsd_databody_offset = xdr_stream_pos(&rqstp->rq_res_stream);
rqstp->rq_cred = rsci->cred;
get_group_info(rsci->cred.cr_group_info);
rqstp->rq_auth_stat = rpc_autherr_badcred;
@@ -1646,22 +1699,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
case RPC_GSS_SVC_NONE:
break;
case RPC_GSS_SVC_INTEGRITY:
- /* placeholders for length and seq. number: */
- svc_putnl(resv, 0);
- svc_putnl(resv, 0);
- if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
- gc->gc_seq, rsci->mechctx))
+ /* placeholders for body length and seq. number: */
+ xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2);
+ if (svcauth_gss_unwrap_integ(rqstp, gc->gc_seq,
+ rsci->mechctx))
goto garbage_args;
- rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
+ svcxdr_set_auth_slack(rqstp, RPC_MAX_AUTH_SIZE);
break;
case RPC_GSS_SVC_PRIVACY:
- /* placeholders for length and seq. number: */
- svc_putnl(resv, 0);
- svc_putnl(resv, 0);
- if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
- gc->gc_seq, rsci->mechctx))
+ /* placeholders for body length and seq. number: */
+ xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2);
+ if (svcauth_gss_unwrap_priv(rqstp, gc->gc_seq,
+ rsci->mechctx))
goto garbage_args;
- rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
+ svcxdr_set_auth_slack(rqstp, RPC_MAX_AUTH_SIZE * 2);
break;
default:
goto auth_err;
@@ -1680,8 +1731,7 @@ garbage_args:
ret = SVC_GARBAGE;
goto out;
auth_err:
- /* Restore write pointer to its original value: */
- xdr_ressize_check(rqstp, reject_stat);
+ xdr_truncate_encode(&rqstp->rq_res_stream, XDR_UNIT * 2);
ret = SVC_DENIED;
goto out;
complete:
@@ -1695,104 +1745,125 @@ out:
return ret;
}
-static __be32 *
-svcauth_gss_prepare_to_wrap(struct xdr_buf *resbuf, struct gss_svc_data *gsd)
+static u32
+svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
{
- __be32 *p;
- u32 verf_len;
+ u32 offset;
+
+ /* Release can be called twice, but we only wrap once. */
+ offset = gsd->gsd_databody_offset;
+ gsd->gsd_databody_offset = 0;
- p = gsd->verf_start;
- gsd->verf_start = NULL;
+ /* AUTH_ERROR replies are not wrapped. */
+ if (rqstp->rq_auth_stat != rpc_auth_ok)
+ return 0;
- /* If the reply stat is nonzero, don't wrap: */
- if (*(p-1) != rpc_success)
- return NULL;
- /* Skip the verifier: */
- p += 1;
- verf_len = ntohl(*p++);
- p += XDR_QUADLEN(verf_len);
- /* move accept_stat to right place: */
- memcpy(p, p + 2, 4);
- /* Also don't wrap if the accept stat is nonzero: */
- if (*p != rpc_success) {
- resbuf->head[0].iov_len -= 2 * 4;
- return NULL;
- }
- p++;
- return p;
+ /* Also don't wrap if the accept_stat is nonzero: */
+ if (*rqstp->rq_accept_statp != rpc_success)
+ return 0;
+
+ return offset;
}
-static inline int
-svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
+/*
+ * RFC 2203, Section 5.3.2.2
+ *
+ * struct rpc_gss_integ_data {
+ * opaque databody_integ<>;
+ * opaque checksum<>;
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ *
+ * The RPC Reply message has already been XDR-encoded. rq_res_stream
+ * is now positioned so that the checksum can be written just past
+ * the RPC Reply message.
+ */
+static int svcauth_gss_wrap_integ(struct svc_rqst *rqstp)
{
- struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct rpc_gss_wire_cred *gc = &gsd->clcred;
- struct xdr_buf *resbuf = &rqstp->rq_res;
- struct xdr_buf integ_buf;
- struct xdr_netobj mic;
- struct kvec *resv;
- __be32 *p;
- int integ_offset, integ_len;
- int stat = -EINVAL;
+ struct xdr_buf *buf = xdr->buf;
+ struct xdr_buf databody_integ;
+ struct xdr_netobj checksum;
+ u32 offset, maj_stat;
- p = svcauth_gss_prepare_to_wrap(resbuf, gsd);
- if (p == NULL)
- goto out;
- integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
- integ_len = resbuf->len - integ_offset;
- if (integ_len & 3)
+ offset = svcauth_gss_prepare_to_wrap(rqstp, gsd);
+ if (!offset)
goto out;
- *p++ = htonl(integ_len);
- *p++ = htonl(gc->gc_seq);
- if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
- WARN_ON_ONCE(1);
- goto out_err;
- }
- if (resbuf->tail[0].iov_base == NULL) {
- if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
- goto out_err;
- resbuf->tail[0].iov_base = resbuf->head[0].iov_base
- + resbuf->head[0].iov_len;
- resbuf->tail[0].iov_len = 0;
- }
- resv = &resbuf->tail[0];
- mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
- if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
- goto out_err;
- svc_putnl(resv, mic.len);
- memset(mic.data + mic.len, 0,
- round_up_to_quad(mic.len) - mic.len);
- resv->iov_len += XDR_QUADLEN(mic.len) << 2;
- /* not strictly required: */
- resbuf->len += XDR_QUADLEN(mic.len) << 2;
- if (resv->iov_len > PAGE_SIZE)
- goto out_err;
+
+ if (xdr_buf_subsegment(buf, &databody_integ, offset + XDR_UNIT,
+ buf->len - offset - XDR_UNIT))
+ goto wrap_failed;
+ /* Buffer space for these has already been reserved in
+ * svcauth_gss_accept(). */
+ if (xdr_encode_word(buf, offset, databody_integ.len))
+ goto wrap_failed;
+ if (xdr_encode_word(buf, offset + XDR_UNIT, gc->gc_seq))
+ goto wrap_failed;
+
+ checksum.data = gsd->gsd_scratch;
+ maj_stat = gss_get_mic(gsd->rsci->mechctx, &databody_integ, &checksum);
+ if (maj_stat != GSS_S_COMPLETE)
+ goto bad_mic;
+
+ if (xdr_stream_encode_opaque(xdr, checksum.data, checksum.len) < 0)
+ goto wrap_failed;
+ xdr_commit_encode(xdr);
+
out:
- stat = 0;
-out_err:
- return stat;
+ return 0;
+
+bad_mic:
+ trace_rpcgss_svc_get_mic(rqstp, maj_stat);
+ return -EINVAL;
+wrap_failed:
+ trace_rpcgss_svc_wrap_failed(rqstp);
+ return -EINVAL;
}
-static inline int
-svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
+/*
+ * RFC 2203, Section 5.3.2.3
+ *
+ * struct rpc_gss_priv_data {
+ * opaque databody_priv<>
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ *
+ * gss_wrap() expands the size of the RPC message payload in the
+ * response buffer. The main purpose of svcauth_gss_wrap_priv()
+ * is to ensure there is adequate space in the response buffer to
+ * avoid overflow during the wrap.
+ */
+static int svcauth_gss_wrap_priv(struct svc_rqst *rqstp)
{
- struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
struct rpc_gss_wire_cred *gc = &gsd->clcred;
- struct xdr_buf *resbuf = &rqstp->rq_res;
- struct page **inpages = NULL;
- __be32 *p, *len;
- int offset;
- int pad;
-
- p = svcauth_gss_prepare_to_wrap(resbuf, gsd);
- if (p == NULL)
+ struct xdr_buf *buf = &rqstp->rq_res;
+ struct kvec *head = buf->head;
+ struct kvec *tail = buf->tail;
+ u32 offset, pad, maj_stat;
+ __be32 *p;
+
+ offset = svcauth_gss_prepare_to_wrap(rqstp, gsd);
+ if (!offset)
return 0;
- len = p++;
- offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
- *p++ = htonl(gc->gc_seq);
- inpages = resbuf->pages;
- /* XXX: Would be better to write some xdr helper functions for
- * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
+
+ /*
+ * Buffer space for this field has already been reserved
+ * in svcauth_gss_accept(). Note that the GSS sequence
+ * number is encrypted along with the RPC reply payload.
+ */
+ if (xdr_encode_word(buf, offset + XDR_UNIT, gc->gc_seq))
+ goto wrap_failed;
/*
* If there is currently tail data, make sure there is
@@ -1801,19 +1872,17 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
* there is RPC_MAX_AUTH_SIZE slack space available in
* both the head and tail.
*/
- if (resbuf->tail[0].iov_base) {
- if (resbuf->tail[0].iov_base >=
- resbuf->head[0].iov_base + PAGE_SIZE)
- return -EINVAL;
- if (resbuf->tail[0].iov_base < resbuf->head[0].iov_base)
- return -EINVAL;
- if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len
+ if (tail->iov_base) {
+ if (tail->iov_base >= head->iov_base + PAGE_SIZE)
+ goto wrap_failed;
+ if (tail->iov_base < head->iov_base)
+ goto wrap_failed;
+ if (tail->iov_len + head->iov_len
+ 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE)
- return -ENOMEM;
- memmove(resbuf->tail[0].iov_base + RPC_MAX_AUTH_SIZE,
- resbuf->tail[0].iov_base,
- resbuf->tail[0].iov_len);
- resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
+ goto wrap_failed;
+ memmove(tail->iov_base + RPC_MAX_AUTH_SIZE, tail->iov_base,
+ tail->iov_len);
+ tail->iov_base += RPC_MAX_AUTH_SIZE;
}
/*
* If there is no current tail data, make sure there is
@@ -1822,55 +1891,70 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
* is RPC_MAX_AUTH_SIZE slack space available in both the
* head and tail.
*/
- if (resbuf->tail[0].iov_base == NULL) {
- if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
- return -ENOMEM;
- resbuf->tail[0].iov_base = resbuf->head[0].iov_base
- + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE;
- resbuf->tail[0].iov_len = 0;
+ if (!tail->iov_base) {
+ if (head->iov_len + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ goto wrap_failed;
+ tail->iov_base = head->iov_base
+ + head->iov_len + RPC_MAX_AUTH_SIZE;
+ tail->iov_len = 0;
}
- if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages))
- return -ENOMEM;
- *len = htonl(resbuf->len - offset);
- pad = 3 - ((resbuf->len - offset - 1)&3);
- p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
+
+ maj_stat = gss_wrap(gsd->rsci->mechctx, offset + XDR_UNIT, buf,
+ buf->pages);
+ if (maj_stat != GSS_S_COMPLETE)
+ goto bad_wrap;
+
+ /* Wrapping can change the size of databody_priv. */
+ if (xdr_encode_word(buf, offset, buf->len - offset - XDR_UNIT))
+ goto wrap_failed;
+ pad = xdr_pad_size(buf->len - offset - XDR_UNIT);
+ p = (__be32 *)(tail->iov_base + tail->iov_len);
memset(p, 0, pad);
- resbuf->tail[0].iov_len += pad;
- resbuf->len += pad;
+ tail->iov_len += pad;
+ buf->len += pad;
+
return 0;
+wrap_failed:
+ trace_rpcgss_svc_wrap_failed(rqstp);
+ return -EINVAL;
+bad_wrap:
+ trace_rpcgss_svc_wrap(rqstp, maj_stat);
+ return -ENOMEM;
}
+/**
+ * svcauth_gss_release - Wrap payload and release resources
+ * @rqstp: RPC transaction context
+ *
+ * Return values:
+ * %0: the Reply is ready to be sent
+ * %-ENOMEM: failed to allocate memory
+ * %-EINVAL: encoding error
+ */
static int
svcauth_gss_release(struct svc_rqst *rqstp)
{
- struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
- struct rpc_gss_wire_cred *gc;
- struct xdr_buf *resbuf = &rqstp->rq_res;
- int stat = -EINVAL;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
+ struct rpc_gss_wire_cred *gc;
+ int stat;
if (!gsd)
goto out;
gc = &gsd->clcred;
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
- /* Release can be called twice, but we only wrap once. */
- if (gsd->verf_start == NULL)
- goto out;
- /* normally not set till svc_send, but we need it here: */
- /* XXX: what for? Do we mess it up the moment we call svc_putu32
- * or whatever? */
- resbuf->len = total_buf_len(resbuf);
+
switch (gc->gc_svc) {
case RPC_GSS_SVC_NONE:
break;
case RPC_GSS_SVC_INTEGRITY:
- stat = svcauth_gss_wrap_resp_integ(rqstp);
+ stat = svcauth_gss_wrap_integ(rqstp);
if (stat)
goto out_err;
break;
case RPC_GSS_SVC_PRIVACY:
- stat = svcauth_gss_wrap_resp_priv(rqstp);
+ stat = svcauth_gss_wrap_priv(rqstp);
if (stat)
goto out_err;
break;
@@ -1915,6 +1999,11 @@ svcauth_gss_domain_release(struct auth_domain *dom)
call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
}
+static rpc_authflavor_t svcauth_gss_pseudoflavor(struct svc_rqst *rqstp)
+{
+ return svcauth_gss_flavor(rqstp->rq_gssclient);
+}
+
static struct auth_ops svcauthops_gss = {
.name = "rpcsec_gss",
.owner = THIS_MODULE,
@@ -1923,6 +2012,7 @@ static struct auth_ops svcauthops_gss = {
.release = svcauth_gss_release,
.domain_release = svcauth_gss_domain_release,
.set_client = svcauth_gss_set_client,
+ .pseudoflavor = svcauth_gss_pseudoflavor,
};
static int rsi_cache_create_net(struct net *net)
@@ -1997,7 +2087,15 @@ gss_svc_init_net(struct net *net)
rv = create_use_gss_proxy_proc_entry(net);
if (rv)
goto out2;
+
+ rv = create_krb5_enctypes_proc_entry(net);
+ if (rv)
+ goto out3;
+
return 0;
+
+out3:
+ destroy_use_gss_proxy_proc_entry(net);
out2:
rsi_cache_destroy_net(net);
out1:
@@ -2008,6 +2106,7 @@ out1:
void
gss_svc_shutdown_net(struct net *net)
{
+ destroy_krb5_enctypes_proc_entry(net);
destroy_use_gss_proxy_proc_entry(net);
rsi_cache_destroy_net(net);
rsc_cache_destroy_net(net);
diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c
new file mode 100644
index 000000000000..87f570fd3b00
--- /dev/null
+++ b/net/sunrpc/auth_tls.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, 2022 Oracle. All rights reserved.
+ *
+ * The AUTH_TLS credential is used only to probe a remote peer
+ * for RPC-over-TLS support.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sunrpc/clnt.h>
+
+static const char *starttls_token = "STARTTLS";
+static const size_t starttls_len = 8;
+
+static struct rpc_auth tls_auth;
+static struct rpc_cred tls_cred;
+
+static void tls_encode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ const void *obj)
+{
+}
+
+static int tls_decode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
+ void *obj)
+{
+ return 0;
+}
+
+static const struct rpc_procinfo rpcproc_tls_probe = {
+ .p_encode = tls_encode_probe,
+ .p_decode = tls_decode_probe,
+};
+
+static void rpc_tls_probe_call_prepare(struct rpc_task *task, void *data)
+{
+ task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+ rpc_call_start(task);
+}
+
+static void rpc_tls_probe_call_done(struct rpc_task *task, void *data)
+{
+}
+
+static const struct rpc_call_ops rpc_tls_probe_ops = {
+ .rpc_call_prepare = rpc_tls_probe_call_prepare,
+ .rpc_call_done = rpc_tls_probe_call_done,
+};
+
+static int tls_probe(struct rpc_clnt *clnt)
+{
+ struct rpc_message msg = {
+ .rpc_proc = &rpcproc_tls_probe,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .rpc_op_cred = &tls_cred,
+ .callback_ops = &rpc_tls_probe_ops,
+ .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ };
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
+static struct rpc_auth *tls_create(const struct rpc_auth_create_args *args,
+ struct rpc_clnt *clnt)
+{
+ refcount_inc(&tls_auth.au_count);
+ return &tls_auth;
+}
+
+static void tls_destroy(struct rpc_auth *auth)
+{
+}
+
+static struct rpc_cred *tls_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
+{
+ return get_rpccred(&tls_cred);
+}
+
+static void tls_destroy_cred(struct rpc_cred *cred)
+{
+}
+
+static int tls_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
+{
+ return 1;
+}
+
+static int tls_marshal(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4 * XDR_UNIT);
+ if (!p)
+ return -EMSGSIZE;
+ /* Credential */
+ *p++ = rpc_auth_tls;
+ *p++ = xdr_zero;
+ /* Verifier */
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;
+ return 0;
+}
+
+static int tls_refresh(struct rpc_task *task)
+{
+ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
+ return 0;
+}
+
+static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ __be32 *p;
+ void *str;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT);
+ if (!p)
+ return -EIO;
+ if (*p != rpc_auth_null)
+ return -EIO;
+ if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len)
+ return -EPROTONOSUPPORT;
+ if (memcmp(str, starttls_token, starttls_len))
+ return -EPROTONOSUPPORT;
+ return 0;
+}
+
+const struct rpc_authops authtls_ops = {
+ .owner = THIS_MODULE,
+ .au_flavor = RPC_AUTH_TLS,
+ .au_name = "NULL",
+ .create = tls_create,
+ .destroy = tls_destroy,
+ .lookup_cred = tls_lookup_cred,
+ .ping = tls_probe,
+};
+
+static struct rpc_auth tls_auth = {
+ .au_cslack = NUL_CALLSLACK,
+ .au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
+ .au_ralign = NUL_REPLYSLACK,
+ .au_ops = &authtls_ops,
+ .au_flavor = RPC_AUTH_TLS,
+ .au_count = REFCOUNT_INIT(1),
+};
+
+static const struct rpc_credops tls_credops = {
+ .cr_name = "AUTH_TLS",
+ .crdestroy = tls_destroy_cred,
+ .crmatch = tls_match,
+ .crmarshal = tls_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
+ .crrefresh = tls_refresh,
+ .crvalidate = tls_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
+};
+
+static struct rpc_cred tls_cred = {
+ .cr_lru = LIST_HEAD_INIT(tls_cred.cr_lru),
+ .cr_auth = &tls_auth,
+ .cr_ops = &tls_credops,
+ .cr_count = REFCOUNT_INIT(2),
+ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
+};
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 65a6c6429a53..caa94cf57123 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -83,7 +83,6 @@ static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt)
return NULL;
req->rq_xprt = xprt;
- INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) {
@@ -349,10 +348,8 @@ found:
}
/*
- * Add callback request to callback list. The callback
- * service sleeps on the sv_cb_waitq waiting for new
- * requests. Wake it up after adding enqueing the
- * request.
+ * Add callback request to callback list. Wake a thread
+ * on the first pool (usually the only pool) to handle it.
*/
void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
{
@@ -369,8 +366,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
dprintk("RPC: add callback request to list\n");
xprt_get(xprt);
- spin_lock(&bc_serv->sv_cb_lock);
- list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
- wake_up(&bc_serv->sv_cb_waitq);
- spin_unlock(&bc_serv->sv_cb_lock);
+ lwq_enqueue(&req->rq_bc_list, &bc_serv->sv_cb_list);
+ svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]);
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 95ff74706104..131090f31e6a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -135,6 +135,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
hlist_add_head_rcu(&new->cache_list, head);
detail->entries++;
+ if (detail->nextcheck > new->expiry_time)
+ detail->nextcheck = new->expiry_time + 1;
cache_get(new);
spin_unlock(&detail->hash_lock);
@@ -281,21 +283,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
return rv;
}
-/*
- * This is the generic cache management routine for all
- * the authentication caches.
- * It checks the currency of a cache item and will (later)
- * initiate an upcall to fill it if needed.
- *
- *
- * Returns 0 if the cache_head can be used, or cache_puts it and returns
- * -EAGAIN if upcall is pending and request has been queued
- * -ETIMEDOUT if upcall failed or request could not be queue or
- * upcall completed but item is still invalid (implying that
- * the cache item has been replaced with a newer one).
- * -ENOENT if cache entry was negative
- */
-int cache_check(struct cache_detail *detail,
+int cache_check_rcu(struct cache_detail *detail,
struct cache_head *h, struct cache_req *rqstp)
{
int rv;
@@ -336,6 +324,31 @@ int cache_check(struct cache_detail *detail,
rv = -ETIMEDOUT;
}
}
+
+ return rv;
+}
+EXPORT_SYMBOL_GPL(cache_check_rcu);
+
+/*
+ * This is the generic cache management routine for all
+ * the authentication caches.
+ * It checks the currency of a cache item and will (later)
+ * initiate an upcall to fill it if needed.
+ *
+ *
+ * Returns 0 if the cache_head can be used, or cache_puts it and returns
+ * -EAGAIN if upcall is pending and request has been queued
+ * -ETIMEDOUT if upcall failed or request could not be queue or
+ * upcall completed but item is still invalid (implying that
+ * the cache item has been replaced with a newer one).
+ * -ENOENT if cache entry was negative
+ */
+int cache_check(struct cache_detail *detail,
+ struct cache_head *h, struct cache_req *rqstp)
+{
+ int rv;
+
+ rv = cache_check_rcu(detail, h, rqstp);
if (rv)
cache_put(h, detail);
return rv;
@@ -451,24 +464,21 @@ static int cache_clean(void)
}
}
+ spin_lock(&current_detail->hash_lock);
+
/* find a non-empty bucket in the table */
- while (current_detail &&
- current_index < current_detail->hash_size &&
+ while (current_index < current_detail->hash_size &&
hlist_empty(&current_detail->hash_table[current_index]))
current_index++;
/* find a cleanable entry in the bucket and clean it, or set to next bucket */
-
- if (current_detail && current_index < current_detail->hash_size) {
+ if (current_index < current_detail->hash_size) {
struct cache_head *ch = NULL;
struct cache_detail *d;
struct hlist_head *head;
struct hlist_node *tmp;
- spin_lock(&current_detail->hash_lock);
-
/* Ok, now to clean this strand */
-
head = &current_detail->hash_table[current_index];
hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
if (current_detail->nextcheck > ch->expiry_time)
@@ -489,8 +499,10 @@ static int cache_clean(void)
spin_unlock(&cache_list_lock);
if (ch)
sunrpc_end_cache_remove_entry(ch, d);
- } else
+ } else {
+ spin_unlock(&current_detail->hash_lock);
spin_unlock(&cache_list_lock);
+ }
return rv;
}
@@ -731,11 +743,10 @@ static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
static void cache_revisit_request(struct cache_head *item)
{
struct cache_deferred_req *dreq;
- struct list_head pending;
struct hlist_node *tmp;
int hash = DFR_HASH(item);
+ LIST_HEAD(pending);
- INIT_LIST_HEAD(&pending);
spin_lock(&cache_defer_lock);
hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash)
@@ -756,10 +767,8 @@ static void cache_revisit_request(struct cache_head *item)
void cache_clean_deferred(void *owner)
{
struct cache_deferred_req *dreq, *tmp;
- struct list_head pending;
+ LIST_HEAD(pending);
-
- INIT_LIST_HEAD(&pending);
spin_lock(&cache_defer_lock);
list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
@@ -1085,9 +1094,8 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
{
struct cache_queue *cq, *tmp;
struct cache_request *cr;
- struct list_head dequeued;
+ LIST_HEAD(dequeued);
- INIT_LIST_HEAD(&dequeued);
spin_lock(&queue_lock);
list_for_each_entry_safe(cq, tmp, &detail->queue, list)
if (!cq->reader) {
@@ -1431,15 +1439,11 @@ static int c_show(struct seq_file *m, void *p)
seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n",
convert_to_wallclock(cp->expiry_time),
kref_read(&cp->ref), cp->flags);
- cache_get(cp);
- if (cache_check(cd, cp, NULL))
- /* cache_check does a cache_put on failure */
+
+ if (cache_check_rcu(cd, cp, NULL))
+ seq_puts(m, "# ");
+ else if (cache_is_expired(cd, cp))
seq_puts(m, "# ");
- else {
- if (cache_is_expired(cd, cp))
- seq_puts(m, "# ");
- cache_put(cp, cd);
- }
return cd->cache_show(m, cd, cp);
}
@@ -1596,7 +1600,6 @@ static int cache_release_procfs(struct inode *inode, struct file *filp)
}
static const struct proc_ops cache_channel_proc_ops = {
- .proc_lseek = no_llseek,
.proc_read = cache_read_procfs,
.proc_write = cache_write_procfs,
.proc_poll = cache_poll_procfs,
@@ -1662,7 +1665,6 @@ static const struct proc_ops cache_flush_proc_ops = {
.proc_read = read_flush_procfs,
.proc_write = write_flush_procfs,
.proc_release = release_flush_procfs,
- .proc_lseek = no_llseek,
};
static void remove_cache_proc_entries(struct cache_detail *cd)
@@ -1673,12 +1675,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd)
}
}
-#ifdef CONFIG_PROC_FS
static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
{
struct proc_dir_entry *p;
struct sunrpc_net *sn;
+ if (!IS_ENABLED(CONFIG_PROC_FS))
+ return 0;
+
sn = net_generic(net, sunrpc_net_id);
cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
if (cd->procfs == NULL)
@@ -1706,12 +1710,6 @@ out_nomem:
remove_cache_proc_entries(cd);
return -ENOMEM;
}
-#else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
-{
- return 0;
-}
-#endif
void __init cache_initialize(void)
{
@@ -1815,7 +1813,6 @@ static int cache_release_pipefs(struct inode *inode, struct file *filp)
const struct file_operations cache_file_operations_pipefs = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = cache_read_pipefs,
.write = cache_write_pipefs,
.poll = cache_poll_pipefs,
@@ -1881,7 +1878,6 @@ const struct file_operations cache_flush_operations_pipefs = {
.read = read_flush_pipefs,
.write = write_flush_pipefs,
.release = release_flush_pipefs,
- .llseek = no_llseek,
};
int sunrpc_cache_register_pipefs(struct dentry *parent,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b0b9f1eed46..58442ae1c2da 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -48,13 +48,8 @@
# define RPCDBG_FACILITY RPCDBG_CALL
#endif
-/*
- * All RPC clients are linked into this list
- */
-
static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
-
static void call_start(struct rpc_task *task);
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
@@ -111,50 +106,52 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- __rpc_clnt_remove_pipedir(clnt);
+ if (pipefs_sb == clnt->pipefs_sb)
+ __rpc_clnt_remove_pipedir(clnt);
rpc_put_sb_net(net);
}
}
-static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
+static int rpc_setup_pipedir_sb(struct super_block *sb,
struct rpc_clnt *clnt)
{
static uint32_t clntid;
const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
- struct dentry *dir, *dentry;
+ struct dentry *dir;
+ int err;
dir = rpc_d_lookup_sb(sb, dir_name);
if (dir == NULL) {
pr_info("RPC: pipefs directory doesn't exist: %s\n", dir_name);
- return dir;
+ return -ENOENT;
}
for (;;) {
snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
name[sizeof(name) - 1] = '\0';
- dentry = rpc_create_client_dir(dir, name, clnt);
- if (!IS_ERR(dentry))
+ err = rpc_create_client_dir(dir, name, clnt);
+ if (!err)
break;
- if (dentry == ERR_PTR(-EEXIST))
+ if (err == -EEXIST)
continue;
printk(KERN_INFO "RPC: Couldn't create pipefs entry"
- " %s/%s, error %ld\n",
- dir_name, name, PTR_ERR(dentry));
+ " %s/%s, error %d\n",
+ dir_name, name, err);
break;
}
dput(dir);
- return dentry;
+ return err;
}
static int
rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
- struct dentry *dentry;
+ clnt->pipefs_sb = pipefs_sb;
if (clnt->cl_program->pipe_dir_name != NULL) {
- dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
+ int err = rpc_setup_pipedir_sb(pipefs_sb, clnt);
+ if (err && err != -ENOENT)
+ return err;
}
return 0;
}
@@ -182,16 +179,9 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
struct super_block *sb)
{
- struct dentry *dentry;
-
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = rpc_setup_pipedir_sb(sb, clnt);
- if (!dentry)
- return -ENOENT;
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- break;
+ return rpc_setup_pipedir_sb(sb, clnt);
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
break;
@@ -272,9 +262,6 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
old = rcu_dereference_protected(clnt->cl_xprt,
lockdep_is_held(&clnt->cl_lock));
- if (!xprt_bound(xprt))
- clnt->cl_autobind = 1;
-
clnt->cl_timeout = timeout;
rcu_assign_pointer(clnt->cl_xprt, xprt);
spin_unlock(&clnt->cl_lock);
@@ -284,8 +271,14 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
{
- clnt->cl_nodelen = strlcpy(clnt->cl_nodename,
- nodename, sizeof(clnt->cl_nodename));
+ ssize_t copied;
+
+ copied = strscpy(clnt->cl_nodename,
+ nodename, sizeof(clnt->cl_nodename));
+
+ clnt->cl_nodelen = copied < 0
+ ? sizeof(clnt->cl_nodename) - 1
+ : copied;
}
static int rpc_client_register(struct rpc_clnt *clnt,
@@ -385,6 +378,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (!clnt)
goto out_err;
clnt->cl_parent = parent ? : clnt;
+ clnt->cl_xprtsec = args->xprtsec;
err = rpc_alloc_clid(clnt);
if (err)
@@ -395,7 +389,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
- clnt->cl_stats = program->stats;
+ clnt->cl_stats = args->stats ? : program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
@@ -434,7 +428,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (parent)
refcount_inc(&parent->cl_count);
- trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
+ trace_rpc_clnt_new(clnt, xprt, args);
return clnt;
out_no_path:
@@ -507,6 +501,8 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
clnt->cl_chatty = 1;
+ if (args->flags & RPC_CLNT_CREATE_NETUNREACH_FATAL)
+ clnt->cl_netunreach_fatal = 1;
return clnt;
}
@@ -532,8 +528,11 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.addrlen = args->addrsize,
.servername = args->servername,
.bc_xprt = args->bc_xprt,
+ .xprtsec = args->xprtsec,
+ .connect_timeout = args->connect_timeout,
+ .reconnect_timeout = args->reconnect_timeout,
};
- char servername[48];
+ char servername[RPC_MAXNETNAMELEN];
struct rpc_clnt *clnt;
int i;
@@ -565,8 +564,12 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
servername[0] = '\0';
switch (args->address->sa_family) {
case AF_LOCAL:
- snprintf(servername, sizeof(servername), "%s",
- sun->sun_path);
+ if (sun->sun_path[0])
+ snprintf(servername, sizeof(servername), "%s",
+ sun->sun_path);
+ else
+ snprintf(servername, sizeof(servername), "@%s",
+ sun->sun_path+1);
break;
case AF_INET:
snprintf(servername, sizeof(servername), "%pI4",
@@ -650,6 +653,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
+ new->cl_netunreach_fatal = clnt->cl_netunreach_fatal;
new->cl_principal = clnt->cl_principal;
new->cl_max_connect = clnt->cl_max_connect;
return new;
@@ -674,6 +678,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
.cred = clnt->cl_cred,
+ .stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
@@ -696,6 +701,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.version = clnt->cl_vers,
.authflavor = flavor,
.cred = clnt->cl_cred,
+ .stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
@@ -727,6 +733,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
struct rpc_clnt *parent;
int err;
+ args->xprtsec = clnt->cl_xprtsec;
xprt = xprt_create_transport(args);
if (IS_ERR(xprt))
return PTR_ERR(xprt);
@@ -785,15 +792,24 @@ out_revert:
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
-static
-int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
- void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
+static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt)
{
struct rpc_xprt_switch *xps;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
rcu_read_unlock();
+
+ return xps;
+}
+
+static
+int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
+ void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
+{
+ struct rpc_xprt_switch *xps;
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps == NULL)
return -EAGAIN;
func(xpi, xps);
@@ -934,12 +950,17 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
trace_rpc_clnt_shutdown(clnt);
+ clnt->cl_shutdown = 1;
while (!list_empty(&clnt->cl_tasks)) {
rpc_killall_tasks(clnt);
wait_event_timeout(destroy_wait,
list_empty(&clnt->cl_tasks), 1*HZ);
}
+ /* wait for tasks still in workqueue or waitqueue */
+ wait_event_timeout(destroy_wait,
+ atomic_read(&clnt->cl_task_count) == 0, 1 * HZ);
+
rpc_release_client(clnt);
}
EXPORT_SYMBOL_GPL(rpc_shutdown_client);
@@ -1041,6 +1062,8 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
+ .stats = old->cl_stats,
+ .timeout = old->cl_timeout,
};
struct rpc_clnt *clnt;
int err;
@@ -1113,6 +1136,7 @@ void rpc_task_release_client(struct rpc_task *task)
list_del(&task->tk_task);
spin_unlock(&clnt->cl_lock);
task->tk_client = NULL;
+ atomic_dec(&clnt->cl_task_count);
rpc_release_client(clnt);
}
@@ -1163,10 +1187,9 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
- /* Add to the client's list of all tasks */
- spin_lock(&clnt->cl_lock);
- list_add_tail(&task->tk_task, &clnt->cl_tasks);
- spin_unlock(&clnt->cl_lock);
+ if (clnt->cl_netunreach_fatal)
+ task->tk_flags |= RPC_TASK_NETUNREACH_FATAL;
+ atomic_inc(&clnt->cl_task_count);
}
static void
@@ -1290,8 +1313,10 @@ static void call_bc_encode(struct rpc_task *task);
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
+ * @timeout: timeout values to use for this task
*/
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+ struct rpc_timeout *timeout)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
@@ -1310,7 +1335,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
return task;
}
- xprt_init_bc_request(req, task);
+ xprt_init_bc_request(req, task, timeout);
task->tk_action = call_bc_encode;
atomic_inc(&task->tk_count);
@@ -1432,12 +1457,12 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
switch (sap->sa_family) {
case AF_INET:
err = kernel_bind(sock,
- (struct sockaddr *)&rpc_inaddr_loopback,
+ (struct sockaddr_unsized *)&rpc_inaddr_loopback,
sizeof(rpc_inaddr_loopback));
break;
case AF_INET6:
err = kernel_bind(sock,
- (struct sockaddr *)&rpc_in6addr_loopback,
+ (struct sockaddr_unsized *)&rpc_in6addr_loopback,
sizeof(rpc_in6addr_loopback));
break;
default:
@@ -1449,7 +1474,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
goto out_release;
}
- err = kernel_connect(sock, sap, salen, 0);
+ err = kernel_connect(sock, (struct sockaddr_unsized *)sap, salen, 0);
if (err < 0) {
dprintk("RPC: can't connect UDP socket (%d)\n", err);
goto out_release;
@@ -1717,6 +1742,11 @@ call_start(struct rpc_task *task)
trace_rpc_request(task);
+ if (task->tk_client->cl_shutdown) {
+ rpc_call_rpcerror(task, -EIO);
+ return;
+ }
+
/* Increment call count (version might not be valid for ping) */
if (clnt->cl_program->version[clnt->cl_vers])
clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
@@ -1754,9 +1784,14 @@ call_reserveresult(struct rpc_task *task)
if (status >= 0) {
if (task->tk_rqstp) {
task->tk_action = call_refresh;
+
+ /* Add to the client's list of all tasks */
+ spin_lock(&task->tk_client->cl_lock);
+ if (list_empty(&task->tk_task))
+ list_add_tail(&task->tk_task, &task->tk_client->cl_tasks);
+ spin_unlock(&task->tk_client->cl_lock);
return;
}
-
rpc_call_rpcerror(task, -EIO);
return;
}
@@ -1821,13 +1856,13 @@ call_refreshresult(struct rpc_task *task)
fallthrough;
case -EAGAIN:
status = -EACCES;
- fallthrough;
- case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
trace_rpc_retry_refresh_status(task);
return;
+ case -EKEYEXPIRED:
+ break;
case -ENOMEM:
rpc_delay(task, HZ >> 4);
return;
@@ -1855,12 +1890,6 @@ call_allocate(struct rpc_task *task)
if (req->rq_buffer)
return;
- if (proc->p_proc != 0) {
- BUG_ON(proc->p_arglen == 0);
- if (proc->p_decode != NULL)
- BUG_ON(proc->p_replen == 0);
- }
-
/*
* Calculate the size (in quads) of the RPC call
* and reply headers, and convert both values
@@ -2050,9 +2079,6 @@ call_bind_status(struct rpc_task *task)
status = -EOPNOTSUPP;
break;
}
- if (task->tk_rebind_retry == 0)
- break;
- task->tk_rebind_retry--;
rpc_delay(task, 3*HZ);
goto retry_timeout;
case -ENOBUFS:
@@ -2070,14 +2096,17 @@ call_bind_status(struct rpc_task *task)
case -EPROTONOSUPPORT:
trace_rpcb_bind_version_err(task);
goto retry_timeout;
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL)
+ break;
+ fallthrough;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
- case -ENETDOWN:
case -EHOSTUNREACH:
- case -ENETUNREACH:
case -EPIPE:
trace_rpcb_unreachable_err(task);
if (!RPC_IS_SOFTCONN(task)) {
@@ -2159,19 +2188,22 @@ call_connect_status(struct rpc_task *task)
task->tk_status = 0;
switch (status) {
+ case -ENETDOWN:
+ case -ENETUNREACH:
+ if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL)
+ break;
+ fallthrough;
case -ECONNREFUSED:
+ case -ECONNRESET:
/* A positive refusal suggests a rebind is needed. */
- if (RPC_IS_SOFTCONN(task))
- break;
if (clnt->cl_autobind) {
rpc_force_rebind(clnt);
+ if (RPC_IS_SOFTCONN(task))
+ break;
goto out_retry;
}
fallthrough;
- case -ECONNRESET:
case -ECONNABORTED:
- case -ENETDOWN:
- case -ENETUNREACH:
case -EHOSTUNREACH:
case -EPIPE:
case -EPROTO:
@@ -2192,9 +2224,7 @@ call_connect_status(struct rpc_task *task)
struct rpc_xprt *saved = task->tk_xprt;
struct rpc_xprt_switch *xps;
- rcu_read_lock();
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps->xps_nxprts > 1) {
long value;
@@ -2209,7 +2239,7 @@ call_connect_status(struct rpc_task *task)
}
xprt_switch_put(xps);
if (!task->tk_xprt)
- return;
+ goto out;
}
goto out_retry;
case -ENOBUFS:
@@ -2224,6 +2254,7 @@ out_next:
out_retry:
/* Check for timeouts before looping back to call_bind */
task->tk_action = call_bind;
+out:
rpc_check_timeout(task);
}
@@ -2292,12 +2323,13 @@ call_transmit_status(struct rpc_task *task)
task->tk_action = call_transmit;
task->tk_status = 0;
break;
- case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPERM:
+ break;
+ case -ECONNREFUSED:
if (RPC_IS_SOFTCONN(task)) {
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
@@ -2423,10 +2455,13 @@ call_status(struct rpc_task *task)
trace_rpc_call_status(task);
task->tk_status = 0;
switch(status) {
- case -EHOSTDOWN:
case -ENETDOWN:
- case -EHOSTUNREACH:
case -ENETUNREACH:
+ if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL)
+ goto out_exit;
+ fallthrough;
+ case -EHOSTDOWN:
+ case -EHOSTUNREACH:
case -EPERM:
if (RPC_IS_SOFTCONN(task))
goto out_exit;
@@ -2465,8 +2500,7 @@ call_status(struct rpc_task *task)
goto out_exit;
}
task->tk_action = call_encode;
- if (status != -ECONNRESET && status != -ECONNABORTED)
- rpc_check_timeout(task);
+ rpc_check_timeout(task);
return;
out_exit:
rpc_call_rpcerror(task, status);
@@ -2593,6 +2627,7 @@ out:
case 0:
task->tk_action = rpc_exit_task;
task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+ xdr_finish_decode(&xdr);
return;
case -EAGAIN:
task->tk_status = 0;
@@ -2665,8 +2700,19 @@ rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
goto out_msg_denied;
error = rpcauth_checkverf(task, xdr);
- if (error)
+ if (error) {
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+ if (!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ rpcauth_invalcred(task);
+ if (!task->tk_cred_retry)
+ goto out_err;
+ task->tk_cred_retry--;
+ trace_rpc__stale_creds(task);
+ return -EKEYREJECTED;
+ }
goto out_verifier;
+ }
p = xdr_inline_decode(xdr, sizeof(*p));
if (!p)
@@ -2713,7 +2759,20 @@ out_unparsable:
out_verifier:
trace_rpc_bad_verifier(task);
- goto out_err;
+ switch (error) {
+ case -EPROTONOSUPPORT:
+ goto out_err;
+ case -EACCES:
+ /* possible RPCSEC_GSS out-of-sequence event (RFC2203),
+ * reset recv state and keep waiting, don't retransmit
+ */
+ task->tk_rqstp->rq_reply_bytes_recvd = 0;
+ task->tk_status = xprt_request_enqueue_receive(task);
+ task->tk_action = call_transmit_status;
+ return -EBADMSG;
+ default:
+ goto out_garbage;
+ }
out_msg_denied:
error = -EACCES;
@@ -2739,6 +2798,7 @@ out_msg_denied:
case rpc_autherr_rejectedverf:
case rpcsec_gsserr_credproblem:
case rpcsec_gsserr_ctxproblem:
+ rpcauth_invalcred(task);
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -2829,6 +2889,9 @@ static int rpc_ping(struct rpc_clnt *clnt)
struct rpc_task *task;
int status;
+ if (clnt->cl_auth->au_ops->ping)
+ return clnt->cl_auth->au_ops->ping(clnt);
+
task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -2892,19 +2955,22 @@ static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
* @clnt: pointer to struct rpc_clnt
* @xps: pointer to struct rpc_xprt_switch,
* @xprt: pointer struct rpc_xprt
- * @dummy: unused
+ * @in_max_connect: pointer to the max_connect value for the passed in xprt transport
*/
int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_xprt_switch *xps, struct rpc_xprt *xprt,
- void *dummy)
+ void *in_max_connect)
{
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
+ int max_connect = clnt->cl_max_connect;
- if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+ if (in_max_connect)
+ max_connect = *(int *)in_max_connect;
+ if (xps->xps_nunique_destaddr_xprts + 1 > max_connect) {
rcu_read_lock();
pr_warn("SUNRPC: reached max allowed number (%d) did not add "
- "transport to server: %s\n", clnt->cl_max_connect,
+ "transport to server: %s\n", max_connect,
rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
rcu_read_unlock();
return -EINVAL;
@@ -3049,6 +3115,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
if (!xprtargs->ident)
xprtargs->ident = ident;
+ xprtargs->xprtsec = clnt->cl_xprtsec;
xprt = xprt_create_transport(xprtargs);
if (IS_ERR(xprt)) {
ret = PTR_ERR(xprt);
@@ -3056,6 +3123,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
}
xprt->resvport = resvport;
xprt->reuseport = reuseport;
+
+ if (xprtargs->connect_timeout)
+ connect_timeout = xprtargs->connect_timeout;
+ if (xprtargs->reconnect_timeout)
+ reconnect_timeout = xprtargs->reconnect_timeout;
if (xprt->ops->set_connect_timeout != NULL)
xprt->ops->set_connect_timeout(xprt,
connect_timeout,
@@ -3080,7 +3152,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct rpc_add_xprt_test *data)
{
- struct rpc_xprt_switch *xps;
struct rpc_xprt *main_xprt;
int status = 0;
@@ -3088,7 +3159,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
rcu_read_lock();
main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
(struct sockaddr *)&main_xprt->addr);
rcu_read_unlock();
@@ -3099,7 +3169,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
out:
xprt_put(xprt);
- xprt_switch_put(xps);
return status;
}
@@ -3214,34 +3283,27 @@ rpc_set_connect_timeout(struct rpc_clnt *clnt,
}
EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
-void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
-{
- rcu_read_lock();
- xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
-
void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
- rcu_read_lock();
- xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
xprt_set_online_locked(xprt, xps);
+ xprt_switch_put(xps);
}
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ struct rpc_xprt_switch *xps;
+
if (rpc_clnt_xprt_switch_has_addr(clnt,
(const struct sockaddr *)&xprt->addr)) {
return rpc_clnt_xprt_set_online(clnt, xprt);
}
- rcu_read_lock();
- rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
- xprt);
- rcu_read_unlock();
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
+ rpc_xprt_switch_add_xprt(xps, xprt);
+ xprt_switch_put(xps);
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
@@ -3273,8 +3335,11 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_has_addr);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-static void rpc_show_header(void)
+static void rpc_show_header(struct rpc_clnt *clnt)
{
+ printk(KERN_INFO "clnt[%pISpc] RPC tasks[%d]\n",
+ (struct sockaddr *)&clnt->cl_xprt->addr,
+ atomic_read(&clnt->cl_task_count));
printk(KERN_INFO "-pid- flgs status -client- --rqstp- "
"-timeout ---ops--\n");
}
@@ -3306,7 +3371,7 @@ void rpc_show_tasks(struct net *net)
spin_lock(&clnt->cl_lock);
list_for_each_entry(task, &clnt->cl_tasks, tk_task) {
if (!header) {
- rpc_show_header();
+ rpc_show_header(clnt);
header++;
}
rpc_show_task(clnt, task);
@@ -3350,6 +3415,8 @@ rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
void
rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
{
+ while (clnt != clnt->cl_parent)
+ clnt = clnt->cl_parent;
if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
rpc_clnt_iterate_for_each_xprt(clnt,
rpc_clnt_swap_deactivate_callback, NULL);
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index a176d5a0b0ee..32417db340de 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -74,6 +74,9 @@ tasks_stop(struct seq_file *f, void *v)
{
struct rpc_clnt *clnt = f->private;
spin_unlock(&clnt->cl_lock);
+ seq_printf(f, "clnt[%pISpc] RPC tasks[%d]\n",
+ (struct sockaddr *)&clnt->cl_xprt->addr,
+ atomic_read(&clnt->cl_task_count));
}
static const struct seq_operations tasks_seq_operations = {
@@ -179,6 +182,18 @@ xprt_info_show(struct seq_file *f, void *v)
seq_printf(f, "addr: %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]);
seq_printf(f, "port: %s\n", xprt->address_strings[RPC_DISPLAY_PORT]);
seq_printf(f, "state: 0x%lx\n", xprt->state);
+ seq_printf(f, "netns: %u\n", xprt->xprt_net->ns.inum);
+
+ if (xprt->ops->get_srcaddr) {
+ int ret, buflen;
+ char buf[INET6_ADDRSTRLEN];
+
+ buflen = ARRAY_SIZE(buf);
+ ret = xprt->ops->get_srcaddr(xprt, buf, buflen);
+ if (ret < 0)
+ ret = sprintf(buf, "<closed>");
+ seq_printf(f, "saddr: %.*s\n", ret, buf);
+ }
return 0;
}
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 7ec10b92bea1..4efb5f28d881 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -33,6 +33,7 @@ struct sunrpc_net {
int pipe_version;
atomic_t pipe_users;
struct proc_dir_entry *use_gssp_proc;
+ struct proc_dir_entry *gss_krb5_enctypes;
};
extern unsigned int sunrpc_net_id;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b6034fab9ab..379daefc4847 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -168,8 +168,9 @@ rpc_inode_setowner(struct inode *inode, void *private)
}
static void
-rpc_close_pipes(struct inode *inode)
+rpc_close_pipes(struct dentry *dentry)
{
+ struct inode *inode = dentry->d_inode;
struct rpc_pipe *pipe = RPC_I(inode)->pipe;
int need_release;
LIST_HEAD(free_list);
@@ -385,7 +386,6 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static const struct file_operations rpc_pipe_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.read = rpc_pipe_read,
.write = rpc_pipe_write,
.poll = rpc_pipe_poll,
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ simple_inode_init_ts(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
@@ -485,60 +485,6 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return inode;
}
-static int __rpc_create_common(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- struct inode *inode;
-
- d_drop(dentry);
- inode = rpc_get_inode(dir->i_sb, mode);
- if (!inode)
- goto out_err;
- inode->i_ino = iunique(dir->i_sb, 100);
- if (i_fop)
- inode->i_fop = i_fop;
- if (private)
- rpc_inode_setowner(inode, private);
- d_add(dentry, inode);
- return 0;
-out_err:
- printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %pd\n",
- __FILE__, __func__, dentry);
- dput(dentry);
- return -ENOMEM;
-}
-
-static int __rpc_create(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- int err;
-
- err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private);
- if (err)
- return err;
- fsnotify_create(dir, dentry);
- return 0;
-}
-
-static int __rpc_mkdir(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private)
-{
- int err;
-
- err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private);
- if (err)
- return err;
- inc_nlink(dir);
- fsnotify_mkdir(dir, dentry);
- return 0;
-}
-
static void
init_pipe(struct rpc_pipe *pipe)
{
@@ -575,119 +521,58 @@ struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags)
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_data);
-static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
- umode_t mode,
- const struct file_operations *i_fop,
- void *private,
- struct rpc_pipe *pipe)
+static int rpc_new_file(struct dentry *parent,
+ const char *name,
+ umode_t mode,
+ const struct file_operations *i_fop,
+ void *private)
{
- struct rpc_inode *rpci;
- int err;
+ struct dentry *dentry = simple_start_creating(parent, name);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
- err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private);
- if (err)
- return err;
- rpci = RPC_I(d_inode(dentry));
- rpci->private = private;
- rpci->pipe = pipe;
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+
+ inode = rpc_get_inode(dir->i_sb, S_IFREG | mode);
+ if (unlikely(!inode)) {
+ simple_done_creating(dentry);
+ return -ENOMEM;
+ }
+ inode->i_ino = iunique(dir->i_sb, 100);
+ if (i_fop)
+ inode->i_fop = i_fop;
+ rpc_inode_setowner(inode, private);
+ d_make_persistent(dentry, inode);
fsnotify_create(dir, dentry);
+ simple_done_creating(dentry);
return 0;
}
-static int __rpc_rmdir(struct inode *dir, struct dentry *dentry)
+static struct dentry *rpc_new_dir(struct dentry *parent,
+ const char *name,
+ umode_t mode)
{
- int ret;
-
- dget(dentry);
- ret = simple_rmdir(dir, dentry);
- d_drop(dentry);
- if (!ret)
- fsnotify_rmdir(dir, dentry);
- dput(dentry);
- return ret;
-}
-
-static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
-{
- int ret;
-
- dget(dentry);
- ret = simple_unlink(dir, dentry);
- d_drop(dentry);
- if (!ret)
- fsnotify_unlink(dir, dentry);
- dput(dentry);
- return ret;
-}
-
-static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode = d_inode(dentry);
-
- rpc_close_pipes(inode);
- return __rpc_unlink(dir, dentry);
-}
+ struct dentry *dentry = simple_start_creating(parent, name);
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
-static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
- const char *name)
-{
- struct qstr q = QSTR_INIT(name, strlen(name));
- struct dentry *dentry = d_hash_and_lookup(parent, &q);
- if (!dentry) {
- dentry = d_alloc(parent, &q);
- if (!dentry)
- return ERR_PTR(-ENOMEM);
- }
- if (d_really_is_negative(dentry))
+ if (IS_ERR(dentry))
return dentry;
- dput(dentry);
- return ERR_PTR(-EEXIST);
-}
-
-/*
- * FIXME: This probably has races.
- */
-static void __rpc_depopulate(struct dentry *parent,
- const struct rpc_filelist *files,
- int start, int eof)
-{
- struct inode *dir = d_inode(parent);
- struct dentry *dentry;
- struct qstr name;
- int i;
-
- for (i = start; i < eof; i++) {
- name.name = files[i].name;
- name.len = strlen(files[i].name);
- dentry = d_hash_and_lookup(parent, &name);
- if (dentry == NULL)
- continue;
- if (d_really_is_negative(dentry))
- goto next;
- switch (d_inode(dentry)->i_mode & S_IFMT) {
- default:
- BUG();
- case S_IFREG:
- __rpc_unlink(dir, dentry);
- break;
- case S_IFDIR:
- __rpc_rmdir(dir, dentry);
- }
-next:
- dput(dentry);
+ inode = rpc_get_inode(dir->i_sb, S_IFDIR | mode);
+ if (unlikely(!inode)) {
+ simple_done_creating(dentry);
+ return ERR_PTR(-ENOMEM);
}
-}
-static void rpc_depopulate(struct dentry *parent,
- const struct rpc_filelist *files,
- int start, int eof)
-{
- struct inode *dir = d_inode(parent);
+ inode->i_ino = iunique(dir->i_sb, 100);
+ inc_nlink(dir);
+ d_make_persistent(dentry, inode);
+ fsnotify_mkdir(dir, dentry);
+ simple_done_creating(dentry);
- inode_lock_nested(dir, I_MUTEX_CHILD);
- __rpc_depopulate(parent, files, start, eof);
- inode_unlock(dir);
+ return dentry; // borrowed
}
static int rpc_populate(struct dentry *parent,
@@ -695,92 +580,39 @@ static int rpc_populate(struct dentry *parent,
int start, int eof,
void *private)
{
- struct inode *dir = d_inode(parent);
struct dentry *dentry;
int i, err;
- inode_lock(dir);
for (i = start; i < eof; i++) {
- dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_bad;
switch (files[i].mode & S_IFMT) {
default:
BUG();
case S_IFREG:
- err = __rpc_create(dir, dentry,
+ err = rpc_new_file(parent,
+ files[i].name,
files[i].mode,
files[i].i_fop,
private);
+ if (err)
+ goto out_bad;
break;
case S_IFDIR:
- err = __rpc_mkdir(dir, dentry,
- files[i].mode,
- NULL,
- private);
+ dentry = rpc_new_dir(parent,
+ files[i].name,
+ files[i].mode);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto out_bad;
+ }
}
- if (err != 0)
- goto out_bad;
}
- inode_unlock(dir);
return 0;
out_bad:
- __rpc_depopulate(parent, files, start, eof);
- inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
}
-static struct dentry *rpc_mkdir_populate(struct dentry *parent,
- const char *name, umode_t mode, void *private,
- int (*populate)(struct dentry *, void *), void *args_populate)
-{
- struct dentry *dentry;
- struct inode *dir = d_inode(parent);
- int error;
-
- inode_lock_nested(dir, I_MUTEX_PARENT);
- dentry = __rpc_lookup_create_exclusive(parent, name);
- if (IS_ERR(dentry))
- goto out;
- error = __rpc_mkdir(dir, dentry, mode, NULL, private);
- if (error != 0)
- goto out_err;
- if (populate != NULL) {
- error = populate(dentry, args_populate);
- if (error)
- goto err_rmdir;
- }
-out:
- inode_unlock(dir);
- return dentry;
-err_rmdir:
- __rpc_rmdir(dir, dentry);
-out_err:
- dentry = ERR_PTR(error);
- goto out;
-}
-
-static int rpc_rmdir_depopulate(struct dentry *dentry,
- void (*depopulate)(struct dentry *))
-{
- struct dentry *parent;
- struct inode *dir;
- int error;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- if (depopulate != NULL)
- depopulate(dentry);
- error = __rpc_rmdir(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
-}
-
/**
* rpc_mkpipe_dentry - make an rpc_pipefs file for kernel<->userspace
* communication
@@ -800,11 +632,13 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
* The @private argument passed here will be available to all these methods
* from the file pointer, via RPC_I(file_inode(file))->private.
*/
-struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
+int rpc_mkpipe_dentry(struct dentry *parent, const char *name,
void *private, struct rpc_pipe *pipe)
{
- struct dentry *dentry;
struct inode *dir = d_inode(parent);
+ struct dentry *dentry;
+ struct inode *inode;
+ struct rpc_inode *rpci;
umode_t umode = S_IFIFO | 0600;
int err;
@@ -813,48 +647,52 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (pipe->ops->downcall == NULL)
umode &= ~0222;
- inode_lock_nested(dir, I_MUTEX_PARENT);
- dentry = __rpc_lookup_create_exclusive(parent, name);
- if (IS_ERR(dentry))
- goto out;
- err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops,
- private, pipe);
- if (err)
- goto out_err;
-out:
- inode_unlock(dir);
- return dentry;
-out_err:
- dentry = ERR_PTR(err);
- printk(KERN_WARNING "%s: %s() failed to create pipe %pd/%s (errno = %d)\n",
- __FILE__, __func__, parent, name,
- err);
- goto out;
+ dentry = simple_start_creating(parent, name);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
+ goto failed;
+ }
+
+ inode = rpc_get_inode(dir->i_sb, umode);
+ if (unlikely(!inode)) {
+ simple_done_creating(dentry);
+ err = -ENOMEM;
+ goto failed;
+ }
+ inode->i_ino = iunique(dir->i_sb, 100);
+ inode->i_fop = &rpc_pipe_fops;
+ rpci = RPC_I(inode);
+ rpci->private = private;
+ rpci->pipe = pipe;
+ rpc_inode_setowner(inode, private);
+ pipe->dentry = dentry; // borrowed
+ d_make_persistent(dentry, inode);
+ fsnotify_create(dir, dentry);
+ simple_done_creating(dentry);
+ return 0;
+
+failed:
+ pr_warn("%s() failed to create pipe %pd/%s (errno = %d)\n",
+ __func__, parent, name, err);
+ return err;
}
EXPORT_SYMBOL_GPL(rpc_mkpipe_dentry);
/**
* rpc_unlink - remove a pipe
- * @dentry: dentry for the pipe, as returned from rpc_mkpipe
+ * @pipe: the pipe to be removed
*
* After this call, lookups will no longer find the pipe, and any
* attempts to read or write using preexisting opens of the pipe will
* return -EPIPE.
*/
-int
-rpc_unlink(struct dentry *dentry)
+void
+rpc_unlink(struct rpc_pipe *pipe)
{
- struct dentry *parent;
- struct inode *dir;
- int error = 0;
-
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- inode_lock_nested(dir, I_MUTEX_PARENT);
- error = __rpc_rmpipe(dir, dentry);
- inode_unlock(dir);
- dput(parent);
- return error;
+ if (pipe->dentry) {
+ simple_recursive_removal(pipe->dentry, rpc_close_pipes);
+ pipe->dentry = NULL;
+ }
}
EXPORT_SYMBOL_GPL(rpc_unlink);
@@ -1011,31 +849,6 @@ rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
pdo->pdo_ops->destroy(dir, pdo);
}
-enum {
- RPCAUTH_info,
- RPCAUTH_EOF
-};
-
-static const struct rpc_filelist authfiles[] = {
- [RPCAUTH_info] = {
- .name = "info",
- .i_fop = &rpc_info_operations,
- .mode = S_IFREG | 0400,
- },
-};
-
-static int rpc_clntdir_populate(struct dentry *dentry, void *private)
-{
- return rpc_populate(dentry,
- authfiles, RPCAUTH_info, RPCAUTH_EOF,
- private);
-}
-
-static void rpc_clntdir_depopulate(struct dentry *dentry)
-{
- rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF);
-}
-
/**
* rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
* @dentry: the parent of new directory
@@ -1047,19 +860,27 @@ static void rpc_clntdir_depopulate(struct dentry *dentry)
* information about the client, together with any "pipes" that may
* later be created using rpc_mkpipe().
*/
-struct dentry *rpc_create_client_dir(struct dentry *dentry,
- const char *name,
- struct rpc_clnt *rpc_client)
+int rpc_create_client_dir(struct dentry *dentry,
+ const char *name,
+ struct rpc_clnt *rpc_client)
{
struct dentry *ret;
+ int err;
- ret = rpc_mkdir_populate(dentry, name, 0555, NULL,
- rpc_clntdir_populate, rpc_client);
- if (!IS_ERR(ret)) {
- rpc_client->cl_pipedir_objects.pdh_dentry = ret;
- rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ ret = rpc_new_dir(dentry, name, 0555);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+ err = rpc_new_file(ret, "info", S_IFREG | 0400,
+ &rpc_info_operations, rpc_client);
+ if (err) {
+ pr_warn("%s failed to populate directory %pd\n",
+ __func__, ret);
+ simple_recursive_removal(ret, NULL);
+ return err;
}
- return ret;
+ rpc_client->cl_pipedir_objects.pdh_dentry = ret;
+ rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ return 0;
}
/**
@@ -1074,7 +895,8 @@ int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
return 0;
rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
- return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
+ simple_recursive_removal(dentry, NULL);
+ return 0;
}
static const struct rpc_filelist cache_pipefs_files[3] = {
@@ -1095,28 +917,25 @@ static const struct rpc_filelist cache_pipefs_files[3] = {
},
};
-static int rpc_cachedir_populate(struct dentry *dentry, void *private)
-{
- return rpc_populate(dentry,
- cache_pipefs_files, 0, 3,
- private);
-}
-
-static void rpc_cachedir_depopulate(struct dentry *dentry)
-{
- rpc_depopulate(dentry, cache_pipefs_files, 0, 3);
-}
-
struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name,
umode_t umode, struct cache_detail *cd)
{
- return rpc_mkdir_populate(parent, name, umode, NULL,
- rpc_cachedir_populate, cd);
+ struct dentry *dentry;
+
+ dentry = rpc_new_dir(parent, name, umode);
+ if (!IS_ERR(dentry)) {
+ int error = rpc_populate(dentry, cache_pipefs_files, 0, 3, cd);
+ if (error) {
+ simple_recursive_removal(dentry, NULL);
+ return ERR_PTR(error);
+ }
+ }
+ return dentry;
}
void rpc_remove_cache_dir(struct dentry *dentry)
{
- rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate);
+ simple_recursive_removal(dentry, NULL);
}
/*
@@ -1142,7 +961,6 @@ enum {
RPCAUTH_nfsd4_cb,
RPCAUTH_cache,
RPCAUTH_nfsd,
- RPCAUTH_gssd,
RPCAUTH_RootEOF
};
@@ -1179,10 +997,6 @@ static const struct rpc_filelist files[] = {
.name = "nfsd",
.mode = S_IFDIR | 0555,
},
- [RPCAUTH_gssd] = {
- .name = "gssd",
- .mode = S_IFDIR | 0555,
- },
};
/*
@@ -1191,8 +1005,7 @@ static const struct rpc_filelist files[] = {
struct dentry *rpc_d_lookup_sb(const struct super_block *sb,
const unsigned char *dir_name)
{
- struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name));
- return d_hash_and_lookup(sb->s_root, &dir);
+ return try_lookup_noperm(&QSTR(dir_name), sb->s_root);
}
EXPORT_SYMBOL_GPL(rpc_d_lookup_sb);
@@ -1243,13 +1056,6 @@ void rpc_put_sb_net(const struct net *net)
}
EXPORT_SYMBOL_GPL(rpc_put_sb_net);
-static const struct rpc_filelist gssd_dummy_clnt_dir[] = {
- [0] = {
- .name = "clntXX",
- .mode = S_IFDIR | 0555,
- },
-};
-
static ssize_t
dummy_downcall(struct file *filp, const char __user *src, size_t len)
{
@@ -1278,14 +1084,6 @@ rpc_dummy_info_show(struct seq_file *m, void *v)
}
DEFINE_SHOW_ATTRIBUTE(rpc_dummy_info);
-static const struct rpc_filelist gssd_dummy_info_file[] = {
- [0] = {
- .name = "info",
- .i_fop = &rpc_dummy_info_fops,
- .mode = S_IFREG | 0400,
- },
-};
-
/**
* rpc_gssd_dummy_populate - create a dummy gssd pipe
* @root: root of the rpc_pipefs filesystem
@@ -1294,72 +1092,32 @@ static const struct rpc_filelist gssd_dummy_info_file[] = {
* Create a dummy set of directories and a pipe that gssd can hold open to
* indicate that it is up and running.
*/
-static struct dentry *
+static int
rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
{
- int ret = 0;
- struct dentry *gssd_dentry;
- struct dentry *clnt_dentry = NULL;
- struct dentry *pipe_dentry = NULL;
- struct qstr q = QSTR_INIT(files[RPCAUTH_gssd].name,
- strlen(files[RPCAUTH_gssd].name));
-
- /* We should never get this far if "gssd" doesn't exist */
- gssd_dentry = d_hash_and_lookup(root, &q);
- if (!gssd_dentry)
- return ERR_PTR(-ENOENT);
-
- ret = rpc_populate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1, NULL);
- if (ret) {
- pipe_dentry = ERR_PTR(ret);
- goto out;
- }
-
- q.name = gssd_dummy_clnt_dir[0].name;
- q.len = strlen(gssd_dummy_clnt_dir[0].name);
- clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
- if (!clnt_dentry) {
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- pipe_dentry = ERR_PTR(-ENOENT);
- goto out;
- }
+ struct dentry *gssd_dentry, *clnt_dentry;
+ int err;
- ret = rpc_populate(clnt_dentry, gssd_dummy_info_file, 0, 1, NULL);
- if (ret) {
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- pipe_dentry = ERR_PTR(ret);
- goto out;
- }
+ gssd_dentry = rpc_new_dir(root, "gssd", 0555);
+ if (IS_ERR(gssd_dentry))
+ return -ENOENT;
- pipe_dentry = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
- if (IS_ERR(pipe_dentry)) {
- __rpc_depopulate(clnt_dentry, gssd_dummy_info_file, 0, 1);
- __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
- }
-out:
- dput(clnt_dentry);
- dput(gssd_dentry);
- return pipe_dentry;
-}
+ clnt_dentry = rpc_new_dir(gssd_dentry, "clntXX", 0555);
+ if (IS_ERR(clnt_dentry))
+ return -ENOENT;
-static void
-rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
-{
- struct dentry *clnt_dir = pipe_dentry->d_parent;
- struct dentry *gssd_dir = clnt_dir->d_parent;
-
- dget(pipe_dentry);
- __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
- __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
- __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
- dput(pipe_dentry);
+ err = rpc_new_file(clnt_dentry, "info", 0400,
+ &rpc_dummy_info_fops, NULL);
+ if (!err)
+ err = rpc_mkpipe_dentry(clnt_dentry, "gssd", NULL, pipe_data);
+ return err;
}
static int
rpc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
- struct dentry *root, *gssd_dentry;
+ struct dentry *root;
struct net *net = sb->s_fs_info;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
int err;
@@ -1368,7 +1126,7 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = RPCAUTH_GSSMAGIC;
sb->s_op = &s_ops;
- sb->s_d_op = &simple_dentry_operations;
+ sb->s_d_flags = DCACHE_DONTCACHE;
sb->s_time_gran = 1;
inode = rpc_get_inode(sb, S_IFDIR | 0555);
@@ -1378,11 +1136,9 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
- gssd_dentry = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
- if (IS_ERR(gssd_dentry)) {
- __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
- return PTR_ERR(gssd_dentry);
- }
+ err = rpc_gssd_dummy_populate(root, sn->gssd_dummy);
+ if (err)
+ return err;
dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n",
net->ns.inum, NET_NAME(net));
@@ -1391,18 +1147,6 @@ rpc_fill_super(struct super_block *sb, struct fs_context *fc)
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
sb);
- if (err)
- goto err_depopulate;
- mutex_unlock(&sn->pipefs_sb_lock);
- return 0;
-
-err_depopulate:
- rpc_gssd_dummy_depopulate(gssd_dentry);
- blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
- RPC_PIPEFS_UMOUNT,
- sb);
- sn->pipefs_sb = NULL;
- __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF);
mutex_unlock(&sn->pipefs_sb_lock);
return err;
}
@@ -1459,7 +1203,7 @@ static void rpc_kill_sb(struct super_block *sb)
sb);
mutex_unlock(&sn->pipefs_sb_lock);
out:
- kill_litter_super(sb);
+ kill_anon_super(sb);
put_net(net);
}
@@ -1490,7 +1234,7 @@ int register_rpc_pipefs(void)
rpc_inode_cachep = kmem_cache_create("rpc_inode_cache",
sizeof(struct rpc_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
init_once);
if (!rpc_inode_cachep)
return -ENOMEM;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5a8e6d46809a..53bcca365fb1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -36,6 +36,7 @@
#include "netns.h"
#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock"
+#define RPCBIND_SOCK_ABSTRACT_NAME "\0/run/rpcbind.sock"
#define RPCBIND_PROGRAM (100000u)
#define RPCBIND_PORT (111u)
@@ -216,21 +217,22 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
sn->rpcb_users = 1;
}
+/* Evaluate to actual length of the `sockaddr_un' structure. */
+# define SUN_LEN(ptr) (offsetof(struct sockaddr_un, sun_path) \
+ + 1 + strlen((ptr)->sun_path + 1))
+
/*
* Returns zero on success, otherwise a negative errno value
* is returned.
*/
-static int rpcb_create_local_unix(struct net *net)
+static int rpcb_create_af_local(struct net *net,
+ const struct sockaddr_un *addr)
{
- static const struct sockaddr_un rpcb_localaddr_rpcbind = {
- .sun_family = AF_LOCAL,
- .sun_path = RPCBIND_SOCK_PATHNAME,
- };
struct rpc_create_args args = {
.net = net,
.protocol = XPRT_TRANSPORT_LOCAL,
- .address = (struct sockaddr *)&rpcb_localaddr_rpcbind,
- .addrsize = sizeof(rpcb_localaddr_rpcbind),
+ .address = (struct sockaddr *)addr,
+ .addrsize = SUN_LEN(addr),
.servername = "localhost",
.program = &rpcb_program,
.version = RPCBVERS_2,
@@ -269,6 +271,26 @@ out:
return result;
}
+static int rpcb_create_local_abstract(struct net *net)
+{
+ static const struct sockaddr_un rpcb_localaddr_abstract = {
+ .sun_family = AF_LOCAL,
+ .sun_path = RPCBIND_SOCK_ABSTRACT_NAME,
+ };
+
+ return rpcb_create_af_local(net, &rpcb_localaddr_abstract);
+}
+
+static int rpcb_create_local_unix(struct net *net)
+{
+ static const struct sockaddr_un rpcb_localaddr_unix = {
+ .sun_family = AF_LOCAL,
+ .sun_path = RPCBIND_SOCK_PATHNAME,
+ };
+
+ return rpcb_create_af_local(net, &rpcb_localaddr_unix);
+}
+
/*
* Returns zero on success, otherwise a negative errno value
* is returned.
@@ -332,7 +354,8 @@ int rpcb_create_local(struct net *net)
if (rpcb_get_local(net))
goto out;
- if (rpcb_create_local_unix(net) != 0)
+ if (rpcb_create_local_abstract(net) != 0 &&
+ rpcb_create_local_unix(net) != 0)
result = rpcb_create_local_net(net);
out:
@@ -746,6 +769,10 @@ void rpcb_getport_async(struct rpc_task *task)
child = rpcb_call_async(rpcb_clnt, map, proc);
rpc_release_client(rpcb_clnt);
+ if (IS_ERR(child)) {
+ /* rpcb_map_release() has freed the arguments */
+ return;
+ }
xprt->stat.bind_count++;
rpc_put_task(child);
@@ -793,9 +820,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
}
trace_rpcb_setport(child, map->r_status, map->r_port);
- xprt->ops->set_port(xprt, map->r_port);
- if (map->r_port)
+ if (map->r_port) {
+ xprt->ops->set_port(xprt, map->r_port);
xprt_set_bound(xprt);
+ }
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index be587a308e05..016f16ca5779 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -369,8 +369,10 @@ static void rpc_make_runnable(struct workqueue_struct *wq,
if (RPC_IS_ASYNC(task)) {
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
queue_work(wq, &task->u.tk_work);
- } else
+ } else {
+ smp_mb__after_atomic();
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
+ }
}
/*
@@ -817,7 +819,6 @@ rpc_init_task_statistics(struct rpc_task *task)
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
- task->tk_rebind_retry = 2;
/* starting timestamp */
task->tk_start = ktime_get();
@@ -863,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task)
if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
return;
trace_rpc_task_signalled(task, task->tk_action);
- set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
- smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
rpc_wake_up_queued_task(queue, task);
@@ -928,11 +927,10 @@ static void __rpc_execute(struct rpc_task *task)
*/
do_action = task->tk_action;
/* Tasks with an RPC error status should exit */
- if (do_action != rpc_exit_task &&
+ if (do_action && do_action != rpc_exit_task &&
(status = READ_ONCE(task->tk_rpc_status)) != 0) {
task->tk_status = status;
- if (do_action != NULL)
- do_action = rpc_exit_task;
+ do_action = rpc_exit_task;
}
/* Callbacks override all actions */
if (task->tk_callback) {
@@ -1076,7 +1074,6 @@ int rpc_malloc(struct rpc_task *task)
rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
return 0;
}
-EXPORT_SYMBOL_GPL(rpc_malloc);
/**
* rpc_free - free RPC buffer resources allocated via rpc_malloc
@@ -1097,7 +1094,6 @@ void rpc_free(struct rpc_task *task)
else
kfree(buf);
}
-EXPORT_SYMBOL_GPL(rpc_free);
/*
* Creation and deletion of RPC task structures
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1b2b84feeec6..d8d8842c7de5 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -27,135 +27,91 @@
struct xdr_skb_reader {
struct sk_buff *skb;
unsigned int offset;
+ bool need_checksum;
size_t count;
__wsum csum;
};
-typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to,
- size_t len);
-
/**
* xdr_skb_read_bits - copy some data bits from skb to internal buffer
* @desc: sk_buff copy helper
* @to: copy destination
* @len: number of bytes to copy
*
- * Possibly called several times to iterate over an sk_buff and copy
- * data out of it.
+ * Possibly called several times to iterate over an sk_buff and copy data out of
+ * it.
*/
static size_t
xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
- if (len > desc->count)
- len = desc->count;
- if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
- return 0;
- desc->count -= len;
- desc->offset += len;
- return len;
-}
+ len = min(len, desc->count);
+
+ if (desc->need_checksum) {
+ __wsum csum;
+
+ csum = skb_copy_and_csum_bits(desc->skb, desc->offset, to, len);
+ desc->csum = csum_block_add(desc->csum, csum, desc->offset);
+ } else {
+ if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
+ return 0;
+ }
-/**
- * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
- * @desc: sk_buff copy helper
- * @to: copy destination
- * @len: number of bytes to copy
- *
- * Same as skb_read_bits, but calculate a checksum at the same time.
- */
-static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
-{
- unsigned int pos;
- __wsum csum2;
-
- if (len > desc->count)
- len = desc->count;
- pos = desc->offset;
- csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len);
- desc->csum = csum_block_add(desc->csum, csum2, pos);
desc->count -= len;
desc->offset += len;
return len;
}
-/**
- * xdr_partial_copy_from_skb - copy data out of an skb
- * @xdr: target XDR buffer
- * @base: starting offset
- * @desc: sk_buff copy helper
- * @copy_actor: virtual method for copying data
- *
- */
static ssize_t
-xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, struct xdr_skb_reader *desc)
{
- struct page **ppage = xdr->pages;
- unsigned int len, pglen = xdr->page_len;
- ssize_t copied = 0;
- size_t ret;
-
- len = xdr->head[0].iov_len;
- if (base < len) {
- len -= base;
- ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len);
- copied += ret;
- if (ret != len || !desc->count)
- goto out;
- base = 0;
- } else
- base -= len;
-
- if (unlikely(pglen == 0))
- goto copy_tail;
- if (unlikely(base >= pglen)) {
- base -= pglen;
- goto copy_tail;
- }
- if (base || xdr->page_base) {
- pglen -= base;
- base += xdr->page_base;
- ppage += base >> PAGE_SHIFT;
- base &= ~PAGE_MASK;
- }
- do {
+ struct page **ppage = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ unsigned int poff = xdr->page_base & ~PAGE_MASK;
+ unsigned int pglen = xdr->page_len;
+ ssize_t copied = 0;
+ size_t ret;
+
+ if (xdr->head[0].iov_len == 0)
+ return 0;
+
+ ret = xdr_skb_read_bits(desc, xdr->head[0].iov_base,
+ xdr->head[0].iov_len);
+ if (ret != xdr->head[0].iov_len || !desc->count)
+ return ret;
+ copied += ret;
+
+ while (pglen) {
+ unsigned int len = min(PAGE_SIZE - poff, pglen);
char *kaddr;
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
- *ppage = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
+ *ppage = alloc_page(GFP_NOWAIT);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
- copied = -ENOMEM;
- goto out;
+ return -ENOMEM;
+ return copied;
}
}
- len = PAGE_SIZE;
kaddr = kmap_atomic(*ppage);
- if (base) {
- len -= base;
- if (pglen < len)
- len = pglen;
- ret = copy_actor(desc, kaddr + base, len);
- base = 0;
- } else {
- if (pglen < len)
- len = pglen;
- ret = copy_actor(desc, kaddr, len);
- }
+ ret = xdr_skb_read_bits(desc, kaddr + poff, len);
flush_dcache_page(*ppage);
kunmap_atomic(kaddr);
+
copied += ret;
if (ret != len || !desc->count)
- goto out;
+ return copied;
ppage++;
- } while ((pglen -= len) != 0);
-copy_tail:
- len = xdr->tail[0].iov_len;
- if (base < len)
- copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base);
-out:
+ pglen -= len;
+ poff = 0;
+ }
+
+ if (xdr->tail[0].iov_len) {
+ copied += xdr_skb_read_bits(desc, xdr->tail[0].iov_base,
+ xdr->tail[0].iov_len);
+ }
+
return copied;
}
@@ -169,17 +125,22 @@ out:
*/
int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{
- struct xdr_skb_reader desc;
-
- desc.skb = skb;
- desc.offset = 0;
- desc.count = skb->len - desc.offset;
+ struct xdr_skb_reader desc = {
+ .skb = skb,
+ .count = skb->len - desc.offset,
+ };
- if (skb_csum_unnecessary(skb))
- goto no_checksum;
+ if (skb_csum_unnecessary(skb)) {
+ if (xdr_partial_copy_from_skb(xdr, &desc) < 0)
+ return -1;
+ if (desc.count)
+ return -1;
+ return 0;
+ }
+ desc.need_checksum = true;
desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
+ if (xdr_partial_copy_from_skb(xdr, &desc) < 0)
return -1;
if (desc.offset != skb->len) {
__wsum csum2;
@@ -194,14 +155,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev, skb);
return 0;
-no_checksum:
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
- return -1;
- if (desc.count)
- return -1;
- return 0;
}
-EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
static inline int xprt_sendmsg(struct socket *sock, struct msghdr *msg,
size_t seek)
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 52908f9e6eab..383860cb1d5b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -83,7 +83,8 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
{
const struct svc_program *prog = statp->program;
const struct svc_version *vers;
- unsigned int i, j;
+ unsigned int i, j, k;
+ unsigned long count;
seq_printf(seq,
"net %u %u %u %u\n",
@@ -104,8 +105,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
if (!vers)
continue;
seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
- for (j = 0; j < vers->vs_nproc; j++)
- seq_printf(seq, " %u", vers->vs_count[j]);
+ for (j = 0; j < vers->vs_nproc; j++) {
+ count = 0;
+ for_each_possible_cpu(k)
+ count += per_cpu(vers->vs_count[j], k);
+ seq_printf(seq, " %lu", count);
+ }
seq_putc(seq, '\n');
}
}
@@ -309,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{
- return do_register(net, statp->program->pg_name, statp, proc_ops);
+ return do_register(net, statp->program->pg_name, net, proc_ops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index d4a362c9e4b3..e3c6e3b63f0b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -36,7 +36,11 @@ static inline int sock_is_loopback(struct sock *sk)
return loopback;
}
+struct svc_serv;
+struct svc_rqst;
int rpc_clients_notifier_register(void);
void rpc_clients_notifier_unregister(void);
void auth_domain_cleanup(void);
+void svc_sock_update_bufs(struct svc_serv *serv);
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
#endif /* _NET_SUNRPC_SUNRPC_H */
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 691c0000e9ea..bab6cab29405 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -148,6 +148,7 @@ cleanup_sunrpc(void)
#endif
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
+MODULE_DESCRIPTION("Sun RPC core");
MODULE_LICENSE("GPL");
fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */
module_exit(cleanup_sunrpc);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f06622814a95..4704dce7284e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -32,6 +32,7 @@
#include <trace/events/sunrpc.h>
#include "fail.h"
+#include "sunrpc.h"
#define RPCDBG_FACILITY RPCDBG_SVCDSP
@@ -72,57 +73,100 @@ static struct svc_pool_map svc_pool_map = {
static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
static int
-param_set_pool_mode(const char *val, const struct kernel_param *kp)
+__param_set_pool_mode(const char *val, struct svc_pool_map *m)
{
- int *ip = (int *)kp->arg;
- struct svc_pool_map *m = &svc_pool_map;
- int err;
+ int err, mode;
mutex_lock(&svc_pool_map_mutex);
- err = -EBUSY;
- if (m->count)
- goto out;
-
err = 0;
if (!strncmp(val, "auto", 4))
- *ip = SVC_POOL_AUTO;
+ mode = SVC_POOL_AUTO;
else if (!strncmp(val, "global", 6))
- *ip = SVC_POOL_GLOBAL;
+ mode = SVC_POOL_GLOBAL;
else if (!strncmp(val, "percpu", 6))
- *ip = SVC_POOL_PERCPU;
+ mode = SVC_POOL_PERCPU;
else if (!strncmp(val, "pernode", 7))
- *ip = SVC_POOL_PERNODE;
+ mode = SVC_POOL_PERNODE;
else
err = -EINVAL;
+ if (err)
+ goto out;
+
+ if (m->count == 0)
+ m->mode = mode;
+ else if (mode != m->mode)
+ err = -EBUSY;
out:
mutex_unlock(&svc_pool_map_mutex);
return err;
}
static int
-param_get_pool_mode(char *buf, const struct kernel_param *kp)
+param_set_pool_mode(const char *val, const struct kernel_param *kp)
+{
+ struct svc_pool_map *m = kp->arg;
+
+ return __param_set_pool_mode(val, m);
+}
+
+int sunrpc_set_pool_mode(const char *val)
+{
+ return __param_set_pool_mode(val, &svc_pool_map);
+}
+EXPORT_SYMBOL(sunrpc_set_pool_mode);
+
+/**
+ * sunrpc_get_pool_mode - get the current pool_mode for the host
+ * @buf: where to write the current pool_mode
+ * @size: size of @buf
+ *
+ * Grab the current pool_mode from the svc_pool_map and write
+ * the resulting string to @buf. Returns the number of characters
+ * written to @buf (a'la snprintf()).
+ */
+int
+sunrpc_get_pool_mode(char *buf, size_t size)
{
- int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
- switch (*ip)
+ switch (m->mode)
{
case SVC_POOL_AUTO:
- return strlcpy(buf, "auto\n", 20);
+ return snprintf(buf, size, "auto");
case SVC_POOL_GLOBAL:
- return strlcpy(buf, "global\n", 20);
+ return snprintf(buf, size, "global");
case SVC_POOL_PERCPU:
- return strlcpy(buf, "percpu\n", 20);
+ return snprintf(buf, size, "percpu");
case SVC_POOL_PERNODE:
- return strlcpy(buf, "pernode\n", 20);
+ return snprintf(buf, size, "pernode");
default:
- return sprintf(buf, "%d\n", *ip);
+ return snprintf(buf, size, "%d", m->mode);
}
}
+EXPORT_SYMBOL(sunrpc_get_pool_mode);
+
+static int
+param_get_pool_mode(char *buf, const struct kernel_param *kp)
+{
+ char str[16];
+ int len;
+
+ len = sunrpc_get_pool_mode(str, ARRAY_SIZE(str));
+
+ /* Ensure we have room for newline and NUL */
+ len = min_t(int, len, ARRAY_SIZE(str) - 2);
+
+ /* tack on the newline */
+ str[len] = '\n';
+ str[len + 1] = '\0';
+
+ return sysfs_emit(buf, "%s", str);
+}
module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
- &svc_pool_map.mode, 0644);
+ &svc_pool_map, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -250,10 +294,8 @@ svc_pool_map_get(void)
int npools = -1;
mutex_lock(&svc_pool_map_mutex);
-
if (m->count++) {
mutex_unlock(&svc_pool_map_mutex);
- WARN_ON_ONCE(m->npools <= 1);
return m->npools;
}
@@ -275,32 +317,21 @@ svc_pool_map_get(void)
m->mode = SVC_POOL_GLOBAL;
}
m->npools = npools;
-
- if (npools == 1)
- /* service is unpooled, so doesn't hold a reference */
- m->count--;
-
mutex_unlock(&svc_pool_map_mutex);
return npools;
}
/*
- * Drop a reference to the global map of cpus to pools, if
- * pools were in use, i.e. if npools > 1.
+ * Drop a reference to the global map of cpus to pools.
* When the last reference is dropped, the map data is
- * freed; this allows the sysadmin to change the pool
- * mode using the pool_mode module option without
- * rebooting or re-loading sunrpc.ko.
+ * freed; this allows the sysadmin to change the pool.
*/
static void
-svc_pool_map_put(int npools)
+svc_pool_map_put(void)
{
struct svc_pool_map *m = &svc_pool_map;
- if (npools <= 1)
- return;
mutex_lock(&svc_pool_map_mutex);
-
if (!--m->count) {
kfree(m->to_pool);
m->to_pool = NULL;
@@ -308,7 +339,6 @@ svc_pool_map_put(int npools)
m->pool_to = NULL;
m->npools = 0;
}
-
mutex_unlock(&svc_pool_map_mutex);
}
@@ -322,7 +352,7 @@ static int svc_pool_map_get_node(unsigned int pidx)
if (m->mode == SVC_POOL_PERNODE)
return m->pool_to[pidx];
}
- return NUMA_NO_NODE;
+ return numa_mem_id();
}
/*
* Set the given thread's cpus_allowed mask so that it
@@ -388,7 +418,7 @@ struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv)
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
-int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
+static int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
{
int err;
@@ -400,21 +430,20 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net)
svc_unregister(serv, net);
return 0;
}
-EXPORT_SYMBOL_GPL(svc_rpcb_setup);
void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net)
{
svc_unregister(serv, net);
rpcb_put_local(net);
}
-EXPORT_SYMBOL_GPL(svc_rpcb_cleanup);
static int svc_uses_rpcbind(struct svc_serv *serv)
{
- struct svc_program *progp;
- unsigned int i;
+ unsigned int p, i;
+
+ for (p = 0; p < serv->sv_nprogs; p++) {
+ struct svc_program *progp = &serv->sv_programs[p];
- for (progp = serv->sv_program; progp; progp = progp->pg_next) {
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
@@ -438,9 +467,7 @@ EXPORT_SYMBOL_GPL(svc_bind);
static void
__svc_init_bc(struct svc_serv *serv)
{
- INIT_LIST_HEAD(&serv->sv_cb_list);
- spin_lock_init(&serv->sv_cb_lock);
- init_waitqueue_head(&serv->sv_cb_waitq);
+ lwq_init(&serv->sv_cb_list);
}
#else
static void
@@ -453,8 +480,8 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service
*/
static struct svc_serv *
-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- int (*threadfn)(void *data))
+__svc_create(struct svc_program *prog, int nprogs, struct svc_stat *stats,
+ unsigned int bufsize, int npools, int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -464,26 +491,27 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
serv->sv_name = prog->pg_name;
- serv->sv_program = prog;
- kref_init(&serv->sv_refcnt);
- serv->sv_stats = prog->pg_stats;
+ serv->sv_programs = prog;
+ serv->sv_nprogs = nprogs;
+ serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
serv->sv_threadfn = threadfn;
xdrsize = 0;
- while (prog) {
- prog->pg_lovers = prog->pg_nvers-1;
- for (vers=0; vers<prog->pg_nvers ; vers++)
- if (prog->pg_vers[vers]) {
- prog->pg_hivers = vers;
- if (prog->pg_lovers > vers)
- prog->pg_lovers = vers;
- if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
- xdrsize = prog->pg_vers[vers]->vs_xdrsize;
+ for (i = 0; i < nprogs; i++) {
+ struct svc_program *progp = &prog[i];
+
+ progp->pg_lovers = progp->pg_nvers-1;
+ for (vers = 0; vers < progp->pg_nvers ; vers++)
+ if (progp->pg_vers[vers]) {
+ progp->pg_hivers = vers;
+ if (progp->pg_lovers > vers)
+ progp->pg_lovers = vers;
+ if (progp->pg_vers[vers]->vs_xdrsize > xdrsize)
+ xdrsize = progp->pg_vers[vers]->vs_xdrsize;
}
- prog = prog->pg_next;
}
serv->sv_xdrsize = xdrsize;
INIT_LIST_HEAD(&serv->sv_tempsocks);
@@ -509,9 +537,13 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
i, serv->sv_name);
pool->sp_id = i;
- INIT_LIST_HEAD(&pool->sp_sockets);
+ lwq_init(&pool->sp_xprts);
INIT_LIST_HEAD(&pool->sp_all_threads);
- spin_lock_init(&pool->sp_lock);
+ init_llist_head(&pool->sp_idle_threads);
+
+ percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
+ percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
+ percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
}
return serv;
@@ -528,31 +560,36 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, 1, threadfn);
+ return __svc_create(prog, 1, NULL, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
/**
* svc_create_pooled - Create an RPC service with pooled threads
- * @prog: the RPC program the new service will handle
+ * @prog: Array of RPC programs the new service will handle
+ * @nprogs: Number of programs in the array
+ * @stats: the stats struct if desired
* @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog
*
* Returns an instantiated struct svc_serv object or NULL.
*/
struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ unsigned int nprogs,
+ struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, threadfn);
+ serv = __svc_create(prog, nprogs, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
+ serv->sv_is_pooled = true;
return serv;
out_err:
- svc_pool_map_put(npools);
+ svc_pool_map_put();
return NULL;
}
EXPORT_SYMBOL_GPL(svc_create_pooled);
@@ -562,57 +599,54 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
* protect sv_permsocks and sv_tempsocks.
*/
void
-svc_destroy(struct kref *ref)
+svc_destroy(struct svc_serv **servp)
{
- struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ struct svc_serv *serv = *servp;
+ unsigned int i;
- dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
+ *servp = NULL;
+
+ dprintk("svc: svc_destroy(%s)\n", serv->sv_programs->pg_name);
timer_shutdown_sync(&serv->sv_temptimer);
/*
- * The last user is gone and thus all sockets have to be destroyed to
- * the point. Check this.
+ * Remaining transports at this point are not expected.
*/
- BUG_ON(!list_empty(&serv->sv_permsocks));
- BUG_ON(!list_empty(&serv->sv_tempsocks));
+ WARN_ONCE(!list_empty(&serv->sv_permsocks),
+ "SVC: permsocks remain for %s\n", serv->sv_programs->pg_name);
+ WARN_ONCE(!list_empty(&serv->sv_tempsocks),
+ "SVC: tempsocks remain for %s\n", serv->sv_programs->pg_name);
cache_clean_deferred(serv);
- svc_pool_map_put(serv->sv_nrpools);
+ if (serv->sv_is_pooled)
+ svc_pool_map_put();
+
+ for (i = 0; i < serv->sv_nrpools; i++) {
+ struct svc_pool *pool = &serv->sv_pools[i];
+ percpu_counter_destroy(&pool->sp_messages_arrived);
+ percpu_counter_destroy(&pool->sp_sockets_queued);
+ percpu_counter_destroy(&pool->sp_threads_woken);
+ }
kfree(serv->sv_pools);
kfree(serv);
}
EXPORT_SYMBOL_GPL(svc_destroy);
-/*
- * Allocate an RPC server's buffer space.
- * We allocate pages and place them in rq_pages.
- */
-static int
-svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
+static bool
+svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
{
- unsigned int pages, arghi;
-
- /* bc_xprt uses fore channel allocated buffers */
- if (svc_is_backchannel(rqstp))
- return 1;
-
- pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
- * We assume one is at most one page
- */
- arghi = 0;
- WARN_ON_ONCE(pages > RPCSVC_MAXPAGES);
- if (pages > RPCSVC_MAXPAGES)
- pages = RPCSVC_MAXPAGES;
- while (pages) {
- struct page *p = alloc_pages_node(node, GFP_KERNEL, 0);
- if (!p)
- break;
- rqstp->rq_pages[arghi++] = p;
- pages--;
- }
- return pages == 0;
+ rqstp->rq_maxpages = svc_serv_maxpages(serv);
+
+ /* rq_pages' last entry is NULL for historical reasons. */
+ rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1,
+ sizeof(struct page *),
+ GFP_KERNEL, node);
+ if (!rqstp->rq_pages)
+ return false;
+
+ return true;
}
/*
@@ -621,15 +655,30 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node)
static void
svc_release_buffer(struct svc_rqst *rqstp)
{
- unsigned int i;
+ unsigned long i;
- for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
+ for (i = 0; i < rqstp->rq_maxpages; i++)
if (rqstp->rq_pages[i])
put_page(rqstp->rq_pages[i]);
+ kfree(rqstp->rq_pages);
}
-struct svc_rqst *
-svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
+static void
+svc_rqst_free(struct svc_rqst *rqstp)
+{
+ folio_batch_release(&rqstp->rq_fbatch);
+ kfree(rqstp->rq_bvec);
+ svc_release_buffer(rqstp);
+ if (rqstp->rq_scratch_folio)
+ folio_put(rqstp->rq_scratch_folio);
+ kfree(rqstp->rq_resp);
+ kfree(rqstp->rq_argp);
+ kfree(rqstp->rq_auth_data);
+ kfree_rcu(rqstp, rq_rcu_head);
+}
+
+static struct svc_rqst *
+svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
{
struct svc_rqst *rqstp;
@@ -637,12 +686,13 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return rqstp;
- __set_bit(RQ_BUSY, &rqstp->rq_flags);
+ folio_batch_init(&rqstp->rq_fbatch);
+
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
- rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0);
- if (!rqstp->rq_scratch_page)
+ rqstp->rq_scratch_folio = __folio_alloc_node(GFP_KERNEL, 0, node);
+ if (!rqstp->rq_scratch_folio)
goto out_enomem;
rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
@@ -653,91 +703,97 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp->rq_resp)
goto out_enomem;
- if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node))
+ if (!svc_init_buffer(rqstp, serv, node))
goto out_enomem;
- return rqstp;
-out_enomem:
- svc_rqst_free(rqstp);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(svc_rqst_alloc);
-
-static struct svc_rqst *
-svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
-{
- struct svc_rqst *rqstp;
+ rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages,
+ sizeof(struct bio_vec),
+ GFP_KERNEL, node);
+ if (!rqstp->rq_bvec)
+ goto out_enomem;
- rqstp = svc_rqst_alloc(serv, pool, node);
- if (!rqstp)
- return ERR_PTR(-ENOMEM);
+ rqstp->rq_err = -EAGAIN; /* No error yet */
- svc_get(serv);
- spin_lock_bh(&serv->sv_lock);
serv->sv_nrthreads += 1;
- spin_unlock_bh(&serv->sv_lock);
+ pool->sp_nrthreads += 1;
- spin_lock_bh(&pool->sp_lock);
- pool->sp_nrthreads++;
+ /* Protected by whatever lock the service uses when calling
+ * svc_set_num_threads()
+ */
list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
- spin_unlock_bh(&pool->sp_lock);
+
return rqstp;
+
+out_enomem:
+ svc_rqst_free(rqstp);
+ return NULL;
}
-/*
- * Choose a pool in which to create a new thread, for svc_set_num_threads
+/**
+ * svc_pool_wake_idle_thread - Awaken an idle thread in @pool
+ * @pool: service thread pool
+ *
+ * Can be called from soft IRQ or process context. Finding an idle
+ * service thread and marking it BUSY is atomic with respect to
+ * other calls to svc_pool_wake_idle_thread().
+ *
*/
-static inline struct svc_pool *
-choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+void svc_pool_wake_idle_thread(struct svc_pool *pool)
{
- if (pool != NULL)
- return pool;
+ struct svc_rqst *rqstp;
+ struct llist_node *ln;
+
+ rcu_read_lock();
+ ln = READ_ONCE(pool->sp_idle_threads.first);
+ if (ln) {
+ rqstp = llist_entry(ln, struct svc_rqst, rq_idle);
+ WRITE_ONCE(rqstp->rq_qtime, ktime_get());
+ if (!task_is_running(rqstp->rq_task)) {
+ wake_up_process(rqstp->rq_task);
+ trace_svc_pool_thread_wake(pool, rqstp->rq_task->pid);
+ percpu_counter_inc(&pool->sp_threads_woken);
+ } else {
+ trace_svc_pool_thread_running(pool, rqstp->rq_task->pid);
+ }
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ trace_svc_pool_thread_noidle(pool, 0);
+}
+EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
- return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+static struct svc_pool *
+svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
}
-/*
- * Choose a thread to kill, for svc_set_num_threads
- */
-static inline struct task_struct *
-choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool,
+ unsigned int *state)
{
+ struct svc_pool *pool;
unsigned int i;
- struct task_struct *task = NULL;
- if (pool != NULL) {
- spin_lock_bh(&pool->sp_lock);
- } else {
- /* choose a pool in round-robin fashion */
+ pool = target_pool;
+
+ if (!pool) {
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
- spin_lock_bh(&pool->sp_lock);
- if (!list_empty(&pool->sp_all_threads))
- goto found_pool;
- spin_unlock_bh(&pool->sp_lock);
+ if (pool->sp_nrthreads)
+ break;
}
- return NULL;
}
-found_pool:
- if (!list_empty(&pool->sp_all_threads)) {
- struct svc_rqst *rqstp;
-
- /*
- * Remove from the pool->sp_all_threads list
- * so we don't try to kill it again.
- */
- rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
- set_bit(RQ_VICTIM, &rqstp->rq_flags);
- list_del_rcu(&rqstp->rq_all);
- task = rqstp->rq_task;
+ if (pool && pool->sp_nrthreads) {
+ set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+ set_bit(SP_NEED_VICTIM, &pool->sp_flags);
+ return pool;
}
- spin_unlock_bh(&pool->sp_lock);
-
- return task;
+ return NULL;
}
-/* create new threads */
static int
svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -746,16 +802,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
struct svc_pool *chosen_pool;
unsigned int state = serv->sv_nrthreads-1;
int node;
+ int err;
do {
nrservs--;
- chosen_pool = choose_pool(serv, pool, &state);
-
+ chosen_pool = svc_pool_next(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
- rqstp = svc_prepare_thread(serv, chosen_pool, node);
- if (IS_ERR(rqstp))
- return PTR_ERR(rqstp);
+ rqstp = svc_prepare_thread(serv, chosen_pool, node);
+ if (!rqstp)
+ return -ENOMEM;
task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
@@ -769,47 +825,60 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
svc_sock_update_bufs(serv);
wake_up_process(task);
+
+ wait_var_event(&rqstp->rq_err, rqstp->rq_err != -EAGAIN);
+ err = rqstp->rq_err;
+ if (err) {
+ svc_exit_thread(rqstp);
+ return err;
+ }
} while (nrservs > 0);
return 0;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- */
-
-/* destroy old threads */
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
+ struct svc_pool *victim;
- /* destroy old threads */
do {
- task = choose_victim(serv, pool, &state);
- if (task == NULL)
+ victim = svc_pool_victim(serv, pool, &state);
+ if (!victim)
break;
- kthread_stop(task);
+ svc_pool_wake_idle_thread(victim);
+ wait_on_bit(&victim->sp_flags, SP_VICTIM_REMAINS,
+ TASK_IDLE);
nrservs++;
} while (nrservs < 0);
return 0;
}
+/**
+ * svc_set_num_threads - adjust number of threads per RPC service
+ * @serv: RPC service to adjust
+ * @pool: Specific pool from which to choose threads, or NULL
+ * @nrservs: New number of threads for @serv (0 or less means kill all threads)
+ *
+ * Create or destroy threads to make the number of threads for @serv the
+ * given number. If @pool is non-NULL, change only threads in that pool;
+ * otherwise, round-robin between all pools for @serv. @serv's
+ * sv_nrthreads is adjusted for each thread created or destroyed.
+ *
+ * Caller must ensure mutual exclusion between this and server startup or
+ * shutdown.
+ *
+ * Returns zero on success or a negative errno if an error occurred while
+ * starting a thread.
+ */
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- if (pool == NULL) {
+ if (!pool)
nrservs -= serv->sv_nrthreads;
- } else {
- spin_lock_bh(&pool->sp_lock);
+ else
nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
if (nrservs > 0)
return svc_start_kthreads(serv, pool, nrservs);
@@ -826,57 +895,80 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
*
* When replacing a page in rq_pages, batch the release of the
* replaced pages to avoid hammering the page allocator.
+ *
+ * Return values:
+ * %true: page replaced
+ * %false: array bounds checking failed
*/
-void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
+bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
{
+ struct page **begin = rqstp->rq_pages;
+ struct page **end = &rqstp->rq_pages[rqstp->rq_maxpages];
+
+ if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) {
+ trace_svc_replace_page_err(rqstp);
+ return false;
+ }
+
if (*rqstp->rq_next_page) {
- if (!pagevec_space(&rqstp->rq_pvec))
- __pagevec_release(&rqstp->rq_pvec);
- pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
+ if (!folio_batch_add(&rqstp->rq_fbatch,
+ page_folio(*rqstp->rq_next_page)))
+ __folio_batch_release(&rqstp->rq_fbatch);
}
get_page(page);
*(rqstp->rq_next_page++) = page;
+ return true;
}
EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
-/*
- * Called from a server thread as it's exiting. Caller must hold the "service
- * mutex" for the service.
+/**
+ * svc_rqst_release_pages - Release Reply buffer pages
+ * @rqstp: RPC transaction context
+ *
+ * Release response pages that might still be in flight after
+ * svc_send, and any spliced filesystem-owned pages.
*/
-void
-svc_rqst_free(struct svc_rqst *rqstp)
+void svc_rqst_release_pages(struct svc_rqst *rqstp)
{
- svc_release_buffer(rqstp);
- if (rqstp->rq_scratch_page)
- put_page(rqstp->rq_scratch_page);
- kfree(rqstp->rq_resp);
- kfree(rqstp->rq_argp);
- kfree(rqstp->rq_auth_data);
- kfree_rcu(rqstp, rq_rcu_head);
+ int i, count = rqstp->rq_next_page - rqstp->rq_respages;
+
+ if (count) {
+ release_pages(rqstp->rq_respages, count);
+ for (i = 0; i < count; i++)
+ rqstp->rq_respages[i] = NULL;
+ }
}
-EXPORT_SYMBOL_GPL(svc_rqst_free);
+/**
+ * svc_exit_thread - finalise the termination of a sunrpc server thread
+ * @rqstp: the svc_rqst which represents the thread.
+ *
+ * When a thread started with svc_new_thread() exits it must call
+ * svc_exit_thread() as its last act. This must be done with the
+ * service mutex held. Normally this is held by a DIFFERENT thread, the
+ * one that is calling svc_set_num_threads() and which will wait for
+ * SP_VICTIM_REMAINS to be cleared before dropping the mutex. If the
+ * thread exits for any reason other than svc_thread_should_stop()
+ * returning %true (which indicated that svc_set_num_threads() is
+ * waiting for it to exit), then it must take the service mutex itself,
+ * which can only safely be done using mutex_try_lock().
+ */
void
svc_exit_thread(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct svc_pool *pool = rqstp->rq_pool;
- spin_lock_bh(&pool->sp_lock);
- pool->sp_nrthreads--;
- if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
- list_del_rcu(&rqstp->rq_all);
- spin_unlock_bh(&pool->sp_lock);
+ list_del_rcu(&rqstp->rq_all);
- spin_lock_bh(&serv->sv_lock);
+ pool->sp_nrthreads -= 1;
serv->sv_nrthreads -= 1;
- spin_unlock_bh(&serv->sv_lock);
svc_sock_update_bufs(serv);
svc_rqst_free(rqstp);
- svc_put(serv);
+ clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
@@ -1002,10 +1094,11 @@ static int __svc_register(struct net *net, const char *progname,
#endif
}
- trace_svc_register(progname, version, protocol, port, family, error);
+ trace_svc_register(progname, version, family, protocol, port, error);
return error;
}
+static
int svc_rpcbind_set_version(struct net *net,
const struct svc_program *progp,
u32 version, int family,
@@ -1016,7 +1109,6 @@ int svc_rpcbind_set_version(struct net *net,
version, family, proto, port);
}
-EXPORT_SYMBOL_GPL(svc_rpcbind_set_version);
int svc_generic_rpcbind_set(struct net *net,
const struct svc_program *progp,
@@ -1064,15 +1156,16 @@ int svc_register(const struct svc_serv *serv, struct net *net,
const int family, const unsigned short proto,
const unsigned short port)
{
- struct svc_program *progp;
- unsigned int i;
+ unsigned int p, i;
int error = 0;
WARN_ON_ONCE(proto == 0 && port == 0);
if (proto == 0 && port == 0)
return -EINVAL;
- for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+ for (p = 0; p < serv->sv_nprogs; p++) {
+ struct svc_program *progp = &serv->sv_programs[p];
+
for (i = 0; i < progp->pg_nvers; i++) {
error = progp->pg_rpcbind_set(net, progp, i,
@@ -1123,13 +1216,15 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
*/
static void svc_unregister(const struct svc_serv *serv, struct net *net)
{
- struct svc_program *progp;
+ struct sighand_struct *sighand;
unsigned long flags;
- unsigned int i;
+ unsigned int p, i;
clear_thread_flag(TIF_SIGPENDING);
- for (progp = serv->sv_program; progp; progp = progp->pg_next) {
+ for (p = 0; p < serv->sv_nprogs; p++) {
+ struct svc_program *progp = &serv->sv_programs[p];
+
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
@@ -1139,9 +1234,12 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
}
}
- spin_lock_irqsave(&current->sighand->siglock, flags);
+ rcu_read_lock();
+ sighand = rcu_dereference(current->sighand);
+ spin_lock_irqsave(&sighand->siglock, flags);
recalc_sigpending();
- spin_unlock_irqrestore(&current->sighand->siglock, flags);
+ spin_unlock_irqrestore(&sighand->siglock, flags);
+ rcu_read_unlock();
}
/*
@@ -1200,15 +1298,13 @@ svc_generic_init_request(struct svc_rqst *rqstp,
if (rqstp->rq_proc >= versp->vs_nproc)
goto err_bad_proc;
rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc];
- if (!procp)
- goto err_bad_proc;
/* Initialize storage for argp and resp */
memset(rqstp->rq_argp, 0, procp->pc_argzero);
memset(rqstp->rq_resp, 0, procp->pc_ressize);
/* Bump per-procedure stats counter */
- versp->vs_count[rqstp->rq_proc]++;
+ this_cpu_inc(versp->vs_count[rqstp->rq_proc]);
ret->dispatch = versp->vs_dispatch;
return rpc_success;
@@ -1225,51 +1321,45 @@ EXPORT_SYMBOL_GPL(svc_generic_init_request);
* Common routine for processing the RPC request.
*/
static int
-svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+svc_process_common(struct svc_rqst *rqstp)
{
- struct svc_program *progp;
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
+ struct svc_program *progp = NULL;
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
- __be32 *statp;
- u32 prog, vers;
- __be32 rpc_stat;
- int auth_res, rc;
- __be32 *reply_statp;
+ enum svc_auth_status auth_res;
+ unsigned int aoffset;
+ int pr, rc;
+ __be32 *p;
- rpc_stat = rpc_success;
+ /* Reset the accept_stat for the RPC */
+ rqstp->rq_accept_statp = NULL;
- if (argv->iov_len < 6*4)
- goto err_short_len;
-
- /* Will be turned off by GSS integrity and privacy services */
- set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
- svc_putu32(resv, rqstp->rq_xid);
-
- vers = svc_getnl(argv);
+ /* Construct the first words of the reply: */
+ svcxdr_init_encode(rqstp);
+ xdr_stream_encode_be32(xdr, rqstp->rq_xid);
+ xdr_stream_encode_be32(xdr, rpc_reply);
- /* First words of reply: */
- svc_putnl(resv, 1); /* REPLY */
-
- if (vers != 2) /* RPC version number */
+ p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 4);
+ if (unlikely(!p))
+ goto err_short_len;
+ if (*p++ != cpu_to_be32(RPC_VERSION))
goto err_bad_rpc;
- /* Save position in case we later decide to reject: */
- reply_statp = resv->iov_base + resv->iov_len;
+ xdr_stream_encode_be32(xdr, rpc_msg_accepted);
- svc_putnl(resv, 0); /* ACCEPT */
+ rqstp->rq_prog = be32_to_cpup(p++);
+ rqstp->rq_vers = be32_to_cpup(p++);
+ rqstp->rq_proc = be32_to_cpup(p);
- rqstp->rq_prog = prog = svc_getnl(argv); /* program number */
- rqstp->rq_vers = svc_getnl(argv); /* version number */
- rqstp->rq_proc = svc_getnl(argv); /* procedure number */
-
- for (progp = serv->sv_program; progp; progp = progp->pg_next)
- if (prog == progp->pg_prog)
- break;
+ for (pr = 0; pr < serv->sv_nprogs; pr++)
+ if (rqstp->rq_prog == serv->sv_programs[pr].pg_prog)
+ progp = &serv->sv_programs[pr];
/*
* Decode auth data, and add verifier to reply buffer.
@@ -1285,10 +1375,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
case SVC_OK:
break;
case SVC_GARBAGE:
- goto err_garbage;
- case SVC_SYSERR:
- rpc_stat = rpc_system_err;
- goto err_bad;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ goto err_bad_auth;
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
@@ -1297,13 +1385,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto dropit;
case SVC_COMPLETE:
goto sendit;
+ default:
+ pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
+ rqstp->rq_auth_stat = rpc_autherr_failed;
+ goto err_bad_auth;
}
if (progp == NULL)
goto err_bad_prog;
- rpc_stat = progp->pg_init_request(rqstp, progp, &process);
- switch (rpc_stat) {
+ switch (progp->pg_init_request(rqstp, progp, &process)) {
case rpc_success:
break;
case rpc_prog_unavail:
@@ -1320,12 +1411,11 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto err_bad_proc;
/* Syntactic check complete */
- serv->sv_stats->rpccnt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
- /* Build the reply header. */
- statp = resv->iov_base +resv->iov_len;
- svc_putnl(resv, RPC_SUCCESS);
+ aoffset = xdr_stream_pos(xdr);
/* un-reserve some of the out-queue now that we have a
* better idea of reply size
@@ -1334,17 +1424,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
/* Call the function that processes the request. */
- rc = process.dispatch(rqstp, statp);
- if (procp->pc_release)
- procp->pc_release(rqstp);
+ rc = process.dispatch(rqstp);
+ xdr_finish_decode(xdr);
+
if (!rc)
goto dropit;
if (rqstp->rq_auth_stat != rpc_auth_ok)
goto err_bad_auth;
- /* Check RPC status result */
- if (*statp != rpc_success)
- resv->iov_len = ((void*)statp) - resv->iov_base + 4;
+ if (*rqstp->rq_accept_statp != rpc_success)
+ xdr_truncate_encode(xdr, aoffset);
if (procp->pc_encode == NULL)
goto dropit;
@@ -1368,71 +1457,89 @@ close_xprt:
return 0;
err_short_len:
- svc_printk(rqstp, "short len %zd, dropping request\n",
- argv->iov_len);
+ svc_printk(rqstp, "short len %u, dropping request\n",
+ rqstp->rq_arg.len);
goto close_xprt;
err_bad_rpc:
- serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, 1); /* REJECT */
- svc_putnl(resv, 0); /* RPC_MISMATCH */
- svc_putnl(resv, 2); /* Only RPCv2 supported */
- svc_putnl(resv, 2);
- goto sendit;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
+ xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
+ xdr_stream_encode_u32(xdr, RPC_MISMATCH);
+ /* Only RPCv2 supported */
+ xdr_stream_encode_u32(xdr, RPC_VERSION);
+ xdr_stream_encode_u32(xdr, RPC_VERSION);
+ return 1; /* don't wrap */
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
- serv->sv_stats->rpcbadauth++;
- /* Restore write pointer to location of accept status: */
- xdr_ressize_check(rqstp, reply_statp);
- svc_putnl(resv, 1); /* REJECT */
- svc_putnl(resv, 1); /* AUTH_ERROR */
- svc_putu32(resv, rqstp->rq_auth_stat); /* status */
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadauth++;
+ /* Restore write pointer to location of reply status: */
+ xdr_truncate_encode(xdr, XDR_UNIT * 2);
+ xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
+ xdr_stream_encode_u32(xdr, RPC_AUTH_ERROR);
+ xdr_stream_encode_be32(xdr, rqstp->rq_auth_stat);
goto sendit;
err_bad_prog:
- dprintk("svc: unknown program %d\n", prog);
- serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, RPC_PROG_UNAVAIL);
+ dprintk("svc: unknown program %d\n", rqstp->rq_prog);
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit;
err_bad_vers:
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
- serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, RPC_PROG_MISMATCH);
- svc_putnl(resv, process.mismatch.lovers);
- svc_putnl(resv, process.mismatch.hivers);
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_prog_mismatch;
+
+ /*
+ * svc_authenticate() has already added the verifier and
+ * advanced the stream just past rq_accept_statp.
+ */
+ xdr_stream_encode_u32(xdr, process.mismatch.lovers);
+ xdr_stream_encode_u32(xdr, process.mismatch.hivers);
goto sendit;
err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
- serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, RPC_PROC_UNAVAIL);
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
+ *rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
+}
+
+/*
+ * Drop request
+ */
+static void svc_drop(struct svc_rqst *rqstp)
+{
+ trace_svc_drop(rqstp);
+}
-err_garbage:
- svc_printk(rqstp, "failed to decode args\n");
+static void svc_release_rqst(struct svc_rqst *rqstp)
+{
+ const struct svc_procedure *procp = rqstp->rq_procinfo;
- rpc_stat = rpc_garbage_args;
-err_bad:
- serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, ntohl(rpc_stat));
- goto sendit;
+ if (procp && procp->pc_release)
+ procp->pc_release(rqstp);
}
-/*
- * Process the RPC request.
+/**
+ * svc_process - Execute one RPC transaction
+ * @rqstp: RPC transaction context
+ *
*/
-int
-svc_process(struct svc_rqst *rqstp)
+void svc_process(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
- __be32 dir;
+ __be32 *p;
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
if (!fail_sunrpc.ignore_server_disconnect &&
@@ -1455,44 +1562,49 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_base = NULL;
rqstp->rq_res.tail[0].iov_len = 0;
- dir = svc_getu32(argv);
- if (dir != rpc_call)
+ svcxdr_init_decode(rqstp);
+ p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2);
+ if (unlikely(!p))
+ goto out_drop;
+ rqstp->rq_xid = *p++;
+ if (unlikely(*p != rpc_call))
goto out_baddir;
- if (!svc_process_common(rqstp, argv, resv))
+
+ if (!svc_process_common(rqstp)) {
+ svc_release_rqst(rqstp);
goto out_drop;
- return svc_send(rqstp);
+ }
+ svc_send(rqstp);
+ svc_release_rqst(rqstp);
+ return;
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
- be32_to_cpu(dir));
- rqstp->rq_server->sv_stats->rpcbadfmt++;
+ be32_to_cpu(*p));
+ if (rqstp->rq_server->sv_stats)
+ rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
- return 0;
}
-EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Process a backchannel RPC request that arrived over an existing
- * outbound connection
+/**
+ * svc_process_bc - process a reverse-direction RPC request
+ * @req: RPC request to be used for client-side processing
+ * @rqstp: server-side execution context
+ *
*/
-int
-bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
- struct svc_rqst *rqstp)
+void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct rpc_timeout timeout = {
+ .to_increment = 0,
+ };
struct rpc_task *task;
int proc_error;
- int error;
-
- dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
- rqstp->rq_server = serv;
rqstp->rq_bc_net = req->rq_xprt->xprt_net;
rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
@@ -1513,43 +1625,46 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len +
rqstp->rq_arg.page_len;
- /* reset result send buffer "put" position */
- resv->iov_len = 0;
+ /* Reset the response buffer */
+ rqstp->rq_res.head[0].iov_len = 0;
/*
- * Skip the next two words because they've already been
- * processed in the transport
+ * Skip the XID and calldir fields because they've already
+ * been processed by the caller.
*/
- svc_getu32(argv); /* XID */
- svc_getnl(argv); /* CALLDIR */
+ svcxdr_init_decode(rqstp);
+ if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2))
+ return;
/* Parse and execute the bc call */
- proc_error = svc_process_common(rqstp, argv, resv);
+ proc_error = svc_process_common(rqstp);
atomic_dec(&req->rq_xprt->bc_slot_count);
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
- error = -EINVAL;
- goto out;
+ svc_release_rqst(rqstp);
+ return;
}
/* Finally, send the reply synchronously */
- memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
- task = rpc_run_bc_task(req);
- if (IS_ERR(task)) {
- error = PTR_ERR(task);
- goto out;
+ if (rqstp->bc_to_initval > 0) {
+ timeout.to_initval = rqstp->bc_to_initval;
+ timeout.to_retries = rqstp->bc_to_retries;
+ } else {
+ timeout.to_initval = req->rq_xprt->timeout->to_initval;
+ timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
+ timeout.to_maxval = timeout.to_initval;
+ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
+ task = rpc_run_bc_task(req, &timeout);
+ svc_release_rqst(rqstp);
+
+ if (IS_ERR(task))
+ return;
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
- error = task->tk_status;
rpc_put_task(task);
-
-out:
- dprintk("svc: %s(), error=%d\n", __func__, error);
- return error;
}
-EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
@@ -1602,46 +1717,6 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset,
EXPORT_SYMBOL_GPL(svc_encode_result_payload);
/**
- * svc_fill_write_vector - Construct data argument for VFS write call
- * @rqstp: svc_rqst to operate on
- * @payload: xdr_buf containing only the write data payload
- *
- * Fills in rqstp::rq_vec, and returns the number of elements.
- */
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
- struct xdr_buf *payload)
-{
- struct page **pages = payload->pages;
- struct kvec *first = payload->head;
- struct kvec *vec = rqstp->rq_vec;
- size_t total = payload->len;
- unsigned int i;
-
- /* Some types of transport can present the write payload
- * entirely in rq_arg.pages. In this case, @first is empty.
- */
- i = 0;
- if (first->iov_len) {
- vec[i].iov_base = first->iov_base;
- vec[i].iov_len = min_t(size_t, total, first->iov_len);
- total -= vec[i].iov_len;
- ++i;
- }
-
- while (total) {
- vec[i].iov_base = page_address(*pages);
- vec[i].iov_len = min_t(size_t, total, PAGE_SIZE);
- total -= vec[i].iov_len;
- ++i;
- ++pages;
- }
-
- WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec));
- return i;
-}
-EXPORT_SYMBOL_GPL(svc_fill_write_vector);
-
-/**
* svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call
* @rqstp: svc_rqst to operate on
* @first: buffer containing first section of pathname
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c2ce12538008..6973184ff667 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -9,7 +9,6 @@
#include <linux/sched/mm.h>
#include <linux/errno.h>
#include <linux/freezer.h>
-#include <linux/kthread.h>
#include <linux/slab.h>
#include <net/sock.h>
#include <linux/sunrpc/addr.h>
@@ -17,6 +16,7 @@
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/bc_xprt.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <trace/events/sunrpc.h>
@@ -46,7 +46,6 @@ static LIST_HEAD(svc_xprt_class_list);
/* SMP locking strategy:
*
- * svc_pool->sp_lock protects most of the fields of that pool.
* svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
* when both need to be taken (rare), svc_serv->sv_lock is first.
* The "service mutex" protects svc_serv->sv_nrthread.
@@ -74,13 +73,18 @@ static LIST_HEAD(svc_xprt_class_list);
* that no other thread will be using the transport or will
* try to set XPT_DEAD.
*/
+
+/**
+ * svc_reg_xprt_class - Register a server-side RPC transport class
+ * @xcl: New transport class to be registered
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
int svc_reg_xprt_class(struct svc_xprt_class *xcl)
{
struct svc_xprt_class *cl;
int res = -EEXIST;
- dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name);
-
INIT_LIST_HEAD(&xcl->xcl_list);
spin_lock(&svc_xprt_class_lock);
/* Make sure there isn't already a class with the same name */
@@ -96,9 +100,13 @@ out:
}
EXPORT_SYMBOL_GPL(svc_reg_xprt_class);
+/**
+ * svc_unreg_xprt_class - Unregister a server-side RPC transport class
+ * @xcl: Transport class to be unregistered
+ *
+ */
void svc_unreg_xprt_class(struct svc_xprt_class *xcl)
{
- dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name);
spin_lock(&svc_xprt_class_lock);
list_del_init(&xcl->xcl_list);
spin_unlock(&svc_xprt_class_lock);
@@ -149,6 +157,7 @@ int svc_print_xprts(char *buf, int maxlen)
*/
void svc_xprt_deferred_close(struct svc_xprt *xprt)
{
+ trace_svc_xprt_close(xprt);
if (!test_and_set_bit(XPT_CLOSE, &xprt->xpt_flags))
svc_xprt_enqueue(xprt);
}
@@ -192,7 +201,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
kref_init(&xprt->xpt_ref);
xprt->xpt_server = serv;
INIT_LIST_HEAD(&xprt->xpt_list);
- INIT_LIST_HEAD(&xprt->xpt_ready);
INIT_LIST_HEAD(&xprt->xpt_deferred);
INIT_LIST_HEAD(&xprt->xpt_users);
mutex_init(&xprt->xpt_mutex);
@@ -203,51 +211,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
}
EXPORT_SYMBOL_GPL(svc_xprt_init);
-static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
- struct svc_serv *serv,
- struct net *net,
- const int family,
- const unsigned short port,
- int flags)
-{
- struct sockaddr_in sin = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_ANY),
- .sin_port = htons(port),
- };
-#if IS_ENABLED(CONFIG_IPV6)
- struct sockaddr_in6 sin6 = {
- .sin6_family = AF_INET6,
- .sin6_addr = IN6ADDR_ANY_INIT,
- .sin6_port = htons(port),
- };
-#endif
- struct svc_xprt *xprt;
- struct sockaddr *sap;
- size_t len;
-
- switch (family) {
- case PF_INET:
- sap = (struct sockaddr *)&sin;
- len = sizeof(sin);
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case PF_INET6:
- sap = (struct sockaddr *)&sin6;
- len = sizeof(sin6);
- break;
-#endif
- default:
- return ERR_PTR(-EAFNOSUPPORT);
- }
-
- xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
- if (IS_ERR(xprt))
- trace_svc_xprt_create_err(serv->sv_program->pg_name,
- xcl->xcl_name, sap, len, xprt);
- return xprt;
-}
-
/**
* svc_xprt_received - start next receiver thread
* @xprt: controlling transport
@@ -286,9 +249,8 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
}
static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
- struct net *net, const int family,
- const unsigned short port, int flags,
- const struct cred *cred)
+ struct net *net, struct sockaddr *sap,
+ size_t len, int flags, const struct cred *cred)
{
struct svc_xprt_class *xcl;
@@ -304,8 +266,11 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
goto err;
spin_unlock(&svc_xprt_class_lock);
- newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags);
+ newxprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
if (IS_ERR(newxprt)) {
+ trace_svc_xprt_create_err(serv->sv_programs->pg_name,
+ xcl->xcl_name, sap, len,
+ newxprt);
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
}
@@ -322,6 +287,48 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
}
/**
+ * svc_xprt_create_from_sa - Add a new listener to @serv from socket address
+ * @serv: target RPC service
+ * @xprt_name: transport class name
+ * @net: network namespace
+ * @sap: socket address pointer
+ * @flags: SVC_SOCK flags
+ * @cred: credential to bind to this transport
+ *
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
+ */
+int svc_xprt_create_from_sa(struct svc_serv *serv, const char *xprt_name,
+ struct net *net, struct sockaddr *sap,
+ int flags, const struct cred *cred)
+{
+ size_t len;
+ int err;
+
+ switch (sap->sa_family) {
+ case AF_INET:
+ len = sizeof(struct sockaddr_in);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags, cred);
+ if (err == -EPROTONOSUPPORT) {
+ request_module("svc%s", xprt_name);
+ err = _svc_xprt_create(serv, xprt_name, net, sap, len, flags,
+ cred);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(svc_xprt_create_from_sa);
+
+/**
* svc_xprt_create - Add a new listener to @serv
* @serv: target RPC service
* @xprt_name: transport class name
@@ -331,23 +338,41 @@ static int _svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
* @flags: SVC_SOCK flags
* @cred: credential to bind to this transport
*
- * Return values:
- * %0: New listener added successfully
- * %-EPROTONOSUPPORT: Requested transport type not supported
+ * Return local xprt port on success or %-EPROTONOSUPPORT on failure
*/
int svc_xprt_create(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags,
const struct cred *cred)
{
- int err;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ .sin_port = htons(port),
+ };
+#if IS_ENABLED(CONFIG_IPV6)
+ struct sockaddr_in6 sin6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_ANY_INIT,
+ .sin6_port = htons(port),
+ };
+#endif
+ struct sockaddr *sap;
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
- if (err == -EPROTONOSUPPORT) {
- request_module("svc%s", xprt_name);
- err = _svc_xprt_create(serv, xprt_name, net, family, port, flags, cred);
+ switch (family) {
+ case PF_INET:
+ sap = (struct sockaddr *)&sin;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case PF_INET6:
+ sap = (struct sockaddr *)&sin6;
+ break;
+#endif
+ default:
+ return -EAFNOSUPPORT;
}
- return err;
+
+ return svc_xprt_create_from_sa(serv, xprt_name, net, sap, flags, cred);
}
EXPORT_SYMBOL_GPL(svc_xprt_create);
@@ -425,9 +450,10 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ trace_svc_xprt_enqueue(xprt, xpt_flags);
if (xpt_flags & BIT(XPT_BUSY))
return false;
- if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
+ if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
return true;
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
@@ -447,7 +473,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
- struct svc_rqst *rqstp = NULL;
if (!svc_xprt_ready(xprt))
return;
@@ -462,28 +487,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
pool = svc_pool_for_cpu(xprt->xpt_server);
- atomic_long_inc(&pool->sp_stats.packets);
-
- spin_lock_bh(&pool->sp_lock);
- list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
- pool->sp_stats.sockets_queued++;
- spin_unlock_bh(&pool->sp_lock);
+ percpu_counter_inc(&pool->sp_sockets_queued);
+ xprt->xpt_qtime = ktime_get();
+ lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts);
- /* find a thread for this xprt */
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- atomic_long_inc(&pool->sp_stats.threads_woken);
- rqstp->rq_qtime = ktime_get();
- wake_up_process(rqstp->rq_task);
- goto out_unlock;
- }
- set_bit(SP_CONGESTED, &pool->sp_flags);
- rqstp = NULL;
-out_unlock:
- rcu_read_unlock();
- trace_svc_xprt_enqueue(xprt, rqstp);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -494,18 +502,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
{
struct svc_xprt *xprt = NULL;
- if (list_empty(&pool->sp_sockets))
- goto out;
-
- spin_lock_bh(&pool->sp_lock);
- if (likely(!list_empty(&pool->sp_sockets))) {
- xprt = list_first_entry(&pool->sp_sockets,
- struct svc_xprt, xpt_ready);
- list_del_init(&xprt->xpt_ready);
+ xprt = lwq_dequeue(&pool->sp_xprts, struct svc_xprt, xpt_ready);
+ if (xprt)
svc_xprt_get(xprt);
- }
- spin_unlock_bh(&pool->sp_lock);
-out:
return xprt;
}
@@ -534,17 +533,26 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
}
EXPORT_SYMBOL_GPL(svc_reserve);
+static void free_deferred(struct svc_xprt *xprt, struct svc_deferred_req *dr)
+{
+ if (!dr)
+ return;
+
+ xprt->xpt_ops->xpo_release_ctxt(xprt, dr->xprt_ctxt);
+ kfree(dr);
+}
+
static void svc_xprt_release(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
- xprt->xpt_ops->xpo_release_rqst(rqstp);
+ xprt->xpt_ops->xpo_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+ rqstp->rq_xprt_ctxt = NULL;
- kfree(rqstp->rq_deferred);
+ free_deferred(xprt, rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
- pagevec_release(&rqstp->rq_pvec);
- svc_free_res_pages(rqstp);
+ svc_rqst_release_pages(rqstp);
rqstp->rq_res.page_len = 0;
rqstp->rq_res.page_base = 0;
@@ -565,7 +573,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
svc_xprt_put(xprt);
}
-/*
+/**
+ * svc_wake_up - Wake up a service thread for non-transport work
+ * @serv: RPC service
+ *
* Some svc_serv's will have occasional work to do, even when a xprt is not
* waiting to be serviced. This function is there to "kick" a task in one of
* those services so that it can wake up and do that work. Note that we only
@@ -574,27 +585,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
*/
void svc_wake_up(struct svc_serv *serv)
{
- struct svc_rqst *rqstp;
- struct svc_pool *pool;
-
- pool = &serv->sv_pools[0];
-
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* skip any that aren't queued */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- rcu_read_unlock();
- wake_up_process(rqstp->rq_task);
- trace_svc_wake_up(rqstp->rq_task->pid);
- return;
- }
- rcu_read_unlock();
+ struct svc_pool *pool = &serv->sv_pools[0];
- /* No free entries available */
set_bit(SP_TASK_PENDING, &pool->sp_flags);
- smp_wmb();
- trace_svc_wake_up(0);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
@@ -613,7 +607,8 @@ int svc_port_is_privileged(struct sockaddr *sin)
}
/*
- * Make sure that we don't have too many active connections. If we have,
+ * Make sure that we don't have too many connections that have not yet
+ * demonstrated that they have access to the NFS server. If we have,
* something must be dropped. It's not clear what will happen if we allow
* "too many" connections, but when dealing with network-facing software,
* we have to code defensively. Here we do that by imposing hard limits.
@@ -625,34 +620,26 @@ int svc_port_is_privileged(struct sockaddr *sin)
* The only somewhat efficient mechanism would be if drop old
* connections from the same IP first. But right now we don't even
* record the client IP in svc_sock.
- *
- * single-threaded services that expect a lot of clients will probably
- * need to set sv_maxconn to override the default value which is based
- * on the number of threads
*/
static void svc_check_conn_limits(struct svc_serv *serv)
{
- unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
- (serv->sv_nrthreads+3) * 20;
-
- if (serv->sv_tmpcnt > limit) {
- struct svc_xprt *xprt = NULL;
+ if (serv->sv_tmpcnt > XPT_MAX_TMP_CONN) {
+ struct svc_xprt *xprt = NULL, *xprti;
spin_lock_bh(&serv->sv_lock);
if (!list_empty(&serv->sv_tempsocks)) {
- /* Try to help the admin */
- net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n",
- serv->sv_name, serv->sv_maxconn ?
- "max number of connections" :
- "number of threads");
/*
* Always select the oldest connection. It's not fair,
- * but so is life
+ * but nor is life.
*/
- xprt = list_entry(serv->sv_tempsocks.prev,
- struct svc_xprt,
- xpt_list);
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_get(xprt);
+ list_for_each_entry_reverse(xprti, &serv->sv_tempsocks,
+ xpt_list) {
+ if (!test_bit(XPT_PEER_VALID, &xprti->xpt_flags)) {
+ xprt = xprti;
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_xprt_get(xprt);
+ break;
+ }
+ }
}
spin_unlock_bh(&serv->sv_lock);
@@ -663,33 +650,22 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-static int svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
{
- struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
unsigned long pages, filled, ret;
- pagevec_init(&rqstp->rq_pvec);
-
- pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
- if (pages > RPCSVC_MAXPAGES) {
- pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
- pages, RPCSVC_MAXPAGES);
- /* use as many pages as possible */
- pages = RPCSVC_MAXPAGES;
- }
-
+ pages = rqstp->rq_maxpages;
for (filled = 0; filled < pages; filled = ret) {
- ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
- rqstp->rq_pages);
+ ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages);
if (ret > filled)
/* Made progress, don't sleep yet */
continue;
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_IDLE);
+ if (svc_thread_should_stop(rqstp)) {
set_current_state(TASK_RUNNING);
- return -EINTR;
+ return false;
}
trace_svc_alloc_arg_err(pages, ret);
memalloc_retry_wait(GFP_KERNEL);
@@ -706,84 +682,66 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
arg->page_len = (pages-2)*PAGE_SIZE;
arg->len = (pages-1)*PAGE_SIZE;
arg->tail[0].iov_len = 0;
- return 0;
+
+ rqstp->rq_xid = xdr_zero;
+ return true;
}
static bool
-rqst_should_sleep(struct svc_rqst *rqstp)
+svc_thread_should_sleep(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
/* did someone call svc_wake_up? */
- if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+ if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
return false;
/* was a socket queued? */
- if (!list_empty(&pool->sp_sockets))
+ if (!lwq_empty(&pool->sp_xprts))
return false;
/* are we shutting down? */
- if (signalled() || kthread_should_stop())
+ if (svc_thread_should_stop(rqstp))
return false;
- /* are we freezing? */
- if (freezing(current))
- return false;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ if (!lwq_empty(&rqstp->rq_server->sv_cb_list))
+ return false;
+ }
+#endif
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static void svc_thread_wait_for_work(struct svc_rqst *rqstp)
{
- struct svc_pool *pool = rqstp->rq_pool;
- long time_left = 0;
-
- /* rq_xprt should be clear on entry */
- WARN_ON_ONCE(rqstp->rq_xprt);
-
- rqstp->rq_xprt = svc_xprt_dequeue(pool);
- if (rqstp->rq_xprt)
- goto out_found;
-
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
- smp_mb__before_atomic();
- clear_bit(SP_CONGESTED, &pool->sp_flags);
- clear_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb__after_atomic();
-
- if (likely(rqst_should_sleep(rqstp)))
- time_left = schedule_timeout(timeout);
- else
+ struct svc_pool *pool = rqstp->rq_pool;
+
+ if (svc_thread_should_sleep(rqstp)) {
+ set_current_state(TASK_IDLE | TASK_FREEZABLE);
+ llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+ if (likely(svc_thread_should_sleep(rqstp)))
+ schedule();
+
+ while (!llist_del_first_this(&pool->sp_idle_threads,
+ &rqstp->rq_idle)) {
+ /* Work just became available. This thread can only
+ * handle it after removing rqstp from the idle
+ * list. If that attempt failed, some other thread
+ * must have queued itself after finding no
+ * work to do, so that thread has taken responsibly
+ * for this new work. This thread can safely sleep
+ * until woken again.
+ */
+ schedule();
+ set_current_state(TASK_IDLE | TASK_FREEZABLE);
+ }
__set_current_state(TASK_RUNNING);
-
+ } else {
+ cond_resched();
+ }
try_to_freeze();
-
- set_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb__after_atomic();
- rqstp->rq_xprt = svc_xprt_dequeue(pool);
- if (rqstp->rq_xprt)
- goto out_found;
-
- if (!time_left)
- atomic_long_inc(&pool->sp_stats.threads_timedout);
-
- if (signalled() || kthread_should_stop())
- return ERR_PTR(-EINTR);
- return ERR_PTR(-EAGAIN);
-out_found:
- /* Normally we will wait up to 5 seconds for any required
- * cache information to be provided.
- */
- if (!test_bit(SP_CONGESTED, &pool->sp_flags))
- rqstp->rq_chandle.thread_wait = 5*HZ;
- else
- rqstp->rq_chandle.thread_wait = 1*HZ;
- trace_svc_xprt_dequeue(rqstp);
- return rqstp->rq_xprt;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -802,7 +760,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
svc_xprt_received(newxpt);
}
-static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
{
struct svc_serv *serv = rqstp->rq_server;
int len = 0;
@@ -831,100 +789,119 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
module_put(xprt->xpt_class->xcl_owner);
}
svc_xprt_received(xprt);
+ } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
+ xprt->xpt_ops->xpo_handshake(xprt);
+ svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
- dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
- rqstp, rqstp->rq_pool->sp_id, xprt,
- kref_read(&xprt->xpt_ref));
rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
- rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
+ if (len <= 0)
+ goto out;
+
+ trace_svc_xdr_recvfrom(&rqstp->rq_arg);
+
+ clear_bit(XPT_OLD, &xprt->xpt_flags);
+
+ rqstp->rq_chandle.defer = svc_defer;
+
+ if (serv->sv_stats)
+ serv->sv_stats->netcnt++;
+ percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
+ rqstp->rq_stime = ktime_get();
+ svc_process(rqstp);
} else
svc_xprt_received(xprt);
out:
- return len;
+ rqstp->rq_res.len = 0;
+ svc_xprt_release(rqstp);
}
-/*
- * Receive the next request on any transport. This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
+static void svc_thread_wake_next(struct svc_rqst *rqstp)
+{
+ if (!svc_thread_should_sleep(rqstp))
+ /* More work pending after I dequeued some,
+ * wake another worker
+ */
+ svc_pool_wake_idle_thread(rqstp->rq_pool);
+}
+
+/**
+ * svc_recv - Receive and process the next request on any transport
+ * @rqstp: an idle RPC service thread
+ *
+ * This code is carefully organised not to touch any cachelines in
+ * the shared svc_serv structure, only cachelines in the local
+ * svc_pool.
*/
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp)
{
- struct svc_xprt *xprt = NULL;
- struct svc_serv *serv = rqstp->rq_server;
- int len, err;
+ struct svc_pool *pool = rqstp->rq_pool;
- err = svc_alloc_arg(rqstp);
- if (err)
- goto out;
+ if (!svc_alloc_arg(rqstp))
+ return;
- try_to_freeze();
- cond_resched();
- err = -EINTR;
- if (signalled() || kthread_should_stop())
- goto out;
+ svc_thread_wait_for_work(rqstp);
- xprt = svc_get_next_xprt(rqstp, timeout);
- if (IS_ERR(xprt)) {
- err = PTR_ERR(xprt);
- goto out;
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
+
+ if (svc_thread_should_stop(rqstp)) {
+ svc_thread_wake_next(rqstp);
+ return;
}
- len = svc_handle_xprt(rqstp, xprt);
+ rqstp->rq_xprt = svc_xprt_dequeue(pool);
+ if (rqstp->rq_xprt) {
+ struct svc_xprt *xprt = rqstp->rq_xprt;
- /* No data, incomplete (TCP) read, or accept() */
- err = -EAGAIN;
- if (len <= 0)
- goto out_release;
- trace_svc_xdr_recvfrom(&rqstp->rq_arg);
+ svc_thread_wake_next(rqstp);
+ /* Normally we will wait up to 5 seconds for any required
+ * cache information to be provided. When there are no
+ * idle threads, we reduce the wait time.
+ */
+ if (pool->sp_idle_threads.first)
+ rqstp->rq_chandle.thread_wait = 5 * HZ;
+ else
+ rqstp->rq_chandle.thread_wait = 1 * HZ;
- clear_bit(XPT_OLD, &xprt->xpt_flags);
+ trace_svc_xprt_dequeue(rqstp);
+ svc_handle_xprt(rqstp, xprt);
+ }
- xprt->xpt_ops->xpo_secure_port(rqstp);
- rqstp->rq_chandle.defer = svc_defer;
- rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ struct svc_serv *serv = rqstp->rq_server;
+ struct rpc_rqst *req;
- if (serv->sv_stats)
- serv->sv_stats->netcnt++;
- return len;
-out_release:
- rqstp->rq_res.len = 0;
- svc_xprt_release(rqstp);
-out:
- return err;
+ req = lwq_dequeue(&serv->sv_cb_list,
+ struct rpc_rqst, rq_bc_list);
+ if (req) {
+ svc_thread_wake_next(rqstp);
+ svc_process_bc(req, rqstp);
+ }
+ }
+#endif
}
EXPORT_SYMBOL_GPL(svc_recv);
-/*
- * Drop request
- */
-void svc_drop(struct svc_rqst *rqstp)
-{
- trace_svc_drop(rqstp);
- svc_xprt_release(rqstp);
-}
-EXPORT_SYMBOL_GPL(svc_drop);
-
-/*
- * Return reply to client.
+/**
+ * svc_send - Return reply to client
+ * @rqstp: RPC transaction context
+ *
*/
-int svc_send(struct svc_rqst *rqstp)
+void svc_send(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt;
- int len = -EFAULT;
struct xdr_buf *xb;
+ int status;
xprt = rqstp->rq_xprt;
- if (!xprt)
- goto out;
/* calculate over-all length */
xb = &rqstp->rq_res;
@@ -934,15 +911,9 @@ int svc_send(struct svc_rqst *rqstp)
trace_svc_xdr_sendto(rqstp->rq_xid, xb);
trace_svc_stats_latency(rqstp);
- len = xprt->xpt_ops->xpo_sendto(rqstp);
+ status = xprt->xpt_ops->xpo_sendto(rqstp);
- trace_svc_send(rqstp, len);
- svc_xprt_release(rqstp);
-
- if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
- len = 0;
-out:
- return len;
+ trace_svc_send(rqstp, status);
}
/*
@@ -951,7 +922,7 @@ out:
*/
static void svc_age_temp_xprts(struct timer_list *t)
{
- struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
+ struct svc_serv *serv = timer_container_of(serv, t, sv_temptimer);
struct svc_xprt *xprt;
struct list_head *le, *next;
@@ -1043,6 +1014,19 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;
+ /* unregister with rpcbind for when transport type is TCP or UDP.
+ */
+ if (test_bit(XPT_RPCB_UNREG, &xprt->xpt_flags)) {
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock,
+ sk_xprt);
+ struct socket *sock = svsk->sk_sock;
+
+ if (svc_register(serv, xprt->xpt_net, sock->sk->sk_family,
+ sock->sk->sk_protocol, 0) < 0)
+ pr_warn("failed to unregister %s with rpcbind\n",
+ xprt->xpt_class->xcl_name);
+ }
+
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
return;
@@ -1053,13 +1037,13 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
spin_lock_bh(&serv->sv_lock);
list_del_init(&xprt->xpt_list);
- WARN_ON_ONCE(!list_empty(&xprt->xpt_ready));
- if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+ if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
+ !test_bit(XPT_PEER_VALID, &xprt->xpt_flags))
serv->sv_tmpcnt--;
spin_unlock_bh(&serv->sv_lock);
while ((dr = svc_deferred_dequeue(xprt)) != NULL)
- kfree(dr);
+ free_deferred(xprt, dr);
call_xpt_users(xprt);
svc_xprt_put(xprt);
@@ -1104,36 +1088,26 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
return ret;
}
-static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net)
+static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
{
- struct svc_pool *pool;
struct svc_xprt *xprt;
- struct svc_xprt *tmp;
int i;
for (i = 0; i < serv->sv_nrpools; i++) {
- pool = &serv->sv_pools[i];
-
- spin_lock_bh(&pool->sp_lock);
- list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) {
- if (xprt->xpt_net != net)
- continue;
- list_del_init(&xprt->xpt_ready);
- spin_unlock_bh(&pool->sp_lock);
- return xprt;
+ struct svc_pool *pool = &serv->sv_pools[i];
+ struct llist_node *q, **t1, *t2;
+
+ q = lwq_dequeue_all(&pool->sp_xprts);
+ lwq_for_each_safe(xprt, t1, t2, &q, xpt_ready) {
+ if (xprt->xpt_net == net) {
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ svc_delete_xprt(xprt);
+ xprt = NULL;
+ }
}
- spin_unlock_bh(&pool->sp_lock);
- }
- return NULL;
-}
-
-static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
-{
- struct svc_xprt *xprt;
- while ((xprt = svc_dequeue_net(serv, net))) {
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_delete_xprt(xprt);
+ if (q)
+ lwq_enqueue_batch(q, &pool->sp_xprts);
}
}
@@ -1141,6 +1115,7 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
* svc_xprt_destroy_all - Destroy transports associated with @serv
* @serv: RPC service to be shut down
* @net: target network namespace
+ * @unregister: true if it is OK to unregister the destroyed xprts
*
* Server threads may still be running (especially in the case where the
* service is still running in other network namespaces).
@@ -1153,7 +1128,8 @@ static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net)
* threads, we may need to wait a little while and then check again to
* see if they're done.
*/
-void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
+void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net,
+ bool unregister)
{
int delay = 0;
@@ -1163,6 +1139,9 @@ void svc_xprt_destroy_all(struct svc_serv *serv, struct net *net)
svc_clean_up_xprts(serv, net);
msleep(delay++);
}
+
+ if (unregister)
+ svc_rpcb_cleanup(serv, net);
}
EXPORT_SYMBOL_GPL(svc_xprt_destroy_all);
@@ -1181,8 +1160,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
spin_unlock(&xprt->xpt_lock);
trace_svc_defer_drop(dr);
+ free_deferred(xprt, dr);
svc_xprt_put(xprt);
- kfree(dr);
return;
}
dr->xprt = NULL;
@@ -1227,14 +1206,14 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
- dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
- rqstp->rq_xprt_ctxt = NULL;
/* back up head to the start of the buffer and copy */
skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
dr->argslen << 2);
}
+ dr->xprt_ctxt = rqstp->rq_xprt_ctxt;
+ rqstp->rq_xprt_ctxt = NULL;
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
@@ -1267,6 +1246,8 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
+
+ dr->xprt_ctxt = NULL;
svc_xprt_received(rqstp->rq_xprt);
return dr->argslen << 2;
}
@@ -1291,6 +1272,40 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
}
/**
+ * svc_find_listener - find an RPC transport instance
+ * @serv: pointer to svc_serv to search
+ * @xcl_name: C string containing transport's class name
+ * @net: owner net pointer
+ * @sa: sockaddr containing address
+ *
+ * Return the transport instance pointer for the endpoint accepting
+ * connections/peer traffic from the specified transport class,
+ * and matching sockaddr.
+ */
+struct svc_xprt *svc_find_listener(struct svc_serv *serv, const char *xcl_name,
+ struct net *net, const struct sockaddr *sa)
+{
+ struct svc_xprt *xprt;
+ struct svc_xprt *found = NULL;
+
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ if (xprt->xpt_net != net)
+ continue;
+ if (strcmp(xprt->xpt_class->xcl_name, xcl_name))
+ continue;
+ if (!rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local))
+ continue;
+ found = xprt;
+ svc_xprt_get(xprt);
+ break;
+ }
+ spin_unlock_bh(&serv->sv_lock);
+ return found;
+}
+EXPORT_SYMBOL_GPL(svc_find_listener);
+
+/**
* svc_find_xprt - find an RPC transport instance
* @serv: pointer to svc_serv to search
* @xcl_name: C string containing transport's class name
@@ -1393,29 +1408,36 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen)
}
EXPORT_SYMBOL_GPL(svc_xprt_names);
-
/*----------------------------------------------------------------------------*/
static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
{
unsigned int pidx = (unsigned int)*pos;
- struct svc_serv *serv = m->private;
+ struct svc_info *si = m->private;
dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
+ mutex_lock(si->mutex);
+
if (!pidx)
return SEQ_START_TOKEN;
- return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
+ if (!si->serv)
+ return NULL;
+ return pidx > si->serv->sv_nrpools ? NULL
+ : &si->serv->sv_pools[pidx - 1];
}
static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
{
struct svc_pool *pool = p;
- struct svc_serv *serv = m->private;
+ struct svc_info *si = m->private;
+ struct svc_serv *serv = si->serv;
dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
- if (p == SEQ_START_TOKEN) {
+ if (!serv) {
+ pool = NULL;
+ } else if (p == SEQ_START_TOKEN) {
pool = &serv->sv_pools[0];
} else {
unsigned int pidx = (pool - &serv->sv_pools[0]);
@@ -1430,6 +1452,9 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
static void svc_pool_stats_stop(struct seq_file *m, void *p)
{
+ struct svc_info *si = m->private;
+
+ mutex_unlock(si->mutex);
}
static int svc_pool_stats_show(struct seq_file *m, void *p)
@@ -1441,12 +1466,11 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
return 0;
}
- seq_printf(m, "%u %lu %lu %lu %lu\n",
- pool->sp_id,
- (unsigned long)atomic_long_read(&pool->sp_stats.packets),
- pool->sp_stats.sockets_queued,
- (unsigned long)atomic_long_read(&pool->sp_stats.threads_woken),
- (unsigned long)atomic_long_read(&pool->sp_stats.threads_timedout));
+ seq_printf(m, "%u %llu %llu %llu 0\n",
+ pool->sp_id,
+ percpu_counter_sum_positive(&pool->sp_messages_arrived),
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_threads_woken));
return 0;
}
@@ -1458,14 +1482,18 @@ static const struct seq_operations svc_pool_stats_seq_ops = {
.show = svc_pool_stats_show,
};
-int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
+int svc_pool_stats_open(struct svc_info *info, struct file *file)
{
+ struct seq_file *seq;
int err;
err = seq_open(file, &svc_pool_stats_seq_ops);
- if (!err)
- ((struct seq_file *) file->private_data)->private = serv;
- return err;
+ if (err)
+ return err;
+ seq = file->private_data;
+ seq->private = info;
+
+ return 0;
}
EXPORT_SYMBOL(svc_pool_stats_open);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index e72ba2f13f6c..55b4d2874188 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -18,6 +18,7 @@
#include <linux/sunrpc/svcauth.h>
#include <linux/err.h>
#include <linux/hash.h>
+#include <linux/user_namespace.h>
#include <trace/events/sunrpc.h>
@@ -60,17 +61,31 @@ svc_put_auth_ops(struct auth_ops *aops)
module_put(aops->owner);
}
-int
-svc_authenticate(struct svc_rqst *rqstp)
+/**
+ * svc_authenticate - Initialize an outgoing credential
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: XDR encoding of the result can begin
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_COMPLETE: GSS context lifetime event; no further action
+ * %SVC_DROP: Drop this request; no further action
+ * %SVC_CLOSE: Like drop, but also close transport connection
+ */
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp)
{
- rpc_authflavor_t flavor;
- struct auth_ops *aops;
+ struct auth_ops *aops;
+ u32 flavor;
rqstp->rq_auth_stat = rpc_auth_ok;
- flavor = svc_getnl(&rqstp->rq_arg.head[0]);
-
- dprintk("svc: svc_authenticate (%d)\n", flavor);
+ /*
+ * Decode the Call credential's flavor field. The credential's
+ * body field is decoded in the chosen ->accept method below.
+ */
+ if (xdr_stream_decode_u32(&rqstp->rq_arg_stream, &flavor) < 0)
+ return SVC_GARBAGE;
aops = svc_get_auth_ops(flavor);
if (aops == NULL) {
@@ -84,18 +99,29 @@ svc_authenticate(struct svc_rqst *rqstp)
rqstp->rq_authop = aops;
return aops->accept(rqstp);
}
-EXPORT_SYMBOL_GPL(svc_authenticate);
-int svc_set_client(struct svc_rqst *rqstp)
+/**
+ * svc_set_client - Assign an appropriate 'auth_domain' as the client
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: Client was found and assigned
+ * %SVC_DENY: Client was explicitly denied
+ * %SVC_DROP: Ignore this request
+ * %SVC_CLOSE: Ignore this request and close the connection
+ */
+enum svc_auth_status svc_set_client(struct svc_rqst *rqstp)
{
rqstp->rq_client = NULL;
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
-/* A request, which was authenticated, has now executed.
- * Time to finalise the credentials and verifier
- * and release and resources
+/**
+ * svc_authorise - Finalize credentials/verifier and release resources
+ * @rqstp: RPC execution context
+ *
+ * Returns zero on success, or a negative errno.
*/
int svc_authorise(struct svc_rqst *rqstp)
{
@@ -134,6 +160,49 @@ svc_auth_unregister(rpc_authflavor_t flavor)
}
EXPORT_SYMBOL_GPL(svc_auth_unregister);
+/**
+ * svc_auth_flavor - return RPC transaction's RPC_AUTH flavor
+ * @rqstp: RPC transaction context
+ *
+ * Returns an RPC flavor or GSS pseudoflavor.
+ */
+rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp)
+{
+ struct auth_ops *aops = rqstp->rq_authop;
+
+ if (!aops->pseudoflavor)
+ return aops->flavour;
+ return aops->pseudoflavor(rqstp);
+}
+EXPORT_SYMBOL_GPL(svc_auth_flavor);
+
+/**
+ * svcauth_map_clnt_to_svc_cred_local - maps a generic cred
+ * to a svc_cred suitable for use in nfsd.
+ * @clnt: rpc_clnt associated with nfs client
+ * @cred: generic cred associated with nfs client
+ * @svc: returned svc_cred that is suitable for use in nfsd
+ */
+void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt,
+ const struct cred *cred,
+ struct svc_cred *svc)
+{
+ struct user_namespace *userns = clnt->cl_cred ?
+ clnt->cl_cred->user_ns : &init_user_ns;
+
+ memset(svc, 0, sizeof(struct svc_cred));
+
+ svc->cr_uid = KUIDT_INIT(from_kuid_munged(userns, cred->fsuid));
+ svc->cr_gid = KGIDT_INIT(from_kgid_munged(userns, cred->fsgid));
+ svc->cr_flavor = clnt->cl_auth->au_flavor;
+ if (cred->group_info)
+ svc->cr_group_info = get_group_info(cred->group_info);
+ /* These aren't relevant for local (network is bypassed) */
+ svc->cr_principal = NULL;
+ svc->cr_gss_mech = NULL;
+}
+EXPORT_SYMBOL_GPL(svcauth_map_clnt_to_svc_cred_local);
+
/**************************************************
* 'auth_domains' are stored in a hash table indexed by name.
* When the last reference to an 'auth_domain' is dropped,
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index b1efc34db6ed..8ca98b146ec8 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -17,8 +17,9 @@
#include <net/ipv6.h>
#include <linux/kernel.h>
#include <linux/user_namespace.h>
-#define RPCDBG_FACILITY RPCDBG_AUTH
+#include <trace/events/sunrpc.h>
+#define RPCDBG_FACILITY RPCDBG_AUTH
#include "netns.h"
@@ -225,9 +226,9 @@ static int ip_map_parse(struct cache_detail *cd,
return -EINVAL;
}
- expiry = get_expiry(&mesg);
- if (expiry ==0)
- return -EINVAL;
+ err = get_expiry(&mesg, &expiry);
+ if (err)
+ return err;
/* domainname, or empty for NEGATIVE */
len = qword_get(&mesg, buf, mlen);
@@ -416,14 +417,23 @@ static int unix_gid_hash(kuid_t uid)
return hash_long(from_kuid(&init_user_ns, uid), GID_HASHBITS);
}
-static void unix_gid_put(struct kref *kref)
+static void unix_gid_free(struct rcu_head *rcu)
{
- struct cache_head *item = container_of(kref, struct cache_head, ref);
- struct unix_gid *ug = container_of(item, struct unix_gid, h);
+ struct unix_gid *ug = container_of(rcu, struct unix_gid, rcu);
+ struct cache_head *item = &ug->h;
+
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
put_group_info(ug->gi);
- kfree_rcu(ug, rcu);
+ kfree(ug);
+}
+
+static void unix_gid_put(struct kref *kref)
+{
+ struct cache_head *item = container_of(kref, struct cache_head, ref);
+ struct unix_gid *ug = container_of(item, struct unix_gid, h);
+
+ call_rcu(&ug->rcu, unix_gid_free);
}
static int unix_gid_match(struct cache_head *corig, struct cache_head *cnew)
@@ -497,9 +507,9 @@ static int unix_gid_parse(struct cache_detail *cd,
uid = make_kuid(current_user_ns(), id);
ug.uid = uid;
- expiry = get_expiry(&mesg);
- if (expiry == 0)
- return -EINVAL;
+ err = get_expiry(&mesg, &expiry);
+ if (err)
+ return err;
rv = get_int(&mesg, &gids);
if (rv || gids < 0 || gids > 8192)
@@ -655,7 +665,7 @@ static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
}
}
-int
+enum svc_auth_status
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct sockaddr_in *sin;
@@ -687,7 +697,8 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
rqstp->rq_auth_stat = rpc_autherr_badcred;
ipm = ip_map_cached_get(xprt);
if (ipm == NULL)
- ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
+ ipm = __ip_map_lookup(sn->ip_map_cache,
+ rqstp->rq_server->sv_programs->pg_class,
&sin6->sin6_addr);
if (ipm == NULL)
@@ -726,26 +737,40 @@ out:
rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
-
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
-static int
+/**
+ * svcauth_null_accept - Decode and validate incoming RPC_AUTH_NULL credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Both credential and verifier are valid
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * rqstp->rq_auth_stat is set as mandated by RFC 5531.
+ */
+static enum svc_auth_status
svcauth_null_accept(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
+ u32 flavor, len;
+ void *body;
- if (argv->iov_len < 3*4)
+ /* Length of Call's credential body field: */
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
return SVC_GARBAGE;
-
- if (svc_getu32(argv) != 0) {
- dprintk("svc: bad null cred\n");
+ if (len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
- if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
- dprintk("svc: bad null verf\n");
+
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -757,9 +782,11 @@ svcauth_null_accept(struct svc_rqst *rqstp)
if (cred->cr_group_info == NULL)
return SVC_CLOSE; /* kmalloc failure - client must retry */
- /* Put NULL verifier */
- svc_putnl(resv, RPC_AUTH_NULL);
- svc_putnl(resv, 0);
+ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
+ RPC_AUTH_NULL, NULL, 0) < 0)
+ return SVC_CLOSE;
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;
rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
return SVC_OK;
@@ -783,31 +810,46 @@ struct auth_ops svcauth_null = {
.name = "null",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_NULL,
- .accept = svcauth_null_accept,
+ .accept = svcauth_null_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
-static int
+/**
+ * svcauth_tls_accept - Decode and validate incoming RPC_AUTH_TLS credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Both credential and verifier are valid
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * rqstp->rq_auth_stat is set as mandated by RFC 5531.
+ */
+static enum svc_auth_status
svcauth_tls_accept(struct svc_rqst *rqstp)
{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
- struct kvec *argv = rqstp->rq_arg.head;
- struct kvec *resv = rqstp->rq_res.head;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+ u32 flavor, len;
+ void *body;
+ __be32 *p;
- if (argv->iov_len < XDR_UNIT * 3)
+ /* Length of Call's credential body field: */
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
return SVC_GARBAGE;
-
- /* Call's cred length */
- if (svc_getu32(argv) != xdr_zero) {
+ if (len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
- /* Call's verifier flavor and its length */
- if (svc_getu32(argv) != rpc_auth_null ||
- svc_getu32(argv) != xdr_zero) {
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -818,21 +860,32 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
return SVC_DENIED;
}
- /* Mapping to nobody uid/gid is required */
+ /* Signal that mapping to nobody uid/gid is required */
cred->cr_uid = INVALID_UID;
cred->cr_gid = INVALID_GID;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
- return SVC_CLOSE; /* kmalloc failure - client must retry */
+ return SVC_CLOSE;
- /* Reply's verifier */
- svc_putnl(resv, RPC_AUTH_NULL);
- if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
- svc_putnl(resv, 8);
- memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
- resv->iov_len += 8;
- } else
- svc_putnl(resv, 0);
+ if (xprt->xpt_ops->xpo_handshake) {
+ p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
+ if (!p)
+ return SVC_CLOSE;
+ trace_svc_tls_start(xprt);
+ *p++ = rpc_auth_null;
+ *p++ = cpu_to_be32(8);
+ memcpy(p, "STARTTLS", 8);
+
+ set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ } else {
+ trace_svc_tls_unavailable(xprt);
+ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
+ RPC_AUTH_NULL, NULL, 0) < 0)
+ return SVC_CLOSE;
+ }
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;
rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
return SVC_OK;
@@ -842,32 +895,48 @@ struct auth_ops svcauth_tls = {
.name = "tls",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_TLS,
- .accept = svcauth_tls_accept,
+ .accept = svcauth_tls_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
-static int
+/**
+ * svcauth_unix_accept - Decode and validate incoming RPC_AUTH_SYS credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Both credential and verifier are valid
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * rqstp->rq_auth_stat is set as mandated by RFC 5531.
+ */
+static enum svc_auth_status
svcauth_unix_accept(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
struct user_namespace *userns;
- u32 slen, i;
- int len = argv->iov_len;
+ u32 flavor, len, i;
+ void *body;
+ __be32 *p;
- if ((len -= 3*4) < 0)
+ /*
+ * This implementation ignores the length of the Call's
+ * credential body field and the timestamp and machinename
+ * fields.
+ */
+ p = xdr_inline_decode(xdr, XDR_UNIT * 3);
+ if (!p)
+ return SVC_GARBAGE;
+ len = be32_to_cpup(p + 2);
+ if (len > RPC_MAX_MACHINENAME)
+ return SVC_GARBAGE;
+ if (!xdr_inline_decode(xdr, len))
return SVC_GARBAGE;
- svc_getu32(argv); /* length */
- svc_getu32(argv); /* time stamp */
- slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */
- if (slen > 64 || (len -= (slen + 3)*4) < 0)
- goto badcred;
- argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
- argv->iov_len -= slen*4;
/*
* Note: we skip uid_valid()/gid_valid() checks here for
* backwards compatibility with clients that use -1 id's.
@@ -877,27 +946,42 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
*/
userns = (rqstp->rq_xprt && rqstp->rq_xprt->xpt_cred) ?
rqstp->rq_xprt->xpt_cred->user_ns : &init_user_ns;
- cred->cr_uid = make_kuid(userns, svc_getnl(argv)); /* uid */
- cred->cr_gid = make_kgid(userns, svc_getnl(argv)); /* gid */
- slen = svc_getnl(argv); /* gids length */
- if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
+ if (xdr_stream_decode_u32(xdr, &i) < 0)
+ return SVC_GARBAGE;
+ cred->cr_uid = make_kuid(userns, i);
+ if (xdr_stream_decode_u32(xdr, &i) < 0)
+ return SVC_GARBAGE;
+ cred->cr_gid = make_kgid(userns, i);
+
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+ return SVC_GARBAGE;
+ if (len > UNX_NGROUPS)
goto badcred;
- cred->cr_group_info = groups_alloc(slen);
+ p = xdr_inline_decode(xdr, XDR_UNIT * len);
+ if (!p)
+ return SVC_GARBAGE;
+ cred->cr_group_info = groups_alloc(len);
if (cred->cr_group_info == NULL)
return SVC_CLOSE;
- for (i = 0; i < slen; i++) {
- kgid_t kgid = make_kgid(userns, svc_getnl(argv));
+ for (i = 0; i < len; i++) {
+ kgid_t kgid = make_kgid(userns, be32_to_cpup(p++));
cred->cr_group_info->gid[i] = kgid;
}
groups_sort(cred->cr_group_info);
- if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
+
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
- /* Put NULL verifier */
- svc_putnl(resv, RPC_AUTH_NULL);
- svc_putnl(resv, 0);
+ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
+ RPC_AUTH_NULL, NULL, 0) < 0)
+ return SVC_CLOSE;
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;
rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
return SVC_OK;
@@ -927,7 +1011,7 @@ struct auth_ops svcauth_unix = {
.name = "unix",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_UNIX,
- .accept = svcauth_unix_accept,
+ .accept = svcauth_unix_accept,
.release = svcauth_unix_release,
.domain_release = svcauth_unix_domain_release,
.set_client = svcauth_unix_set_client,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 815baf308236..d61cd9b40491 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -36,6 +36,8 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <linux/freezer.h>
+#include <linux/bvec.h>
+
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -43,9 +45,12 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
+#include <net/tls_prot.h>
+#include <net/handshake.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/ioctls.h>
+#include <linux/key.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
@@ -55,6 +60,7 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/events/sock.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
@@ -62,6 +68,23 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+/*
+ * For UDP:
+ * 1 for header page
+ * enough pages for RPCSVC_MAXPAYLOAD_UDP
+ * 1 in case payload is not aligned
+ * 1 for tail page
+ */
+enum {
+ SUNRPC_MAX_UDP_SENDPAGES = 1 + RPCSVC_MAXPAYLOAD_UDP / PAGE_SIZE + 1 + 1
+};
+
+/* To-do: to avoid tying up an nfsd thread while waiting for a
+ * handshake request, the request could instead be deferred.
+ */
+enum {
+ SVC_HANDSHAKE_TO = 5U * HZ
+};
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
@@ -111,27 +134,27 @@ static void svc_reclassify_socket(struct socket *sock)
#endif
/**
- * svc_tcp_release_rqst - Release transport-related resources
- * @rqstp: request structure with resources to be released
+ * svc_tcp_release_ctxt - Release transport-related resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
*
*/
-static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
+static void svc_tcp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
{
}
/**
- * svc_udp_release_rqst - Release transport-related resources
- * @rqstp: request structure with resources to be released
+ * svc_udp_release_ctxt - Release transport-related resources
+ * @xprt: the transport which owned the context
+ * @ctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
*
*/
-static void svc_udp_release_rqst(struct svc_rqst *rqstp)
+static void svc_udp_release_ctxt(struct svc_xprt *xprt, void *ctxt)
{
- struct sk_buff *skb = rqstp->rq_xprt_ctxt;
+ struct sk_buff *skb = ctxt;
- if (skb) {
- rqstp->rq_xprt_ctxt = NULL;
+ if (skb)
consume_skb(skb);
- }
}
union svc_pktinfo_u {
@@ -215,6 +238,80 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
+static int
+svc_tcp_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
+ struct cmsghdr *cmsg, int ret)
+{
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -ENOTCONN : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
+ }
+ return ret;
+}
+
+static int
+svc_tcp_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags)
+{
+ union {
+ struct cmsghdr cmsg;
+ u8 buf[CMSG_SPACE(sizeof(u8))];
+ } u;
+ u8 alert[2];
+ struct kvec alert_kvec = {
+ .iov_base = alert,
+ .iov_len = sizeof(alert),
+ };
+ struct msghdr msg = {
+ .msg_flags = *msg_flags,
+ .msg_control = &u,
+ .msg_controllen = sizeof(u),
+ };
+ int ret;
+
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
+ alert_kvec.iov_len);
+ ret = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
+ if (ret > 0 &&
+ tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT) {
+ iov_iter_revert(&msg.msg_iter, ret);
+ ret = svc_tcp_sock_process_cmsg(sock, &msg, &u.cmsg, -EAGAIN);
+ }
+ return ret;
+}
+
+static int
+svc_tcp_sock_recvmsg(struct svc_sock *svsk, struct msghdr *msg)
+{
+ int ret;
+ struct socket *sock = svsk->sk_sock;
+
+ ret = sock_recvmsg(sock, msg, MSG_DONTWAIT);
+ if (msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
+ if (ret == 0 || ret == -EIO)
+ ret = svc_tcp_sock_recv_cmsg(sock, &msg->msg_flags);
+ }
+ return ret;
+}
+
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
{
@@ -252,11 +349,8 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
- bvec[i].bv_page = rqstp->rq_pages[i];
- bvec[i].bv_len = PAGE_SIZE;
- bvec[i].bv_offset = 0;
- }
+ for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
+ bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
rqstp->rq_respages = &rqstp->rq_pages[i];
rqstp->rq_next_page = rqstp->rq_respages + 1;
@@ -265,7 +359,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
iov_iter_advance(&msg.msg_iter, seek);
buflen -= seek;
}
- len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ len = svc_tcp_sock_recvmsg(svsk, &msg);
if (len > 0)
svc_flush_bvec(bvec, len, seek);
@@ -310,11 +404,15 @@ static void svc_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+ trace_sk_data_ready(sk);
+
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
trace_svcsock_data_ready(&svsk->sk_xprt, 0);
+ if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
+ return;
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -352,6 +450,88 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
sock_no_linger(svsk->sk_sock->sk);
}
+/**
+ * svc_tcp_handshake_done - Handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ * @peerid: serial number of key containing the remote peer's identity
+ *
+ * If a security policy is specified as an export option, we don't
+ * have a specific export here to check. So we set a "TLS session
+ * is present" flag on the xprt and let an upper layer enforce local
+ * security policy.
+ */
+static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
+{
+ struct svc_xprt *xprt = data;
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+ if (!status) {
+ if (peerid != TLS_NO_PEERID)
+ set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
+ set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
+ }
+ clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ complete_all(&svsk->sk_handshake_done);
+}
+
+/**
+ * svc_tcp_handshake - Perform a transport-layer security handshake
+ * @xprt: connected transport endpoint
+ *
+ */
+static void svc_tcp_handshake(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct sock *sk = svsk->sk_sock->sk;
+ struct tls_handshake_args args = {
+ .ta_sock = svsk->sk_sock,
+ .ta_done = svc_tcp_handshake_done,
+ .ta_data = xprt,
+ };
+ int ret;
+
+ trace_svc_tls_upcall(xprt);
+
+ clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
+ init_completion(&svsk->sk_handshake_done);
+
+ ret = tls_server_hello_x509(&args, GFP_KERNEL);
+ if (ret) {
+ trace_svc_tls_not_started(xprt);
+ goto out_failed;
+ }
+
+ ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
+ SVC_HANDSHAKE_TO);
+ if (ret <= 0) {
+ if (tls_handshake_cancel(sk)) {
+ trace_svc_tls_timed_out(xprt);
+ goto out_close;
+ }
+ }
+
+ if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
+ trace_svc_tls_unavailable(xprt);
+ goto out_close;
+ }
+
+ /* Mark the transport ready in case the remote sent RPC
+ * traffic before the kernel received the handshake
+ * completion downcall.
+ */
+ set_bit(XPT_DATA, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ return;
+
+out_close:
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+out_failed:
+ clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ set_bit(XPT_DATA, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+}
+
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
@@ -508,6 +688,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
+ svc_sock_secure_port(rqstp);
svc_xprt_received(rqstp->rq_xprt);
return len;
@@ -554,12 +735,14 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
+ .msg_flags = MSG_SPLICE_PAGES,
.msg_controllen = sizeof(buffer),
};
- unsigned int sent;
+ unsigned int count;
int err;
- svc_udp_release_rqst(rqstp);
+ svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+ rqstp->rq_xprt_ctxt = NULL;
svc_set_cmsg_data(rqstp, cmh);
@@ -568,22 +751,22 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- goto out_unlock;
+ count = xdr_buf_to_bvec(svsk->sk_bvec, SUNRPC_MAX_UDP_SENDPAGES, xdr);
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
+ count, rqstp->rq_res.len);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
+ count, rqstp->rq_res.len);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
}
- xdr_free_bvec(xdr);
+
trace_svcsock_udp_send(xprt, err);
-out_unlock:
+
mutex_unlock(&xprt->xpt_mutex);
- if (err < 0)
- return err;
- return sent;
+ return err;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
@@ -631,12 +814,11 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_result_payload = svc_sock_result_payload,
- .xpo_release_rqst = svc_udp_release_rqst,
+ .xpo_release_ctxt = svc_udp_release_ctxt,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
- .xpo_secure_port = svc_sock_secure_port,
.xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
};
@@ -665,6 +847,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
/* data might have come in before data_ready set up */
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
+ set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags);
/* make sure we get destination address info */
switch (svsk->sk_sk->sk_family) {
@@ -687,11 +870,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- if (svsk) {
- /* Refer to svc_setup_socket() for details. */
- rmb();
- svsk->sk_odata(sk);
- }
+ trace_sk_data_ready(sk);
/*
* This callback may called twice when a new connection
@@ -701,13 +880,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted.
- * In case of 2, we should ignore it silently.
+ * In case of 2, we should ignore it silently and DO NOT
+ * dereference svsk.
*/
- if (sk->sk_state == TCP_LISTEN) {
- if (svsk) {
- set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- }
+ if (sk->sk_state != TCP_LISTEN)
+ return;
+
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
+ svsk->sk_odata(sk);
+ set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_enqueue(&svsk->sk_xprt);
}
}
@@ -748,15 +932,13 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_accept(sock, &newsock, O_NONBLOCK);
if (err < 0) {
- if (err == -ENOMEM)
- printk(KERN_WARNING "%s: no more sockets!\n",
- serv->sv_name);
- else if (err != -EAGAIN)
- net_warn_ratelimited("%s: accept failed (err %d)!\n",
- serv->sv_name, -err);
- trace_svcsock_accept_err(xprt, serv->sv_name, err);
+ if (err != -EAGAIN)
+ trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL;
}
+ if (IS_ERR(sock_alloc_file(newsock, O_NONBLOCK, NULL)))
+ return NULL;
+
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_getpeername(newsock, sin);
@@ -797,7 +979,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
return &newsvsk->sk_xprt;
failed:
- sock_release(newsock);
+ sockfd_put(newsock);
return NULL;
}
@@ -875,7 +1057,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
- len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ len = svc_tcp_sock_recvmsg(svsk, &msg);
if (len < 0)
return len;
svsk->sk_tcplen += len;
@@ -891,9 +1073,10 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
return svc_sock_reclen(svsk);
err_too_large:
- net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
- __func__, svsk->sk_xprt.xpt_server->sv_name,
- svc_sock_reclen(svsk));
+ net_notice_ratelimited("svc: %s oversized RPC fragment (%u octets) from %pISpc\n",
+ svsk->sk_xprt.xpt_server->sv_name,
+ svc_sock_reclen(svsk),
+ (struct sockaddr *)&svsk->sk_xprt.xpt_remote);
svc_xprt_deferred_close(&svsk->sk_xprt);
err_short:
return -EAGAIN;
@@ -905,18 +1088,14 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
struct rpc_rqst *req = NULL;
struct kvec *src, *dst;
__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
- __be32 xid;
- __be32 calldir;
-
- xid = *p++;
- calldir = *p;
+ __be32 xid = *p;
if (!bc_xprt)
return -EAGAIN;
spin_lock(&bc_xprt->queue_lock);
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req)
- goto unlock_notfound;
+ goto unlock_eagain;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
/*
@@ -933,12 +1112,6 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
rqstp->rq_arg.len = 0;
spin_unlock(&bc_xprt->queue_lock);
return 0;
-unlock_notfound:
- printk(KERN_NOTICE
- "%s: Got unrecognized reply: "
- "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
- __func__, ntohl(calldir),
- bc_xprt, ntohl(xid));
unlock_eagain:
spin_unlock(&bc_xprt->queue_lock);
return -EAGAIN;
@@ -1028,6 +1201,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
+ svc_sock_secure_port(rqstp);
svc_xprt_received(rqstp->rq_xprt);
return rqstp->rq_arg.len;
@@ -1057,87 +1231,39 @@ err_noclose:
return 0; /* record not complete */
}
-static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
- int flags)
-{
- return kernel_sendpage(sock, virt_to_page(vec->iov_base),
- offset_in_page(vec->iov_base),
- vec->iov_len, flags);
-}
-
/*
- * kernel_sendpage() is used exclusively to reduce the number of
+ * MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
- *
- * In addition, the logic assumes that * .bv_len is never larger
- * than PAGE_SIZE.
*/
-static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
- rpc_fraghdr marker, unsigned int *sentp)
+static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
+ rpc_fraghdr marker)
{
- const struct kvec *head = xdr->head;
- const struct kvec *tail = xdr->tail;
- struct kvec rm = {
- .iov_base = &marker,
- .iov_len = sizeof(marker),
- };
struct msghdr msg = {
- .msg_flags = 0,
+ .msg_flags = MSG_SPLICE_PAGES,
};
+ unsigned int count;
+ void *buf;
int ret;
- *sentp = 0;
- ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (ret < 0)
- return ret;
-
- ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != rm.iov_len)
- return -EAGAIN;
-
- ret = svc_tcp_send_kvec(sock, head, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != head->iov_len)
- goto out;
-
- if (xdr->page_len) {
- unsigned int offset, len, remaining;
- struct bio_vec *bvec;
-
- bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
- offset = offset_in_page(xdr->page_base);
- remaining = xdr->page_len;
- while (remaining > 0) {
- len = min(remaining, bvec->bv_len - offset);
- ret = kernel_sendpage(sock, bvec->bv_page,
- bvec->bv_offset + offset,
- len, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != len)
- goto out;
- remaining -= len;
- offset = 0;
- bvec++;
- }
- }
-
- if (tail->iov_len) {
- ret = svc_tcp_send_kvec(sock, tail, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- }
-
-out:
- return 0;
+ /* The stream record marker is copied into a temporary page
+ * fragment buffer so that it can be included in sk_bvec.
+ */
+ buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, &marker, sizeof(marker));
+ bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker));
+
+ count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages,
+ &rqstp->rq_res);
+
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
+ 1 + count, sizeof(marker) + rqstp->rq_res.len);
+ ret = sock_sendmsg(svsk->sk_sock, &msg);
+ page_frag_free(buf);
+ return ret;
}
/**
@@ -1156,37 +1282,30 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
- unsigned int sent;
- int err;
+ int sent;
- svc_tcp_release_rqst(rqstp);
+ svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
+ rqstp->rq_xprt_ctxt = NULL;
- atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- tcp_sock_set_cork(svsk->sk_sk, true);
- err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
- xdr_free_bvec(xdr);
- trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
- if (err < 0 || sent != (xdr->len + sizeof(marker)))
+ sent = svc_tcp_sendmsg(svsk, rqstp, marker);
+ trace_svcsock_tcp_send(xprt, sent);
+ if (sent < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
- if (atomic_dec_and_test(&svsk->sk_sendqlen))
- tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
- pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
+ pr_notice("rpc-srv/tcp: %s: %s %d when sending %zu bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
- (err < 0) ? "got error" : "sent",
- (err < 0) ? err : sent, xdr->len);
+ (sent < 0) ? "got error" : "sent",
+ sent, xdr->len + sizeof(marker));
svc_xprt_deferred_close(xprt);
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1204,13 +1323,13 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
.xpo_result_payload = svc_sock_result_payload,
- .xpo_release_rqst = svc_tcp_release_rqst,
+ .xpo_release_ctxt = svc_tcp_release_ctxt,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
- .xpo_secure_port = svc_sock_secure_port,
.xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
+ .xpo_handshake = svc_tcp_handshake,
};
static struct svc_xprt_class svc_tcp_class = {
@@ -1244,6 +1363,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
if (sk->sk_state == TCP_LISTEN) {
strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
+ set_bit(XPT_RPCB_UNREG, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
} else {
@@ -1254,7 +1374,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svsk->sk_marker = xdr_zero;
svsk->sk_tcplen = 0;
svsk->sk_datalen = 0;
- memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
+ memset(&svsk->sk_pages[0], 0,
+ svsk->sk_maxpages * sizeof(struct page *));
tcp_sock_set_nodelay(sk);
@@ -1282,7 +1403,20 @@ void svc_sock_update_bufs(struct svc_serv *serv)
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
spin_unlock_bh(&serv->sv_lock);
}
-EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
+
+static int svc_sock_sendpages(struct svc_serv *serv, struct socket *sock, int flags)
+{
+ switch (sock->type) {
+ case SOCK_STREAM:
+ /* +1 for TCP record marker */
+ if (flags & SVC_SOCK_TEMPORARY)
+ return svc_serv_maxpages(serv) + 1;
+ return 0;
+ case SOCK_DGRAM:
+ return SUNRPC_MAX_UDP_SENDPAGES;
+ }
+ return -EINVAL;
+}
/*
* Initialize socket for RPC use and create svc_sock struct
@@ -1294,23 +1428,41 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
- int err = 0;
+ int sendpages;
+ unsigned long pages;
+
+ sendpages = svc_sock_sendpages(serv, sock, flags);
+ if (sendpages < 0)
+ return ERR_PTR(sendpages);
- svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
+ pages = svc_serv_maxpages(serv);
+ svsk = kzalloc(struct_size(svsk, sk_pages, pages), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
+ if (sendpages) {
+ svsk->sk_bvec = kcalloc(sendpages, sizeof(*svsk->sk_bvec), GFP_KERNEL);
+ if (!svsk->sk_bvec) {
+ kfree(svsk);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ svsk->sk_maxpages = pages;
+
inet = sock->sk;
- /* Register socket with portmapper */
- if (pmap_register)
+ if (pmap_register) {
+ int err;
+
err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
inet->sk_protocol,
ntohs(inet_sk(inet)->inet_sport));
-
- if (err < 0) {
- kfree(svsk);
- return ERR_PTR(err);
+ if (err < 0) {
+ kfree(svsk->sk_bvec);
+ kfree(svsk);
+ return ERR_PTR(err);
+ }
}
svsk->sk_sock = sock;
@@ -1320,7 +1472,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
svsk->sk_owspace = inet->sk_write_space;
/*
* This barrier is necessary in order to prevent race condition
- * with svc_data_ready(), svc_listen_data_ready() and others
+ * with svc_data_ready(), svc_tcp_listen_data_ready(), and others
* when calling callbacks above.
*/
wmb();
@@ -1332,29 +1484,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
- trace_svcsock_new_socket(sock);
+ trace_svcsock_new(svsk, sock);
return svsk;
}
-bool svc_alien_sock(struct net *net, int fd)
-{
- int err;
- struct socket *sock = sockfd_lookup(fd, &err);
- bool ret = false;
-
- if (!sock)
- goto out;
- if (sock_net(sock->sk) != net)
- ret = true;
- sockfd_put(sock);
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(svc_alien_sock);
-
/**
* svc_addsock - add a listener socket to an RPC service
* @serv: pointer to RPC service to which to add a new listener
+ * @net: caller's network namespace
* @fd: file descriptor of the new listener
* @name_return: pointer to buffer to fill in with name of listener
* @len: size of the buffer
@@ -1364,8 +1501,8 @@ EXPORT_SYMBOL_GPL(svc_alien_sock);
* Name is terminated with '\n'. On error, returns a negative errno
* value.
*/
-int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
- const size_t len, const struct cred *cred)
+int svc_addsock(struct svc_serv *serv, struct net *net, const int fd,
+ char *name_return, const size_t len, const struct cred *cred)
{
int err = 0;
struct socket *so = sockfd_lookup(fd, &err);
@@ -1376,6 +1513,9 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
if (!so)
return err;
+ err = -EINVAL;
+ if (sock_net(so->sk) != net)
+ goto out;
err = -EAFNOSUPPORT;
if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
goto out;
@@ -1458,7 +1598,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
ip6_sock_set_v6only(sock->sk);
if (type == SOCK_STREAM)
sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
- error = kernel_bind(sock, sin, len);
+ error = kernel_bind(sock, (struct sockaddr_unsized *)sin, len);
if (error < 0)
goto bummer;
@@ -1468,7 +1608,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
- if ((error = kernel_listen(sock, 64)) < 0)
+ sk_net_refcnt_upgrade(sock->sk);
+ if ((error = kernel_listen(sock, SOMAXCONN)) < 0)
goto bummer;
}
@@ -1509,6 +1650,8 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ tls_handshake_close(svsk->sk_sock);
+
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
@@ -1523,10 +1666,17 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct socket *sock = svsk->sk_sock;
- if (svsk->sk_sock->file)
- sockfd_put(svsk->sk_sock);
+ trace_svcsock_free(svsk, sock);
+
+ tls_handshake_cancel(sock->sk);
+ if (sock->file)
+ sockfd_put(sock);
else
- sock_release(svsk->sk_sock);
+ sock_release(sock);
+
+ page_frag_cache_drain(&svsk->sk_frag_cache);
+ kfree(svsk->sk_bvec);
kfree(svsk);
}
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 3aad6ef18504..bdb587a72422 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -40,26 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-static struct ctl_table_header *sunrpc_table_header;
-static struct ctl_table sunrpc_table[];
-
-void
-rpc_register_sysctl(void)
-{
- if (!sunrpc_table_header)
- sunrpc_table_header = register_sysctl_table(sunrpc_table);
-}
-
-void
-rpc_unregister_sysctl(void)
-{
- if (sunrpc_table_header) {
- unregister_sysctl_table(sunrpc_table_header);
- sunrpc_table_header = NULL;
- }
-}
-
-static int proc_do_xprt(struct ctl_table *table, int write,
+static int proc_do_xprt(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[256];
@@ -81,7 +62,7 @@ static int proc_do_xprt(struct ctl_table *table, int write,
}
static int
-proc_dodebug(struct ctl_table *table, int write, void *buffer, size_t *lenp,
+proc_dodebug(const struct ctl_table *table, int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
char tmpbuf[20], *s = NULL;
@@ -142,6 +123,7 @@ done:
return 0;
}
+static struct ctl_table_header *sunrpc_table_header;
static struct ctl_table debug_table[] = {
{
@@ -178,16 +160,21 @@ static struct ctl_table debug_table[] = {
.mode = 0444,
.proc_handler = proc_do_xprt,
},
- { }
};
-static struct ctl_table sunrpc_table[] = {
- {
- .procname = "sunrpc",
- .mode = 0555,
- .child = debug_table
- },
- { }
-};
+void
+rpc_register_sysctl(void)
+{
+ if (!sunrpc_table_header)
+ sunrpc_table_header = register_sysctl("sunrpc", debug_table);
+}
+void
+rpc_unregister_sysctl(void)
+{
+ if (sunrpc_table_header) {
+ unregister_sysctl_table(sunrpc_table_header);
+ sunrpc_table_header = NULL;
+ }
+}
#endif
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 1e05a2d723f4..8b01b7ae2690 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -36,7 +36,7 @@ rpc_sysfs_object_child_ns_type(const struct kobject *kobj)
return &net_ns_type_operations;
}
-static struct kobj_type rpc_sysfs_object_type = {
+static const struct kobj_type rpc_sysfs_object_type = {
.release = rpc_sysfs_object_release,
.sysfs_ops = &kobj_sysfs_ops,
.child_ns_type = rpc_sysfs_object_child_ns_type,
@@ -59,6 +59,16 @@ static struct kobject *rpc_sysfs_object_alloc(const char *name,
return NULL;
}
+static inline struct rpc_clnt *
+rpc_sysfs_client_kobj_get_clnt(struct kobject *kobj)
+{
+ struct rpc_sysfs_client *c = container_of(kobj,
+ struct rpc_sysfs_client, kobject);
+ struct rpc_clnt *ret = c->clnt;
+
+ return refcount_inc_not_zero(&ret->cl_count) ? ret : NULL;
+}
+
static inline struct rpc_xprt *
rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj)
{
@@ -86,6 +96,51 @@ rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj)
return xprt_switch_get(x->xprt_switch);
}
+static ssize_t rpc_sysfs_clnt_version_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj);
+ ssize_t ret;
+
+ if (!clnt)
+ return sprintf(buf, "<closed>\n");
+
+ ret = sprintf(buf, "%u", clnt->cl_vers);
+ refcount_dec(&clnt->cl_count);
+ return ret;
+}
+
+static ssize_t rpc_sysfs_clnt_program_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj);
+ ssize_t ret;
+
+ if (!clnt)
+ return sprintf(buf, "<closed>\n");
+
+ ret = sprintf(buf, "%s", clnt->cl_program->name);
+ refcount_dec(&clnt->cl_count);
+ return ret;
+}
+
+static ssize_t rpc_sysfs_clnt_max_connect_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_clnt *clnt = rpc_sysfs_client_kobj_get_clnt(kobj);
+ ssize_t ret;
+
+ if (!clnt)
+ return sprintf(buf, "<closed>\n");
+
+ ret = sprintf(buf, "%u\n", clnt->cl_max_connect);
+ refcount_dec(&clnt->cl_count);
+ return ret;
+}
+
static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
@@ -129,6 +184,31 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
return ret;
}
+static const char *xprtsec_strings[] = {
+ [RPC_XPRTSEC_NONE] = "none",
+ [RPC_XPRTSEC_TLS_ANON] = "tls-anon",
+ [RPC_XPRTSEC_TLS_X509] = "tls-x509",
+};
+
+static ssize_t rpc_sysfs_xprt_xprtsec_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ ssize_t ret;
+
+ if (!xprt) {
+ ret = sprintf(buf, "<closed>\n");
+ goto out;
+ }
+
+ ret = sprintf(buf, "%s\n", xprtsec_strings[xprt->xprtsec.policy]);
+ xprt_put(xprt);
+out:
+ return ret;
+
+}
+
static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -206,6 +286,14 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
return ret;
}
+static ssize_t rpc_sysfs_xprt_del_xprt_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "# delete this xprt\n");
+}
+
+
static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
@@ -225,6 +313,55 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
return ret;
}
+static ssize_t rpc_sysfs_xprt_switch_add_xprt_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "# add one xprt to this xprt_switch\n");
+}
+
+static ssize_t rpc_sysfs_xprt_switch_add_xprt_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rpc_xprt_switch *xprt_switch =
+ rpc_sysfs_xprt_switch_kobj_get_xprt(kobj);
+ struct xprt_create xprt_create_args;
+ struct rpc_xprt *xprt, *new;
+
+ if (!xprt_switch)
+ return 0;
+
+ xprt = rpc_xprt_switch_get_main_xprt(xprt_switch);
+ if (!xprt)
+ goto out;
+
+ xprt_create_args.ident = xprt->xprt_class->ident;
+ xprt_create_args.net = xprt->xprt_net;
+ xprt_create_args.dstaddr = (struct sockaddr *)&xprt->addr;
+ xprt_create_args.addrlen = xprt->addrlen;
+ xprt_create_args.servername = xprt->servername;
+ xprt_create_args.bc_xprt = xprt->bc_xprt;
+ xprt_create_args.xprtsec = xprt->xprtsec;
+ xprt_create_args.connect_timeout = xprt->connect_timeout;
+ xprt_create_args.reconnect_timeout = xprt->max_reconnect_timeout;
+
+ new = xprt_create_transport(&xprt_create_args);
+ if (IS_ERR_OR_NULL(new)) {
+ count = PTR_ERR(new);
+ goto out_put_xprt;
+ }
+
+ rpc_xprt_switch_add_xprt(xprt_switch, new);
+ xprt_put(new);
+
+out_put_xprt:
+ xprt_put(xprt);
+out:
+ xprt_switch_put(xprt_switch);
+ return count;
+}
+
static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
@@ -239,6 +376,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
if (!xprt)
return 0;
if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP ||
+ xprt->xprt_class->ident == XPRT_TRANSPORT_TCP_TLS ||
xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) {
xprt_put(xprt);
return -EOPNOTSUPP;
@@ -251,7 +389,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
saddr = (struct sockaddr *)&xprt->addr;
port = rpc_get_port(saddr);
- /* buf_len is the len until the first occurence of either
+ /* buf_len is the len until the first occurrence of either
* '\n' or '\0'
*/
buf_len = strcspn(buf, "\n");
@@ -334,6 +472,40 @@ out_put:
return count;
}
+static ssize_t rpc_sysfs_xprt_del_xprt(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj);
+
+ if (!xprt || !xps) {
+ count = 0;
+ goto out;
+ }
+
+ if (xprt->main) {
+ count = -EINVAL;
+ goto release_tasks;
+ }
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ count = -EINTR;
+ goto out_put;
+ }
+
+ xprt_set_offline_locked(xprt, xps);
+ xprt_delete_locked(xprt, xps);
+
+release_tasks:
+ xprt_release_write(xprt, NULL);
+out_put:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+out:
+ return count;
+}
+
int rpc_sysfs_init(void)
{
rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj);
@@ -397,23 +569,48 @@ static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj)
kobject)->xprt->xprt_net;
}
+static struct kobj_attribute rpc_sysfs_clnt_version = __ATTR(rpc_version,
+ 0444, rpc_sysfs_clnt_version_show, NULL);
+
+static struct kobj_attribute rpc_sysfs_clnt_program = __ATTR(program,
+ 0444, rpc_sysfs_clnt_program_show, NULL);
+
+static struct kobj_attribute rpc_sysfs_clnt_max_connect = __ATTR(max_connect,
+ 0444, rpc_sysfs_clnt_max_connect_show, NULL);
+
+static struct attribute *rpc_sysfs_rpc_clnt_attrs[] = {
+ &rpc_sysfs_clnt_version.attr,
+ &rpc_sysfs_clnt_program.attr,
+ &rpc_sysfs_clnt_max_connect.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(rpc_sysfs_rpc_clnt);
+
static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr,
0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store);
static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr,
0644, rpc_sysfs_xprt_srcaddr_show, NULL);
+static struct kobj_attribute rpc_sysfs_xprt_xprtsec = __ATTR(xprtsec,
+ 0644, rpc_sysfs_xprt_xprtsec_show, NULL);
+
static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info,
0444, rpc_sysfs_xprt_info_show, NULL);
static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state,
0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change);
+static struct kobj_attribute rpc_sysfs_xprt_del = __ATTR(del_xprt,
+ 0644, rpc_sysfs_xprt_del_xprt_show, rpc_sysfs_xprt_del_xprt);
+
static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_dstaddr.attr,
&rpc_sysfs_xprt_srcaddr.attr,
+ &rpc_sysfs_xprt_xprtsec.attr,
&rpc_sysfs_xprt_info.attr,
&rpc_sysfs_xprt_change_state.attr,
+ &rpc_sysfs_xprt_del.attr,
NULL,
};
ATTRIBUTE_GROUPS(rpc_sysfs_xprt);
@@ -421,26 +618,32 @@ ATTRIBUTE_GROUPS(rpc_sysfs_xprt);
static struct kobj_attribute rpc_sysfs_xprt_switch_info =
__ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
+static struct kobj_attribute rpc_sysfs_xprt_switch_add_xprt =
+ __ATTR(add_xprt, 0644, rpc_sysfs_xprt_switch_add_xprt_show,
+ rpc_sysfs_xprt_switch_add_xprt_store);
+
static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
&rpc_sysfs_xprt_switch_info.attr,
+ &rpc_sysfs_xprt_switch_add_xprt.attr,
NULL,
};
ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch);
-static struct kobj_type rpc_sysfs_client_type = {
+static const struct kobj_type rpc_sysfs_client_type = {
.release = rpc_sysfs_client_release,
+ .default_groups = rpc_sysfs_rpc_clnt_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_client_namespace,
};
-static struct kobj_type rpc_sysfs_xprt_switch_type = {
+static const struct kobj_type rpc_sysfs_xprt_switch_type = {
.release = rpc_sysfs_xprt_switch_release,
.default_groups = rpc_sysfs_xprt_switch_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_switch_namespace,
};
-static struct kobj_type rpc_sysfs_xprt_type = {
+static const struct kobj_type rpc_sysfs_xprt_type = {
.release = rpc_sysfs_xprt_release,
.default_groups = rpc_sysfs_xprt_groups,
.sysfs_ops = &kobj_sysfs_ops,
diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h
index 6620cebd1037..d2dd77a0a0e9 100644
--- a/net/sunrpc/sysfs.h
+++ b/net/sunrpc/sysfs.h
@@ -5,13 +5,6 @@
#ifndef __SUNRPC_SYSFS_H
#define __SUNRPC_SYSFS_H
-struct rpc_sysfs_client {
- struct kobject kobject;
- struct net *net;
- struct rpc_clnt *clnt;
- struct rpc_xprt_switch *xprt_switch;
-};
-
struct rpc_sysfs_xprt_switch {
struct kobject kobject;
struct net *net;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f7767bf22406..70efc727a9cd 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -37,19 +37,6 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
}
EXPORT_SYMBOL_GPL(xdr_encode_netobj);
-__be32 *
-xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
-{
- unsigned int len;
-
- if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ)
- return NULL;
- obj->len = len;
- obj->data = (u8 *) p;
- return p + XDR_QUADLEN(len);
-}
-EXPORT_SYMBOL_GPL(xdr_decode_netobj);
-
/**
* xdr_encode_opaque_fixed - Encode fixed length opaque data
* @p: pointer to current position in XDR buffer.
@@ -102,21 +89,6 @@ xdr_encode_string(__be32 *p, const char *string)
}
EXPORT_SYMBOL_GPL(xdr_encode_string);
-__be32 *
-xdr_decode_string_inplace(__be32 *p, char **sp,
- unsigned int *lenp, unsigned int maxlen)
-{
- u32 len;
-
- len = be32_to_cpu(*p++);
- if (len > maxlen)
- return NULL;
- *lenp = len;
- *sp = (char *) p;
- return p + XDR_QUADLEN(len);
-}
-EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
-
/**
* xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
* @buf: XDR buffer where string resides
@@ -150,9 +122,8 @@ xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
if (!buf->bvec)
return -ENOMEM;
for (i = 0; i < n; i++) {
- buf->bvec[i].bv_page = buf->pages[i];
- buf->bvec[i].bv_len = PAGE_SIZE;
- buf->bvec[i].bv_offset = 0;
+ bvec_set_page(&buf->bvec[i], buf->pages[i], PAGE_SIZE,
+ 0);
}
}
return 0;
@@ -166,6 +137,57 @@ xdr_free_bvec(struct xdr_buf *buf)
}
/**
+ * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array
+ * @bvec: bio_vec array to populate
+ * @bvec_size: element count of @bio_vec
+ * @xdr: xdr_buf to be copied
+ *
+ * Returns the number of entries consumed in @bvec.
+ */
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ unsigned int count = 0;
+
+ if (head->iov_len) {
+ bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+ ++count;
+ }
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct page **pages = xdr->pages;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min_t(unsigned int, remaining,
+ PAGE_SIZE - offset);
+ bvec_set_page(bvec++, *pages++, len, offset);
+ remaining -= len;
+ offset = 0;
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+ }
+
+ if (tail->iov_len) {
+ bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+
+ return count;
+
+bvec_overflow:
+ pr_warn_once("%s: bio_vec array overflow\n", __func__);
+ return count - 1;
+}
+EXPORT_SYMBOL_GPL(xdr_buf_to_bvec);
+
+/**
* xdr_inline_pages - Prepare receive buffer for a large reply
* @xdr: xdr_buf into which reply will be placed
* @offset: expected offset where data payload will start, in bytes
@@ -863,13 +885,6 @@ static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, unsigned int len)
return shift;
}
-void
-xdr_shift_buf(struct xdr_buf *buf, size_t len)
-{
- xdr_shrink_bufhead(buf, buf->head->iov_len - len);
-}
-EXPORT_SYMBOL_GPL(xdr_shift_buf);
-
/**
* xdr_stream_pos - Return the current offset from the start of the xdr_stream
* @xdr: pointer to struct xdr_stream
@@ -950,21 +965,18 @@ EXPORT_SYMBOL_GPL(xdr_init_encode);
* xdr_init_encode_pages - Initialize an xdr_stream for encoding into pages
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer into which to encode data
- * @pages: list of pages to decode into
- * @rqst: pointer to controlling rpc_rqst, for debugging
*
*/
-void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
- struct page **pages, struct rpc_rqst *rqst)
+void xdr_init_encode_pages(struct xdr_stream *xdr, struct xdr_buf *buf)
{
xdr_reset_scratch_buffer(xdr);
xdr->buf = buf;
- xdr->page_ptr = pages;
+ xdr->page_ptr = buf->pages;
xdr->iov = NULL;
- xdr->p = page_address(*pages);
+ xdr->p = page_address(*xdr->page_ptr);
xdr->end = (void *)xdr->p + min_t(u32, buf->buflen, PAGE_SIZE);
- xdr->rqst = rqst;
+ xdr->rqst = NULL;
}
EXPORT_SYMBOL_GPL(xdr_init_encode_pages);
@@ -1055,6 +1067,12 @@ out_overflow:
* Checks that we have enough buffer space to encode 'nbytes' more
* bytes of data. If so, update the total xdr_buf length, and
* adjust the length of the current kvec.
+ *
+ * The returned pointer is valid only until the next call to
+ * xdr_reserve_space() or xdr_commit_encode() on @xdr. The current
+ * implementation of this API guarantees that space reserved for a
+ * four-byte data item remains valid until @xdr is destroyed, but
+ * that might not always be true in the future.
*/
__be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
{
@@ -1078,22 +1096,22 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
-
/**
* xdr_reserve_space_vec - Reserves a large amount of buffer space for sending
* @xdr: pointer to xdr_stream
- * @vec: pointer to a kvec array
* @nbytes: number of bytes to reserve
*
- * Reserves enough buffer space to encode 'nbytes' of data and stores the
- * pointers in 'vec'. The size argument passed to xdr_reserve_space() is
- * determined based on the number of bytes remaining in the current page to
- * avoid invalidating iov_base pointers when xdr_commit_encode() is called.
+ * The size argument passed to xdr_reserve_space() is determined based
+ * on the number of bytes remaining in the current page to avoid
+ * invalidating iov_base pointers when xdr_commit_encode() is called.
+ *
+ * Return values:
+ * %0: success
+ * %-EMSGSIZE: not enough space is available in @xdr
*/
-int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes)
+int xdr_reserve_space_vec(struct xdr_stream *xdr, size_t nbytes)
{
- int thislen;
- int v = 0;
+ size_t thislen;
__be32 *p;
/*
@@ -1105,21 +1123,19 @@ int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbyte
xdr->end = xdr->p;
}
+ /* XXX: Let's find a way to make this more efficient */
while (nbytes) {
thislen = xdr->buf->page_len % PAGE_SIZE;
thislen = min_t(size_t, nbytes, PAGE_SIZE - thislen);
p = xdr_reserve_space(xdr, thislen);
if (!p)
- return -EIO;
+ return -EMSGSIZE;
- vec[v].iov_base = p;
- vec[v].iov_len = thislen;
- v++;
nbytes -= thislen;
}
- return v;
+ return 0;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space_vec);
@@ -1193,6 +1209,21 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
EXPORT_SYMBOL(xdr_truncate_encode);
/**
+ * xdr_truncate_decode - Truncate a decoding stream
+ * @xdr: pointer to struct xdr_stream
+ * @len: Number of bytes to remove
+ *
+ */
+void xdr_truncate_decode(struct xdr_stream *xdr, size_t len)
+{
+ unsigned int nbytes = xdr_align_size(len);
+
+ xdr->buf->len -= nbytes;
+ xdr->nwords -= XDR_QUADLEN(nbytes);
+}
+EXPORT_SYMBOL_GPL(xdr_truncate_decode);
+
+/**
* xdr_restrict_buflen - decrease available buffer space
* @xdr: pointer to xdr_stream
* @newbuflen: new maximum number of bytes available
@@ -1283,6 +1314,14 @@ static unsigned int xdr_set_tail_base(struct xdr_stream *xdr,
return xdr_set_iov(xdr, buf->tail, base, len);
}
+static void xdr_stream_unmap_current_page(struct xdr_stream *xdr)
+{
+ if (xdr->page_kaddr) {
+ kunmap_local(xdr->page_kaddr);
+ xdr->page_kaddr = NULL;
+ }
+}
+
static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
unsigned int base, unsigned int len)
{
@@ -1300,12 +1339,18 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
if (len > maxlen)
len = maxlen;
+ xdr_stream_unmap_current_page(xdr);
xdr_stream_page_set_pos(xdr, base);
base += xdr->buf->page_base;
pgnr = base >> PAGE_SHIFT;
xdr->page_ptr = &xdr->buf->pages[pgnr];
- kaddr = page_address(*xdr->page_ptr);
+
+ if (PageHighMem(*xdr->page_ptr)) {
+ xdr->page_kaddr = kmap_local_page(*xdr->page_ptr);
+ kaddr = xdr->page_kaddr;
+ } else
+ kaddr = page_address(*xdr->page_ptr);
pgoff = base & ~PAGE_MASK;
xdr->p = (__be32*)(kaddr + pgoff);
@@ -1359,6 +1404,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct rpc_rqst *rqst)
{
xdr->buf = buf;
+ xdr->page_kaddr = NULL;
xdr_reset_scratch_buffer(xdr);
xdr->nwords = XDR_QUADLEN(buf->len);
if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 &&
@@ -1391,6 +1437,16 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+/**
+ * xdr_finish_decode - Clean up the xdr_stream after decoding data.
+ * @xdr: pointer to xdr_stream struct
+ */
+void xdr_finish_decode(struct xdr_stream *xdr)
+{
+ xdr_stream_unmap_current_page(xdr);
+}
+EXPORT_SYMBOL(xdr_finish_decode);
+
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
unsigned int nwords = XDR_QUADLEN(nbytes);
@@ -2161,116 +2217,91 @@ out:
EXPORT_SYMBOL_GPL(xdr_process_buf);
/**
- * xdr_stream_decode_opaque - Decode variable length opaque
- * @xdr: pointer to xdr_stream
- * @ptr: location to store opaque data
- * @size: size of storage buffer @ptr
- *
- * Return values:
- * On success, returns size of object stored in *@ptr
- * %-EBADMSG on XDR buffer overflow
- * %-EMSGSIZE on overflow of storage buffer @ptr
- */
-ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size)
-{
- ssize_t ret;
- void *p;
-
- ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
- if (ret <= 0)
- return ret;
- memcpy(ptr, p, ret);
- return ret;
-}
-EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque);
-
-/**
- * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque
+ * xdr_stream_decode_string_dup - Decode and duplicate variable length string
* @xdr: pointer to xdr_stream
- * @ptr: location to store pointer to opaque data
- * @maxlen: maximum acceptable object size
+ * @str: location to store pointer to string
+ * @maxlen: maximum acceptable string length
* @gfp_flags: GFP mask to use
*
* Return values:
- * On success, returns size of object stored in *@ptr
+ * On success, returns length of NUL-terminated string stored in *@ptr
* %-EBADMSG on XDR buffer overflow
- * %-EMSGSIZE if the size of the object would exceed @maxlen
+ * %-EMSGSIZE if the size of the string would exceed @maxlen
* %-ENOMEM on memory allocation failure
*/
-ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
+ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
size_t maxlen, gfp_t gfp_flags)
{
- ssize_t ret;
void *p;
+ ssize_t ret;
ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
if (ret > 0) {
- *ptr = kmemdup(p, ret, gfp_flags);
- if (*ptr != NULL)
- return ret;
+ char *s = kmemdup_nul(p, ret, gfp_flags);
+ if (s != NULL) {
+ *str = s;
+ return strlen(s);
+ }
ret = -ENOMEM;
}
- *ptr = NULL;
+ *str = NULL;
return ret;
}
-EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup);
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
/**
- * xdr_stream_decode_string - Decode variable length string
+ * xdr_stream_decode_opaque_auth - Decode struct opaque_auth (RFC5531 S8.2)
* @xdr: pointer to xdr_stream
- * @str: location to store string
- * @size: size of storage buffer @str
+ * @flavor: location to store decoded flavor
+ * @body: location to store decode body
+ * @body_len: location to store length of decoded body
*
* Return values:
- * On success, returns length of NUL-terminated string stored in *@str
+ * On success, returns the number of buffer bytes consumed
* %-EBADMSG on XDR buffer overflow
- * %-EMSGSIZE on overflow of storage buffer @str
+ * %-EMSGSIZE if the decoded size of the body field exceeds 400 octets
*/
-ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size)
+ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor,
+ void **body, unsigned int *body_len)
{
- ssize_t ret;
- void *p;
+ ssize_t ret, len;
- ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
- if (ret > 0) {
- memcpy(str, p, ret);
- str[ret] = '\0';
- return strlen(str);
- }
- *str = '\0';
- return ret;
+ len = xdr_stream_decode_u32(xdr, flavor);
+ if (unlikely(len < 0))
+ return len;
+ ret = xdr_stream_decode_opaque_inline(xdr, body, RPC_MAX_AUTH_SIZE);
+ if (unlikely(ret < 0))
+ return ret;
+ *body_len = ret;
+ return len + ret;
}
-EXPORT_SYMBOL_GPL(xdr_stream_decode_string);
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_auth);
/**
- * xdr_stream_decode_string_dup - Decode and duplicate variable length string
+ * xdr_stream_encode_opaque_auth - Encode struct opaque_auth (RFC5531 S8.2)
* @xdr: pointer to xdr_stream
- * @str: location to store pointer to string
- * @maxlen: maximum acceptable string length
- * @gfp_flags: GFP mask to use
+ * @flavor: verifier flavor to encode
+ * @body: content of body to encode
+ * @body_len: length of body to encode
*
* Return values:
- * On success, returns length of NUL-terminated string stored in *@ptr
+ * On success, returns length in bytes of XDR buffer consumed
* %-EBADMSG on XDR buffer overflow
- * %-EMSGSIZE if the size of the string would exceed @maxlen
- * %-ENOMEM on memory allocation failure
+ * %-EMSGSIZE if the size of @body exceeds 400 octets
*/
-ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
- size_t maxlen, gfp_t gfp_flags)
-{
- void *p;
- ssize_t ret;
-
- ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
- if (ret > 0) {
- char *s = kmemdup_nul(p, ret, gfp_flags);
- if (s != NULL) {
- *str = s;
- return strlen(s);
- }
- ret = -ENOMEM;
- }
- *str = NULL;
- return ret;
+ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor,
+ void *body, unsigned int body_len)
+{
+ ssize_t ret, len;
+
+ if (unlikely(body_len > RPC_MAX_AUTH_SIZE))
+ return -EMSGSIZE;
+ len = xdr_stream_encode_u32(xdr, flavor);
+ if (unlikely(len < 0))
+ return len;
+ ret = xdr_stream_encode_opaque(xdr, body, body_len);
+ if (unlikely(ret < 0))
+ return ret;
+ return len + ret;
}
-EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
+EXPORT_SYMBOL_GPL(xdr_stream_encode_opaque_auth);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ab453ede54f0..1023361845f9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -283,7 +283,7 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
task->tk_status = -EAGAIN;
- if (RPC_IS_SOFT(task))
+ if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
@@ -349,7 +349,7 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
task->tk_status = -EAGAIN;
- if (RPC_IS_SOFT(task))
+ if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
@@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
jiffies + nsecs_to_jiffies(-delta);
}
-static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
- const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
unsigned long majortimeo = req->rq_timeout;
if (to->to_exponential)
@@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
return majortimeo;
}
-static void xprt_reset_majortimeo(struct rpc_rqst *req)
+static void xprt_reset_majortimeo(struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
- req->rq_majortimeo += xprt_calc_majortimeo(req);
+ req->rq_majortimeo += xprt_calc_majortimeo(req, to);
}
static void xprt_reset_minortimeo(struct rpc_rqst *req)
@@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req)
req->rq_minortimeo += req->rq_timeout;
}
-static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
unsigned long time_init;
struct rpc_xprt *xprt = req->rq_xprt;
@@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
time_init = jiffies;
else
time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
- req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+
+ req->rq_timeout = to->to_initval;
+ req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to);
req->rq_minortimeo = time_init + req->rq_timeout;
}
@@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
} else {
req->rq_timeout = to->to_initval;
req->rq_retries = 0;
- xprt_reset_majortimeo(req);
+ xprt_reset_majortimeo(req, to);
/* Reset the RTT counters == "slow start" */
spin_lock(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
@@ -851,7 +854,7 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
static void
xprt_init_autodisconnect(struct timer_list *t)
{
- struct rpc_xprt *xprt = from_timer(xprt, t, timer);
+ struct rpc_xprt *xprt = timer_container_of(xprt, t, timer);
if (!RB_EMPTY_ROOT(&xprt->recv_queue))
return;
@@ -1164,7 +1167,7 @@ xprt_request_enqueue_receive(struct rpc_task *task)
spin_unlock(&xprt->queue_lock);
/* Turn off autodisconnect */
- del_timer_sync(&xprt->timer);
+ timer_delete_sync(&xprt->timer);
return 0;
}
@@ -1362,7 +1365,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
INIT_LIST_HEAD(&req->rq_xmit2);
goto out;
}
- } else if (!req->rq_seqno) {
+ } else if (req->rq_seqno_count == 0) {
list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
if (pos->rq_task->tk_owner != task->tk_owner)
continue;
@@ -1395,6 +1398,12 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
return;
if (!list_empty(&req->rq_xmit)) {
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (list_is_first(&req->rq_xmit, &xprt->xmit_queue) &&
+ xprt->ops->abort_send_request)
+ xprt->ops->abort_send_request(req);
+
list_del(&req->rq_xmit);
if (!list_empty(&req->rq_xmit2)) {
struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
@@ -1538,6 +1547,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
int is_retrans = RPC_WAS_SENT(task);
int status;
+ if (test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ return -ENOTCONN;
+
if (!req->rq_bytes_sent) {
if (xprt_request_data_received(task)) {
status = 0;
@@ -1886,7 +1898,8 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.bvec = NULL;
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
- xprt_init_majortimeo(task, req);
+ req->rq_seqno_count = 0;
+ xprt_init_majortimeo(task, req, task->tk_client->cl_timeout);
trace_xprt_reserve(req);
}
@@ -1983,7 +1996,8 @@ void xprt_release(struct rpc_task *task)
#ifdef CONFIG_SUNRPC_BACKCHANNEL
void
-xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
+ const struct rpc_timeout *to)
{
struct xdr_buf *xbufp = &req->rq_snd_buf;
@@ -1996,6 +2010,13 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
+ /*
+ * Backchannel Replies are sent with !RPC_TASK_SOFT and
+ * RPC_TASK_NO_RETRANS_TIMEOUT. The major timeout setting
+ * affects only how long each Reply waits to be sent when
+ * a transport connection cannot be established.
+ */
+ xprt_init_majortimeo(task, req, to);
}
#endif
@@ -2118,7 +2139,7 @@ static void xprt_destroy(struct rpc_xprt *xprt)
* can only run *before* del_time_sync(), never after.
*/
spin_lock(&xprt->transport_lock);
- del_timer_sync(&xprt->timer);
+ timer_delete_sync(&xprt->timer);
spin_unlock(&xprt->transport_lock);
/*
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 701250b305db..4c5e08b0aa64 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -92,6 +92,27 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
xprt_put(xprt);
}
+/**
+ * rpc_xprt_switch_get_main_xprt - Get the 'main' xprt for an xprt switch.
+ * @xps: pointer to struct rpc_xprt_switch.
+ */
+struct rpc_xprt *rpc_xprt_switch_get_main_xprt(struct rpc_xprt_switch *xps)
+{
+ struct rpc_xprt_iter xpi;
+ struct rpc_xprt *xprt;
+
+ xprt_iter_init_listall(&xpi, xps);
+
+ xprt = xprt_iter_get_next(&xpi);
+ while (xprt && !xprt->main) {
+ xprt_put(xprt);
+ xprt = xprt_iter_get_next(&xpi);
+ }
+
+ xprt_iter_destroy(&xpi);
+ return xprt;
+}
+
static DEFINE_IDA(rpc_xprtswitch_ids);
void xprt_multipath_cleanup_ids(void)
@@ -284,7 +305,7 @@ struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head,
if (cur == pos)
found = true;
if (found && ((find_active && xprt_is_active(pos)) ||
- (!find_active && xprt_is_active(pos))))
+ (!find_active && !xprt_is_active(pos))))
return pos;
}
return NULL;
@@ -336,8 +357,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
xprt_switch_find_current_entry_offline);
}
-bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
- const struct sockaddr *sap)
+static
+bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
{
struct list_head *head;
struct rpc_xprt *pos;
@@ -356,6 +378,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
return false;
}
+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
+{
+ bool res;
+
+ rcu_read_lock();
+ res = __rpc_xprt_switch_has_addr(xps, sap);
+ rcu_read_unlock();
+
+ return res;
+}
+
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur, bool check_active)
@@ -590,23 +624,6 @@ struct rpc_xprt *xprt_iter_get_helper(struct rpc_xprt_iter *xpi,
}
/**
- * xprt_iter_get_xprt - Returns the rpc_xprt pointed to by the cursor
- * @xpi: pointer to rpc_xprt_iter
- *
- * Returns a reference to the struct rpc_xprt that is currently
- * pointed to by the cursor.
- */
-struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi)
-{
- struct rpc_xprt *xprt;
-
- rcu_read_lock();
- xprt = xprt_iter_get_helper(xpi, xprt_iter_ops(xpi)->xpi_xprt);
- rcu_read_unlock();
- return xprt;
-}
-
-/**
* xprt_iter_get_next - Returns the next rpc_xprt following the cursor
* @xpi: pointer to rpc_xprt_iter
*
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 55b21bae866d..3232aa23cdb4 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
-rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
+rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o ib_client.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
svc_rdma_pcl.o module.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e4d84a13c566..8c817e755262 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -263,11 +263,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
xprt_get(xprt);
- spin_lock(&bc_serv->sv_cb_lock);
- list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
- spin_unlock(&bc_serv->sv_cb_lock);
+ lwq_enqueue(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
- wake_up(&bc_serv->sv_cb_waitq);
+ svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]);
r_xprt->rx_stats.bcall_count++;
return;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ffbf99894970..31434aeb8e29 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -54,7 +54,7 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
cid->ci_completion_id = mr->mr_ibmr->res.id;
}
-static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
+static void frwr_mr_unmap(struct rpcrdma_mr *mr)
{
if (mr->mr_device) {
trace_xprtrdma_mr_unmap(mr);
@@ -73,7 +73,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
{
int rc;
- frwr_mr_unmap(mr->mr_xprt, mr);
+ frwr_mr_unmap(mr);
rc = ib_dereg_mr(mr->mr_ibmr);
if (rc)
@@ -84,7 +84,7 @@ void frwr_mr_release(struct rpcrdma_mr *mr)
static void frwr_mr_put(struct rpcrdma_mr *mr)
{
- frwr_mr_unmap(mr->mr_xprt, mr);
+ frwr_mr_unmap(mr);
/* The MR is returned to the req's MR free list instead
* of to the xprt's MR free list. No spinlock is needed.
@@ -92,7 +92,8 @@ static void frwr_mr_put(struct rpcrdma_mr *mr)
rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
}
-/* frwr_reset - Place MRs back on the free list
+/**
+ * frwr_reset - Place MRs back on @req's free list
* @req: request to reset
*
* Used after a failed marshal. For FRWR, this means the MRs
diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c
new file mode 100644
index 000000000000..28c68b5f6823
--- /dev/null
+++ b/net/sunrpc/xprtrdma/ib_client.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2024 Oracle. All rights reserved.
+ */
+
+/* #include <linux/module.h>
+#include <linux/slab.h> */
+#include <linux/xarray.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
+/* Per-ib_device private data for rpcrdma */
+struct rpcrdma_device {
+ struct kref rd_kref;
+ unsigned long rd_flags;
+ struct ib_device *rd_device;
+ struct xarray rd_xa;
+ struct completion rd_done;
+};
+
+#define RPCRDMA_RD_F_REMOVING (0)
+
+static struct ib_client rpcrdma_ib_client;
+
+/*
+ * Listeners have no associated device, so we never register them.
+ * Note that ib_get_client_data() does not check if @device is
+ * NULL for us.
+ */
+static struct rpcrdma_device *rpcrdma_get_client_data(struct ib_device *device)
+{
+ if (!device)
+ return NULL;
+ return ib_get_client_data(device, &rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_rn_register - register to get device removal notifications
+ * @device: device to monitor
+ * @rn: notification object that wishes to be notified
+ * @done: callback to notify caller of device removal
+ *
+ * Returns zero on success. The callback in rn_done is guaranteed
+ * to be invoked when the device is removed, unless this notification
+ * is unregistered first.
+ *
+ * On failure, a negative errno is returned.
+ */
+int rpcrdma_rn_register(struct ib_device *device,
+ struct rpcrdma_notification *rn,
+ void (*done)(struct rpcrdma_notification *rn))
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags))
+ return -ENETUNREACH;
+
+ if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0)
+ return -ENOMEM;
+ kref_get(&rd->rd_kref);
+ rn->rn_done = done;
+ trace_rpcrdma_client_register(device, rn);
+ return 0;
+}
+
+static void rpcrdma_rn_release(struct kref *kref)
+{
+ struct rpcrdma_device *rd = container_of(kref, struct rpcrdma_device,
+ rd_kref);
+
+ trace_rpcrdma_client_completion(rd->rd_device);
+ complete(&rd->rd_done);
+}
+
+/**
+ * rpcrdma_rn_unregister - stop device removal notifications
+ * @device: monitored device
+ * @rn: notification object that no longer wishes to be notified
+ */
+void rpcrdma_rn_unregister(struct ib_device *device,
+ struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_device *rd = rpcrdma_get_client_data(device);
+
+ if (!rd)
+ return;
+
+ trace_rpcrdma_client_unregister(device, rn);
+ xa_erase(&rd->rd_xa, rn->rn_index);
+ kref_put(&rd->rd_kref, rpcrdma_rn_release);
+}
+
+/**
+ * rpcrdma_add_one - ib_client device insertion callback
+ * @device: device about to be inserted
+ *
+ * Returns zero on success. xprtrdma private data has been allocated
+ * for this device. On failure, a negative errno is returned.
+ */
+static int rpcrdma_add_one(struct ib_device *device)
+{
+ struct rpcrdma_device *rd;
+
+ rd = kzalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ kref_init(&rd->rd_kref);
+ xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC);
+ rd->rd_device = device;
+ init_completion(&rd->rd_done);
+ ib_set_client_data(device, &rpcrdma_ib_client, rd);
+
+ trace_rpcrdma_client_add_one(device);
+ return 0;
+}
+
+/**
+ * rpcrdma_remove_one - ib_client device removal callback
+ * @device: device about to be removed
+ * @client_data: this module's private per-device data
+ *
+ * Upon return, all transports associated with @device have divested
+ * themselves from IB hardware resources.
+ */
+static void rpcrdma_remove_one(struct ib_device *device,
+ void *client_data)
+{
+ struct rpcrdma_device *rd = client_data;
+ struct rpcrdma_notification *rn;
+ unsigned long index;
+
+ trace_rpcrdma_client_remove_one(device);
+
+ set_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags);
+ xa_for_each(&rd->rd_xa, index, rn)
+ rn->rn_done(rn);
+
+ /*
+ * Wait only if there are still outstanding notification
+ * registrants for this device.
+ */
+ if (!refcount_dec_and_test(&rd->rd_kref.refcount)) {
+ trace_rpcrdma_client_wait_on(device);
+ wait_for_completion(&rd->rd_done);
+ }
+
+ trace_rpcrdma_client_remove_one_done(device);
+ xa_destroy(&rd->rd_xa);
+ kfree(rd);
+}
+
+static struct ib_client rpcrdma_ib_client = {
+ .name = "rpcrdma",
+ .add = rpcrdma_add_one,
+ .remove = rpcrdma_remove_one,
+};
+
+/**
+ * rpcrdma_ib_client_unregister - unregister ib_client for xprtrdma
+ *
+ * cel: watch for orphaned rpcrdma_device objects on module unload
+ */
+void rpcrdma_ib_client_unregister(void)
+{
+ ib_unregister_client(&rpcrdma_ib_client);
+}
+
+/**
+ * rpcrdma_ib_client_register - register ib_client for rpcrdma
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int rpcrdma_ib_client_register(void)
+{
+ return ib_register_client(&rpcrdma_ib_client);
+}
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 45c5b41ac8dc..697f571d4c01 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rdma_rn.h>
#include <asm/swab.h>
@@ -30,21 +31,32 @@ static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
+ rpcrdma_ib_client_unregister();
}
static int __init rpc_rdma_init(void)
{
int rc;
+ rc = rpcrdma_ib_client_register();
+ if (rc)
+ goto out_rc;
+
rc = svc_rdma_init();
if (rc)
- goto out;
+ goto out_ib_client;
rc = xprt_rdma_init();
if (rc)
- svc_rdma_cleanup();
+ goto out_svc_rdma;
-out:
+ return 0;
+
+out_svc_rdma:
+ svc_rdma_cleanup();
+out_ib_client:
+ rpcrdma_ib_client_unregister();
+out_rc:
return rc;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 190a4de239c8..3aac1456e23e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -190,7 +190,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
ppages = buf->pages + (buf->page_base >> PAGE_SHIFT);
while (len > 0) {
if (!*ppages)
- *ppages = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
+ *ppages = alloc_page(GFP_NOWAIT);
if (!*ppages)
return -ENOBUFS;
ppages++;
@@ -1471,8 +1471,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
- rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
- false);
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1));
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 5bc20e9d09cd..415c0310101f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -74,7 +74,7 @@ enum {
SVCRDMA_COUNTER_BUFSIZ = sizeof(unsigned long long),
};
-static int svcrdma_counter_handler(struct ctl_table *table, int write,
+static int svcrdma_counter_handler(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct percpu_counter *stat = (struct percpu_counter *)table->data;
@@ -209,25 +209,6 @@ static struct ctl_table svcrdma_parm_table[] = {
.extra1 = &zero,
.extra2 = &zero,
},
- { },
-};
-
-static struct ctl_table svcrdma_table[] = {
- {
- .procname = "svc_rdma",
- .mode = 0555,
- .child = svcrdma_parm_table
- },
- { },
-};
-
-static struct ctl_table svcrdma_root_table[] = {
- {
- .procname = "sunrpc",
- .mode = 0555,
- .child = svcrdma_table
- },
- { },
};
static void svc_rdma_proc_cleanup(void)
@@ -252,49 +233,75 @@ static int svc_rdma_proc_init(void)
rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err;
rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_read;
rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_recv;
rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL);
if (rc)
- goto out_err;
+ goto err_sq;
+
+ svcrdma_table_header = register_sysctl("sunrpc/svc_rdma",
+ svcrdma_parm_table);
+ if (!svcrdma_table_header)
+ goto err_write;
- svcrdma_table_header = register_sysctl_table(svcrdma_root_table);
return 0;
-out_err:
+err_write:
+ rc = -ENOMEM;
+ percpu_counter_destroy(&svcrdma_stat_write);
+err_sq:
percpu_counter_destroy(&svcrdma_stat_sq_starve);
+err_recv:
percpu_counter_destroy(&svcrdma_stat_recv);
+err_read:
percpu_counter_destroy(&svcrdma_stat_read);
+err:
return rc;
}
+struct workqueue_struct *svcrdma_wq;
+
void svc_rdma_cleanup(void)
{
- dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
svc_unreg_xprt_class(&svc_rdma_class);
svc_rdma_proc_cleanup();
+ if (svcrdma_wq) {
+ struct workqueue_struct *wq = svcrdma_wq;
+
+ svcrdma_wq = NULL;
+ destroy_workqueue(wq);
+ }
+
+ dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
}
int svc_rdma_init(void)
{
+ struct workqueue_struct *wq;
int rc;
- dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
- dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
- dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
- dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
+ wq = alloc_workqueue("svcrdma", WQ_UNBOUND, 0);
+ if (!wq)
+ return -ENOMEM;
rc = svc_rdma_proc_init();
- if (rc)
+ if (rc) {
+ destroy_workqueue(wq);
return rc;
+ }
- /* Register RDMA with the SVC transport switch */
+ svcrdma_wq = wq;
svc_reg_xprt_class(&svc_rdma_class);
+
+ dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
+ dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
+ dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index aa2227a7e552..e5a78b761012 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -76,15 +76,12 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
struct rpc_rqst *rqst,
struct svc_rdma_send_ctxt *sctxt)
{
- struct svc_rdma_recv_ctxt *rctxt;
+ struct svc_rdma_pcl empty_pcl;
int ret;
- rctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!rctxt)
- return -EIO;
-
- ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqst->rq_snd_buf);
- svc_rdma_recv_ctxt_put(rdma, rctxt);
+ pcl_init(&empty_pcl);
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, &empty_pcl, &empty_pcl,
+ &rqst->rq_snd_buf);
if (ret < 0)
return -EIO;
@@ -93,13 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
- ret = svc_rdma_send(rdma, sctxt);
- if (ret < 0)
- return ret;
-
- ret = wait_for_completion_killable(&sctxt->sc_done);
- svc_rdma_send_ctxt_put(rdma, sctxt);
- return ret;
+ return svc_rdma_post_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 5242ad121450..e7e4a39ca6c6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -94,7 +94,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -115,24 +115,22 @@ svc_rdma_next_recv_ctxt(struct list_head *list)
rc_list);
}
-static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
-{
- cid->ci_queue_id = rdma->sc_rq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
-
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
+ int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_recv_ctxt *ctxt;
+ unsigned long pages;
dma_addr_t addr;
void *buffer;
- ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
+ pages = svc_serv_maxpages(rdma->sc_xprt.xpt_server);
+ ctxt = kzalloc_node(struct_size(ctxt, rc_pages, pages),
+ GFP_KERNEL, node);
if (!ctxt)
goto fail0;
- buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
+ ctxt->rc_maxpages = pages;
+ buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
@@ -155,7 +153,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer;
- ctxt->rc_temp = false;
+ svc_rdma_cc_init(rdma, &ctxt->rc_cc);
return ctxt;
fail2:
@@ -204,18 +202,11 @@ struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
node = llist_del_first(&rdma->sc_recv_ctxts);
if (!node)
- goto out_empty;
- ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
+ return NULL;
-out:
+ ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
ctxt->rc_page_count = 0;
return ctxt;
-
-out_empty:
- ctxt = svc_rdma_recv_ctxt_alloc(rdma);
- if (!ctxt)
- return NULL;
- goto out;
}
/**
@@ -227,39 +218,42 @@ out_empty:
void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
+ svc_rdma_cc_release(rdma, &ctxt->rc_cc, DMA_FROM_DEVICE);
+
+ /* @rc_page_count is normally zero here, but error flows
+ * can leave pages in @rc_pages.
+ */
+ release_pages(ctxt->rc_pages, ctxt->rc_page_count);
+
pcl_free(&ctxt->rc_call_pcl);
pcl_free(&ctxt->rc_read_pcl);
pcl_free(&ctxt->rc_write_pcl);
pcl_free(&ctxt->rc_reply_pcl);
- if (!ctxt->rc_temp)
- llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
- else
- svc_rdma_recv_ctxt_destroy(rdma, ctxt);
+ llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
}
/**
- * svc_rdma_release_rqst - Release transport-specific per-rqst resources
- * @rqstp: svc_rqst being released
+ * svc_rdma_release_ctxt - Release transport-specific per-rqst resources
+ * @xprt: the transport which owned the context
+ * @vctxt: the context from rqstp->rq_xprt_ctxt or dr->xprt_ctxt
*
* Ensure that the recv_ctxt is released whether or not a Reply
* was sent. For example, the client could close the connection,
* or svc_process could drop an RPC, before the Reply is sent.
*/
-void svc_rdma_release_rqst(struct svc_rqst *rqstp)
+void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *vctxt)
{
- struct svc_rdma_recv_ctxt *ctxt = rqstp->rq_xprt_ctxt;
- struct svc_xprt *xprt = rqstp->rq_xprt;
+ struct svc_rdma_recv_ctxt *ctxt = vctxt;
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- rqstp->rq_xprt_ctxt = NULL;
if (ctxt)
svc_rdma_recv_ctxt_put(rdma, ctxt);
}
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
- unsigned int wanted, bool temp)
+ unsigned int wanted)
{
const struct ib_recv_wr *bad_wr = NULL;
struct svc_rdma_recv_ctxt *ctxt;
@@ -275,14 +269,13 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
if (!ctxt)
break;
- trace_svcrdma_post_recv(ctxt);
- ctxt->rc_temp = temp;
+ trace_svcrdma_post_recv(&ctxt->rc_cid);
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
}
if (!recv_chain)
- return false;
+ return true;
ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
@@ -306,11 +299,28 @@ err_free:
* svc_rdma_post_recvs - Post initial set of Recv WRs
* @rdma: fresh svcxprt_rdma
*
- * Returns true if successful, otherwise false.
+ * Return values:
+ * %true: Receive Queue initialization successful
+ * %false: memory allocation or DMA error
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
- return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
+ unsigned int total;
+
+ /* For each credit, allocate enough recv_ctxts for one
+ * posted Receive and one RPC in process.
+ */
+ total = (rdma->sc_max_requests * 2) + rdma->sc_recv_batch;
+ while (total--) {
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ ctxt = svc_rdma_recv_ctxt_alloc(rdma);
+ if (!ctxt)
+ return false;
+ llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+ }
+
+ return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
}
/**
@@ -344,7 +354,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
* client reconnects.
*/
if (rdma->sc_pending_recvs < rdma->sc_max_requests)
- if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch, false))
+ if (!svc_rdma_refresh_recvs(rdma, rdma->sc_recv_batch))
goto dropped;
/* All wc fields are now known to be valid */
@@ -378,6 +388,10 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
+ while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
+ list_del(&ctxt->rc_list);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
list_del(&ctxt->rc_list);
svc_rdma_recv_ctxt_put(rdma, ctxt);
@@ -483,7 +497,13 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount))
return false;
- /* A bogus segcount causes this buffer overflow check to fail. */
+ /* Before trusting the segcount value enough to use it in
+ * a computation, perform a simple range check. This is an
+ * arbitrary but sensible limit (ie, not architectural).
+ */
+ if (unlikely(segcount > rctxt->rc_maxpages))
+ return false;
+
p = xdr_inline_decode(&rctxt->rc_stream,
segcount * rpcrdma_segment_maxsz * sizeof(*p));
return p != NULL;
@@ -759,6 +779,122 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
return true;
}
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_MSG type message
+ * with a single Read chunk (only the upper layer data payload
+ * was conveyed via RDMA Read).
+ */
+static void svc_rdma_read_complete_one(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct svc_rdma_chunk *chunk = pcl_first_chunk(&ctxt->rc_read_pcl);
+ struct xdr_buf *buf = &rqstp->rq_arg;
+ unsigned int length;
+
+ /* Split the Receive buffer between the head and tail
+ * buffers at Read chunk's position. XDR roundup of the
+ * chunk is not included in either the pagelist or in
+ * the tail.
+ */
+ buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
+ buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
+ buf->head[0].iov_len = chunk->ch_position;
+
+ /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
+ *
+ * If the client already rounded up the chunk length, the
+ * length does not change. Otherwise, the length of the page
+ * list is increased to include XDR round-up.
+ *
+ * Currently these chunks always start at page offset 0,
+ * thus the rounded-up length never crosses a page boundary.
+ */
+ buf->pages = &rqstp->rq_pages[0];
+ length = xdr_align_size(chunk->ch_length);
+ buf->page_len = length;
+ buf->len += length;
+ buf->buflen += length;
+}
+
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_MSG type message
+ * with payload in multiple Read chunks and no PZRC.
+ */
+static void svc_rdma_read_complete_multiple(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *buf = &rqstp->rq_arg;
+
+ buf->len += ctxt->rc_readbytes;
+ buf->buflen += ctxt->rc_readbytes;
+
+ buf->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes);
+ buf->pages = &rqstp->rq_pages[1];
+ buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len;
+}
+
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_NOMSG type message
+ * (the RPC message body was conveyed via RDMA Read).
+ */
+static void svc_rdma_read_complete_pzrc(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *buf = &rqstp->rq_arg;
+
+ buf->len += ctxt->rc_readbytes;
+ buf->buflen += ctxt->rc_readbytes;
+
+ buf->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes);
+ buf->pages = &rqstp->rq_pages[1];
+ buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len;
+}
+
+static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ unsigned int i;
+
+ /* Transfer the Read chunk pages into @rqstp.rq_pages, replacing
+ * the rq_pages that were already allocated for this rqstp.
+ */
+ release_pages(rqstp->rq_respages, ctxt->rc_page_count);
+ for (i = 0; i < ctxt->rc_page_count; i++)
+ rqstp->rq_pages[i] = ctxt->rc_pages[i];
+
+ /* Update @rqstp's result send buffer to start after the
+ * last page in the RDMA Read payload.
+ */
+ rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+ /* Prevent svc_rdma_recv_ctxt_put() from releasing the
+ * pages in ctxt::rc_pages a second time.
+ */
+ ctxt->rc_page_count = 0;
+
+ /* Finish constructing the RPC Call message. The exact
+ * procedure for that depends on what kind of RPC/RDMA
+ * chunks were provided by the client.
+ */
+ rqstp->rq_arg = ctxt->rc_saved_arg;
+ if (pcl_is_empty(&ctxt->rc_call_pcl)) {
+ if (ctxt->rc_read_pcl.cl_count == 1)
+ svc_rdma_read_complete_one(rqstp, ctxt);
+ else
+ svc_rdma_read_complete_multiple(rqstp, ctxt);
+ } else {
+ svc_rdma_read_complete_pzrc(rqstp, ctxt);
+ }
+
+ trace_svcrdma_read_finished(&ctxt->rc_cid);
+}
+
/**
* svc_rdma_recvfrom - Receive an RPC call
* @rqstp: request structure into which to receive an RPC Call
@@ -776,9 +912,6 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
*
* The next ctxt is removed from the "receive" lists.
*
- * - If the ctxt completes a Read, then finish assembling the Call
- * message and return the number of bytes in the message.
- *
* - If the ctxt completes a Receive, then construct the Call
* message from the contents of the Receive buffer.
*
@@ -787,7 +920,8 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
* in the message.
*
* - If there are Read chunks in this message, post Read WRs to
- * pull that payload and return 0.
+ * pull that payload. When the Read WRs complete, build the
+ * full message and return the number of bytes in it.
*/
int svc_rdma_recvfrom(struct svc_rqst *rqstp)
{
@@ -797,10 +931,23 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *ctxt;
int ret;
+ /* Prevent svc_xprt_release() from releasing pages in rq_pages
+ * when returning 0 or an error.
+ */
+ rqstp->rq_respages = rqstp->rq_pages;
+ rqstp->rq_next_page = rqstp->rq_respages;
+
rqstp->rq_xprt_ctxt = NULL;
- ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
+ ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
+ if (ctxt) {
+ list_del(&ctxt->rc_list);
+ spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+ svc_xprt_received(xprt);
+ svc_rdma_read_complete(rqstp, ctxt);
+ goto complete;
+ }
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
if (ctxt)
list_del(&ctxt->rc_list);
@@ -820,12 +967,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt);
- /* Prevent svc_xprt_release from releasing pages in rq_pages
- * if we return 0 or an error.
- */
- rqstp->rq_respages = rqstp->rq_pages;
- rqstp->rq_next_page = rqstp->rq_respages;
-
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0)
goto out_err;
@@ -838,15 +979,14 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
- !pcl_is_empty(&ctxt->rc_call_pcl)) {
- ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
- if (ret < 0)
- goto out_readfail;
- }
+ !pcl_is_empty(&ctxt->rc_call_pcl))
+ goto out_readlist;
+complete:
rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
return rqstp->rq_arg.len;
out_err:
@@ -854,11 +994,23 @@ out_err:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
-out_readfail:
- if (ret == -EINVAL)
- svc_rdma_send_error(rdma_xprt, ctxt, ret);
- svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
- return ret;
+out_readlist:
+ /* This @rqstp is about to be recycled. Save the work
+ * already done constructing the Call message in rq_arg
+ * so it can be restored when the RDMA Reads have
+ * completed.
+ */
+ ctxt->rc_saved_arg = rqstp->rq_arg;
+
+ ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
+ if (ret < 0) {
+ if (ret == -EINVAL)
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
+ svc_xprt_deferred_close(xprt);
+ return ret;
+ }
+ return 0;
out_backchannel:
svc_rdma_handle_bc_reply(rqstp, ctxt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 11cf7c646644..661b3fe2779f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -39,6 +39,7 @@ struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
+ unsigned int rw_first_sgl_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
@@ -53,6 +54,8 @@ svc_rdma_next_ctxt(struct list_head *list)
static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
+ struct ib_device *dev = rdma->sc_cm_id->device;
+ unsigned int first_sgl_nents = dev->attrs.max_send_sge;
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
@@ -62,40 +65,40 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
- GFP_KERNEL);
+ ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, first_sgl_nents),
+ GFP_KERNEL, ibdev_to_node(dev));
if (!ctxt)
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
+ ctxt->rw_first_sgl_nents = first_sgl_nents;
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
- SG_CHUNK_SIZE))
+ first_sgl_nents))
goto out_free;
return ctxt;
out_free:
kfree(ctxt);
out_noctx:
- trace_svcrdma_no_rwctx_err(rdma, sges);
+ trace_svcrdma_rwctx_empty(rdma, sges);
return NULL;
}
-static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
- struct svc_rdma_rw_ctxt *ctxt,
+static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
- sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&ctxt->rw_sg_table, ctxt->rw_first_sgl_nents);
llist_add(&ctxt->rw_node, list);
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
{
- __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
+ __svc_rdma_put_rw_ctxt(ctxt, &rdma->sc_rw_ctxts);
}
/**
@@ -136,61 +139,46 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
0, offset, handle, direction);
if (unlikely(ret < 0)) {
+ trace_svcrdma_dma_map_rw_err(rdma, offset, handle,
+ ctxt->rw_nents, ret);
svc_rdma_put_rw_ctxt(rdma, ctxt);
- trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret);
}
return ret;
}
-/* A chunk context tracks all I/O for moving one Read or Write
- * chunk. This is a set of rdma_rw's that handle data movement
- * for all segments of one chunk.
- *
- * These are small, acquired with a single allocator call, and
- * no more than one is needed per chunk. They are allocated on
- * demand, and not cached.
+/**
+ * svc_rdma_cc_init - Initialize an svc_rdma_chunk_ctxt
+ * @rdma: controlling transport instance
+ * @cc: svc_rdma_chunk_ctxt to be initialized
*/
-struct svc_rdma_chunk_ctxt {
- struct rpc_rdma_cid cc_cid;
- struct ib_cqe cc_cqe;
- struct svcxprt_rdma *cc_rdma;
- struct list_head cc_rwctxts;
- ktime_t cc_posttime;
- int cc_sqecount;
- enum ib_wc_status cc_status;
- struct completion cc_done;
-};
-
-static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
+void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc)
{
- cid->ci_queue_id = rdma->sc_sq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
+ struct rpc_rdma_cid *cid = &cc->cc_cid;
-static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
- struct svc_rdma_chunk_ctxt *cc)
-{
- svc_rdma_cc_cid_init(rdma, &cc->cc_cid);
- cc->cc_rdma = rdma;
+ if (unlikely(!cid->ci_completion_id))
+ svc_rdma_send_cid_init(rdma, cid);
INIT_LIST_HEAD(&cc->cc_rwctxts);
cc->cc_sqecount = 0;
}
-/*
- * The consumed rw_ctx's are cleaned and placed on a local llist so
- * that only one atomic llist operation is needed to put them all
- * back on the free list.
+/**
+ * svc_rdma_cc_release - Release resources held by a svc_rdma_chunk_ctxt
+ * @rdma: controlling transport instance
+ * @cc: svc_rdma_chunk_ctxt to be released
+ * @dir: DMA direction
*/
-static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
- enum dma_data_direction dir)
+void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc,
+ enum dma_data_direction dir)
{
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free);
+ trace_svcrdma_cc_release(&cc->cc_cid, cc->cc_sqecount);
+
first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
@@ -198,7 +186,7 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
- __svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
+ __svc_rdma_put_rw_ctxt(ctxt, &free);
ctxt->rw_node.next = first;
first = &ctxt->rw_node;
@@ -209,49 +197,82 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
-/* State for sending a Write or Reply chunk.
- * - Tracks progress of writing one chunk over all its segments
- * - Stores arguments for the SGL constructor functions
- */
-struct svc_rdma_write_info {
- const struct svc_rdma_chunk *wi_chunk;
-
- /* write state of this chunk */
- unsigned int wi_seg_off;
- unsigned int wi_seg_no;
-
- /* SGL constructor arguments */
- const struct xdr_buf *wi_xdr;
- unsigned char *wi_base;
- unsigned int wi_next_off;
-
- struct svc_rdma_chunk_ctxt wi_cc;
-};
-
static struct svc_rdma_write_info *
svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk)
{
struct svc_rdma_write_info *info;
- info = kmalloc(sizeof(*info), GFP_KERNEL);
+ info = kzalloc_node(sizeof(*info), GFP_KERNEL,
+ ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
+ info->wi_rdma = rdma;
info->wi_chunk = chunk;
- info->wi_seg_off = 0;
- info->wi_seg_no = 0;
svc_rdma_cc_init(rdma, &info->wi_cc);
info->wi_cc.cc_cqe.done = svc_rdma_write_done;
return info;
}
-static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+static void svc_rdma_write_info_free_async(struct work_struct *work)
{
- svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE);
+ struct svc_rdma_write_info *info;
+
+ info = container_of(work, struct svc_rdma_write_info, wi_work);
+ svc_rdma_cc_release(info->wi_rdma, &info->wi_cc, DMA_TO_DEVICE);
kfree(info);
}
+static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+{
+ INIT_WORK(&info->wi_work, svc_rdma_write_info_free_async);
+ queue_work(svcrdma_wq, &info->wi_work);
+}
+
+/**
+ * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_chunk_ctxt *cc = &ctxt->sc_reply_info.wi_cc;
+
+ if (!cc->cc_sqecount)
+ return;
+ svc_rdma_cc_release(rdma, cc, DMA_TO_DEVICE);
+}
+
+/**
+ * svc_rdma_reply_done - Reply chunk Write completion handler
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion report
+ *
+ * Pages under I/O are released by a subsequent Send completion.
+ */
+static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_chunk_ctxt *cc =
+ container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svcxprt_rdma *rdma = cq->cq_context;
+
+ switch (wc->status) {
+ case IB_WC_SUCCESS:
+ trace_svcrdma_wc_reply(&cc->cc_cid);
+ return;
+ case IB_WC_WR_FLUSH_ERR:
+ trace_svcrdma_wc_reply_flush(wc, &cc->cc_cid);
+ break;
+ default:
+ trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
+ }
+
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+}
+
/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
@@ -261,16 +282,16 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
*/
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
- trace_svcrdma_wc_write(wc, &cc->cc_cid);
+ trace_svcrdma_wc_write(&cc->cc_cid);
break;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
@@ -287,38 +308,6 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
svc_rdma_write_info_free(info);
}
-/* State for pulling a Read chunk.
- */
-struct svc_rdma_read_info {
- struct svc_rqst *ri_rqst;
- struct svc_rdma_recv_ctxt *ri_readctxt;
- unsigned int ri_pageno;
- unsigned int ri_pageoff;
- unsigned int ri_totalbytes;
-
- struct svc_rdma_chunk_ctxt ri_cc;
-};
-
-static struct svc_rdma_read_info *
-svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
-{
- struct svc_rdma_read_info *info;
-
- info = kmalloc(sizeof(*info), GFP_KERNEL);
- if (!info)
- return info;
-
- svc_rdma_cc_init(rdma, &info->ri_cc);
- info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
- return info;
-}
-
-static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
-{
- svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE);
- kfree(info);
-}
-
/**
* svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx
* @cq: controlling Completion Queue
@@ -327,17 +316,27 @@ static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
*/
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_read_info *info;
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+ ctxt = container_of(cc, struct svc_rdma_recv_ctxt, rc_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
- info = container_of(cc, struct svc_rdma_read_info, ri_cc);
- trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes,
+ trace_svcrdma_wc_read(wc, &cc->cc_cid, ctxt->rc_readbytes,
cc->cc_posttime);
- break;
+
+ spin_lock(&rdma->sc_rq_dto_lock);
+ list_add_tail(&ctxt->rc_list, &rdma->sc_read_complete_q);
+ /* the unlock pairs with the smp_rmb in svc_xprt_ready */
+ set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+ spin_unlock(&rdma->sc_rq_dto_lock);
+ svc_xprt_enqueue(&rdma->sc_xprt);
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_read_flush(wc, &cc->cc_cid);
break;
@@ -345,28 +344,32 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount);
- cc->cc_status = wc->status;
- complete(&cc->cc_done);
- return;
+ /* The RDMA Read has flushed, so the incoming RPC message
+ * cannot be constructed and must be dropped. Signal the
+ * loss to the client by closing the connection.
+ */
+ svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
-/* This function sleeps when the transport's Send Queue is congested.
- *
+/*
* Assumptions:
* - If ib_post_send() succeeds, only one completion is expected,
* even if one or more WRs are flushed. This is true when posting
* an rdma_rw_ctx or when posting a single signaled WR.
*/
-static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
+static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc)
{
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct ib_send_wr *first_wr;
const struct ib_send_wr *bad_wr;
struct list_head *tmp;
struct ib_cqe *cqe;
int ret;
+ might_sleep();
+
if (cc->cc_sqecount > rdma->sc_sq_depth)
return -EINVAL;
@@ -392,14 +395,14 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
}
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma);
+ trace_svcrdma_sq_full(rdma, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
- trace_svcrdma_sq_retry(rdma);
+ trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
} while (1);
- trace_svcrdma_sq_post_err(rdma, ret);
+ trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one was posted, there will be a completion. */
@@ -469,7 +472,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int remaining)
{
struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
- struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svcxprt_rdma *rdma = info->wi_rdma;
const struct svc_rdma_segment *seg;
struct svc_rdma_rw_ctxt *ctxt;
int ret;
@@ -512,7 +515,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
return 0;
out_overflow:
- trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
+ trace_svcrdma_small_wrch_err(&cc->cc_cid, remaining, info->wi_seg_no,
info->wi_chunk->ch_segcount);
return -E2BIG;
}
@@ -598,41 +601,33 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-/**
- * svc_rdma_send_write_chunk - Write all segments in a Write chunk
- * @rdma: controlling RDMA transport
- * @chunk: Write chunk provided by the client
- * @xdr: xdr_buf containing the data payload
- *
- * Returns a non-negative number of bytes the chunk consumed, or
- * %-E2BIG if the payload was larger than the Write chunk,
- * %-EINVAL if client provided too many segments,
- * %-ENOMEM if rdma_rw context pool was exhausted,
- * %-ENOTCONN if posting failed (connection is lost),
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
- */
-int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
+ struct xdr_buf payload;
int ret;
+ if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
+ chunk->ch_payload_length))
+ return -EMSGSIZE;
+
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
- ret = svc_rdma_xb_write(xdr, info);
- if (ret != xdr->len)
+ ret = svc_rdma_xb_write(&payload, info);
+ if (ret != payload.len)
goto out_err;
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(cc);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
- return xdr->len;
+ return 0;
out_err:
svc_rdma_write_info_free(info);
@@ -640,9 +635,37 @@ out_err:
}
/**
- * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * svc_rdma_send_write_list - Send all chunks on the Write list
+ * @rdma: controlling RDMA transport
+ * @rctxt: Write list provisioned by the client
+ * @xdr: xdr_buf containing an RPC Reply message
+ *
+ * Returns zero on success, or a negative errno if one or more
+ * Write chunks could not be sent.
+ */
+int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ const struct xdr_buf *xdr)
+{
+ struct svc_rdma_chunk *chunk;
+ int ret;
+
+ pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
+ if (!chunk->ch_payload_length)
+ break;
+ ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * svc_rdma_prepare_reply_chunk - Construct WR chain for writing the Reply chunk
* @rdma: controlling RDMA transport
- * @rctxt: Write and Reply chunks from client
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
+ * @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply
*
* Returns a non-negative number of bytes the chunk consumed, or
@@ -652,44 +675,51 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/
-int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
{
- struct svc_rdma_write_info *info;
- struct svc_rdma_chunk_ctxt *cc;
- struct svc_rdma_chunk *chunk;
+ struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
+ struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+ struct ib_send_wr *first_wr;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
- if (pcl_is_empty(&rctxt->rc_reply_pcl))
- return 0;
-
- chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
- info = svc_rdma_write_info_alloc(rdma, chunk);
- if (!info)
- return -ENOMEM;
- cc = &info->wi_cc;
+ info->wi_rdma = rdma;
+ info->wi_chunk = pcl_first_chunk(reply_pcl);
+ info->wi_seg_off = 0;
+ info->wi_seg_no = 0;
+ info->wi_cc.cc_cqe.done = svc_rdma_reply_done;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_write, info);
if (ret < 0)
- goto out_err;
+ return ret;
- trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(cc);
- if (ret < 0)
- goto out_err;
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
- return xdr->len;
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
-out_err:
- svc_rdma_write_info_free(info);
- return ret;
+ trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
+ return xdr->len;
}
/**
* svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment
- * @info: context for ongoing I/O
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @segment: co-ordinates of remote memory to be read
*
* Returns:
@@ -698,20 +728,20 @@ out_err:
* %-ENOMEM: allocating a local resources failed
* %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
+static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_segment *segment)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
- struct svc_rqst *rqstp = info->ri_rqst;
+ struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp);
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
unsigned int sge_no, seg_len, len;
struct svc_rdma_rw_ctxt *ctxt;
struct scatterlist *sg;
int ret;
len = segment->rs_length;
- sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
- ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
+ sge_no = PAGE_ALIGN(head->rc_pageoff + len) >> PAGE_SHIFT;
+ ctxt = svc_rdma_get_rw_ctxt(rdma, sge_no);
if (!ctxt)
return -ENOMEM;
ctxt->rw_nents = sge_no;
@@ -719,29 +749,27 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
sg = ctxt->rw_sg_table.sgl;
for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
seg_len = min_t(unsigned int, len,
- PAGE_SIZE - info->ri_pageoff);
+ PAGE_SIZE - head->rc_pageoff);
- if (!info->ri_pageoff)
+ if (!head->rc_pageoff)
head->rc_page_count++;
- sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
- seg_len, info->ri_pageoff);
+ sg_set_page(sg, rqstp->rq_pages[head->rc_curpage],
+ seg_len, head->rc_pageoff);
sg = sg_next(sg);
- info->ri_pageoff += seg_len;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
+ head->rc_pageoff += seg_len;
+ if (head->rc_pageoff == PAGE_SIZE) {
+ head->rc_curpage++;
+ head->rc_pageoff = 0;
}
len -= seg_len;
- /* Safety check */
- if (len &&
- &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end)
+ if (len && ((head->rc_curpage + 1) > rqstp->rq_maxpages))
goto out_overrun;
}
- ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset,
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
segment->rs_handle, DMA_FROM_DEVICE);
if (ret < 0)
return -EIO;
@@ -752,13 +780,14 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
return 0;
out_overrun:
- trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno);
+ trace_svcrdma_page_overrun_err(&cc->cc_cid, head->rc_curpage);
return -EINVAL;
}
/**
* svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk
- * @info: context for ongoing I/O
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @chunk: Read chunk to pull
*
* Return values:
@@ -767,7 +796,8 @@ out_overrun:
* %-ENOMEM: allocating a local resources failed
* %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
+static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_chunk *chunk)
{
const struct svc_rdma_segment *segment;
@@ -775,56 +805,56 @@ static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
ret = -EINVAL;
pcl_for_each_segment(segment, chunk) {
- ret = svc_rdma_build_read_segment(info, segment);
+ ret = svc_rdma_build_read_segment(rqstp, head, segment);
if (ret < 0)
break;
- info->ri_totalbytes += segment->rs_length;
+ head->rc_readbytes += segment->rs_length;
}
return ret;
}
/**
* svc_rdma_copy_inline_range - Copy part of the inline content into pages
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @offset: offset into the Receive buffer of region to copy
* @remaining: length of region to copy
*
* Take a page at a time from rqstp->rq_pages and copy the inline
* content from the Receive buffer into that page. Update
- * info->ri_pageno and info->ri_pageoff so that the next RDMA Read
+ * head->rc_curpage and head->rc_pageoff so that the next RDMA Read
* result will land contiguously with the copied content.
*
* Return values:
* %0: Inline content was successfully copied
* %-EINVAL: offset or length was incorrect
*/
-static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
+static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
unsigned int offset,
unsigned int remaining)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
unsigned char *dst, *src = head->rc_recv_buf;
- struct svc_rqst *rqstp = info->ri_rqst;
unsigned int page_no, numpages;
- numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT;
+ numpages = PAGE_ALIGN(head->rc_pageoff + remaining) >> PAGE_SHIFT;
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
page_len = min_t(unsigned int, remaining,
- PAGE_SIZE - info->ri_pageoff);
+ PAGE_SIZE - head->rc_pageoff);
- if (!info->ri_pageoff)
+ if (!head->rc_pageoff)
head->rc_page_count++;
- dst = page_address(rqstp->rq_pages[info->ri_pageno]);
- memcpy(dst + info->ri_pageno, src + offset, page_len);
+ dst = page_address(rqstp->rq_pages[head->rc_curpage]);
+ memcpy(dst + head->rc_curpage, src + offset, page_len);
- info->ri_totalbytes += page_len;
- info->ri_pageoff += page_len;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
+ head->rc_readbytes += page_len;
+ head->rc_pageoff += page_len;
+ if (head->rc_pageoff == PAGE_SIZE) {
+ head->rc_curpage++;
+ head->rc_pageoff = 0;
}
remaining -= page_len;
offset += page_len;
@@ -835,7 +865,8 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
/**
* svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The chunk data lands in rqstp->rq_arg as a series of contiguous pages,
* like an incoming TCP call.
@@ -847,11 +878,11 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info)
+static noinline int
+svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk, *next;
unsigned int start, length;
int ret;
@@ -859,12 +890,12 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
- ret = svc_rdma_copy_inline_range(info, start, length);
+ ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
return ret;
pcl_for_each_chunk(chunk, pcl) {
- ret = svc_rdma_build_read_chunk(info, chunk);
+ ret = svc_rdma_build_read_chunk(rqstp, head, chunk);
if (ret < 0)
return ret;
@@ -873,31 +904,21 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
break;
start += length;
- length = next->ch_position - info->ri_totalbytes;
- ret = svc_rdma_copy_inline_range(info, start, length);
+ length = next->ch_position - head->rc_readbytes;
+ ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
return ret;
}
start += length;
length = head->rc_byte_len - start;
- ret = svc_rdma_copy_inline_range(info, start, length);
- if (ret < 0)
- return ret;
-
- buf->len += info->ri_totalbytes;
- buf->buflen += info->ri_totalbytes;
-
- buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
- buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
- buf->pages = &info->ri_rqst->rq_pages[1];
- buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
- return 0;
+ return svc_rdma_copy_inline_range(rqstp, head, start, length);
}
/**
* svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The chunk data lands in the page list of rqstp->rq_arg.pages.
*
@@ -912,50 +933,17 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
+static int svc_rdma_read_data_item(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
- struct svc_rdma_chunk *chunk;
- unsigned int length;
- int ret;
-
- chunk = pcl_first_chunk(&head->rc_read_pcl);
- ret = svc_rdma_build_read_chunk(info, chunk);
- if (ret < 0)
- goto out;
-
- /* Split the Receive buffer between the head and tail
- * buffers at Read chunk's position. XDR roundup of the
- * chunk is not included in either the pagelist or in
- * the tail.
- */
- buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
- buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
- buf->head[0].iov_len = chunk->ch_position;
-
- /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
- *
- * If the client already rounded up the chunk length, the
- * length does not change. Otherwise, the length of the page
- * list is increased to include XDR round-up.
- *
- * Currently these chunks always start at page offset 0,
- * thus the rounded-up length never crosses a page boundary.
- */
- buf->pages = &info->ri_rqst->rq_pages[0];
- length = xdr_align_size(chunk->ch_length);
- buf->page_len = length;
- buf->len += length;
- buf->buflen += length;
-
-out:
- return ret;
+ return svc_rdma_build_read_chunk(rqstp, head,
+ pcl_first_chunk(&head->rc_read_pcl));
}
/**
- * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk
- * @info: context for RDMA Reads
+ * svc_rdma_read_chunk_range - Build RDMA Read WRs for portion of a chunk
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @chunk: parsed Call chunk to pull
* @offset: offset of region to pull
* @length: length of region to pull
@@ -967,7 +955,8 @@ out:
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
+static int svc_rdma_read_chunk_range(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_chunk *chunk,
unsigned int offset, unsigned int length)
{
@@ -987,11 +976,11 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
dummy.rs_length = min_t(u32, length, segment->rs_length) - offset;
dummy.rs_offset = segment->rs_offset + offset;
- ret = svc_rdma_build_read_segment(info, &dummy);
+ ret = svc_rdma_build_read_segment(rqstp, head, &dummy);
if (ret < 0)
break;
- info->ri_totalbytes += dummy.rs_length;
+ head->rc_readbytes += dummy.rs_length;
length -= dummy.rs_length;
offset = 0;
}
@@ -1000,7 +989,8 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
/**
* svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* Return values:
* %0: RDMA Read WQEs were successfully built
@@ -1009,9 +999,9 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
+static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_chunk *call_chunk =
pcl_first_chunk(&head->rc_call_pcl);
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
@@ -1020,17 +1010,18 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
int ret;
if (pcl_is_empty(pcl))
- return svc_rdma_build_read_chunk(info, call_chunk);
+ return svc_rdma_build_read_chunk(rqstp, head, call_chunk);
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
- ret = svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
+ start, length);
if (ret < 0)
return ret;
pcl_for_each_chunk(chunk, pcl) {
- ret = svc_rdma_build_read_chunk(info, chunk);
+ ret = svc_rdma_build_read_chunk(rqstp, head, chunk);
if (ret < 0)
return ret;
@@ -1039,8 +1030,8 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
break;
start += length;
- length = next->ch_position - info->ri_totalbytes;
- ret = svc_rdma_read_chunk_range(info, call_chunk,
+ length = next->ch_position - head->rc_readbytes;
+ ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
start, length);
if (ret < 0)
return ret;
@@ -1048,12 +1039,14 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
start += length;
length = call_chunk->ch_length - start;
- return svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ return svc_rdma_read_chunk_range(rqstp, head, call_chunk,
+ start, length);
}
/**
* svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The start of the data lands in the first page just after the
* Transport header, and the rest lands in rqstp->rq_arg.pages.
@@ -1069,25 +1062,31 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
+static noinline int svc_rdma_read_special(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
- int ret;
-
- ret = svc_rdma_read_call_chunk(info);
- if (ret < 0)
- goto out;
-
- buf->len += info->ri_totalbytes;
- buf->buflen += info->ri_totalbytes;
+ return svc_rdma_read_call_chunk(rqstp, head);
+}
- buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
- buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
- buf->pages = &info->ri_rqst->rq_pages[1];
- buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
+/* Pages under I/O have been copied to head->rc_pages. Ensure that
+ * svc_xprt_release() does not put them when svc_rdma_recvfrom()
+ * returns. This has to be done after all Read WRs are constructed
+ * to properly handle a page that happens to be part of I/O on behalf
+ * of two different RDMA segments.
+ *
+ * Note: if the subsequent post_send fails, these pages have already
+ * been moved to head->rc_pages and thus will be cleaned up by
+ * svc_rdma_recv_ctxt_put().
+ */
+static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
+{
+ unsigned int i;
-out:
- return ret;
+ for (i = 0; i < head->rc_page_count; i++) {
+ head->rc_pages[i] = rqstp->rq_pages[i];
+ rqstp->rq_pages[i] = NULL;
+ }
}
/**
@@ -1117,49 +1116,27 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_read_info *info;
- struct svc_rdma_chunk_ctxt *cc;
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
int ret;
- info = svc_rdma_read_info_alloc(rdma);
- if (!info)
- return -ENOMEM;
- cc = &info->ri_cc;
- info->ri_rqst = rqstp;
- info->ri_readctxt = head;
- info->ri_pageno = 0;
- info->ri_pageoff = 0;
- info->ri_totalbytes = 0;
+ cc->cc_cqe.done = svc_rdma_wc_read_done;
+ cc->cc_sqecount = 0;
+ head->rc_pageoff = 0;
+ head->rc_curpage = 0;
+ head->rc_readbytes = 0;
if (pcl_is_empty(&head->rc_call_pcl)) {
if (head->rc_read_pcl.cl_count == 1)
- ret = svc_rdma_read_data_item(info);
+ ret = svc_rdma_read_data_item(rqstp, head);
else
- ret = svc_rdma_read_multiple_chunks(info);
+ ret = svc_rdma_read_multiple_chunks(rqstp, head);
} else
- ret = svc_rdma_read_special(info);
+ ret = svc_rdma_read_special(rqstp, head);
+ svc_rdma_clear_rqst_pages(rqstp, head);
if (ret < 0)
- goto out_err;
+ return ret;
trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount);
- init_completion(&cc->cc_done);
- ret = svc_rdma_post_chunk_ctxt(cc);
- if (ret < 0)
- goto out_err;
-
- ret = 1;
- wait_for_completion(&cc->cc_done);
- if (cc->cc_status != IB_WC_SUCCESS)
- ret = -EIO;
-
- /* rq_respages starts after the last arg page */
- rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
- /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */
- head->rc_page_count = 0;
-
-out_err:
- svc_rdma_read_info_free(info);
- return ret;
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
+ return ret < 0 ? ret : 1;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 22a871e6fe4d..914cd263c2f1 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -100,7 +100,7 @@
*/
#include <linux/spinlock.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -113,42 +113,41 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
-static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
-{
- cid->ci_queue_id = rdma->sc_sq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
-
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
+ int node = ibdev_to_node(rdma->sc_cm_id->device);
struct svc_rdma_send_ctxt *ctxt;
+ unsigned long pages;
dma_addr_t addr;
void *buffer;
- size_t size;
int i;
- size = sizeof(*ctxt);
- size += rdma->sc_max_send_sges * sizeof(struct ib_sge);
- ctxt = kmalloc(size, GFP_KERNEL);
+ ctxt = kzalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
+ GFP_KERNEL, node);
if (!ctxt)
goto fail0;
- buffer = kmalloc(rdma->sc_max_req_size, GFP_KERNEL);
- if (!buffer)
+ pages = svc_serv_maxpages(rdma->sc_xprt.xpt_server);
+ ctxt->sc_pages = kcalloc_node(pages, sizeof(struct page *),
+ GFP_KERNEL, node);
+ if (!ctxt->sc_pages)
goto fail1;
+ ctxt->sc_maxpages = pages;
+ buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
+ if (!buffer)
+ goto fail2;
addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
rdma->sc_max_req_size, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
- goto fail2;
+ goto fail3;
svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
+ ctxt->sc_rdma = rdma;
ctxt->sc_send_wr.next = NULL;
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
- init_completion(&ctxt->sc_done);
ctxt->sc_cqe.done = svc_rdma_wc_send;
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
@@ -159,8 +158,10 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_sges[i].lkey = rdma->sc_pd->local_dma_lkey;
return ctxt;
-fail2:
+fail3:
kfree(buffer);
+fail2:
+ kfree(ctxt->sc_pages);
fail1:
kfree(ctxt);
fail0:
@@ -184,6 +185,7 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
rdma->sc_max_req_size,
DMA_TO_DEVICE);
kfree(ctxt->sc_xprt_buf);
+ kfree(ctxt->sc_pages);
kfree(ctxt);
}
}
@@ -202,55 +204,82 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
spin_lock(&rdma->sc_send_lock);
node = llist_del_first(&rdma->sc_send_ctxts);
+ spin_unlock(&rdma->sc_send_lock);
if (!node)
goto out_empty;
+
ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
- spin_unlock(&rdma->sc_send_lock);
out:
rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL);
+ svc_rdma_cc_init(rdma, &ctxt->sc_reply_info.wi_cc);
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
+ ctxt->sc_page_count = 0;
+ ctxt->sc_wr_chain = &ctxt->sc_send_wr;
+ ctxt->sc_sqecount = 1;
+
return ctxt;
out_empty:
- spin_unlock(&rdma->sc_send_lock);
ctxt = svc_rdma_send_ctxt_alloc(rdma);
if (!ctxt)
return NULL;
goto out;
}
-/**
- * svc_rdma_send_ctxt_put - Return send_ctxt to free list
- * @rdma: controlling svcxprt_rdma
- * @ctxt: object to return to the free list
- */
-void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt)
+static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ svc_rdma_reply_chunk_release(rdma, ctxt);
+
+ if (ctxt->sc_page_count)
+ release_pages(ctxt->sc_pages, ctxt->sc_page_count);
+
/* The first SGE contains the transport header, which
* remains mapped until @ctxt is destroyed.
*/
for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) {
+ trace_svcrdma_dma_unmap_page(&ctxt->sc_cid,
+ ctxt->sc_sges[i].addr,
+ ctxt->sc_sges[i].length);
ib_dma_unmap_page(device,
ctxt->sc_sges[i].addr,
ctxt->sc_sges[i].length,
DMA_TO_DEVICE);
- trace_svcrdma_dma_unmap_page(rdma,
- ctxt->sc_sges[i].addr,
- ctxt->sc_sges[i].length);
}
llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
}
+static void svc_rdma_send_ctxt_put_async(struct work_struct *work)
+{
+ struct svc_rdma_send_ctxt *ctxt;
+
+ ctxt = container_of(work, struct svc_rdma_send_ctxt, sc_work);
+ svc_rdma_send_ctxt_release(ctxt->sc_rdma, ctxt);
+}
+
+/**
+ * svc_rdma_send_ctxt_put - Return send_ctxt to free list
+ * @rdma: controlling svcxprt_rdma
+ * @ctxt: object to return to the free list
+ *
+ * Pages left in sc_pages are DMA unmapped and released.
+ */
+void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ INIT_WORK(&ctxt->sc_work, svc_rdma_send_ctxt_put_async);
+ queue_work(svcrdma_wq, &ctxt->sc_work);
+}
+
/**
* svc_rdma_wake_send_waiters - manage Send Queue accounting
* @rdma: controlling transport
@@ -280,13 +309,13 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- svc_rdma_wake_send_waiters(rdma, 1);
- complete(&ctxt->sc_done);
+ svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
- trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ trace_svcrdma_wc_send(&ctxt->sc_cid);
+ svc_rdma_send_ctxt_put(rdma, ctxt);
return;
flushed:
@@ -294,55 +323,81 @@ flushed:
trace_svcrdma_wc_send_err(wc, &ctxt->sc_cid);
else
trace_svcrdma_wc_send_flush(wc, &ctxt->sc_cid);
+ svc_rdma_send_ctxt_put(rdma, ctxt);
svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
- * svc_rdma_send - Post a single Send WR
- * @rdma: transport on which to post the WR
- * @ctxt: send ctxt with a Send WR ready to post
+ * svc_rdma_post_send - Post a WR chain to the Send Queue
+ * @rdma: transport context
+ * @ctxt: WR chain to post
+ *
+ * Copy fields in @ctxt to stack variables in order to guarantee
+ * that these values remain available after the ib_post_send() call.
+ * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
+ *
+ * Note there is potential for starvation when the Send Queue is
+ * full because there is no order to when waiting threads are
+ * awoken. The transport is typically provisioned with a deep
+ * enough Send Queue that SQ exhaustion should be a rare event.
*
- * Returns zero if the Send WR was posted successfully. Otherwise, a
- * negative errno is returned.
+ * Return values:
+ * %0: @ctxt's WR chain was posted successfully
+ * %-ENOTCONN: The connection was lost
*/
-int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
+int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
- struct ib_send_wr *wr = &ctxt->sc_send_wr;
- int ret;
+ struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
+ struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
+ const struct ib_send_wr *bad_wr = first_wr;
+ struct rpc_rdma_cid cid = ctxt->sc_cid;
+ int ret, sqecount = ctxt->sc_sqecount;
- reinit_completion(&ctxt->sc_done);
+ might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
- wr->sg_list[0].addr,
- wr->sg_list[0].length,
+ send_wr->sg_list[0].addr,
+ send_wr->sg_list[0].length,
DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */
- while (1) {
- if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
+ while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
+ if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+
+ /* When the transport is torn down, assume
+ * ib_drain_sq() will trigger enough Send
+ * completions to wake us. The XPT_CLOSE test
+ * above should then cause the while loop to
+ * exit.
+ */
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma);
- atomic_inc(&rdma->sc_sq_avail);
+ trace_svcrdma_sq_full(rdma, &cid);
wait_event(rdma->sc_send_wait,
- atomic_read(&rdma->sc_sq_avail) > 1);
- if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
- return -ENOTCONN;
- trace_svcrdma_sq_retry(rdma);
+ atomic_read(&rdma->sc_sq_avail) > 0);
+ trace_svcrdma_sq_retry(rdma, &cid);
continue;
}
trace_svcrdma_post_send(ctxt);
- ret = ib_post_send(rdma->sc_qp, wr, NULL);
- if (ret)
- break;
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret) {
+ trace_svcrdma_sq_post_err(rdma, &cid, ret);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ /* If even one WR was posted, there will be a
+ * Send completion that bumps sc_sq_avail.
+ */
+ if (bad_wr == first_wr) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+ break;
+ }
+ }
return 0;
}
-
- trace_svcrdma_sq_post_err(rdma, ret);
- svc_xprt_deferred_close(&rdma->sc_xprt);
- wake_up(&rdma->sc_send_wait);
- return ret;
+ return -ENOTCONN;
}
/**
@@ -529,14 +584,14 @@ static int svc_rdma_page_dma_map(void *data, struct page *page,
if (ib_dma_mapping_error(dev, dma_addr))
goto out_maperr;
- trace_svcrdma_dma_map_page(rdma, dma_addr, len);
+ trace_svcrdma_dma_map_page(&ctxt->sc_cid, dma_addr, len);
ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
ctxt->sc_send_wr.num_sge++;
return 0;
out_maperr:
- trace_svcrdma_dma_map_err(rdma, dma_addr, len);
+ trace_svcrdma_dma_map_err(&ctxt->sc_cid, dma_addr, len);
return -EIO;
}
@@ -648,7 +703,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
* svc_rdma_pull_up_needed - Determine whether to use pull-up
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
* @xdr: xdr_buf containing RPC message to transmit
*
* Returns:
@@ -657,7 +712,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
*/
static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
const struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr)
{
/* Resources needed for the transport header */
@@ -667,7 +722,7 @@ static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
};
int ret;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_count_sges, &args);
if (ret < 0)
return false;
@@ -723,7 +778,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
* svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* The device is not capable of sending the reply directly.
@@ -738,7 +793,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
*/
static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr)
{
struct svc_rdma_pullup_data args = {
@@ -746,7 +801,7 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
};
int ret;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_linearize, &args);
if (ret < 0)
return ret;
@@ -759,7 +814,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
/* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* Returns:
@@ -771,7 +827,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
*/
int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
const struct xdr_buf *xdr)
{
struct svc_rdma_map_data args = {
@@ -784,36 +841,49 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
/* If there is a Reply chunk, nothing follows the transport
- * header, and we're done here.
+ * header, so there is nothing to map.
*/
- if (!pcl_is_empty(&rctxt->rc_reply_pcl))
+ if (!pcl_is_empty(reply_pcl))
return 0;
/* For pull-up, svc_rdma_send() will sync the transport header.
* No additional DMA mapping is necessary.
*/
- if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
- return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);
+ if (svc_rdma_pull_up_needed(rdma, sctxt, write_pcl, xdr))
+ return svc_rdma_pull_up_reply_msg(rdma, sctxt, write_pcl, xdr);
- return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ return pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_dma_map, &args);
}
+/* The svc_rqst and all resources it owns are released as soon as
+ * svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
+ * so they are released by the Send completion handler.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ int i, pages = rqstp->rq_next_page - rqstp->rq_respages;
+
+ ctxt->sc_page_count += pages;
+ for (i = 0; i < pages; i++) {
+ ctxt->sc_pages[i] = rqstp->rq_respages[i];
+ rqstp->rq_respages[i] = NULL;
+ }
+
+ /* Prevent svc_xprt_release from releasing pages in rq_pages */
+ rqstp->rq_next_page = rqstp->rq_respages;
+}
+
/* Prepare the portion of the RPC Reply that will be transmitted
* via RDMA Send. The RPC-over-RDMA transport header is prepared
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
*
* Depending on whether a Write list or Reply chunk is present,
- * the server may send all, a portion of, or none of the xdr_buf.
+ * the server may Send all, a portion of, or none of the xdr_buf.
* In the latter case, only the transport header (sc_sges[0]) is
* transmitted.
*
- * RDMA Send is the last step of transmitting an RPC reply. Pages
- * involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the sctxt's page array. These pages are
- * DMA unmapped by each Write completion, but the subsequent Send
- * completion finally releases these pages.
- *
* Assumptions:
* - The Reply's transport header will never be larger than a page.
*/
@@ -822,26 +892,27 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp)
{
+ struct ib_send_wr *send_wr = &sctxt->sc_send_wr;
int ret;
- ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, &rqstp->rq_res);
if (ret < 0)
return ret;
+ /* Transfer pages involved in RDMA Writes to the sctxt's
+ * page array. Completion handling releases these pages.
+ */
+ svc_rdma_save_io_pages(rqstp, sctxt);
+
if (rctxt->rc_inv_rkey) {
- sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
- sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
- sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ send_wr->opcode = IB_WR_SEND;
}
- ret = svc_rdma_send(rdma, sctxt);
- if (ret < 0)
- return ret;
-
- ret = wait_for_completion_killable(&sctxt->sc_done);
- svc_rdma_send_ctxt_put(rdma, sctxt);
- return ret;
+ return svc_rdma_post_send(rdma, sctxt);
}
/**
@@ -905,10 +976,9 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
- if (svc_rdma_send(rdma, sctxt))
+ if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt;
-
- wait_for_completion_killable(&sctxt->sc_done);
+ return;
put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
@@ -953,10 +1023,19 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
if (ret < 0)
- goto reply_chunk;
- rc_size = ret;
+ goto put_ctxt;
+
+ rc_size = 0;
+ if (!pcl_is_empty(&rctxt->rc_reply_pcl)) {
+ ret = svc_rdma_prepare_reply_chunk(rdma, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, sctxt,
+ &rqstp->rq_res);
+ if (ret < 0)
+ goto reply_chunk;
+ rc_size = ret;
+ }
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
@@ -976,17 +1055,16 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
goto put_ctxt;
-
- /* Prevent svc_xprt_release() from releasing the page backing
- * rq_res.head[0].iov_base. It's no longer being accessed by
- * the I/O device. */
- rqstp->rq_respages++;
return 0;
reply_chunk:
if (ret != -E2BIG && ret != -EINVAL)
goto put_ctxt;
+ /* Send completion releases payload pages that were part
+ * of previously posted RDMA Writes.
+ */
+ svc_rdma_save_io_pages(rqstp, sctxt);
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
@@ -1000,45 +1078,33 @@ drop_connection:
/**
* svc_rdma_result_payload - special processing for a result payload
- * @rqstp: svc_rqst to operate on
- * @offset: payload's byte offset in @xdr
+ * @rqstp: RPC transaction context
+ * @offset: payload's byte offset in @rqstp->rq_res
* @length: size of payload, in bytes
*
+ * Assign the passed-in result payload to the current Write chunk,
+ * and advance to cur_result_payload to the next Write chunk, if
+ * there is one.
+ *
* Return values:
* %0 if successful or nothing needed to be done
- * %-EMSGSIZE on XDR buffer overflow
* %-E2BIG if the payload was larger than the Write chunk
- * %-EINVAL if client provided too many segments
- * %-ENOMEM if rdma_rw context pool was exhausted
- * %-ENOTCONN if posting failed (connection is lost)
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/
int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
struct svc_rdma_chunk *chunk;
- struct svcxprt_rdma *rdma;
- struct xdr_buf subbuf;
- int ret;
chunk = rctxt->rc_cur_result_payload;
if (!length || !chunk)
return 0;
rctxt->rc_cur_result_payload =
pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
+
if (length > chunk->ch_length)
return -E2BIG;
-
chunk->ch_position = offset;
chunk->ch_payload_length = length;
-
- if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
- return -EMSGSIZE;
-
- rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
- ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
- if (ret < 0)
- return ret;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 94b20fb47135..b7b318ad25c4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -64,7 +64,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
- struct net *net);
+ struct net *net, int node);
+static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
@@ -73,7 +75,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
-static void svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_ops = {
@@ -81,12 +82,11 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto,
.xpo_result_payload = svc_rdma_result_payload,
- .xpo_release_rqst = svc_rdma_release_rqst,
+ .xpo_release_ctxt = svc_rdma_release_ctxt,
.xpo_detach = svc_rdma_detach,
.xpo_free = svc_rdma_free,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
- .xpo_secure_port = svc_rdma_secure_port,
.xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
};
@@ -124,18 +124,57 @@ static void qp_event_handler(struct ib_event *event, void *context)
}
}
+static struct rdma_cm_id *
+svc_rdma_create_listen_id(struct net *net, struct sockaddr *sap,
+ void *context)
+{
+ struct rdma_cm_id *listen_id;
+ int ret;
+
+ listen_id = rdma_create_id(net, svc_rdma_listen_handler, context,
+ RDMA_PS_TCP, IB_QPT_RC);
+ if (IS_ERR(listen_id))
+ return listen_id;
+
+ /* Allow both IPv4 and IPv6 sockets to bind a single port
+ * at the same time.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ ret = rdma_set_afonly(listen_id, 1);
+ if (ret)
+ goto out_destroy;
+#endif
+ ret = rdma_bind_addr(listen_id, sap);
+ if (ret)
+ goto out_destroy;
+
+ ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
+ if (ret)
+ goto out_destroy;
+
+ return listen_id;
+
+out_destroy:
+ rdma_destroy_id(listen_id);
+ return ERR_PTR(ret);
+}
+
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
- struct net *net)
+ struct net *net, int node)
{
- struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
+ static struct lock_class_key svcrdma_rwctx_lock;
+ static struct lock_class_key svcrdma_sctx_lock;
+ static struct lock_class_key svcrdma_dto_lock;
+ struct svcxprt_rdma *cma_xprt;
- if (!cma_xprt) {
- dprintk("svcrdma: failed to create new transport\n");
+ cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
+ if (!cma_xprt)
return NULL;
- }
+
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
init_llist_head(&cma_xprt->sc_rw_ctxts);
@@ -143,8 +182,11 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
+ lockdep_set_class(&cma_xprt->sc_rq_dto_lock, &svcrdma_dto_lock);
spin_lock_init(&cma_xprt->sc_send_lock);
+ lockdep_set_class(&cma_xprt->sc_send_lock, &svcrdma_sctx_lock);
spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
+ lockdep_set_class(&cma_xprt->sc_rw_ctxt_lock, &svcrdma_rwctx_lock);
/*
* Note that this implies that the underlying transport support
@@ -195,9 +237,9 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
struct svcxprt_rdma *newxprt;
struct sockaddr *sa;
- /* Create a new transport */
newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
- listen_xprt->sc_xprt.xpt_net);
+ listen_xprt->sc_xprt.xpt_net,
+ ibdev_to_node(new_cma_id->device));
if (!newxprt)
return;
newxprt->sc_cm_id = new_cma_id;
@@ -242,17 +284,31 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
*
* Return values:
* %0: Do not destroy @cma_id
- * %1: Destroy @cma_id (never returned here)
+ * %1: Destroy @cma_id
*
* NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
*/
static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
struct rdma_cm_event *event)
{
+ struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
+ struct svcxprt_rdma *cma_xprt = cma_id->context;
+ struct svc_xprt *cma_rdma = &cma_xprt->sc_xprt;
+ struct rdma_cm_id *listen_id;
+
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
handle_connect_req(cma_id, &event->param.conn);
break;
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ listen_id = svc_rdma_create_listen_id(cma_rdma->xpt_net,
+ sap, cma_xprt);
+ if (IS_ERR(listen_id)) {
+ pr_err("Listener dead, address change failed for device %s\n",
+ cma_id->device->name);
+ } else
+ cma_xprt->sc_cm_id = listen_id;
+ return 1;
default:
break;
}
@@ -283,7 +339,6 @@ static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
svc_xprt_enqueue(xprt);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
svc_xprt_deferred_close(xprt);
break;
default:
@@ -302,40 +357,22 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
{
struct rdma_cm_id *listen_id;
struct svcxprt_rdma *cma_xprt;
- int ret;
if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
- cma_xprt = svc_rdma_create_xprt(serv, net);
+ cma_xprt = svc_rdma_create_xprt(serv, net, NUMA_NO_NODE);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
- RDMA_PS_TCP, IB_QPT_RC);
+ listen_id = svc_rdma_create_listen_id(net, sa, cma_xprt);
if (IS_ERR(listen_id)) {
- ret = PTR_ERR(listen_id);
- goto err0;
+ kfree(cma_xprt);
+ return ERR_CAST(listen_id);
}
-
- /* Allow both IPv4 and IPv6 sockets to bind a single port
- * at the same time.
- */
-#if IS_ENABLED(CONFIG_IPV6)
- ret = rdma_set_afonly(listen_id, 1);
- if (ret)
- goto err1;
-#endif
- ret = rdma_bind_addr(listen_id, sa);
- if (ret)
- goto err1;
cma_xprt->sc_cm_id = listen_id;
- ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
- if (ret)
- goto err1;
-
/*
* We need to use the address from the cm_id in case the
* caller specified 0 for the port number.
@@ -344,12 +381,16 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
return &cma_xprt->sc_xprt;
+}
- err1:
- rdma_destroy_id(listen_id);
- err0:
- kfree(cma_xprt);
- return ERR_PTR(ret);
+static void svc_rdma_xprt_done(struct rpcrdma_notification *rn)
+{
+ struct svcxprt_rdma *rdma = container_of(rn, struct svcxprt_rdma,
+ sc_rn);
+ struct rdma_cm_id *id = rdma->sc_cm_id;
+
+ trace_svcrdma_device_removal(id);
+ svc_xprt_close(&rdma->sc_xprt);
}
/*
@@ -365,12 +406,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
*/
static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
{
+ unsigned int ctxts, rq_depth, maxpayload;
struct svcxprt_rdma *listen_rdma;
struct svcxprt_rdma *newxprt = NULL;
struct rdma_conn_param conn_param;
struct rpcrdma_connect_private pmsg;
struct ib_qp_init_attr qp_attr;
- unsigned int ctxts, rq_depth;
struct ib_device *dev;
int ret = 0;
RPC_IFDEBUG(struct sockaddr *sap);
@@ -393,37 +434,45 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
- /* Qualify the transport resource defaults with the
- * capabilities of this particular device */
+ if (rpcrdma_rn_register(dev, &newxprt->sc_rn, svc_rdma_xprt_done))
+ goto errout;
+
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = svcrdma_max_requests;
+ newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
+ newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
+ newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
+
+ /* Qualify the transport's resource defaults with the
+ * capabilities of this particular device.
+ */
+
/* Transport header, head iovec, tail iovec */
newxprt->sc_max_send_sges = 3;
/* Add one SGE per page list entry */
newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
- newxprt->sc_max_req_size = svcrdma_max_req_size;
- newxprt->sc_max_requests = svcrdma_max_requests;
- newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
- newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
- newxprt->sc_recv_batch;
+ newxprt->sc_recv_batch + 1 /* drain */;
if (rq_depth > dev->attrs.max_qp_wr) {
- pr_warn("svcrdma: reducing receive depth to %d\n",
- dev->attrs.max_qp_wr);
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
- newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
- ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
- ctxts *= newxprt->sc_max_requests;
+
+ /* Arbitrary estimate of the needed number of rdma_rw contexts.
+ */
+ maxpayload = min(xprt->xpt_server->sv_max_payload,
+ RPCSVC_MAXPAYLOAD_RDMA);
+ ctxts = newxprt->sc_max_requests * 3 *
+ rdma_rw_mr_factor(dev, newxprt->sc_port_num,
+ maxpayload >> PAGE_SHIFT);
+
newxprt->sc_sq_depth = rq_depth + ctxts;
- if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
- pr_warn("svcrdma: reducing send depth to %d\n",
- dev->attrs.max_qp_wr);
+ if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
- }
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
newxprt->sc_pd = ib_alloc_pd(dev, 0);
@@ -453,18 +502,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = newxprt->sc_sq_cq;
qp_attr.recv_cq = newxprt->sc_rq_cq;
- dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
- newxprt->sc_cm_id, newxprt->sc_pd);
dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
-
+ dprintk(" send CQ depth = %u, recv CQ depth = %u\n",
+ newxprt->sc_sq_depth, rq_depth);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
+ newxprt->sc_max_send_sges = qp_attr.cap.max_send_sge;
newxprt->sc_qp = newxprt->sc_cm_id->qp;
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
@@ -508,7 +557,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- dprintk("svcrdma: new connection %p accepted:\n", newxprt);
+ dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
@@ -528,6 +577,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
ib_destroy_qp(newxprt->sc_qp);
rdma_destroy_id(newxprt->sc_cm_id);
+ rpcrdma_rn_unregister(dev, &newxprt->sc_rn);
/* This call to put will destroy the transport */
svc_xprt_put(&newxprt->sc_xprt);
return NULL;
@@ -541,14 +591,22 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
rdma_disconnect(rdma->sc_cm_id);
}
-static void __svc_rdma_free(struct work_struct *work)
+/**
+ * svc_rdma_free - Release class-specific transport resources
+ * @xprt: Generic svc transport object
+ */
+static void svc_rdma_free(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
- container_of(work, struct svcxprt_rdma, sc_work);
+ container_of(xprt, struct svcxprt_rdma, sc_xprt);
+ struct ib_device *device = rdma->sc_cm_id->device;
+
+ might_sleep();
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
+ flush_workqueue(svcrdma_wq);
svc_rdma_flush_recv_queues(rdma);
@@ -572,18 +630,11 @@ static void __svc_rdma_free(struct work_struct *work)
/* Destroy the CM ID */
rdma_destroy_id(rdma->sc_cm_id);
+ if (!test_bit(XPT_LISTENER, &rdma->sc_xprt.xpt_flags))
+ rpcrdma_rn_unregister(device, &rdma->sc_rn);
kfree(rdma);
}
-static void svc_rdma_free(struct svc_xprt *xprt)
-{
- struct svcxprt_rdma *rdma =
- container_of(xprt, struct svcxprt_rdma, sc_xprt);
-
- INIT_WORK(&rdma->sc_work, __svc_rdma_free);
- schedule_work(&rdma->sc_work);
-}
-
static int svc_rdma_has_wspace(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
@@ -600,11 +651,6 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
-static void svc_rdma_secure_port(struct svc_rqst *rqstp)
-{
- set_bit(RQ_SECURE, &rqstp->rq_flags);
-}
-
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
{
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 10bb2b929c6d..9a8ce5df83ca 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -137,16 +137,6 @@ static struct ctl_table xr_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { },
-};
-
-static struct ctl_table sunrpc_table[] = {
- {
- .procname = "sunrpc",
- .mode = 0555,
- .child = xr_tunables_table
- },
- { },
};
#endif
@@ -799,7 +789,7 @@ int xprt_rdma_init(void)
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header)
- sunrpc_table_header = register_sysctl_table(sunrpc_table);
+ sunrpc_table_header = register_sysctl("sunrpc", xr_tunables_table);
#endif
return 0;
}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b098fde373ab..63262ef0c2e3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -49,14 +49,14 @@
* o buffer memory
*/
+#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/log2.h>
-#include <asm-generic/barrier.h>
-#include <asm/bitops.h>
+#include <asm/barrier.h>
#include <rdma/ib_cm.h>
@@ -69,13 +69,15 @@ static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_sendctx *sc);
static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt);
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
+ int node);
+static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction);
static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb);
static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
@@ -222,7 +224,6 @@ static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
static int
rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
- struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
might_sleep();
@@ -241,10 +242,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
ep->re_async_rc = -ENETUNREACH;
complete(&ep->re_done);
return 0;
- case RDMA_CM_EVENT_DEVICE_REMOVAL:
- pr_info("rpcrdma: removing device %s for %pISpc\n",
- ep->re_id->device->name, sap);
- fallthrough;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;
@@ -280,6 +277,14 @@ disconnected:
return 0;
}
+static void rpcrdma_ep_removal_done(struct rpcrdma_notification *rn)
+{
+ struct rpcrdma_ep *ep = container_of(rn, struct rpcrdma_ep, re_rn);
+
+ trace_xprtrdma_device_removal(ep->re_id);
+ xprt_force_disconnect(ep->re_xprt);
+}
+
static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep)
{
@@ -319,6 +324,10 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt,
if (rc)
goto out;
+ rc = rpcrdma_rn_register(id->device, &ep->re_rn, rpcrdma_ep_removal_done);
+ if (rc)
+ goto out;
+
return id;
out:
@@ -346,6 +355,8 @@ static void rpcrdma_ep_destroy(struct kref *kref)
ib_dealloc_pd(ep->re_pd);
ep->re_pd = NULL;
+ rpcrdma_rn_unregister(ep->re_id->device, &ep->re_rn);
+
kfree(ep);
module_put(THIS_MODULE);
}
@@ -501,7 +512,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, 1, true);
+ rpcrdma_post_recvs(r_xprt, 1);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
@@ -893,6 +904,8 @@ static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt)
static void rpcrdma_req_reset(struct rpcrdma_req *req)
{
+ struct rpcrdma_mr *mr;
+
/* Credits are valid for only one connection */
req->rl_slot.rq_cong = 0;
@@ -902,7 +915,19 @@ static void rpcrdma_req_reset(struct rpcrdma_req *req)
rpcrdma_regbuf_dma_unmap(req->rl_sendbuf);
rpcrdma_regbuf_dma_unmap(req->rl_recvbuf);
- frwr_reset(req);
+ /* The verbs consumer can't know the state of an MR on the
+ * req->rl_registered list unless a successful completion
+ * has occurred, so they cannot be re-used.
+ */
+ while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
+ struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&mr->mr_all);
+ spin_unlock(&buf->rb_lock);
+
+ frwr_mr_release(mr);
+ }
}
/* ASSUMPTION: the rb_allreqs list is stable for the duration,
@@ -920,24 +945,23 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
}
static noinline
-struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
- bool temp)
+struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct ib_device *device = ep->re_id->device;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS);
if (rep == NULL)
goto out;
- rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv,
- DMA_FROM_DEVICE);
+ rep->rr_rdmabuf = rpcrdma_regbuf_alloc_node(ep->re_inline_recv,
+ DMA_FROM_DEVICE,
+ ibdev_to_node(device));
if (!rep->rr_rdmabuf)
goto out_free;
- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
- goto out_free_regbuf;
-
rep->rr_cid.ci_completion_id =
atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
@@ -949,15 +973,12 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
- rep->rr_temp = temp;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_all, &buf->rb_all_reps);
spin_unlock(&buf->rb_lock);
return rep;
-out_free_regbuf:
- rpcrdma_regbuf_free(rep->rr_rdmabuf);
out_free:
kfree(rep);
out:
@@ -970,17 +991,6 @@ static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
kfree(rep);
}
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
-{
- struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
-
- spin_lock(&buf->rb_lock);
- list_del(&rep->rr_all);
- spin_unlock(&buf->rb_lock);
-
- rpcrdma_rep_free(rep);
-}
-
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
@@ -1012,10 +1022,8 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
- list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
+ list_for_each_entry(rep, &buf->rb_all_reps, rr_all)
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true; /* Mark this rep for destruction */
- }
}
static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
@@ -1232,14 +1240,15 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
* or Replies they may be registered externally via frwr_map.
*/
static struct rpcrdma_regbuf *
-rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
+rpcrdma_regbuf_alloc_node(size_t size, enum dma_data_direction direction,
+ int node)
{
struct rpcrdma_regbuf *rb;
- rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS);
+ rb = kmalloc_node(sizeof(*rb), XPRTRDMA_GFP_FLAGS, node);
if (!rb)
return NULL;
- rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS);
+ rb->rg_data = kmalloc_node(size, XPRTRDMA_GFP_FLAGS, node);
if (!rb->rg_data) {
kfree(rb);
return NULL;
@@ -1251,6 +1260,12 @@ rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
return rb;
}
+static struct rpcrdma_regbuf *
+rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction)
+{
+ return rpcrdma_regbuf_alloc_node(size, direction, NUMA_NO_NODE);
+}
+
/**
* rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer
* @rb: regbuf to reallocate
@@ -1328,10 +1343,9 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @needed: current credit grant
- * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
@@ -1345,8 +1359,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
- if (!temp)
- needed += RPCRDMA_MAX_RECV_BATCH;
+ needed += RPCRDMA_MAX_RECV_BATCH;
if (atomic_inc_return(&ep->re_receiving) > 1)
goto out;
@@ -1355,17 +1368,17 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
wr = NULL;
while (needed) {
rep = rpcrdma_rep_get_locked(buf);
- if (rep && rep->rr_temp) {
- rpcrdma_rep_destroy(rep);
- continue;
- }
if (!rep)
- rep = rpcrdma_rep_create(r_xprt, temp);
+ rep = rpcrdma_rep_create(r_xprt);
if (!rep)
break;
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+ rpcrdma_rep_put(buf, rep);
+ break;
+ }
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
- trace_xprtrdma_post_recv(rep);
+ trace_xprtrdma_post_recv(&rep->rr_cid);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
--needed;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5e5ff6784ef5..8147d2b41494 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -56,6 +56,7 @@
#include <linux/sunrpc/rpc_rdma_cid.h> /* completion IDs */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#include <linux/sunrpc/rdma_rn.h> /* removal notifications */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -92,6 +93,7 @@ struct rpcrdma_ep {
struct rpcrdma_connect_private
re_cm_private;
struct rdma_conn_param re_remote_cma;
+ struct rpcrdma_notification re_rn;
int re_receive_count;
unsigned int re_max_requests; /* depends on device */
unsigned int re_inline_send; /* negotiated */
@@ -198,7 +200,6 @@ struct rpcrdma_rep {
__be32 rr_proc;
int rr_wc_flags;
u32 rr_inv_rkey;
- bool rr_temp;
struct rpcrdma_regbuf *rr_rdmabuf;
struct rpcrdma_xprt *rr_rxprt;
struct rpc_rqst *rr_rqst;
@@ -466,7 +467,7 @@ void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed);
/*
* Buffer calls - xprtrdma/verbs.c
@@ -593,7 +594,6 @@ void xprt_rdma_cleanup(void);
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *);
-int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index aaa5b2741b79..2e1fe6013361 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,17 +47,22 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
+#include <net/tls_prot.h>
+#include <net/handshake.h>
+
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/uio.h>
#include <linux/sched/mm.h>
+#include <trace/events/sock.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
+static void xs_reset_srcport(struct sock_xprt *transport);
static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
@@ -77,7 +82,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
/*
* We can register our own files under /proc/sys/sunrpc by
- * calling register_sysctl_table() again. The files in that
+ * calling register_sysctl() again. The files in that
* directory become the union of all files registered there.
*
* We simply need to make sure that we don't collide with
@@ -95,6 +100,7 @@ static struct ctl_table_header *sunrpc_table_header;
static struct xprt_class xs_local_transport;
static struct xprt_class xs_udp_transport;
static struct xprt_class xs_tcp_transport;
+static struct xprt_class xs_tcp_tls_transport;
static struct xprt_class xs_bc_tcp_transport;
/*
@@ -154,16 +160,6 @@ static struct ctl_table xs_tunables_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { },
-};
-
-static struct ctl_table sunrpc_table[] = {
- {
- .procname = "sunrpc",
- .mode = 0555,
- .child = xs_tunables_table
- },
- { },
};
/*
@@ -195,6 +191,11 @@ static struct ctl_table sunrpc_table[] = {
*/
#define XS_IDLE_DISC_TO (5U * 60 * HZ)
+/*
+ * TLS handshake timeout.
+ */
+#define XS_TLS_HANDSHAKE_TO (10U * HZ)
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# undef RPC_DEBUG_DATA
# define RPCDBG_FACILITY RPCDBG_TRANS
@@ -261,7 +262,12 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
switch (sap->sa_family) {
case AF_LOCAL:
sun = xs_addr_un(xprt);
- strscpy(buf, sun->sun_path, sizeof(buf));
+ if (sun->sun_path[0]) {
+ strscpy(buf, sun->sun_path, sizeof(buf));
+ } else {
+ buf[0] = '@';
+ strscpy(buf+1, sun->sun_path+1, sizeof(buf)-1);
+ }
xprt->address_strings[RPC_DISPLAY_ADDR] =
kstrdup(buf, GFP_KERNEL);
break;
@@ -350,6 +356,66 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
return want;
}
+static int
+xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg,
+ unsigned int *msg_flags, struct cmsghdr *cmsg, int ret)
+{
+ u8 content_type = tls_get_record_type(sock->sk, cmsg);
+ u8 level, description;
+
+ switch (content_type) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ *msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(sock->sk, msg, &level, &description);
+ ret = (level == TLS_ALERT_LEVEL_FATAL) ?
+ -EACCES : -EAGAIN;
+ break;
+ default:
+ /* discard this record type */
+ ret = -EAGAIN;
+ }
+ return ret;
+}
+
+static int
+xs_sock_recv_cmsg(struct socket *sock, unsigned int *msg_flags, int flags)
+{
+ union {
+ struct cmsghdr cmsg;
+ u8 buf[CMSG_SPACE(sizeof(u8))];
+ } u;
+ u8 alert[2];
+ struct kvec alert_kvec = {
+ .iov_base = alert,
+ .iov_len = sizeof(alert),
+ };
+ struct msghdr msg = {
+ .msg_flags = *msg_flags,
+ .msg_control = &u,
+ .msg_controllen = sizeof(u),
+ };
+ int ret;
+
+ iov_iter_kvec(&msg.msg_iter, ITER_DEST, &alert_kvec, 1,
+ alert_kvec.iov_len);
+ ret = sock_recvmsg(sock, &msg, flags);
+ if (ret > 0) {
+ if (tls_get_record_type(sock->sk, &u.cmsg) == TLS_RECORD_TYPE_ALERT)
+ iov_iter_revert(&msg.msg_iter, ret);
+ ret = xs_sock_process_cmsg(sock, &msg, msg_flags, &u.cmsg,
+ -EAGAIN);
+ }
+ return ret;
+}
+
static ssize_t
xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
{
@@ -357,6 +423,12 @@ xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
if (seek != 0)
iov_iter_advance(&msg->msg_iter, seek);
ret = sock_recvmsg(sock, msg, flags);
+ /* Handle TLS inband control message lazily */
+ if (msg->msg_flags & MSG_CTRUNC) {
+ msg->msg_flags &= ~(MSG_CTRUNC | MSG_EOR);
+ if (ret == 0 || ret == -EIO)
+ ret = xs_sock_recv_cmsg(sock, &msg->msg_flags, flags);
+ }
return ret > 0 ? ret + seek : ret;
}
@@ -382,7 +454,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count)
{
iov_iter_discard(&msg->msg_iter, ITER_DEST, count);
- return sock_recvmsg(sock, msg, flags);
+ return xs_sock_recvmsg(sock, msg, flags, 0);
}
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
@@ -703,6 +775,8 @@ static void xs_poll_check_readable(struct sock_xprt *transport)
{
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+ return;
if (!xs_poll_socket_readable(transport))
return;
if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
@@ -726,6 +800,8 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
}
if (ret == -ESHUTDOWN)
kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ else if (ret == -EACCES)
+ xprt_wake_pending_tasks(&transport->xprt, -EACCES);
else
xs_poll_check_readable(transport);
out:
@@ -827,6 +903,17 @@ static int xs_stream_prepare_request(struct rpc_rqst *req, struct xdr_buf *buf)
return xdr_alloc_bvec(buf, rpc_task_gfp_mask());
}
+static void xs_stream_abort_send_request(struct rpc_rqst *req)
+{
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+
+ if (transport->xmit.offset != 0 &&
+ !test_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ xprt_force_disconnect(xprt);
+}
+
/*
* Determine if the previous message in the stream was aborted before it
* could complete transmission.
@@ -1125,11 +1212,13 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ transport->xprt_err = 0;
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
+ clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
}
static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
@@ -1199,6 +1288,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (atomic_read(&transport->xprt.swapper))
sk_clear_memalloc(sk);
+ tls_handshake_cancel(sk);
+
kernel_sock_shutdown(sock, SHUT_RDWR);
mutex_lock(&transport->recv_mutex);
@@ -1208,6 +1299,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
transport->file = NULL;
sk->sk_user_data = NULL;
+ sk->sk_sndtimeo = 0;
xs_restore_old_callbacks(transport, sk);
xprt_clear_connected(xprt);
@@ -1239,6 +1331,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
+ if (transport->sock)
+ tls_handshake_close(transport->sock);
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
}
@@ -1378,6 +1472,8 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
+ trace_sk_data_ready(sk);
+
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
struct sock_xprt *transport = container_of(xprt,
@@ -1386,6 +1482,10 @@ static void xs_data_ready(struct sock *sk)
trace_xs_data_ready(xprt);
transport->old_data_ready(sk);
+
+ if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state))
+ return;
+
/* Any data means we had a useful conversation, so
* then we don't need to delay the next reconnect
*/
@@ -1498,8 +1598,10 @@ static void xs_tcp_state_change(struct sock *sk)
break;
case TCP_CLOSE:
if (test_and_clear_bit(XPRT_SOCK_CONNECTING,
- &transport->sock_state))
+ &transport->sock_state)) {
+ xs_reset_srcport(transport);
xprt_clear_connecting(xprt);
+ }
clear_bit(XPRT_CLOSING, &xprt->state);
/* Trigger the socket release */
xs_run_error_worker(transport, XPRT_SOCK_WAKE_DISCONNECT);
@@ -1655,6 +1757,11 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
xs_update_peer_port(xprt);
}
+static void xs_reset_srcport(struct sock_xprt *transport)
+{
+ transport->srcport = 0;
+}
+
static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
{
if (transport->srcport == 0 && transport->xprt.reuseport)
@@ -1738,8 +1845,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
do {
rpc_set_port((struct sockaddr *)&myaddr, port);
- err = kernel_bind(sock, (struct sockaddr *)&myaddr,
- transport->xprt.addrlen);
+ err = kernel_bind(sock, (struct sockaddr_unsized *)&myaddr,
+ transport->xprt.addrlen);
if (err == 0) {
if (transport->xprt.reuseport)
transport->srcport = port;
@@ -1854,6 +1961,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
+ if (protocol == IPPROTO_TCP)
+ sk_net_refcnt_upgrade(sock->sk);
+
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
@@ -1895,7 +2005,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
xs_stream_start_connect(transport);
- return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
+ return kernel_connect(sock, (struct sockaddr_unsized *)xs_addr(xprt), xprt->addrlen, 0);
}
/**
@@ -2155,6 +2265,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
switch (skst) {
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
+ case TCP_LAST_ACK:
break;
case TCP_ESTABLISHED:
case TCP_CLOSE_WAIT:
@@ -2170,9 +2281,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct net *net = sock_net(sock->sk);
+ unsigned long connect_timeout;
+ unsigned long syn_retries;
unsigned int keepidle;
unsigned int keepcnt;
unsigned int timeo;
+ unsigned long t;
spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
@@ -2190,6 +2305,35 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
/* TCP user timeout (see RFC5482) */
tcp_sock_set_user_timeout(sock->sk, timeo);
+
+ /* Connect timeout */
+ connect_timeout = max_t(unsigned long,
+ DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
+ syn_retries = max_t(unsigned long,
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
+ for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
+ ;
+ if (t <= syn_retries)
+ tcp_sock_set_syncnt(sock->sk, t - 1);
+}
+
+static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout)
+{
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ memcpy(&to, xprt->timeout, sizeof(to));
+ /* Arbitrary lower limit */
+ initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ to.to_retries = 0;
+ memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
+ xprt->timeout = &transport->tcp_timeout;
+ xprt->connect_timeout = connect_timeout;
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@@ -2197,25 +2341,12 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
unsigned long reconnect_timeout)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct rpc_timeout to;
- unsigned long initval;
spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
- if (connect_timeout < xprt->connect_timeout) {
- memcpy(&to, xprt->timeout, sizeof(to));
- initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
- /* Arbitrary lower limit */
- if (initval < XS_TCP_INIT_REEST_TO << 1)
- initval = XS_TCP_INIT_REEST_TO << 1;
- to.to_initval = initval;
- to.to_maxval = initval;
- memcpy(&transport->tcp_timeout, &to,
- sizeof(transport->tcp_timeout));
- xprt->timeout = &transport->tcp_timeout;
- xprt->connect_timeout = connect_timeout;
- }
+ if (connect_timeout < xprt->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
spin_unlock(&xprt->transport_lock);
}
@@ -2274,7 +2405,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* Tell the socket layer to start connecting... */
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
- return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+ return kernel_connect(sock, (struct sockaddr_unsized *)xs_addr(xprt),
+ xprt->addrlen, O_NONBLOCK);
}
/**
@@ -2335,6 +2467,13 @@ static void xs_tcp_setup_socket(struct work_struct *work)
transport->srcport = 0;
status = -EAGAIN;
break;
+ case -EPERM:
+ /* Happens, for instance, if a BPF program is preventing
+ * the connect. Remap the error so upper layers can better
+ * deal with it.
+ */
+ status = -ECONNREFUSED;
+ fallthrough;
case -EINVAL:
/* Happens, for instance, if the user specified a link
* local IPv6 address without a scope-id.
@@ -2346,6 +2485,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
+ case -ENOTCONN:
break;
default:
printk("%s: connect returned unhandled error %d\n",
@@ -2365,6 +2505,275 @@ out_unlock:
current_restore_flags(pflags, PF_MEMALLOC);
}
+/*
+ * Transfer the connected socket to @upper_transport, then mark that
+ * xprt CONNECTED.
+ */
+static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt,
+ struct sock_xprt *upper_transport)
+{
+ struct sock_xprt *lower_transport =
+ container_of(lower_xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *upper_xprt = &upper_transport->xprt;
+
+ if (!upper_transport->inet) {
+ struct socket *sock = lower_transport->sock;
+ struct sock *sk = sock->sk;
+
+ /* Avoid temporary address, they are bad for long-lived
+ * connections such as NFS mounts.
+ * RFC4941, section 3.6 suggests that:
+ * Individual applications, which have specific
+ * knowledge about the normal duration of connections,
+ * MAY override this as appropriate.
+ */
+ if (xs_addr(upper_xprt)->sa_family == PF_INET6)
+ ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC);
+
+ xs_tcp_set_socket_timeouts(upper_xprt, sock);
+ tcp_sock_set_nodelay(sk);
+
+ lock_sock(sk);
+
+ /* @sk is already connected, so it now has the RPC callbacks.
+ * Reach into @lower_transport to save the original ones.
+ */
+ upper_transport->old_data_ready = lower_transport->old_data_ready;
+ upper_transport->old_state_change = lower_transport->old_state_change;
+ upper_transport->old_write_space = lower_transport->old_write_space;
+ upper_transport->old_error_report = lower_transport->old_error_report;
+ sk->sk_user_data = upper_xprt;
+
+ /* socket options */
+ sock_reset_flag(sk, SOCK_LINGER);
+
+ xprt_clear_connected(upper_xprt);
+
+ upper_transport->sock = sock;
+ upper_transport->inet = sk;
+ upper_transport->file = lower_transport->file;
+
+ release_sock(sk);
+
+ /* Reset lower_transport before shutting down its clnt */
+ mutex_lock(&lower_transport->recv_mutex);
+ lower_transport->inet = NULL;
+ lower_transport->sock = NULL;
+ lower_transport->file = NULL;
+
+ xprt_clear_connected(lower_xprt);
+ xs_sock_reset_connection_flags(lower_xprt);
+ xs_stream_reset_connect(lower_transport);
+ mutex_unlock(&lower_transport->recv_mutex);
+ }
+
+ if (!xprt_bound(upper_xprt))
+ return -ENOTCONN;
+
+ xs_set_memalloc(upper_xprt);
+
+ if (!xprt_test_and_set_connected(upper_xprt)) {
+ upper_xprt->connect_cookie++;
+ clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+ xprt_clear_connecting(upper_xprt);
+
+ upper_xprt->stat.connect_count++;
+ upper_xprt->stat.connect_time += (long)jiffies -
+ upper_xprt->stat.connect_start;
+ xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+ }
+ return 0;
+}
+
+/**
+ * xs_tls_handshake_done - TLS handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ * @peerid: serial number of key containing the remote's identity
+ *
+ */
+static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid)
+{
+ struct rpc_xprt *lower_xprt = data;
+ struct sock_xprt *lower_transport =
+ container_of(lower_xprt, struct sock_xprt, xprt);
+
+ switch (status) {
+ case 0:
+ case -EACCES:
+ case -ETIMEDOUT:
+ lower_transport->xprt_err = status;
+ break;
+ default:
+ lower_transport->xprt_err = -EACCES;
+ }
+ complete(&lower_transport->handshake_done);
+ xprt_put(lower_xprt);
+}
+
+static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec)
+{
+ struct sock_xprt *lower_transport =
+ container_of(lower_xprt, struct sock_xprt, xprt);
+ struct tls_handshake_args args = {
+ .ta_sock = lower_transport->sock,
+ .ta_done = xs_tls_handshake_done,
+ .ta_data = xprt_get(lower_xprt),
+ .ta_peername = lower_xprt->servername,
+ };
+ struct sock *sk = lower_transport->inet;
+ int rc;
+
+ init_completion(&lower_transport->handshake_done);
+ set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
+ lower_transport->xprt_err = -ETIMEDOUT;
+ switch (xprtsec->policy) {
+ case RPC_XPRTSEC_TLS_ANON:
+ rc = tls_client_hello_anon(&args, GFP_KERNEL);
+ if (rc)
+ goto out_put_xprt;
+ break;
+ case RPC_XPRTSEC_TLS_X509:
+ args.ta_my_cert = xprtsec->cert_serial;
+ args.ta_my_privkey = xprtsec->privkey_serial;
+ rc = tls_client_hello_x509(&args, GFP_KERNEL);
+ if (rc)
+ goto out_put_xprt;
+ break;
+ default:
+ rc = -EACCES;
+ goto out_put_xprt;
+ }
+
+ rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
+ XS_TLS_HANDSHAKE_TO);
+ if (rc <= 0) {
+ tls_handshake_cancel(sk);
+ if (rc == 0)
+ rc = -ETIMEDOUT;
+ goto out_put_xprt;
+ }
+
+ rc = lower_transport->xprt_err;
+
+out:
+ xs_stream_reset_connect(lower_transport);
+ clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state);
+ return rc;
+
+out_put_xprt:
+ xprt_put(lower_xprt);
+ goto out;
+}
+
+/**
+ * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket
+ * @work: queued work item
+ *
+ * Invoked by a work queue tasklet.
+ *
+ * For RPC-with-TLS, there is a two-stage connection process.
+ *
+ * The "upper-layer xprt" is visible to the RPC consumer. Once it has
+ * been marked connected, the consumer knows that a TCP connection and
+ * a TLS session have been established.
+ *
+ * A "lower-layer xprt", created in this function, handles the mechanics
+ * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and
+ * then driving the TLS handshake. Once all that is complete, the upper
+ * layer xprt is marked connected.
+ */
+static void xs_tcp_tls_setup_socket(struct work_struct *work)
+{
+ struct sock_xprt *upper_transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_clnt *upper_clnt = upper_transport->clnt;
+ struct rpc_xprt *upper_xprt = &upper_transport->xprt;
+ struct rpc_create_args args = {
+ .net = upper_xprt->xprt_net,
+ .protocol = upper_xprt->prot,
+ .address = (struct sockaddr *)&upper_xprt->addr,
+ .addrsize = upper_xprt->addrlen,
+ .timeout = upper_clnt->cl_timeout,
+ .servername = upper_xprt->servername,
+ .program = upper_clnt->cl_program,
+ .prognumber = upper_clnt->cl_prog,
+ .version = upper_clnt->cl_vers,
+ .authflavor = RPC_AUTH_TLS,
+ .cred = upper_clnt->cl_cred,
+ .xprtsec = {
+ .policy = RPC_XPRTSEC_NONE,
+ },
+ .stats = upper_clnt->cl_stats,
+ };
+ unsigned int pflags = current->flags;
+ struct rpc_clnt *lower_clnt;
+ struct rpc_xprt *lower_xprt;
+ int status;
+
+ if (atomic_read(&upper_xprt->swapper))
+ current->flags |= PF_MEMALLOC;
+
+ xs_stream_start_connect(upper_transport);
+
+ /* This implicitly sends an RPC_AUTH_TLS probe */
+ lower_clnt = rpc_create(&args);
+ if (IS_ERR(lower_clnt)) {
+ trace_rpc_tls_unavailable(upper_clnt, upper_xprt);
+ clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state);
+ xprt_clear_connecting(upper_xprt);
+ xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt));
+ xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING);
+ goto out_unlock;
+ }
+
+ /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on
+ * the lower xprt.
+ */
+ rcu_read_lock();
+ lower_xprt = rcu_dereference(lower_clnt->cl_xprt);
+ rcu_read_unlock();
+
+ if (wait_on_bit_lock(&lower_xprt->state, XPRT_LOCKED, TASK_KILLABLE))
+ goto out_unlock;
+
+ status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec);
+ if (status) {
+ trace_rpc_tls_not_started(upper_clnt, upper_xprt);
+ goto out_close;
+ }
+
+ status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport);
+ if (status)
+ goto out_close;
+ xprt_release_write(lower_xprt, NULL);
+ trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0);
+ rpc_shutdown_client(lower_clnt);
+
+ /* Check for ingress data that arrived before the socket's
+ * ->data_ready callback was set up.
+ */
+ xs_poll_check_readable(upper_transport);
+
+out_unlock:
+ current_restore_flags(pflags, PF_MEMALLOC);
+ upper_transport->clnt = NULL;
+ xprt_unlock_connect(upper_xprt, upper_transport);
+ return;
+
+out_close:
+ xprt_release_write(lower_xprt, NULL);
+ rpc_shutdown_client(lower_clnt);
+
+ /* xprt_force_disconnect() wakes tasks with a fixed tk_status code.
+ * Wake them first here to ensure they get our tk_status code.
+ */
+ xprt_wake_pending_tasks(upper_xprt, status);
+ xs_tcp_force_close(upper_xprt);
+ xprt_clear_connecting(upper_xprt);
+ goto out_unlock;
+}
+
/**
* xs_connect - connect a socket to a remote endpoint
* @xprt: pointer to transport structure
@@ -2396,6 +2805,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
} else
dprintk("RPC: xs_connect scheduled xprt %p\n", xprt);
+ transport->clnt = task->tk_client;
queue_delayed_work(xprtiod_workqueue,
&transport->connect_worker,
delay);
@@ -2417,18 +2827,13 @@ static void xs_wake_error(struct sock_xprt *transport)
{
int sockerr;
- if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
- return;
- mutex_lock(&transport->recv_mutex);
- if (transport->sock == NULL)
- goto out;
if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
- goto out;
+ return;
sockerr = xchg(&transport->xprt_err, 0);
- if (sockerr < 0)
+ if (sockerr < 0) {
xprt_wake_pending_tasks(&transport->xprt, sockerr);
-out:
- mutex_unlock(&transport->recv_mutex);
+ xs_tcp_force_close(&transport->xprt);
+ }
}
static void xs_wake_pending(struct sock_xprt *transport)
@@ -2636,20 +3041,11 @@ static int bc_send_request(struct rpc_rqst *req)
return len;
}
-/*
- * The close routine. Since this is client initiated, we do nothing
- */
-
static void bc_close(struct rpc_xprt *xprt)
{
xprt_disconnect_done(xprt);
}
-/*
- * The xprt destroy routine. Again, because this connection is client
- * initiated, we do nothing
- */
-
static void bc_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: bc_destroy xprt %p\n", xprt);
@@ -2670,6 +3066,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_local_send_request,
+ .abort_send_request = xs_stream_abort_send_request,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_close,
.destroy = xs_destroy,
@@ -2717,6 +3114,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.buf_free = rpc_free,
.prepare_request = xs_stream_prepare_request,
.send_request = xs_tcp_send_request,
+ .abort_send_request = xs_stream_abort_send_request,
.wait_for_reply_request = xprt_wait_for_reply_request_def,
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
@@ -2863,7 +3261,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
switch (sun->sun_family) {
case AF_LOCAL:
- if (sun->sun_path[0] != '/') {
+ if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') {
dprintk("RPC: bad AF_LOCAL address: %s\n",
sun->sun_path);
ret = ERR_PTR(-EINVAL);
@@ -3006,8 +3404,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->timeout = &xs_tcp_default_timeout;
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ if (args->reconnect_timeout)
+ xprt->max_reconnect_timeout = args->reconnect_timeout;
+
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
+ if (args->connect_timeout)
+ xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_WORK(&transport->error_worker, xs_error_handle);
@@ -3050,6 +3453,94 @@ out_err:
}
/**
+ * xs_setup_tcp_tls - Set up transport to use a TCP with TLS
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args)
+{
+ struct sockaddr *addr = args->dstaddr;
+ struct rpc_xprt *xprt;
+ struct sock_xprt *transport;
+ struct rpc_xprt *ret;
+ unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
+
+ if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
+ max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
+
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+ max_slot_table_size);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+ xprt->prot = IPPROTO_TCP;
+ xprt->xprt_class = &xs_tcp_transport;
+ xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+
+ xprt->bind_timeout = XS_BIND_TO;
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ xprt->idle_timeout = XS_IDLE_DISC_TO;
+
+ xprt->ops = &xs_tcp_ops;
+ xprt->timeout = &xs_tcp_default_timeout;
+
+ xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ xprt->connect_timeout = xprt->timeout->to_initval *
+ (xprt->timeout->to_retries + 1);
+
+ INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_WORK(&transport->error_worker, xs_error_handle);
+
+ switch (args->xprtsec.policy) {
+ case RPC_XPRTSEC_TLS_ANON:
+ case RPC_XPRTSEC_TLS_X509:
+ xprt->xprtsec = args->xprtsec;
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_tcp_tls_setup_socket);
+ break;
+ default:
+ ret = ERR_PTR(-EACCES);
+ goto out_err;
+ }
+
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ xprt_set_bound(xprt);
+
+ xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
+ break;
+ case AF_INET6:
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ xprt_set_bound(xprt);
+
+ xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
+ break;
+ default:
+ ret = ERR_PTR(-EAFNOSUPPORT);
+ goto out_err;
+ }
+
+ if (xprt_bound(xprt))
+ dprintk("RPC: set up xprt to %s (port %s) via %s\n",
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PORT],
+ xprt->address_strings[RPC_DISPLAY_PROTO]);
+ else
+ dprintk("RPC: set up xprt to %s (autobind) via %s\n",
+ xprt->address_strings[RPC_DISPLAY_ADDR],
+ xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+ ret = ERR_PTR(-EINVAL);
+out_err:
+ xs_xprt_free(xprt);
+ return ret;
+}
+
+/**
* xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
* @args: rpc transport creation arguments
*
@@ -3158,6 +3649,15 @@ static struct xprt_class xs_tcp_transport = {
.netid = { "tcp", "tcp6", "" },
};
+static struct xprt_class xs_tcp_tls_transport = {
+ .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list),
+ .name = "tcp-with-tls",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_TCP_TLS,
+ .setup = xs_setup_tcp_tls,
+ .netid = { "tcp", "tcp6", "" },
+};
+
static struct xprt_class xs_bc_tcp_transport = {
.list = LIST_HEAD_INIT(xs_bc_tcp_transport.list),
.name = "tcp NFSv4.1 backchannel",
@@ -3174,11 +3674,12 @@ static struct xprt_class xs_bc_tcp_transport = {
int init_socket_xprt(void)
{
if (!sunrpc_table_header)
- sunrpc_table_header = register_sysctl_table(sunrpc_table);
+ sunrpc_table_header = register_sysctl("sunrpc", xs_tunables_table);
xprt_register_transport(&xs_local_transport);
xprt_register_transport(&xs_udp_transport);
xprt_register_transport(&xs_tcp_transport);
+ xprt_register_transport(&xs_tcp_tls_transport);
xprt_register_transport(&xs_bc_tcp_transport);
return 0;
@@ -3198,6 +3699,7 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_local_transport);
xprt_unregister_transport(&xs_udp_transport);
xprt_unregister_transport(&xs_tcp_transport);
+ xprt_unregister_transport(&xs_tcp_tls_transport);
xprt_unregister_transport(&xs_bc_tcp_transport);
}