From 3b3009ea8abb713b022d94fba95ec270cf6e7eae Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Apr 2023 10:32:26 -0400 Subject: net/handshake: Create a NETLINK service for handling handshake requests When a kernel consumer needs a transport layer security session, it first needs a handshake to negotiate and establish a session. This negotiation can be done in user space via one of the several existing library implementations, or it can be done in the kernel. No in-kernel handshake implementations yet exist. In their absence, we add a netlink service that can: a. Notify a user space daemon that a handshake is needed. b. Once notified, the daemon calls the kernel back via this netlink service to get the handshake parameters, including an open socket on which to establish the session. c. Once the handshake is complete, the daemon reports the session status and other information via a second netlink operation. This operation marks that it is safe for the kernel to use the open socket and the security session established there. The notification service uses a multicast group. Each handshake mechanism (eg, tlshd) adopts its own group number so that the handshake services are completely independent of one another. The kernel can then tell via netlink_has_listeners() whether a handshake service is active and prepared to handle a handshake request. A new netlink operation, ACCEPT, acts like accept(2) in that it instantiates a file descriptor in the user space daemon's fd table. If this operation is successful, the reply carries the fd number, which can be treated as an open and ready file descriptor. While user space is performing the handshake, the kernel keeps its muddy paws off the open socket. A second new netlink operation, DONE, indicates that the user space daemon is finished with the socket and it is safe for the kernel to use again. The operation also indicates whether a session was established successfully. Signed-off-by: Chuck Lever Signed-off-by: Jakub Kicinski --- net/handshake/request.c | 339 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 net/handshake/request.c (limited to 'net/handshake/request.c') diff --git a/net/handshake/request.c b/net/handshake/request.c new file mode 100644 index 000000000000..d5b2bc6de057 --- /dev/null +++ b/net/handshake/request.c @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Handshake request lifetime events + * + * Author: Chuck Lever + * + * Copyright (c) 2023, Oracle and/or its affiliates. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include "handshake.h" + +#include + +/* + * We need both a handshake_req -> sock mapping, and a sock -> + * handshake_req mapping. Both are one-to-one. + * + * To avoid adding another pointer field to struct sock, net/handshake + * maintains a hash table, indexed by the memory address of @sock, to + * find the struct handshake_req outstanding for that socket. The + * reverse direction uses a simple pointer field in the handshake_req + * struct. + */ + +static struct rhashtable handshake_rhashtbl ____cacheline_aligned_in_smp; + +static const struct rhashtable_params handshake_rhash_params = { + .key_len = sizeof_field(struct handshake_req, hr_sk), + .key_offset = offsetof(struct handshake_req, hr_sk), + .head_offset = offsetof(struct handshake_req, hr_rhash), + .automatic_shrinking = true, +}; + +int handshake_req_hash_init(void) +{ + return rhashtable_init(&handshake_rhashtbl, &handshake_rhash_params); +} + +void handshake_req_hash_destroy(void) +{ + rhashtable_destroy(&handshake_rhashtbl); +} + +struct handshake_req *handshake_req_hash_lookup(struct sock *sk) +{ + return rhashtable_lookup_fast(&handshake_rhashtbl, &sk, + handshake_rhash_params); +} + +static bool handshake_req_hash_add(struct handshake_req *req) +{ + int ret; + + ret = rhashtable_lookup_insert_fast(&handshake_rhashtbl, + &req->hr_rhash, + handshake_rhash_params); + return ret == 0; +} + +static void handshake_req_destroy(struct handshake_req *req) +{ + if (req->hr_proto->hp_destroy) + req->hr_proto->hp_destroy(req); + rhashtable_remove_fast(&handshake_rhashtbl, &req->hr_rhash, + handshake_rhash_params); + kfree(req); +} + +static void handshake_sk_destruct(struct sock *sk) +{ + void (*sk_destruct)(struct sock *sk); + struct handshake_req *req; + + req = handshake_req_hash_lookup(sk); + if (!req) + return; + + trace_handshake_destruct(sock_net(sk), req, sk); + sk_destruct = req->hr_odestruct; + handshake_req_destroy(req); + if (sk_destruct) + sk_destruct(sk); +} + +/** + * handshake_req_alloc - Allocate a handshake request + * @proto: security protocol + * @flags: memory allocation flags + * + * Returns an initialized handshake_req or NULL. + */ +struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto, + gfp_t flags) +{ + struct handshake_req *req; + + if (!proto) + return NULL; + if (proto->hp_handler_class <= HANDSHAKE_HANDLER_CLASS_NONE) + return NULL; + if (proto->hp_handler_class >= HANDSHAKE_HANDLER_CLASS_MAX) + return NULL; + if (!proto->hp_accept || !proto->hp_done) + return NULL; + + req = kzalloc(struct_size(req, hr_priv, proto->hp_privsize), flags); + if (!req) + return NULL; + + INIT_LIST_HEAD(&req->hr_list); + req->hr_proto = proto; + return req; +} +EXPORT_SYMBOL(handshake_req_alloc); + +/** + * handshake_req_private - Get per-handshake private data + * @req: handshake arguments + * + */ +void *handshake_req_private(struct handshake_req *req) +{ + return (void *)&req->hr_priv; +} +EXPORT_SYMBOL(handshake_req_private); + +static bool __add_pending_locked(struct handshake_net *hn, + struct handshake_req *req) +{ + if (WARN_ON_ONCE(!list_empty(&req->hr_list))) + return false; + hn->hn_pending++; + list_add_tail(&req->hr_list, &hn->hn_requests); + return true; +} + +static void __remove_pending_locked(struct handshake_net *hn, + struct handshake_req *req) +{ + hn->hn_pending--; + list_del_init(&req->hr_list); +} + +/* + * Returns %true if the request was found on @net's pending list, + * otherwise %false. + * + * If @req was on a pending list, it has not yet been accepted. + */ +static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) +{ + bool ret = false; + + spin_lock(&hn->hn_lock); + if (!list_empty(&req->hr_list)) { + __remove_pending_locked(hn, req); + ret = true; + } + spin_unlock(&hn->hn_lock); + + return ret; +} + +struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) +{ + struct handshake_req *req, *pos; + + req = NULL; + spin_lock(&hn->hn_lock); + list_for_each_entry(pos, &hn->hn_requests, hr_list) { + if (pos->hr_proto->hp_handler_class != class) + continue; + __remove_pending_locked(hn, pos); + req = pos; + break; + } + spin_unlock(&hn->hn_lock); + + return req; +} + +/** + * handshake_req_submit - Submit a handshake request + * @sock: open socket on which to perform the handshake + * @req: handshake arguments + * @flags: memory allocation flags + * + * Return values: + * %0: Request queued + * %-EINVAL: Invalid argument + * %-EBUSY: A handshake is already under way for this socket + * %-ESRCH: No handshake agent is available + * %-EAGAIN: Too many pending handshake requests + * %-ENOMEM: Failed to allocate memory + * %-EMSGSIZE: Failed to construct notification message + * %-EOPNOTSUPP: Handshake module not initialized + * + * A zero return value from handshake_req_submit() means that + * exactly one subsequent completion callback is guaranteed. + * + * A negative return value from handshake_req_submit() means that + * no completion callback will be done and that @req has been + * destroyed. + */ +int handshake_req_submit(struct socket *sock, struct handshake_req *req, + gfp_t flags) +{ + struct handshake_net *hn; + struct net *net; + int ret; + + if (!sock || !req || !sock->file) { + kfree(req); + return -EINVAL; + } + + req->hr_sk = sock->sk; + if (!req->hr_sk) { + kfree(req); + return -EINVAL; + } + req->hr_odestruct = req->hr_sk->sk_destruct; + req->hr_sk->sk_destruct = handshake_sk_destruct; + + ret = -EOPNOTSUPP; + net = sock_net(req->hr_sk); + hn = handshake_pernet(net); + if (!hn) + goto out_err; + + ret = -EAGAIN; + if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max) + goto out_err; + + spin_lock(&hn->hn_lock); + ret = -EOPNOTSUPP; + if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags)) + goto out_unlock; + ret = -EBUSY; + if (!handshake_req_hash_add(req)) + goto out_unlock; + if (!__add_pending_locked(hn, req)) + goto out_unlock; + spin_unlock(&hn->hn_lock); + + ret = handshake_genl_notify(net, req->hr_proto, flags); + if (ret) { + trace_handshake_notify_err(net, req, req->hr_sk, ret); + if (remove_pending(hn, req)) + goto out_err; + } + + /* Prevent socket release while a handshake request is pending */ + sock_hold(req->hr_sk); + + trace_handshake_submit(net, req, req->hr_sk); + return 0; + +out_unlock: + spin_unlock(&hn->hn_lock); +out_err: + trace_handshake_submit_err(net, req, req->hr_sk, ret); + handshake_req_destroy(req); + return ret; +} +EXPORT_SYMBOL(handshake_req_submit); + +void handshake_complete(struct handshake_req *req, unsigned int status, + struct genl_info *info) +{ + struct sock *sk = req->hr_sk; + struct net *net = sock_net(sk); + + if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + trace_handshake_complete(net, req, sk, status); + req->hr_proto->hp_done(req, status, info); + + /* Handshake request is no longer pending */ + sock_put(sk); + } +} + +/** + * handshake_req_cancel - Cancel an in-progress handshake + * @sk: socket on which there is an ongoing handshake + * + * Request cancellation races with request completion. To determine + * who won, callers examine the return value from this function. + * + * Return values: + * %true - Uncompleted handshake request was canceled + * %false - Handshake request already completed or not found + */ +bool handshake_req_cancel(struct sock *sk) +{ + struct handshake_req *req; + struct handshake_net *hn; + struct net *net; + + net = sock_net(sk); + req = handshake_req_hash_lookup(sk); + if (!req) { + trace_handshake_cancel_none(net, req, sk); + return false; + } + + hn = handshake_pernet(net); + if (hn && remove_pending(hn, req)) { + /* Request hadn't been accepted */ + goto out_true; + } + if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + /* Request already completed */ + trace_handshake_cancel_busy(net, req, sk); + return false; + } + +out_true: + trace_handshake_cancel(net, req, sk); + + /* Handshake request is no longer pending */ + sock_put(sk); + return true; +} +EXPORT_SYMBOL(handshake_req_cancel); -- cgit From 88232ec1ec5ecf4aa5de439cff3d5e2b7adcac93 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Apr 2023 10:32:39 -0400 Subject: net/handshake: Add Kunit tests for the handshake consumer API These verify the API contracts and help exercise lifetime rules for consumer sockets and handshake_req structures. One way to run these tests: ./tools/testing/kunit/kunit.py run --kunitconfig ./net/handshake/.kunitconfig Signed-off-by: Chuck Lever Signed-off-by: Jakub Kicinski --- net/handshake/request.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/handshake/request.c') diff --git a/net/handshake/request.c b/net/handshake/request.c index d5b2bc6de057..94d5cef3e048 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -20,6 +20,8 @@ #include #include +#include + #include #include "handshake.h" @@ -60,6 +62,7 @@ struct handshake_req *handshake_req_hash_lookup(struct sock *sk) return rhashtable_lookup_fast(&handshake_rhashtbl, &sk, handshake_rhash_params); } +EXPORT_SYMBOL_IF_KUNIT(handshake_req_hash_lookup); static bool handshake_req_hash_add(struct handshake_req *req) { @@ -192,6 +195,7 @@ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) return req; } +EXPORT_SYMBOL_IF_KUNIT(handshake_req_next); /** * handshake_req_submit - Submit a handshake request @@ -293,6 +297,7 @@ void handshake_complete(struct handshake_req *req, unsigned int status, sock_put(sk); } } +EXPORT_SYMBOL_IF_KUNIT(handshake_complete); /** * handshake_req_cancel - Cancel an in-progress handshake -- cgit From 1ce77c998f0415d7d9d91cb9bd7665e25c8f75f1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 11 May 2023 11:49:17 -0400 Subject: net/handshake: Unpin sock->file if a handshake is cancelled If user space never calls DONE, sock->file's reference count remains elevated. Enable sock->file to be freed eventually in this case. Reported-by: Jakub Kacinski Fixes: 3b3009ea8abb ("net/handshake: Create a NETLINK service for handling handshake requests") Signed-off-by: Chuck Lever Signed-off-by: Jakub Kicinski --- net/handshake/request.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/handshake/request.c') diff --git a/net/handshake/request.c b/net/handshake/request.c index 94d5cef3e048..d78d41abb3d9 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -239,6 +239,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, } req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; + req->hr_file = sock->file; ret = -EOPNOTSUPP; net = sock_net(req->hr_sk); @@ -334,6 +335,9 @@ bool handshake_req_cancel(struct sock *sk) return false; } + /* Request accepted and waiting for DONE */ + fput(req->hr_file); + out_true: trace_handshake_cancel(net, req, sk); -- cgit From 361b6889ae636926cdff517add240c3c8e24593a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 14 Jun 2023 09:52:49 +0800 Subject: net/handshake: remove fput() that causes use-after-free A reference underflow is found in TLS handshake subsystem that causes a direct use-after-free. Part of the crash log is like below: [ 2.022114] ------------[ cut here ]------------ [ 2.022193] refcount_t: underflow; use-after-free. [ 2.022288] WARNING: CPU: 0 PID: 60 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110 [ 2.022432] Modules linked in: [ 2.022848] RIP: 0010:refcount_warn_saturate+0xbe/0x110 [ 2.023231] RSP: 0018:ffffc900001bfe18 EFLAGS: 00000286 [ 2.023325] RAX: 0000000000000000 RBX: 0000000000000007 RCX: 00000000ffffdfff [ 2.023438] RDX: 0000000000000000 RSI: 00000000ffffffea RDI: 0000000000000001 [ 2.023555] RBP: ffff888004c20098 R08: ffffffff82b392c8 R09: 00000000ffffdfff [ 2.023693] R10: ffffffff82a592e0 R11: ffffffff82b092e0 R12: ffff888004c200d8 [ 2.023813] R13: 0000000000000000 R14: ffff888004c20000 R15: ffffc90000013ca8 [ 2.023930] FS: 0000000000000000(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 [ 2.024062] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2.024161] CR2: ffff888003601000 CR3: 0000000002a2e000 CR4: 00000000000006f0 [ 2.024275] Call Trace: [ 2.024322] [ 2.024367] ? __warn+0x7f/0x130 [ 2.024430] ? refcount_warn_saturate+0xbe/0x110 [ 2.024513] ? report_bug+0x199/0x1b0 [ 2.024585] ? handle_bug+0x3c/0x70 [ 2.024676] ? exc_invalid_op+0x18/0x70 [ 2.024750] ? asm_exc_invalid_op+0x1a/0x20 [ 2.024830] ? refcount_warn_saturate+0xbe/0x110 [ 2.024916] ? refcount_warn_saturate+0xbe/0x110 [ 2.024998] __tcp_close+0x2f4/0x3d0 [ 2.025065] ? __pfx_kunit_generic_run_threadfn_adapter+0x10/0x10 [ 2.025168] tcp_close+0x1f/0x70 [ 2.025231] inet_release+0x33/0x60 [ 2.025297] sock_release+0x1f/0x80 [ 2.025361] handshake_req_cancel_test2+0x100/0x2d0 [ 2.025457] kunit_try_run_case+0x4c/0xa0 [ 2.025532] kunit_generic_run_threadfn_adapter+0x15/0x20 [ 2.025644] kthread+0xe1/0x110 [ 2.025708] ? __pfx_kthread+0x10/0x10 [ 2.025780] ret_from_fork+0x2c/0x50 One can enable CONFIG_NET_HANDSHAKE_KUNIT_TEST config to reproduce above crash. The root cause of this bug is that the commit 1ce77c998f04 ("net/handshake: Unpin sock->file if a handshake is cancelled") adds one additional fput() function. That patch claims that the fput() is used to enable sock->file to be freed even when user space never calls DONE. However, it seems that the intended DONE routine will never give an additional fput() of ths sock->file. The existing two of them are just used to balance the reference added in sockfd_lookup(). This patch revert the mentioned commit to avoid the use-after-free. The patched kernel could successfully pass the KUNIT test and boot to shell. [ 0.733613] # Subtest: Handshake API tests [ 0.734029] 1..11 [ 0.734255] KTAP version 1 [ 0.734542] # Subtest: req_alloc API fuzzing [ 0.736104] ok 1 handshake_req_alloc NULL proto [ 0.736114] ok 2 handshake_req_alloc CLASS_NONE [ 0.736559] ok 3 handshake_req_alloc CLASS_MAX [ 0.737020] ok 4 handshake_req_alloc no callbacks [ 0.737488] ok 5 handshake_req_alloc no done callback [ 0.737988] ok 6 handshake_req_alloc excessive privsize [ 0.738529] ok 7 handshake_req_alloc all good [ 0.739036] # req_alloc API fuzzing: pass:7 fail:0 skip:0 total:7 [ 0.739444] ok 1 req_alloc API fuzzing [ 0.740065] ok 2 req_submit NULL req arg [ 0.740436] ok 3 req_submit NULL sock arg [ 0.740834] ok 4 req_submit NULL sock->file [ 0.741236] ok 5 req_lookup works [ 0.741621] ok 6 req_submit max pending [ 0.741974] ok 7 req_submit multiple [ 0.742382] ok 8 req_cancel before accept [ 0.742764] ok 9 req_cancel after accept [ 0.743151] ok 10 req_cancel after done [ 0.743510] ok 11 req_destroy works [ 0.743882] # Handshake API tests: pass:11 fail:0 skip:0 total:11 [ 0.744205] # Totals: pass:17 fail:0 skip:0 total:17 Acked-by: Chuck Lever Fixes: 1ce77c998f04 ("net/handshake: Unpin sock->file if a handshake is cancelled") Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/20230613083204.633896-1-linma@zju.edu.cn Link: https://lore.kernel.org/r/20230614015249.987448-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski --- net/handshake/request.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net/handshake/request.c') diff --git a/net/handshake/request.c b/net/handshake/request.c index d78d41abb3d9..94d5cef3e048 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -239,7 +239,6 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, } req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; - req->hr_file = sock->file; ret = -EOPNOTSUPP; net = sock_net(req->hr_sk); @@ -335,9 +334,6 @@ bool handshake_req_cancel(struct sock *sk) return false; } - /* Request accepted and waiting for DONE */ - fput(req->hr_file); - out_true: trace_handshake_cancel(net, req, sk); -- cgit