summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Sitnicki <jakub@cloudflare.com>2020-02-18 17:10:13 +0000
committerDaniel Borkmann <daniel@iogearbox.net>2020-02-21 22:29:45 +0100
commitb8e202d1d1d0f182f01062804efb523ea9a9008c (patch)
tree57a1c1d075e2d470fd9c1487744286c2f3bed747
parente42da4c62abb547d9c9138e0e7fcd1f36057b5e8 (diff)
net, sk_msg: Annotate lockless access to sk_prot on clone
sk_msg and ULP frameworks override protocol callbacks pointer in sk->sk_prot, while tcp accesses it locklessly when cloning the listening socket, that is with neither sk_lock nor sk_callback_lock held. Once we enable use of listening sockets with sockmap (and hence sk_msg), there will be shared access to sk->sk_prot if socket is getting cloned while being inserted/deleted to/from the sockmap from another CPU: Read side: tcp_v4_rcv sk = __inet_lookup_skb(...) tcp_check_req(sk) inet_csk(sk)->icsk_af_ops->syn_recv_sock tcp_v4_syn_recv_sock tcp_create_openreq_child inet_csk_clone_lock sk_clone_lock READ_ONCE(sk->sk_prot) Write side: sock_map_ops->map_update_elem sock_map_update_elem sock_map_update_common sock_map_link_no_progs tcp_bpf_init tcp_bpf_update_sk_prot sk_psock_update_proto WRITE_ONCE(sk->sk_prot, ops) sock_map_ops->map_delete_elem sock_map_delete_elem __sock_map_delete sock_map_unref sk_psock_put sk_psock_drop sk_psock_restore_proto tcp_update_ulp WRITE_ONCE(sk->sk_prot, proto) Mark the shared access with READ_ONCE/WRITE_ONCE annotations. Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20200218171023.844439-2-jakub@cloudflare.com
-rw-r--r--include/linux/skmsg.h3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_ulp.c3
-rw-r--r--net/tls/tls_main.c3
5 files changed, 14 insertions, 7 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index d90ef61712a1..112765bd146d 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -352,7 +352,8 @@ static inline void sk_psock_update_proto(struct sock *sk,
psock->saved_write_space = sk->sk_write_space;
psock->sk_proto = sk->sk_prot;
- sk->sk_prot = ops;
+ /* Pairs with lockless read in sk_clone_lock() */
+ WRITE_ONCE(sk->sk_prot, ops);
}
static inline void sk_psock_restore_proto(struct sock *sk,
diff --git a/net/core/sock.c b/net/core/sock.c
index a4c8fac781ff..bf1173b93eda 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1572,13 +1572,14 @@ static inline void sock_lock_init(struct sock *sk)
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
+ const struct proto *prot = READ_ONCE(osk->sk_prot);
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
- osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
+ prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
@@ -1792,16 +1793,17 @@ static void sk_init_common(struct sock *sk)
*/
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
+ struct proto *prot = READ_ONCE(sk->sk_prot);
struct sock *newsk;
bool is_charged = true;
- newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
+ newsk = sk_prot_alloc(prot, priority, sk->sk_family);
if (newsk != NULL) {
struct sk_filter *filter;
sock_copy(newsk, sk);
- newsk->sk_prot_creator = sk->sk_prot;
+ newsk->sk_prot_creator = prot;
/* SANITY */
if (likely(newsk->sk_net_refcnt))
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 8a01428f80c1..dd183b050642 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -645,8 +645,10 @@ static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
* or added requiring sk_prot hook updates. We keep original saved
* hooks in this case.
+ *
+ * Pairs with lockless read in sk_clone_lock().
*/
- sk->sk_prot = &tcp_bpf_prots[family][config];
+ WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
}
static int tcp_bpf_assert_proto_ops(struct proto *ops)
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 38d3ad141161..6c43fa189195 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -106,7 +106,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto,
if (!icsk->icsk_ulp_ops) {
sk->sk_write_space = write_space;
- sk->sk_prot = proto;
+ /* Pairs with lockless read in sk_clone_lock() */
+ WRITE_ONCE(sk->sk_prot, proto);
return;
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 94774c0e5ff3..82225bcc1117 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -742,7 +742,8 @@ static void tls_update(struct sock *sk, struct proto *p,
ctx->sk_write_space = write_space;
ctx->sk_proto = p;
} else {
- sk->sk_prot = p;
+ /* Pairs with lockless read in sk_clone_lock(). */
+ WRITE_ONCE(sk->sk_prot, p);
sk->sk_write_space = write_space;
}
}