summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_bpf.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_bpf.c')
-rw-r--r--net/ipv4/tcp_bpf.c272
1 files changed, 101 insertions, 171 deletions
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 8a01428f80c1..5a05327f97c1 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -10,38 +10,6 @@
#include <net/inet_common.h>
#include <net/tls.h>
-static bool tcp_bpf_stream_read(const struct sock *sk)
-{
- struct sk_psock *psock;
- bool empty = true;
-
- rcu_read_lock();
- psock = sk_psock(sk);
- if (likely(psock))
- empty = list_empty(&psock->ingress_msg);
- rcu_read_unlock();
- return !empty;
-}
-
-static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
- int flags, long timeo, int *err)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int ret = 0;
-
- if (!timeo)
- return ret;
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- ret = sk_wait_event(sk, &timeo,
- !list_empty(&psock->ingress_msg) ||
- !skb_queue_empty(&sk->sk_receive_queue), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- return ret;
-}
-
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags)
{
@@ -115,49 +83,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
}
EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
-int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
-{
- struct sk_psock *psock;
- int copied, ret;
-
- psock = sk_psock_get(sk);
- if (unlikely(!psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
- if (unlikely(flags & MSG_ERRQUEUE))
- return inet_recv_error(sk, msg, len, addr_len);
- if (!skb_queue_empty(&sk->sk_receive_queue) &&
- sk_psock_queue_empty(psock))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
- lock_sock(sk);
-msg_bytes_ready:
- copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
- if (!copied) {
- int data, err = 0;
- long timeo;
-
- timeo = sock_rcvtimeo(sk, nonblock);
- data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
- if (data) {
- if (!sk_psock_queue_empty(psock))
- goto msg_bytes_ready;
- release_sock(sk);
- sk_psock_put(sk, psock);
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
- }
- if (err) {
- ret = err;
- goto out;
- }
- copied = -EAGAIN;
- }
- ret = copied;
-out:
- release_sock(sk);
- sk_psock_put(sk, psock);
- return ret;
-}
-
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
@@ -298,6 +223,82 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
}
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
+#ifdef CONFIG_BPF_STREAM_PARSER
+static bool tcp_bpf_stream_read(const struct sock *sk)
+{
+ struct sk_psock *psock;
+ bool empty = true;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (likely(psock))
+ empty = list_empty(&psock->ingress_msg);
+ rcu_read_unlock();
+ return !empty;
+}
+
+static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
+ int flags, long timeo, int *err)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret = 0;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ ret = sk_wait_event(sk, &timeo,
+ !list_empty(&psock->ingress_msg) ||
+ !skb_queue_empty(&sk->sk_receive_queue), &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+
+static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int nonblock, int flags, int *addr_len)
+{
+ struct sk_psock *psock;
+ int copied, ret;
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+ if (!skb_queue_empty(&sk->sk_receive_queue) &&
+ sk_psock_queue_empty(psock))
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ lock_sock(sk);
+msg_bytes_ready:
+ copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ int data, err = 0;
+ long timeo;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ }
+ if (err) {
+ ret = err;
+ goto out;
+ }
+ copied = -EAGAIN;
+ }
+ ret = copied;
+out:
+ release_sock(sk);
+ sk_psock_put(sk, psock);
+ return ret;
+}
+
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, int *copied, int flags)
{
@@ -528,57 +529,6 @@ out_err:
return copied ? copied : err;
}
-static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
-{
- struct sk_psock_link *link;
-
- while ((link = sk_psock_link_pop(psock))) {
- sk_psock_unlink(sk, link);
- sk_psock_free_link(link);
- }
-}
-
-static void tcp_bpf_unhash(struct sock *sk)
-{
- void (*saved_unhash)(struct sock *sk);
- struct sk_psock *psock;
-
- rcu_read_lock();
- psock = sk_psock(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
- }
-
- saved_unhash = psock->saved_unhash;
- tcp_bpf_remove(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
-}
-
-static void tcp_bpf_close(struct sock *sk, long timeout)
-{
- void (*saved_close)(struct sock *sk, long timeout);
- struct sk_psock *psock;
-
- lock_sock(sk);
- rcu_read_lock();
- psock = sk_psock(sk);
- if (unlikely(!psock)) {
- rcu_read_unlock();
- release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
- }
-
- saved_close = psock->saved_close;
- tcp_bpf_remove(sk, psock);
- rcu_read_unlock();
- release_sock(sk);
- saved_close(sk, timeout);
-}
-
enum {
TCP_BPF_IPV4,
TCP_BPF_IPV6,
@@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
struct proto *base)
{
prot[TCP_BPF_BASE] = *base;
- prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash;
- prot[TCP_BPF_BASE].close = tcp_bpf_close;
+ prot[TCP_BPF_BASE].unhash = sock_map_unhash;
+ prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
@@ -629,26 +579,6 @@ static int __init tcp_bpf_v4_build_proto(void)
}
core_initcall(tcp_bpf_v4_build_proto);
-static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
-{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
- int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
-
- sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
-}
-
-static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
-{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
- int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
-
- /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
- * or added requiring sk_prot hook updates. We keep original saved
- * hooks in this case.
- */
- sk->sk_prot = &tcp_bpf_prots[family][config];
-}
-
static int tcp_bpf_assert_proto_ops(struct proto *ops)
{
/* In order to avoid retpoline, we make assumptions when we call
@@ -660,34 +590,34 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops)
ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
}
-void tcp_bpf_reinit(struct sock *sk)
+struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
{
- struct sk_psock *psock;
+ int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+ int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE;
- sock_owned_by_me(sk);
+ if (!psock->sk_proto) {
+ struct proto *ops = READ_ONCE(sk->sk_prot);
- rcu_read_lock();
- psock = sk_psock(sk);
- tcp_bpf_reinit_sk_prot(sk, psock);
- rcu_read_unlock();
+ if (tcp_bpf_assert_proto_ops(ops))
+ return ERR_PTR(-EINVAL);
+
+ tcp_bpf_check_v6_needs_rebuild(sk, ops);
+ }
+
+ return &tcp_bpf_prots[family][config];
}
-int tcp_bpf_init(struct sock *sk)
+/* If a child got cloned from a listening socket that had tcp_bpf
+ * protocol callbacks installed, we need to restore the callbacks to
+ * the default ones because the child does not inherit the psock state
+ * that tcp_bpf callbacks expect.
+ */
+void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
- struct proto *ops = READ_ONCE(sk->sk_prot);
- struct sk_psock *psock;
-
- sock_owned_by_me(sk);
+ int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+ struct proto *prot = newsk->sk_prot;
- rcu_read_lock();
- psock = sk_psock(sk);
- if (unlikely(!psock || psock->sk_proto ||
- tcp_bpf_assert_proto_ops(ops))) {
- rcu_read_unlock();
- return -EINVAL;
- }
- tcp_bpf_check_v6_needs_rebuild(sk, ops);
- tcp_bpf_update_sk_prot(sk, psock);
- rcu_read_unlock();
- return 0;
+ if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+ newsk->sk_prot = sk->sk_prot_creator;
}
+#endif /* CONFIG_BPF_STREAM_PARSER */