summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2025-01-29 13:31:19 -0800
committerMartin KaFai Lau <martin.lau@kernel.org>2025-01-29 13:33:10 -0800
commit9bf412d4d5b1f431e6cdd8111094be39c031036c (patch)
treef01b165541b84bf465130e3e20902b031391fe37 /net/ipv4/tcp.c
parentbc27c52eea189e8f7492d40739b7746d67b65beb (diff)
parent6fcfe96e0f6e9bebe1b185f1548a9a8cb1b68dea (diff)
Merge branch 'bpf-fix-wrong-copied_seq-calculation-and-add-tests'
Jiayuan Chen says: ==================== A previous commit described in this topic http://lore.kernel.org/bpf/20230523025618.113937-9-john.fastabend@gmail.com directly updated 'sk->copied_seq' in the tcp_eat_skb() function when the action of a BPF program was SK_REDIRECT. For other actions, like SK_PASS, the update logic for 'sk->copied_seq' was moved to tcp_bpf_recvmsg_parser() to ensure the accuracy of the 'fionread' feature. That commit works for a single stream_verdict scenario, as it also modified 'sk_data_ready->sk_psock_verdict_data_ready->tcp_read_skb' to remove updating 'sk->copied_seq'. However, for programs where both stream_parser and stream_verdict are active (strparser purpose), tcp_read_sock() was used instead of tcp_read_skb() (sk_data_ready->strp_data_ready->tcp_read_sock). tcp_read_sock() now still updates 'sk->copied_seq', leading to duplicated updates. In summary, for strparser + SK_PASS, copied_seq is redundantly calculated in both tcp_read_sock() and tcp_bpf_recvmsg_parser(). The issue causes incorrect copied_seq calculations, which prevent correct data reads from the recv() interface in user-land. Also we added test cases for bpf + strparser and separated them from sockmap_basic, as strparser has more encapsulation and parsing capabilities compared to sockmap. ==================== Link: https://patch.msgid.link/20250122100917.49845-1-mrpre@163.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c29
1 files changed, 24 insertions, 5 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d704bda6c41..285678d8ce07 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1565,12 +1565,13 @@ EXPORT_SYMBOL(tcp_recv_skb);
* or for 'peeking' the socket using this routine
* (although both would be easy to implement).
*/
-int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq)
{
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
+ u32 seq = *copied_seq;
u32 offset;
int copied = 0;
@@ -1624,9 +1625,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_eat_recv_skb(sk, skb);
if (!desc->count)
break;
- WRITE_ONCE(tp->copied_seq, seq);
+ WRITE_ONCE(*copied_seq, seq);
}
- WRITE_ONCE(tp->copied_seq, seq);
+ WRITE_ONCE(*copied_seq, seq);
+
+ if (noack)
+ goto out;
tcp_rcv_space_adjust(sk);
@@ -1635,10 +1639,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_recv_skb(sk, seq, &offset);
tcp_cleanup_rbuf(sk, copied);
}
+out:
return copied;
}
+
+int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ return __tcp_read_sock(sk, desc, recv_actor, false,
+ &tcp_sk(sk)->copied_seq);
+}
EXPORT_SYMBOL(tcp_read_sock);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq)
+{
+ return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq);
+}
+
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;