From 28b24f90020fed8e8e3e8e20575f08c1cd06e54f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:15 +0000 Subject: net: implement lockless SO_MAX_PACING_RATE SO_MAX_PACING_RATE setsockopt() does not need to hold the socket lock, because sk->sk_pacing_rate readers can run fine if the value is changed by other threads, after adding READ_ONCE() accessors. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d1e4b5ac1ca..970a07289480 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1201,7 +1201,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); if (sk->sk_pacing_status != SK_PACING_NONE) { - unsigned long rate = sk->sk_pacing_rate; + unsigned long rate = READ_ONCE(sk->sk_pacing_rate); /* Original sch_fq does not pace first 10 MSS * Note that tp->data_segs_out overflows after 2^32 packets, @@ -1973,7 +1973,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, unsigned long bytes; u32 r; - bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); + bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) @@ -2553,7 +2553,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); + READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); @@ -2561,7 +2561,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, if (static_branch_unlikely(&tcp_tx_delay_enabled) && tcp_sk(sk)->tcp_tx_delay) { - u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; + u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) * + tcp_sk(sk)->tcp_tx_delay; /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we * approximate our needs assuming an ~100% skb->truesize overhead. -- cgit From eb44ad4e635132754bfbcb18103f1dcb7058aedd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:18 +0000 Subject: net: annotate data-races around sk->sk_dst_pending_confirm This field can be read or written without socket lock being held. Add annotations to avoid load-store tearing. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 970a07289480..8885552dff8e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1325,7 +1325,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; refcount_add(skb->truesize, &sk->sk_wmem_alloc); - skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); + skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm)); /* Build TCP header and checksum it. */ th = (struct tcphdr *)skb->data; -- cgit