summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2023-09-22 03:42:16 +0000
committerDavid S. Miller <davem@davemloft.net>2023-10-01 19:39:18 +0100
commite08d0b3d172311e2bb500865c0d0038533d0ff11 (patch)
tree0c5343300213611a52bf2904f1aa129567929171 /net/ipv4
parentceaa714138a372ac63cc2c5c19ee0882d22827f9 (diff)
inet: implement lockless IP_TOS
Some reads of inet->tos are racy. Add needed READ_ONCE() annotations and convert IP_TOS option lockless. v2: missing changes in include/net/route.h (David Ahern) Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_sockglue.c29
-rw-r--r--net/ipv4/tcp_ipv4.c9
4 files changed, 20 insertions, 24 deletions
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9f0bd518901a..f01aee832aab 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -134,7 +134,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
* hence this needs to be included regardless of socket family.
*/
if (ext & (1 << (INET_DIAG_TOS - 1)))
- if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
+ if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0)
goto errout;
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9fc7be2c2033..89e62ed08dad 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -544,7 +544,7 @@ EXPORT_SYMBOL(__ip_queue_xmit);
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
- return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
+ return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos));
}
EXPORT_SYMBOL(ip_queue_xmit);
@@ -1438,7 +1438,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
- iph->tos = (cork->tos != -1) ? cork->tos : inet->tos;
+ iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos);
iph->frag_off = df;
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6d874cc03c8b..50c008efbb6d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -585,25 +585,20 @@ out:
return err;
}
-void __ip_sock_set_tos(struct sock *sk, int val)
+void ip_sock_set_tos(struct sock *sk, int val)
{
+ u8 old_tos = READ_ONCE(inet_sk(sk)->tos);
+
if (sk->sk_type == SOCK_STREAM) {
val &= ~INET_ECN_MASK;
- val |= inet_sk(sk)->tos & INET_ECN_MASK;
+ val |= old_tos & INET_ECN_MASK;
}
- if (inet_sk(sk)->tos != val) {
- inet_sk(sk)->tos = val;
+ if (old_tos != val) {
+ WRITE_ONCE(inet_sk(sk)->tos, val);
WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
-
-void ip_sock_set_tos(struct sock *sk, int val)
-{
- lock_sock(sk);
- __ip_sock_set_tos(sk, val);
- release_sock(sk);
-}
EXPORT_SYMBOL(ip_sock_set_tos);
void ip_sock_set_freebind(struct sock *sk)
@@ -1050,6 +1045,9 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
return 0;
case IP_MTU_DISCOVER:
return ip_sock_set_mtu_discover(sk, val);
+ case IP_TOS: /* This sets both TOS and Precedence */
+ ip_sock_set_tos(sk, val);
+ return 0;
}
err = 0;
@@ -1104,9 +1102,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
}
}
break;
- case IP_TOS: /* This sets both TOS and Precedence */
- __ip_sock_set_tos(sk, val);
- break;
case IP_UNICAST_IF:
{
struct net_device *dev = NULL;
@@ -1593,6 +1588,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MTU_DISCOVER:
val = READ_ONCE(inet->pmtudisc);
goto copyval;
+ case IP_TOS:
+ val = READ_ONCE(inet->tos);
+ goto copyval;
}
if (needs_rtnl)
@@ -1629,9 +1627,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
- case IP_TOS:
- val = inet->tos;
- break;
case IP_MTU:
{
struct dst_entry *dst;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 95e972be0c05..a441740616d7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1024,10 +1024,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
- (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
- (inet_sk(sk)->tos & INET_ECN_MASK) :
- inet_sk(sk)->tos;
+ tos = READ_ONCE(inet_sk(sk)->tos);
+
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ tos = (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (tos & INET_ECN_MASK);
if (!INET_ECN_is_capable(tos) &&
tcp_bpf_ca_needs_ecn((struct sock *)req))