From 3fa29971c69519629370b119b0b618ee88ade6b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Sep 2023 16:02:08 +0000 Subject: ipv6: lockless IPV6_RECVERR implemetation np->recverr is moved to inet->inet_flags to fix data-races. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +-- include/net/inet_sock.h | 1 + include/net/ipv6.h | 4 +--- net/dccp/ipv6.c | 2 +- net/ipv4/ping.c | 2 +- net/ipv6/datagram.c | 6 ++---- net/ipv6/ipv6_sockglue.c | 17 ++++++++--------- net/ipv6/raw.c | 10 +++++----- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/udp.c | 6 +++--- net/sctp/ipv6.c | 4 +--- 11 files changed, 25 insertions(+), 32 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 57d563f1d4b1..53f4f1b97a78 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -243,8 +243,7 @@ struct ipv6_pinfo { } rxopt; /* sockopt flags */ - __u16 recverr:1, - sndflow:1, + __u16 sndflow:1, repflow:1, pmtudisc:3, padding:1, /* 1 bit hole */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index ac75324e9e1e..3b79bc759ff4 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_AUTOFLOWLABEL_SET = 23, INET_FLAGS_AUTOFLOWLABEL = 24, INET_FLAGS_DONTFRAG = 25, + INET_FLAGS_RECVERR6 = 26, }; /* cmsg flags for inet */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5a1f2993680d..bd115980809f 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1303,9 +1303,7 @@ static inline int ip6_sock_set_v6only(struct sock *sk) static inline void ip6_sock_set_recverr(struct sock *sk) { - lock_sock(sk); - inet6_sk(sk)->recverr = true; - release_sock(sk); + inet6_set_bit(RECVERR6, sk); } static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 83617a16b98e..e6c3d84c2b9e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -185,7 +185,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 75e0aee35eb7..bc01ad5fc01a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) * 4.1.3.3. */ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || - (family == AF_INET6 && !inet6_sk(sk)->recverr)) { + (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e81892814935..74673a5eff31 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -305,11 +305,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = icmp6_hdr(skb); struct sock_exterr_skb *serr; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = skb_clone(skb, GFP_ATOMIC); @@ -344,12 +343,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 33dd4dd872e6..ec10b45c49c1 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -481,6 +481,13 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DONTFRAG: inet6_assign_bit(DONTFRAG, sk, valbool); return 0; + case IPV6_RECVERR: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RECVERR6, sk, valbool); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; } if (needs_rtnl) rtnl_lock(); @@ -943,14 +950,6 @@ done: np->pmtudisc = val; retv = 0; break; - case IPV6_RECVERR: - if (optlen < sizeof(int)) - goto e_inval; - np->recverr = valbool; - if (!val) - skb_errqueue_purge(&sk->sk_error_queue); - retv = 0; - break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; @@ -1380,7 +1379,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_RECVERR: - val = np->recverr; + val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index cc9673c1809f..71f6bdccfa1f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -300,7 +301,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); @@ -312,14 +313,14 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_redirect(skb, sk); return; } - if (np->recverr) { + if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } @@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; @@ -668,7 +668,7 @@ out: error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: - if (err == -ENOBUFS && !np->recverr) + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 54db5fab318b..b5954b136b57 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -508,7 +508,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tcp_ld_RTO_revert(sk, seq); } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e4301500741a..90e873689b88 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -619,7 +619,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!np->recverr) { + if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -1283,7 +1283,7 @@ csum_partial: send: err = ip6_send_skb(skb); if (err) { - if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; @@ -1608,7 +1608,7 @@ do_append_data: up->pending = 0; if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 43f2731bf590..42b5b853ea01 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; - struct ipv6_pinfo *np; int err = 0; switch (type) { @@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, break; } - np = inet6_sk(sk); icmpv6_err_convert(type, code, &err); - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { -- cgit