From 10bbf1652c1cca9819e98d56f3432c56d7a2d229 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2023 20:28:11 +0000 Subject: net: implement lockless SO_PRIORITY This is a followup of 8bf43be799d4 ("net: annotate data-races around sk->sk_priority"). sk->sk_priority can be read and written without holding the socket lock. Signed-off-by: Eric Dumazet Reviewed-by: Wenjia Zhang Signed-off-by: David S. Miller --- net/appletalk/aarp.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/can/j1939/socket.c | 2 +- net/can/raw.c | 2 +- net/core/sock.c | 23 ++++++++++++----------- net/dccp/ipv6.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/mptcp/sockopt.c | 2 +- net/netrom/af_netrom.c | 2 +- net/rose/af_rose.c | 2 +- net/sched/em_meta.c | 2 +- net/sctp/ipv6.c | 2 +- net/smc/af_smc.c | 2 +- net/x25/af_x25.c | 2 +- net/xdp/xsk.c | 2 +- 22 files changed, 34 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c7236daa2415..9fa0b246902b 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -664,7 +664,7 @@ out_unlock: sendit: if (skb->sk) - skb->priority = skb->sk->sk_priority; + skb->priority = READ_ONCE(skb->sk->sk_priority); if (dev_queue_xmit(skb)) goto drop; sent: diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5db805d5f74d..558e158c98d0 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -939,7 +939,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3bdfc3f1e73d..e50d3d102078 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1615,7 +1615,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, return ERR_PTR(-ENOTCONN); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); bt_cb(skb)->l2cap.chan = chan; diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b28c976f52a0..14c431663233 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -884,7 +884,7 @@ static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, skcb = j1939_skb_to_cb(skb); memset(skcb, 0, sizeof(*skcb)); skcb->addr = jsk->addr; - skcb->priority = j1939_prio(sk->sk_priority); + skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority)); if (msg->msg_name) { struct sockaddr_can *addr = msg->msg_name; diff --git a/net/can/raw.c b/net/can/raw.c index d50c3f3d892f..73468d2ebd51 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -881,7 +881,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; diff --git a/net/core/sock.c b/net/core/sock.c index a5995750c5c5..1fdc0a0d8ff2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -806,9 +806,7 @@ EXPORT_SYMBOL(sock_no_linger); void sock_set_priority(struct sock *sk, u32 priority) { - lock_sock(sk); WRITE_ONCE(sk->sk_priority, priority); - release_sock(sk); } EXPORT_SYMBOL(sock_set_priority); @@ -1118,6 +1116,18 @@ int sk_setsockopt(struct sock *sk, int level, int optname, valbool = val ? 1 : 0; + /* handle options which do not require locking the socket. */ + switch (optname) { + case SO_PRIORITY: + if ((val >= 0 && val <= 6) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + sock_set_priority(sk, val); + return 0; + } + return -EPERM; + } + sockopt_lock_sock(sk); switch (optname) { @@ -1213,15 +1223,6 @@ set_sndbuf: sk->sk_no_check_tx = valbool; break; - case SO_PRIORITY: - if ((val >= 0 && val <= 6) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - WRITE_ONCE(sk->sk_priority, val); - else - ret = -EPERM; - break; - case SO_LINGER: if (optlen < sizeof(ling)) { ret = -EINVAL; /* 1003.1g */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 80b956b39252..8d344b219f84 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -239,7 +239,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e13a84433413..9f0bd518901a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -165,7 +165,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, * For cgroup2 classid is always zero. */ if (!classid) - classid = sk->sk_priority; + classid = READ_ONCE(sk->sk_priority); if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4ab877cf6d35..6b14097e80ad 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1449,7 +1449,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt); } - skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; + skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; /* diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f13eb7e23d03..95e972be0c05 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -828,7 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_priority : sk->sk_priority; + inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); xfrm_sk_clone_policy(ctl_sk, sk); txhash = (sk->sk_state == TCP_TIME_WAIT) ? diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index eee8ab1bfa0e..3f87611077ef 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -292,7 +292,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_mark = sk->sk_mark; - tw->tw_priority = sk->sk_priority; + tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c50dcd35fe8..80043e46117c 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 951ba8089b5b..cdaa9275e990 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1984,7 +1984,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 94afb8d0f2d0..8a6e2e97f673 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -565,7 +565,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), - opt, tclass, sk->sk_priority); + opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -1058,7 +1058,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) trace_tcp_send_reset(sk, skb); if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); - priority = sk->sk_priority; + priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 8260202c0066..f3485a6b35e7 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -89,7 +89,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); break; case SO_PRIORITY: - ssk->sk_priority = val; + WRITE_ONCE(ssk->sk_priority, val); break; case SO_SNDBUF: case SO_SNDBUFFORCE: diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 96e91ab71573..0eed00184adf 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -487,7 +487,7 @@ static struct sock *nr_make_new(struct sock *osk) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 49dafe9ac72f..0cc5a4e19900 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -583,7 +583,7 @@ static struct sock *rose_make_new(struct sock *osk) #endif sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index da34fd4c9269..09d8afd04a2a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio) *err = -1; return; } - dst->value = sk->sk_priority; + dst->value = READ_ONCE(sk->sk_priority); } META_COLLECTOR(int_sk_rcvlowat) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 5c0ed5909d85..24368f755ab1 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -247,7 +247,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) rcu_read_lock(); res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), - tclass, sk->sk_priority); + tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index bacdd971615e..297681601414 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -493,7 +493,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, nsk->sk_sndtimeo = osk->sk_sndtimeo; nsk->sk_rcvtimeo = osk->sk_rcvtimeo; nsk->sk_mark = READ_ONCE(osk->sk_mark); - nsk->sk_priority = osk->sk_priority; + nsk->sk_priority = READ_ONCE(osk->sk_priority); nsk->sk_rcvlowat = osk->sk_rcvlowat; nsk->sk_bound_dev_if = osk->sk_bound_dev_if; nsk->sk_err = osk->sk_err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0fb5143bec7a..aad8ffeaee04 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -598,7 +598,7 @@ static struct sock *x25_make_new(struct sock *osk) x25 = x25_sk(sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 7482d0aca504..f5e96e0d6e01 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -684,7 +684,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, } skb->dev = dev; - skb->priority = xs->sk.sk_priority; + skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; xsk_set_destructor_arg(skb); -- cgit