diff options
| author | David S. Miller <davem@davemloft.net> | 2022-07-13 12:56:50 +0100 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2022-07-13 12:56:50 +0100 | 
| commit | 7d5424b26f17b74d94e73815718b424ad207a3e7 (patch) | |
| tree | a571db15de0e5ac08997aab9705356eff39685b6 | |
| parent | 22b9c41a3fb8ef4624bcda312665937d2ba98aa7 (diff) | |
| parent | bdf00bf24bef9be1ca641a6390fd5487873e0d2e (diff) | |
Merge branch 'net-sysctl-races'
Kuniyuki Iwashima says:
====================
sysctl: Fix data-races around ipv4_net_table (Roun).
This series fixes data-races around the first 13 knobs and
nexthop_compat_mode in ipv4_net_table.
I will post another patch for three early_demux knobs later,
so the next round will start from ip_default_ttl.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | Documentation/networking/ip-sysctl.rst | 2 | ||||
| -rw-r--r-- | drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 2 | ||||
| -rw-r--r-- | include/net/raw.h | 2 | ||||
| -rw-r--r-- | kernel/sysctl.c | 12 | ||||
| -rw-r--r-- | net/ipv4/af_inet.c | 4 | ||||
| -rw-r--r-- | net/ipv4/fib_semantics.c | 2 | ||||
| -rw-r--r-- | net/ipv4/icmp.c | 15 | ||||
| -rw-r--r-- | net/ipv4/inet_timewait_sock.c | 3 | ||||
| -rw-r--r-- | net/ipv4/nexthop.c | 5 | ||||
| -rw-r--r-- | net/ipv4/syncookies.c | 2 | ||||
| -rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 12 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 4 | ||||
| -rw-r--r-- | net/ipv6/icmp.c | 2 | ||||
| -rw-r--r-- | net/ipv6/route.c | 2 | 
15 files changed, 43 insertions, 28 deletions
| diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 0e58001f8580..b3a534ed0e7c 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN  	option should only be set by experts.  	Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER  	If set non-zero, enables support for dynamic addresses.  	If set to a non-zero value larger than 1, a kernel log  	message will be printed when dynamic address rewriting diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 4af5561cbfc5..7c760aa65540 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk,  	th_ecn = tcph->ece && tcph->cwr;  	if (th_ecn) {  		ect = !INET_ECN_is_not_ect(ip_dsfield); -		ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; +		ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);  		if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))  			inet_rsk(oreq)->ecn_ok = 1;  	} diff --git a/include/net/raw.h b/include/net/raw.h index 8ad8df594853..c51a635671a7 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,  				       int dif, int sdif)  {  #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) -	return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, +	return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),  				 bound_dev_if, dif, sdif);  #else  	return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bf9383d17e1b..d99bc3945445 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,  	tmp.maxlen = sizeof(val);  	tmp.data = &val; -	val = *data; +	val = READ_ONCE(*data);  	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,  				do_proc_douintvec_minmax_conv, ¶m);  	if (res)  		return res;  	if (write) -		*data = val; +		WRITE_ONCE(*data, val);  	return 0;  }  EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); @@ -1224,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,  		if (jif > INT_MAX)  			return 1; -		*valp = (int)jif; +		WRITE_ONCE(*valp, (int)jif);  	} else { -		int val = *valp; +		int val = READ_ONCE(*valp);  		unsigned long lval;  		if (val < 0) {  			*negp = true; @@ -1294,8 +1294,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,   * @ppos: the current position in the file   *   * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string.  - * The values read are assumed to be in 1/1000 seconds, and  + * values from/to the user buffer, treated as an ASCII string. + * The values read are assumed to be in 1/1000 seconds, and   * are converted into jiffies.   *   * Returns 0 on success. diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 93da9f783bec..ac67f6b4ec70 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)  	if (new_saddr == old_saddr)  		return 0; -	if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { +	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {  		pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",  			__func__, &old_saddr, &new_saddr);  	} @@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk)  		 * Other protocols have to map its equivalent state to TCP_SYN_SENT.  		 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme  		 */ -		if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || +		if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||  		    sk->sk_state != TCP_SYN_SENT ||  		    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||  		    (err = inet_sk_reselect_saddr(sk)) != 0) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a57ba23571c9..16dbd5075284 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,  			goto nla_put_failure;  		if (nexthop_is_blackhole(fi->nh))  			rtm->rtm_type = RTN_BLACKHOLE; -		if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) +		if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))  			goto offload;  	} diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0f9e61d29f73..57c4f0d87a7a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -282,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)  		return true;  	/* Limit if icmp type is enabled in ratemask. */ -	if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) +	if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))  		return true;  	return false; @@ -320,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,  	vif = l3mdev_master_ifindex(dst->dev);  	peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); -	rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); +	rc = inet_peer_xrlim_allow(peer, +				   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));  	if (peer)  		inet_putpeer(peer);  out: @@ -693,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,  		rcu_read_lock();  		if (rt_is_input_route(rt) && -		    net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) +		    READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))  			dev = dev_get_by_index_rcu(net, inet_iif(skb_in));  		if (dev) @@ -933,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)  	 *	get the other vendor to fix their kit.  	 */ -	if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && +	if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&  	    inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {  		net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",  				     &ip_hdr(skb)->saddr, @@ -993,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)  	net = dev_net(skb_dst(skb)->dev);  	/* should there be an ICMP stat for ignored echos? */ -	if (net->ipv4.sysctl_icmp_echo_ignore_all) +	if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))  		return SKB_NOT_DROPPED_YET;  	icmp_param.data.icmph	   = *icmp_hdr(skb); @@ -1028,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)  	u16 ident_len;  	u8 status; -	if (!net->ipv4.sysctl_icmp_echo_enable_probe) +	if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))  		return false;  	/* We currently only support probing interfaces on the proxy node @@ -1249,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb)  		 */  		if ((icmph->type == ICMP_ECHO ||  		     icmph->type == ICMP_TIMESTAMP) && -		    net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { +		    READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {  			reason = SKB_DROP_REASON_INVALID_PROTO;  			goto error;  		} diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 0ec501845cb3..47ccc343c9fb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,  {  	struct inet_timewait_sock *tw; -	if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) +	if (refcount_read(&dr->tw_refcount) - 1 >= +	    READ_ONCE(dr->sysctl_max_tw_buckets))  		return NULL;  	tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e459a391e607..853a75a8fbaf 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)  		/* __ip6_del_rt does a release, so do a hold here */  		fib6_info_hold(f6i);  		ipv6_stub->ip6_del_rt(net, f6i, -				      !net->ipv4.sysctl_nexthop_compat_mode); +				      !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));  	}  } @@ -2361,7 +2361,8 @@ out:  	if (!rc) {  		nh_base_seq_inc(net);  		nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); -		if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) +		if (replace_notify && +		    READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))  			nexthop_replace_notify(net, new_nh, &cfg->nlinfo);  	} diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f33c31dd7366..b387c4835155 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,  	if (!ecn_ok)  		return false; -	if (net->ipv4.sysctl_tcp_ecn) +	if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))  		return true;  	return dst_feature(dst, RTAX_FEATURE_ECN); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index cd448cdd3b38..108fd86f2718 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -599,6 +599,8 @@ static struct ctl_table ipv4_net_table[] = {  		.maxlen		= sizeof(u8),  		.mode		= 0644,  		.proc_handler	= proc_dou8vec_minmax, +		.extra1		= SYSCTL_ZERO, +		.extra2		= SYSCTL_ONE  	},  	{  		.procname	= "icmp_echo_enable_probe", @@ -615,6 +617,8 @@ static struct ctl_table ipv4_net_table[] = {  		.maxlen		= sizeof(u8),  		.mode		= 0644,  		.proc_handler	= proc_dou8vec_minmax, +		.extra1		= SYSCTL_ZERO, +		.extra2		= SYSCTL_ONE  	},  	{  		.procname	= "icmp_ignore_bogus_error_responses", @@ -622,6 +626,8 @@ static struct ctl_table ipv4_net_table[] = {  		.maxlen		= sizeof(u8),  		.mode		= 0644,  		.proc_handler	= proc_dou8vec_minmax, +		.extra1		= SYSCTL_ZERO, +		.extra2		= SYSCTL_ONE  	},  	{  		.procname	= "icmp_errors_use_inbound_ifaddr", @@ -629,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = {  		.maxlen		= sizeof(u8),  		.mode		= 0644,  		.proc_handler	= proc_dou8vec_minmax, +		.extra1		= SYSCTL_ZERO, +		.extra2		= SYSCTL_ONE  	},  	{  		.procname	= "icmp_ratelimit", @@ -668,6 +676,8 @@ static struct ctl_table ipv4_net_table[] = {  		.maxlen		= sizeof(u8),  		.mode		= 0644,  		.proc_handler	= proc_dou8vec_minmax, +		.extra1		= SYSCTL_ZERO, +		.extra2		= SYSCTL_TWO,  	},  	{  		.procname	= "tcp_ecn_fallback", @@ -675,6 +685,8 @@ static struct ctl_table ipv4_net_table[] = {  		.maxlen		= sizeof(u8),  		.mode		= 0644,  		.proc_handler	= proc_dou8vec_minmax, +		.extra1		= SYSCTL_ZERO, +		.extra2		= SYSCTL_ONE,  	},  	{  		.procname	= "ip_dynaddr", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2e2a9ece9af2..3ec4edc37313 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6729,7 +6729,7 @@ static void tcp_ecn_create_request(struct request_sock *req,  	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);  	ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); -	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; +	ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;  	if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||  	    (ecn_ok_dst & DST_FEATURE_ECN_CA) || diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c054431e358..11aa0ab10bba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk);  	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); -	bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || +	bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||  		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;  	if (!use_ecn) { @@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)  static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)  { -	if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) +	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))  		/* tp->ecn_flags are cleared at a later point in time when  		 * SYN ACK is ultimatively being received.  		 */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 61770220774e..9d92d51c4757 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)  		break;  	case ICMPV6_EXT_ECHO_REQUEST:  		if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && -		    net->ipv4.sysctl_icmp_echo_enable_probe) +		    READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))  			icmpv6_echo_reply(skb);  		break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 828355710c57..916417944ec8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,  		if (nexthop_is_blackhole(rt->nh))  			rtm->rtm_type = RTN_BLACKHOLE; -		if (net->ipv4.sysctl_nexthop_compat_mode && +		if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&  		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)  			goto nla_put_failure; | 
