From 193f3685d0546b0cea20c99894aadb70098e47bf Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 11:19:41 +0100 Subject: ipv6: route: enforce RCU protection in rt6_update_exception_stamp_rt() We must access rt6_info->from under RCU read lock: move the dereference under such lock, with proper annotation. v1 -> v2: - avoid using multiple, racy, fetch operations for rt->from Fixes: a68886a69180 ("net/ipv6: Make from in rt6_info rcu protected") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/ipv6/route.c') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 964491cf3672..2b1ed8c6fcab 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1599,15 +1599,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; - - if (!from || - !(rt->rt6i_flags & RTF_CACHE)) - return; + struct fib6_info *from; rcu_read_lock(); + from = rcu_dereference(rt->from); + if (!from || !(rt->rt6i_flags & RTF_CACHE)) + goto unlock; + bucket = rcu_dereference(from->rt6i_exception_bucket); #ifdef CONFIG_IPV6_SUBTREES @@ -1626,6 +1626,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) if (rt6_ex) rt6_ex->stamp = jiffies; +unlock: rcu_read_unlock(); } -- cgit From bf1dc8bad1d42287164d216d8efb51c5cd381b18 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Feb 2019 11:19:42 +0100 Subject: ipv6: route: enforce RCU protection in ip6_route_check_nh_onlink() We need a RCU critical section around rt6_info->from deference, and proper annotation. Fixes: 4ed591c8ab44 ("net/ipv6: Allow onlink routes to have a device mismatch if it is the default route") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv6/route.c') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2b1ed8c6fcab..74b9b6fd4168 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2743,20 +2743,24 @@ static int ip6_route_check_nh_onlink(struct net *net, u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; const struct in6_addr *gw_addr = &cfg->fc_gateway; u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; + struct fib6_info *from; struct rt6_info *grt; int err; err = 0; grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); if (grt) { + rcu_read_lock(); + from = rcu_dereference(grt->from); if (!grt->dst.error && /* ignore match if it is the default route */ - grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && + from && !ipv6_addr_any(&from->fib6_dst.addr) && (grt->rt6i_flags & flags || dev != grt->dst.dev)) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway or device mismatch"); err = -EINVAL; } + rcu_read_unlock(); ip6_rt_put(grt); } -- cgit From f5b51fe804ec2a6edce0f8f6b11ea57283f5857b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Feb 2019 18:18:12 +0100 Subject: ipv6: route: purge exception on removal When a netdevice is unregistered, we flush the relevant exception via rt6_sync_down_dev() -> fib6_ifdown() -> fib6_del() -> fib6_del_route(). Finally, we end-up calling rt6_remove_exception(), where we release the relevant dst, while we keep the references to the related fib6_info and dev. Such references should be released later when the dst will be destroyed. There are a number of caches that can keep the exception around for an unlimited amount of time - namely dst_cache, possibly even socket cache. As a result device registration may hang, as demonstrated by this script: ip netns add cl ip netns add rt ip netns add srv ip netns exec rt sysctl -w net.ipv6.conf.all.forwarding=1 ip link add name cl_veth type veth peer name cl_rt_veth ip link set dev cl_veth netns cl ip -n cl link set dev cl_veth up ip -n cl addr add dev cl_veth 2001::2/64 ip -n cl route add default via 2001::1 ip -n cl link add tunv6 type ip6tnl mode ip6ip6 local 2001::2 remote 2002::1 hoplimit 64 dev cl_veth ip -n cl link set tunv6 up ip -n cl addr add 2013::2/64 dev tunv6 ip link set dev cl_rt_veth netns rt ip -n rt link set dev cl_rt_veth up ip -n rt addr add dev cl_rt_veth 2001::1/64 ip link add name rt_srv_veth type veth peer name srv_veth ip link set dev srv_veth netns srv ip -n srv link set dev srv_veth up ip -n srv addr add dev srv_veth 2002::1/64 ip -n srv route add default via 2002::2 ip -n srv link add tunv6 type ip6tnl mode ip6ip6 local 2002::1 remote 2001::2 hoplimit 64 dev srv_veth ip -n srv link set tunv6 up ip -n srv addr add 2013::1/64 dev tunv6 ip link set dev rt_srv_veth netns rt ip -n rt link set dev rt_srv_veth up ip -n rt addr add dev rt_srv_veth 2002::2/64 ip netns exec srv netserver & sleep 0.1 ip netns exec cl ping6 -c 4 2013::1 ip netns exec cl netperf -H 2013::1 -t TCP_STREAM -l 3 & sleep 1 ip -n rt link set dev rt_srv_veth mtu 1400 wait %2 ip -n cl link del cl_veth This commit addresses the issue purging all the references held by the exception at time, as we currently do for e.g. ipv6 pcpu dst entries. v1 -> v2: - re-order the code to avoid accessing dst and net after dst_dev_put() Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Signed-off-by: Paolo Abeni Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net/ipv6/route.c') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 74b9b6fd4168..047c47224dba 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1274,18 +1274,29 @@ static DEFINE_SPINLOCK(rt6_exception_lock); static void rt6_remove_exception(struct rt6_exception_bucket *bucket, struct rt6_exception *rt6_ex) { + struct fib6_info *from; struct net *net; if (!bucket || !rt6_ex) return; net = dev_net(rt6_ex->rt6i->dst.dev); + net->ipv6.rt6_stats->fib_rt_cache--; + + /* purge completely the exception to allow releasing the held resources: + * some [sk] cache may keep the dst around for unlimited time + */ + from = rcu_dereference_protected(rt6_ex->rt6i->from, + lockdep_is_held(&rt6_exception_lock)); + rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + fib6_info_release(from); + dst_dev_put(&rt6_ex->rt6i->dst); + hlist_del_rcu(&rt6_ex->hlist); dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; - net->ipv6.rt6_stats->fib_rt_cache--; } /* Remove oldest rt6_ex in bucket and free the memory -- cgit From 97f0082a0592212fc15d4680f5a4d80f79a1687c Mon Sep 17 00:00:00 2001 From: Kalash Nainwal Date: Wed, 20 Feb 2019 16:23:04 -0800 Subject: net: Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255 to keep legacy software happy. This is similar to what was done for ipv4 in commit 709772e6e065 ("net: Fix routing tables with id > 255 for legacy software"). Signed-off-by: Kalash Nainwal Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/route.c') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 047c47224dba..ce15dc4ccbfa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4665,7 +4665,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, table = rt->fib6_table->tb6_id; else table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; + rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; -- cgit