summaryrefslogtreecommitdiff
path: root/net/ipv6/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r--net/ipv6/route.c955
1 files changed, 566 insertions, 389 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e74e0361fd92..aee6a10b112a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -87,11 +87,12 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ip6_mtu(const struct dst_entry *dst);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
- struct net_device *dev, int how);
-static int ip6_dst_gc(struct dst_ops *ops);
+ struct net_device *dev);
+static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -130,7 +131,6 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list {
spinlock_t lock;
struct list_head head;
- struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
@@ -139,20 +139,20 @@ void rt6_uncached_list_add(struct rt6_info *rt)
{
struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
- rt->rt6i_uncached_list = ul;
+ rt->dst.rt_uncached_list = ul;
spin_lock_bh(&ul->lock);
- list_add_tail(&rt->rt6i_uncached, &ul->head);
+ list_add_tail(&rt->dst.rt_uncached, &ul->head);
spin_unlock_bh(&ul->lock);
}
void rt6_uncached_list_del(struct rt6_info *rt)
{
- if (!list_empty(&rt->rt6i_uncached)) {
- struct uncached_list *ul = rt->rt6i_uncached_list;
+ if (!list_empty(&rt->dst.rt_uncached)) {
+ struct uncached_list *ul = rt->dst.rt_uncached_list;
spin_lock_bh(&ul->lock);
- list_del_init(&rt->rt6i_uncached);
+ list_del_init(&rt->dst.rt_uncached);
spin_unlock_bh(&ul->lock);
}
}
@@ -169,12 +169,12 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
continue;
spin_lock_bh(&ul->lock);
- list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) {
+ list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) {
struct inet6_dev *rt_idev = rt->rt6i_idev;
struct net_device *rt_dev = rt->dst.dev;
bool handled = false;
- if (rt_idev->dev == dev) {
+ if (rt_idev && rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
handled = true;
@@ -188,8 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
handled = true;
}
if (handled)
- list_move(&rt->rt6i_uncached,
- &ul->quarantine);
+ list_del_init(&rt->dst.rt_uncached);
}
spin_unlock_bh(&ul->lock);
}
@@ -226,16 +225,16 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+ const struct rt6_info *rt = dst_rt6_info(dst);
return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
- dst->dev, skb, daddr);
+ dst_dev(dst), skb, daddr);
}
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
- struct net_device *dev = dst->dev;
- struct rt6_info *rt = (struct rt6_info *)dst;
+ const struct rt6_info *rt = dst_rt6_info(dst);
+ struct net_device *dev = dst_dev(dst);
daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
@@ -293,7 +292,7 @@ static const struct fib6_info fib6_null_entry_template = {
static const struct rt6_info ip6_null_entry_template = {
.dst = {
- .__refcnt = ATOMIC_INIT(1),
+ .__rcuref = RCUREF_INIT(1),
.__use = 1,
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -ENETUNREACH,
@@ -307,7 +306,7 @@ static const struct rt6_info ip6_null_entry_template = {
static const struct rt6_info ip6_prohibit_entry_template = {
.dst = {
- .__refcnt = ATOMIC_INIT(1),
+ .__rcuref = RCUREF_INIT(1),
.__use = 1,
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EACCES,
@@ -319,7 +318,7 @@ static const struct rt6_info ip6_prohibit_entry_template = {
static const struct rt6_info ip6_blk_hole_entry_template = {
.dst = {
- .__refcnt = ATOMIC_INIT(1),
+ .__rcuref = RCUREF_INIT(1),
.__use = 1,
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
@@ -334,7 +333,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
static void rt6_info_init(struct rt6_info *rt)
{
memset_after(rt, 0, dst);
- INIT_LIST_HEAD(&rt->rt6i_uncached);
}
/* allocate dst with ip6_dst_ops */
@@ -342,7 +340,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
- 1, DST_OBSOLETE_FORCE_CHK, flags);
+ DST_OBSOLETE_FORCE_CHK, flags);
if (rt) {
rt6_info_init(rt);
@@ -355,7 +353,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct fib6_info *from;
struct inet6_dev *idev;
@@ -368,15 +366,15 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- from = xchg((__force struct fib6_info **)&rt->from, NULL);
+ from = unrcu_pointer(xchg(&rt->from, NULL));
fib6_info_release(from);
}
-static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
- int how)
+static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
+ struct fib6_info *from;
if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
@@ -386,14 +384,15 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
in6_dev_put(idev);
}
}
+ from = unrcu_pointer(xchg(&rt->from, NULL));
+ fib6_info_release(from);
}
static bool __rt6_check_expired(const struct rt6_info *rt)
{
if (rt->rt6i_flags & RTF_EXPIRES)
- return time_after(jiffies, rt->dst.expires);
- else
- return false;
+ return time_after(jiffies, READ_ONCE(rt->dst.expires));
+ return false;
}
static bool rt6_check_expired(const struct rt6_info *rt)
@@ -403,21 +402,46 @@ static bool rt6_check_expired(const struct rt6_info *rt)
from = rcu_dereference(rt->from);
if (rt->rt6i_flags & RTF_EXPIRES) {
- if (time_after(jiffies, rt->dst.expires))
+ if (time_after(jiffies, READ_ONCE(rt->dst.expires)))
return true;
} else if (from) {
- return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+ return READ_ONCE(rt->dst.obsolete) != DST_OBSOLETE_FORCE_CHK ||
fib6_check_expired(from);
}
return false;
}
+static struct fib6_info *
+rt6_multipath_first_sibling_rcu(const struct fib6_info *rt)
+{
+ struct fib6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+ iter = rcu_dereference(fn->leaf);
+ if (!iter)
+ goto out;
+
+ while (iter) {
+ if (iter->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference(iter->fib6_next);
+ }
+
+out:
+ return NULL;
+}
+
void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *sibling, *next_sibling;
- struct fib6_info *match = res->f6i;
+ struct fib6_info *first, *match = res->f6i;
+ struct fib6_info *sibling;
+ int hash;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -425,6 +449,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (match->nh && have_oif_match && res->nh)
return;
+ if (skb)
+ IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
@@ -437,16 +464,25 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
return;
}
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
+ first = rt6_multipath_first_sibling_rcu(match);
+ if (!first)
+ goto out;
+
+ hash = fl6->mp_hash;
+ if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound)) {
+ if (rt6_score_route(first->fib6_nh, first->fib6_flags, oif,
+ strict) >= 0)
+ match = first;
goto out;
+ }
- list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
- fib6_siblings) {
+ list_for_each_entry_rcu(sibling, &first->fib6_siblings,
+ fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
- if (fl6->mp_hash > nh_upper_bound)
+ if (hash > nh_upper_bound)
continue;
if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break;
@@ -633,25 +669,28 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
nh_gw = &fib6_nh->fib_nh_gw6;
dev = fib6_nh->fib_nh_dev;
- rcu_read_lock_bh();
+ rcu_read_lock();
last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
+ if (!idev)
+ goto out;
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
- if (neigh->nud_state & NUD_VALID)
+ if (READ_ONCE(neigh->nud_state) & NUD_VALID)
goto out;
- write_lock(&neigh->lock);
+ write_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
- neigh->updated + idev->cnf.rtr_probe_interval)) {
+ neigh->updated +
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work)
__neigh_set_probe_once(neigh);
}
- write_unlock(&neigh->lock);
+ write_unlock_bh(&neigh->lock);
} else if (time_after(jiffies, last_probe +
- idev->cnf.rtr_probe_interval)) {
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
@@ -667,7 +706,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
}
out:
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
#else
static inline void rt6_probe(struct fib6_nh *fib6_nh)
@@ -683,25 +722,25 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;
- rcu_read_lock_bh();
+ rcu_read_lock();
neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
&fib6_nh->fib_nh_gw6);
if (neigh) {
- read_lock(&neigh->lock);
- if (neigh->nud_state & NUD_VALID)
+ u8 nud_state = READ_ONCE(neigh->nud_state);
+
+ if (nud_state & NUD_VALID)
ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
- else if (!(neigh->nud_state & NUD_FAILED))
+ else if (!(nud_state & NUD_FAILED))
ret = RT6_NUD_SUCCEED;
else
ret = RT6_NUD_FAIL_PROBE;
#endif
- read_unlock(&neigh->lock);
} else {
ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}
@@ -930,6 +969,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
+ struct fib6_table *table;
unsigned int pref;
unsigned long lifetime;
struct fib6_info *rt;
@@ -988,10 +1028,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
- if (!addrconf_finite_timeout(lifetime))
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- else
+ fib6_remove_gc_list(rt);
+ } else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
+ fib6_add_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(rt);
}
@@ -1096,6 +1144,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
rt->dst.input = ip6_input;
} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
rt->dst.input = ip6_mc_input;
+ rt->dst.output = ip6_mr_output;
} else {
rt->dst.input = ip6_forward;
}
@@ -1277,7 +1326,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
- return (struct rt6_info *) dst;
+ return dst_rt6_info(dst);
dst_release(dst);
@@ -1397,6 +1446,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
struct rt6_info *prev, **p;
p = this_cpu_ptr(res->nh->rt6i_pcpu);
+ /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
prev = xchg(p, NULL);
if (prev) {
dst_dev_put(&prev->dst);
@@ -1425,7 +1475,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
if (res->f6i->fib6_destroying) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
@@ -1442,7 +1492,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
- struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
@@ -1454,8 +1503,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
- fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
hlist_del_rcu(&rt6_ex->hlist);
@@ -1586,7 +1633,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
rcu_read_lock();
idev = __in6_dev_get(dev);
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
}
@@ -1724,6 +1771,7 @@ out:
if (!err) {
spin_lock_bh(&f6i->fib6_table->tb6_lock);
fib6_update_sernum(net, f6i);
+ fib6_add_gc_list(f6i);
spin_unlock_bh(&f6i->fib6_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -1772,11 +1820,13 @@ static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)
void rt6_flush_exceptions(struct fib6_info *f6i)
{
- if (f6i->nh)
- nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
- f6i);
- else
+ if (f6i->nh) {
+ rcu_read_lock();
+ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, f6i);
+ rcu_read_unlock();
+ } else {
fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+ }
}
/* Find cached rt in the hash table inside passed in rt
@@ -2083,13 +2133,14 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
* expired, independently from their aging, as per RFC 8201 section 4
*/
if (!(rt->rt6i_flags & RTF_EXPIRES)) {
- if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ if (time_after_eq(now, READ_ONCE(rt->dst.lastuse) +
+ gc_args->timeout)) {
+ pr_debug("aging clone %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
- } else if (time_after(jiffies, rt->dst.expires)) {
- RT6_TRACE("purging expired route %p\n", rt);
+ } else if (time_after(jiffies, READ_ONCE(rt->dst.expires))) {
+ pr_debug("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2100,8 +2151,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (!(neigh && (neigh->flags & NTF_ROUTER))) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
+ pr_debug("purging route %p via non-router but gateway\n",
+ rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2210,7 +2261,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
rcu_read_lock();
@@ -2361,7 +2412,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net,
hash_keys.ports.dst = keys.ports.dst;
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_inner(const struct net *net,
@@ -2410,7 +2461,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_skb(const struct net *net,
@@ -2444,12 +2495,16 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
hash_keys.basic.ip_proto = fl6->flowi6_proto;
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
- if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
- hash_keys.ports.src = fl6->fl6_sport;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) {
+ if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT)
+ hash_keys.ports.src = (__force __be16)get_random_u16();
+ else
+ hash_keys.ports.src = fl6->fl6_sport;
+ }
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl6->fl6_dport;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
/* if skb is set it will be used and fl6 can be NULL */
@@ -2471,7 +2526,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 1:
if (skb) {
@@ -2499,11 +2554,14 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
hash_keys.addrs.v6addrs.src = fl6->saddr;
hash_keys.addrs.v6addrs.dst = fl6->daddr;
- hash_keys.ports.src = fl6->fl6_sport;
+ if (fl6->flowi6_flags & FLOWI_FLAG_ANY_SPORT)
+ hash_keys.ports.src = (__force __be16)get_random_u16();
+ else
+ hash_keys.ports.src = fl6->fl6_sport;
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2540,7 +2598,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 3:
if (skb)
@@ -2593,9 +2651,10 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
-struct dst_entry *ip6_route_output_flags_noref(struct net *net,
- const struct sock *sk,
- struct flowi6 *fl6, int flags)
+static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ int flags)
{
bool any_src;
@@ -2620,11 +2679,10 @@ struct dst_entry *ip6_route_output_flags_noref(struct net *net,
if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
- flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+ flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
struct dst_entry *ip6_route_output_flags(struct net *net,
const struct sock *sk,
@@ -2636,9 +2694,9 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
rcu_read_lock();
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
/* For dst cached in uncached_list, refcnt is already taken. */
- if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+ if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
dst = &net->ipv6.ip6_null_entry->dst;
dst_hold(dst);
}
@@ -2650,11 +2708,11 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
- rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+ rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
@@ -2719,11 +2777,10 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
u32 cookie)
{
if (!__rt6_check_expired(rt) &&
- rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+ READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK &&
fib6_check(from, cookie))
return &rt->dst;
- else
- return NULL;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
@@ -2733,7 +2790,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
struct fib6_info *from;
struct rt6_info *rt;
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->sernum)
return rt6_is_valid(rt) ? dst : NULL;
@@ -2748,7 +2805,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
from = rcu_dereference(rt->from);
if (from && (rt->rt6i_flags & RTF_PCPU ||
- unlikely(!list_empty(&rt->rt6i_uncached))))
+ unlikely(!list_empty(&rt->dst.rt_uncached))))
dst_ret = rt6_dst_from_check(rt, from, cookie);
else
dst_ret = rt6_check(rt, from, cookie);
@@ -2759,24 +2816,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
}
EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
- if (rt) {
- if (rt->rt6i_flags & RTF_CACHE) {
- rcu_read_lock();
- if (rt6_check_expired(rt)) {
- rt6_remove_exception_rt(rt);
- dst = NULL;
- }
- rcu_read_unlock();
- } else {
- dst_release(dst);
- dst = NULL;
+ if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
+ if (rt6_check_expired(rt)) {
+ /* rt/dst can not be destroyed yet,
+ * because of rcu_read_lock()
+ */
+ sk_dst_reset(sk);
+ rt6_remove_exception_rt(rt);
}
+ rcu_read_unlock();
+ return;
}
- return dst;
+ sk_dst_reset(sk);
}
static void ip6_link_failure(struct sk_buff *skb)
@@ -2785,7 +2842,7 @@ static void ip6_link_failure(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2813,7 +2870,7 @@ static void rt6_update_expires(struct rt6_info *rt0, int timeout)
rcu_read_lock();
from = rcu_dereference(rt0->from);
if (from)
- rt0->dst.expires = from->expires;
+ WRITE_ONCE(rt0->dst.expires, from->expires);
rcu_read_unlock();
}
@@ -2841,7 +2898,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
* IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
@@ -2886,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst->dev,
+ .dev = dst_dev_rcu(dst),
.gw = &rt6->rt6i_gateway,
};
@@ -2952,10 +3009,11 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
if (!oif && skb->dev)
oif = l3mdev_master_ifindex(skb->dev);
- ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
+ sk_uid(sk));
dst = __sk_dst_get(sk);
- if (!dst || !dst->obsolete ||
+ if (!dst || !READ_ONCE(dst->obsolete) ||
dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
return;
@@ -2974,13 +3032,12 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
#endif
ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
- &sk->sk_v6_daddr : NULL,
+ ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr),
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
- &np->saddr :
+ true :
#endif
- NULL);
+ false);
}
static bool ip6_redirect_nh_match(const struct fib6_result *res,
@@ -3173,22 +3230,26 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
- sk->sk_uid);
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+ READ_ONCE(sk->sk_mark), sk_uid(sk));
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);
static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
- struct net_device *dev = dst->dev;
unsigned int mtu = dst_mtu(dst);
- struct net *net = dev_net(dev);
+ struct net *net;
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ rcu_read_lock();
+
+ net = dst_dev_net_rcu(dst);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
+ rcu_read_unlock();
+
/*
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
@@ -3238,8 +3299,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
- if (idev && idev->cnf.mtu6 > mtu)
- mtu = idev->cnf.mtu6;
+ if (idev)
+ mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
}
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
@@ -3284,23 +3345,17 @@ out:
return dst;
}
-static int ip6_dst_gc(struct dst_ops *ops)
+static void ip6_dst_gc(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
- int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
unsigned int val;
int entries;
- entries = dst_entries_get_fast(ops);
- if (entries > rt_max_size)
- entries = dst_entries_get_slow(ops);
-
- if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
- entries <= rt_max_size)
+ if (time_after(rt_last_gc + rt_min_interval, jiffies))
goto out;
fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
@@ -3310,7 +3365,6 @@ static int ip6_dst_gc(struct dst_ops *ops)
out:
val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
- return entries > rt_max_size;
}
static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
@@ -3368,6 +3422,7 @@ static int ip6_route_check_nh_onlink(struct net *net,
static int ip6_route_check_nh(struct net *net,
struct fib6_config *cfg,
struct net_device **_dev,
+ netdevice_tracker *dev_tracker,
struct inet6_dev **idev)
{
const struct in6_addr *gw_addr = &cfg->fc_gateway;
@@ -3412,7 +3467,7 @@ static int ip6_route_check_nh(struct net *net,
err = -EHOSTUNREACH;
} else {
*_dev = dev = res.nh->fib_nh_dev;
- dev_hold(dev);
+ netdev_hold(dev, dev_tracker, GFP_ATOMIC);
*idev = in6_dev_get(dev);
}
@@ -3420,7 +3475,9 @@ static int ip6_route_check_nh(struct net *net,
}
static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
- struct net_device **_dev, struct inet6_dev **idev,
+ struct net_device **_dev,
+ netdevice_tracker *dev_tracker,
+ struct inet6_dev **idev,
struct netlink_ext_ack *extack)
{
const struct in6_addr *gw_addr = &cfg->fc_gateway;
@@ -3461,7 +3518,8 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
if (cfg->fc_flags & RTNH_F_ONLINK)
err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
else
- err = ip6_route_check_nh(net, cfg, _dev, idev);
+ err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
+ idev);
rcu_read_unlock();
@@ -3511,6 +3569,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
+ netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
int addr_type;
@@ -3528,7 +3587,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
err = -ENODEV;
if (cfg->fc_ifindex) {
- dev = dev_get_by_index(net, cfg->fc_ifindex);
+ dev = netdev_get_by_index(net, cfg->fc_ifindex,
+ dev_tracker, gfp_flags);
if (!dev)
goto out;
idev = in6_dev_get(dev);
@@ -3562,11 +3622,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
- dev_put(dev);
+ netdev_put(dev, dev_tracker);
in6_dev_put(idev);
}
dev = net->loopback_dev;
- dev_hold(dev);
+ netdev_hold(dev, dev_tracker, gfp_flags);
idev = in6_dev_get(dev);
if (!idev) {
err = -ENODEV;
@@ -3577,7 +3637,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
}
if (cfg->fc_flags & RTF_GATEWAY) {
- err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
+ &idev, extack);
if (err)
goto out;
@@ -3589,7 +3650,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
if (!dev)
goto out;
- if (idev->cnf.disable_ipv6) {
+ if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
err = -EACCES;
goto out;
@@ -3618,8 +3679,6 @@ pcpu_alloc:
}
fib6_nh->fib_nh_dev = dev;
- netdev_tracker_alloc(dev, &fib6_nh->fib_nh_dev_tracker, gfp_flags);
-
fib6_nh->fib_nh_oif = dev->ifindex;
err = 0;
out:
@@ -3627,9 +3686,10 @@ out:
in6_dev_put(idev);
if (err) {
- lwtstate_put(fib6_nh->fib_nh_lws);
+ fib_nh_common_release(&fib6_nh->nh_common);
+ fib6_nh->nh_common.nhc_pcpu_rth_output = NULL;
fib6_nh->fib_nh_lws = NULL;
- dev_put(dev);
+ netdev_put(dev, dev_tracker);
}
return err;
@@ -3675,62 +3735,62 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
}
}
-static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
- gfp_t gfp_flags,
- struct netlink_ext_ack *extack)
+static int fib6_config_validate(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack)
{
- struct net *net = cfg->fc_nlinfo.nl_net;
- struct fib6_info *rt = NULL;
- struct nexthop *nh = NULL;
- struct fib6_table *table;
- struct fib6_nh *fib6_nh;
- int err = -EINVAL;
- int addr_type;
-
/* RTF_PCPU is an internal flag; can not be set by userspace */
if (cfg->fc_flags & RTF_PCPU) {
NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
- goto out;
+ goto errout;
}
/* RTF_CACHE is an internal flag; can not be set by userspace */
if (cfg->fc_flags & RTF_CACHE) {
NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
- goto out;
+ goto errout;
}
if (cfg->fc_type > RTN_MAX) {
NL_SET_ERR_MSG(extack, "Invalid route type");
- goto out;
+ goto errout;
}
if (cfg->fc_dst_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid prefix length");
- goto out;
+ goto errout;
}
+
+#ifdef CONFIG_IPV6_SUBTREES
if (cfg->fc_src_len > 128) {
NL_SET_ERR_MSG(extack, "Invalid source address length");
- goto out;
+ goto errout;
}
-#ifndef CONFIG_IPV6_SUBTREES
+
+ if (cfg->fc_nh_id && cfg->fc_src_len) {
+ NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ goto errout;
+ }
+#else
if (cfg->fc_src_len) {
NL_SET_ERR_MSG(extack,
"Specifying source address requires IPV6_SUBTREES to be enabled");
- goto out;
+ goto errout;
}
#endif
- if (cfg->fc_nh_id) {
- nh = nexthop_find_by_id(net, cfg->fc_nh_id);
- if (!nh) {
- NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
- goto out;
- }
- err = fib6_check_nexthop(nh, cfg, extack);
- if (err)
- goto out;
- }
+ return 0;
+errout:
+ return -EINVAL;
+}
+
+static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
+ gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ struct fib6_table *table;
+ struct fib6_info *rt;
+ int err;
- err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
table = fib6_get_table(net, cfg->fc_table);
@@ -3741,22 +3801,22 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
} else {
table = fib6_new_table(net, cfg->fc_table);
}
+ if (!table) {
+ err = -ENOBUFS;
+ goto err;
+ }
- if (!table)
- goto out;
-
- err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags, !nh);
- if (!rt)
- goto out;
+ rt = fib6_info_alloc(gfp_flags, !cfg->fc_nh_id);
+ if (!rt) {
+ err = -ENOMEM;
+ goto err;
+ }
- rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
+ rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len,
extack);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
- /* Do not leave garbage there. */
- rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
- goto out_free;
+ goto free;
}
if (cfg->fc_flags & RTF_ADDRCONF)
@@ -3764,14 +3824,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
- clock_t_to_jiffies(cfg->fc_expires));
- else
- fib6_clean_expires(rt);
+ clock_t_to_jiffies(cfg->fc_expires));
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
- rt->fib6_protocol = cfg->fc_protocol;
+ rt->fib6_protocol = cfg->fc_protocol;
rt->fib6_table = table;
rt->fib6_metric = cfg->fc_metric;
rt->fib6_type = cfg->fc_type ? : RTN_UNICAST;
@@ -3784,21 +3842,54 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- if (nh) {
- if (rt->fib6_src.plen) {
- NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ return rt;
+free:
+ kfree(rt);
+err:
+ return ERR_PTR(err);
+}
+
+static int ip6_route_info_create_nh(struct fib6_info *rt,
+ struct fib6_config *cfg,
+ gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ struct fib6_nh *fib6_nh;
+ int err;
+
+ if (cfg->fc_nh_id) {
+ struct nexthop *nh;
+
+ rcu_read_lock();
+
+ nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+ if (!nh) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
goto out_free;
}
+
+ err = fib6_check_nexthop(nh, cfg, extack);
+ if (err)
+ goto out_free;
+
if (!nexthop_get(nh)) {
NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -ENOENT;
goto out_free;
}
+
rt->nh = nh;
fib6_nh = nexthop_fib6_nh(rt->nh);
+
+ rcu_read_unlock();
} else {
+ int addr_type;
+
err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
if (err)
- goto out;
+ goto out_release;
fib6_nh = rt->fib6_nh;
@@ -3817,21 +3908,21 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
err = -EINVAL;
- goto out;
+ goto out_release;
}
rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
rt->fib6_prefsrc.plen = 128;
- } else
- rt->fib6_prefsrc.plen = 0;
+ }
- return rt;
-out:
+ return 0;
+out_release:
fib6_info_release(rt);
- return ERR_PTR(err);
+ return err;
out_free:
+ rcu_read_unlock();
ip_fib_metrics_put(rt->fib6_metrics);
kfree(rt);
- return ERR_PTR(err);
+ return err;
}
int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
@@ -3840,10 +3931,18 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
struct fib6_info *rt;
int err;
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
rt = ip6_route_info_create(cfg, gfp_flags, extack);
if (IS_ERR(rt))
return PTR_ERR(rt);
+ err = ip6_route_info_create_nh(rt, cfg, gfp_flags, extack);
+ if (err)
+ return err;
+
err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
fib6_info_release(rt);
@@ -4072,9 +4171,9 @@ static int ip6_route_del(struct fib6_config *cfg,
if (rt->nh) {
if (!fib6_info_hold_safe(rt))
continue;
- rcu_read_unlock();
- return __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ break;
}
if (cfg->fc_nh_id)
continue;
@@ -4089,13 +4188,13 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (!fib6_info_hold_safe(rt))
continue;
- rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
- return __ip6_del_rt(rt, &cfg->fc_nlinfo);
-
- return __ip6_del_rt_siblings(rt, cfg);
+ err = __ip6_del_rt(rt, &cfg->fc_nlinfo);
+ else
+ err = __ip6_del_rt_siblings(rt, cfg);
+ break;
}
}
rcu_read_unlock();
@@ -4142,7 +4241,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev)
return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ if (READ_ONCE(in6_dev->cnf.forwarding) ||
+ !READ_ONCE(in6_dev->cnf.accept_redirects))
return;
/* RFC2461 8.1:
@@ -4165,7 +4265,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
@@ -4199,7 +4299,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (res.f6i->nh) {
struct fib6_nh_match_arg arg = {
- .dev = dst->dev,
+ .dev = dst_dev_rcu(dst),
.gw = &rt->rt6i_gateway,
};
@@ -4355,7 +4455,8 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref,
- u32 defrtr_usr_metric)
+ u32 defrtr_usr_metric,
+ int lifetime)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
@@ -4368,6 +4469,7 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
+ .fc_expires = jiffies_to_clock_t(lifetime * HZ),
};
cfg.fc_gateway = *gwaddr;
@@ -4434,7 +4536,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+ .fc_metric = rtmsg->rtmsg_metric,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -4461,16 +4563,18 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtmsg_to_fib6_config(net, rtmsg, &cfg);
- rtnl_lock();
switch (cmd) {
case SIOCADDRT:
+ /* Only do the default setting of fc_metric in route adding */
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
break;
case SIOCDELRT:
err = ip6_route_del(&cfg, NULL);
break;
}
- rtnl_unlock();
+
return err;
}
@@ -4481,13 +4585,14 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
{
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(dst->dev);
+ struct net_device *dev = dst_dev(dst);
+ struct net *net = dev_net(dev);
struct inet6_dev *idev;
SKB_DR(reason);
int type;
if (netif_is_l3_master(skb->dev) ||
- dst->dev == net->loopback_dev)
+ dev == net->loopback_dev)
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
else
idev = ip6_dst_idev(dst);
@@ -4524,7 +4629,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->dev = skb_dst(skb)->dev;
+ skb->dev = skb_dst_dev(skb);
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}
@@ -4535,7 +4640,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- skb->dev = skb_dst(skb)->dev;
+ skb->dev = skb_dst_dev(skb);
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}
@@ -4546,7 +4651,8 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff
struct fib6_info *addrconf_f6i_alloc(struct net *net,
struct inet6_dev *idev,
const struct in6_addr *addr,
- bool anycast, gfp_t gfp_flags)
+ bool anycast, gfp_t gfp_flags,
+ struct netlink_ext_ack *extack)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
@@ -4559,6 +4665,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
.fc_ignore_dev_down = true,
};
struct fib6_info *f6i;
+ int err;
if (anycast) {
cfg.fc_type = RTN_ANYCAST;
@@ -4568,36 +4675,39 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
cfg.fc_flags |= RTF_LOCAL;
}
- f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
- if (!IS_ERR(f6i)) {
- f6i->dst_nocount = true;
+ f6i = ip6_route_info_create(&cfg, gfp_flags, extack);
+ if (IS_ERR(f6i))
+ return f6i;
- if (!anycast &&
- (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy))
- f6i->dst_nopolicy = true;
- }
+ err = ip6_route_info_create_nh(f6i, &cfg, gfp_flags, extack);
+ if (err)
+ return ERR_PTR(err);
+
+ f6i->dst_nocount = true;
+
+ if (!anycast &&
+ (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
+ READ_ONCE(idev->cnf.disable_policy)))
+ f6i->dst_nopolicy = true;
return f6i;
}
/* remove deleted ip from prefsrc entries */
struct arg_dev_net_ip {
- struct net_device *dev;
struct net *net;
struct in6_addr *addr;
};
static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
{
- struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
if (!rt->nh &&
- ((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
- ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
+ ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr) &&
+ !ipv6_chk_addr(net, addr, rt->fib6_nh->fib_nh_dev, 0)) {
spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->fib6_prefsrc.plen = 0;
@@ -4610,7 +4720,6 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);
struct arg_dev_net_ip adni = {
- .dev = ifp->idev->dev,
.net = net,
.addr = &ifp->addr,
};
@@ -4991,14 +5100,63 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
[RTA_NH_ID] = { .type = NLA_U32 },
+ [RTA_FLOWLABEL] = { .type = NLA_BE32 },
};
+static int rtm_to_fib6_multipath_config(struct fib6_config *cfg,
+ struct netlink_ext_ack *extack,
+ bool newroute)
+{
+ struct rtnexthop *rtnh;
+ int remaining;
+
+ remaining = cfg->fc_mp_len;
+ rtnh = (struct rtnexthop *)cfg->fc_mp;
+
+ if (!rtnh_ok(rtnh, remaining)) {
+ NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - no valid nexthops");
+ return -EINVAL;
+ }
+
+ do {
+ bool has_gateway = cfg->fc_flags & RTF_GATEWAY;
+ int attrlen = rtnh_attrlen(rtnh);
+
+ if (attrlen > 0) {
+ struct nlattr *nla, *attrs;
+
+ attrs = rtnh_attrs(rtnh);
+ nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+ if (nla) {
+ if (nla_len(nla) < sizeof(cfg->fc_gateway)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid IPv6 address in RTA_GATEWAY");
+ return -EINVAL;
+ }
+
+ has_gateway = true;
+ }
+ }
+
+ if (newroute && (cfg->fc_nh_id || !has_gateway)) {
+ NL_SET_ERR_MSG(extack,
+ "Device only routes can not be added for IPv6 using the multipath API.");
+ return -EINVAL;
+ }
+
+ rtnh = rtnh_next(rtnh, &remaining);
+ } while (rtnh_ok(rtnh, remaining));
+
+ return lwtunnel_valid_encap_type_attr(cfg->fc_mp, cfg->fc_mp_len, extack);
+}
+
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rtmsg *rtm;
+ bool newroute = nlh->nlmsg_type == RTM_NEWROUTE;
struct nlattr *tb[RTA_MAX+1];
+ struct rtmsg *rtm;
unsigned int pref;
int err;
@@ -5016,6 +5174,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (tb[RTA_FLOWLABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Flow label cannot be specified for this operation");
+ goto errout;
+ }
+
*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
@@ -5101,8 +5265,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
- err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
- cfg->fc_mp_len, extack);
+ err = rtm_to_fib6_multipath_config(cfg, extack, newroute);
if (err < 0)
goto errout;
}
@@ -5143,29 +5306,28 @@ errout:
struct rt6_nh {
struct fib6_info *fib6_info;
struct fib6_config r_cfg;
- struct list_head next;
+ struct list_head list;
};
-static int ip6_route_info_append(struct net *net,
- struct list_head *rt6_nh_list,
+static int ip6_route_info_append(struct list_head *rt6_nh_list,
struct fib6_info *rt,
struct fib6_config *r_cfg)
{
struct rt6_nh *nh;
- int err = -EEXIST;
- list_for_each_entry(nh, rt6_nh_list, next) {
+ list_for_each_entry(nh, rt6_nh_list, list) {
/* check if fib6_info already exists */
if (rt6_duplicate_nexthop(nh->fib6_info, rt))
- return err;
+ return -EEXIST;
}
nh = kzalloc(sizeof(*nh), GFP_KERNEL);
if (!nh)
return -ENOMEM;
+
nh->fib6_info = rt;
memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
- list_add_tail(&nh->next, rt6_nh_list);
+ list_add_tail(&nh->list, rt6_nh_list);
return 0;
}
@@ -5181,14 +5343,19 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
* nexthop. Since sibling routes are always added at the end of
* the list, find the first sibling of the last route appended
*/
- if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
- rt = list_first_entry(&rt_last->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
+ rcu_read_lock();
+
+ if ((nlflags & NLM_F_APPEND) && rt_last &&
+ READ_ONCE(rt_last->fib6_nsiblings)) {
+ rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
if (rt)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
+
+ rcu_read_unlock();
}
static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
@@ -5217,37 +5384,30 @@ out:
return should_notify;
}
-static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
- struct netlink_ext_ack *extack)
-{
- if (nla_len(nla) < sizeof(*gw)) {
- NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
- return -EINVAL;
- }
-
- *gw = nla_get_in6_addr(nla);
-
- return 0;
-}
-
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
+ struct rt6_nh *nh, *nh_safe;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
- struct fib6_info *rt;
+ LIST_HEAD(rt6_nh_list);
struct rt6_nh *err_nh;
- struct rt6_nh *nh, *nh_safe;
+ struct fib6_info *rt;
__u16 nlflags;
int remaining;
int attrlen;
- int err = 1;
+ int replace;
int nhn = 0;
- int replace = (cfg->fc_nlinfo.nlh &&
- (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
- LIST_HEAD(rt6_nh_list);
+ int err;
+
+ err = fib6_config_validate(cfg, extack);
+ if (err)
+ return err;
+
+ replace = (cfg->fc_nlinfo.nlh &&
+ (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
@@ -5270,18 +5430,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
- err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
- extack);
- if (err)
- goto cleanup;
-
+ r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
- r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
- /* RTA_ENCAP_TYPE length checked in
- * lwtunnel_valid_encap_type_attr
- */
+ r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla)
r_cfg.fc_encap_type = nla_get_u16(nla);
@@ -5294,18 +5447,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rt = NULL;
goto cleanup;
}
- if (!rt6_qualify_for_ecmp(rt)) {
- err = -EINVAL;
- NL_SET_ERR_MSG(extack,
- "Device only routes can not be added for IPv6 using the multipath API.");
- fib6_info_release(rt);
+
+ err = ip6_route_info_create_nh(rt, &r_cfg, GFP_KERNEL, extack);
+ if (err) {
+ rt = NULL;
goto cleanup;
}
rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
- err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
- rt, &r_cfg);
+ err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
fib6_info_release(rt);
goto cleanup;
@@ -5314,12 +5465,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rtnh = rtnh_next(rtnh, &remaining);
}
- if (list_empty(&rt6_nh_list)) {
- NL_SET_ERR_MSG(extack,
- "Invalid nexthop configuration - no valid nexthops");
- return -EINVAL;
- }
-
/* for add and replace send one notification with all nexthops.
* Skip the notification in fib6_add_rt2node and send one with
* the full route when done
@@ -5332,21 +5477,9 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
info->skip_notify_kernel = 1;
err_nh = NULL;
- list_for_each_entry(nh, &rt6_nh_list, next) {
+ list_for_each_entry(nh, &rt6_nh_list, list) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
- fib6_info_release(nh->fib6_info);
- if (!err) {
- /* save reference to last route successfully inserted */
- rt_last = nh->fib6_info;
-
- /* save reference to first route for notification */
- if (!rt_notif)
- rt_notif = nh->fib6_info;
- }
-
- /* nh->fib6_info is used or freed at this point, reset to NULL*/
- nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
NL_SET_ERR_MSG_MOD(extack,
@@ -5354,6 +5487,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = nh;
goto add_errout;
}
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
@@ -5406,17 +5545,16 @@ add_errout:
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
/* Delete routes that were already added */
- list_for_each_entry(nh, &rt6_nh_list, next) {
+ list_for_each_entry(nh, &rt6_nh_list, list) {
if (err_nh == nh)
break;
ip6_route_del(&nh->r_cfg, extack);
}
cleanup:
- list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->fib6_info)
- fib6_info_release(nh->fib6_info);
- list_del(&nh->next);
+ list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) {
+ fib6_info_release(nh->fib6_info);
+ list_del(&nh->list);
kfree(nh);
}
@@ -5448,21 +5586,15 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
- err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
- extack);
- if (err) {
- last_err = err;
- goto next_rtnh;
- }
-
+ r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
}
+
err = ip6_route_del(&r_cfg, extack);
if (err)
last_err = err;
-next_rtnh:
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -5479,15 +5611,20 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
- if (cfg.fc_nh_id &&
- !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id)) {
- NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
- return -EINVAL;
+ if (cfg.fc_nh_id) {
+ rcu_read_lock();
+ err = !nexthop_find_by_id(sock_net(skb->sk), cfg.fc_nh_id);
+ rcu_read_unlock();
+
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+ return -EINVAL;
+ }
}
- if (cfg.fc_mp)
+ if (cfg.fc_mp) {
return ip6_route_multipath_del(&cfg, extack);
- else {
+ } else {
cfg.fc_delete_all_nh = 1;
return ip6_route_del(&cfg, extack);
}
@@ -5533,27 +5670,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
static size_t rt6_nlmsg_size(struct fib6_info *f6i)
{
+ struct fib6_info *sibling;
+ struct fib6_nh *nh;
int nexthop_len;
if (f6i->nh) {
nexthop_len = nla_total_size(4); /* RTA_NH_ID */
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
- } else {
- struct fib6_nh *nh = f6i->fib6_nh;
-
- nexthop_len = 0;
- if (f6i->fib6_nsiblings) {
- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
- + NLA_ALIGN(sizeof(struct rtnexthop))
- + nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(nh->fib_nh_lws);
+ goto common;
+ }
- nexthop_len *= f6i->fib6_nsiblings;
+ rcu_read_lock();
+retry:
+ nh = f6i->fib6_nh;
+ nexthop_len = 0;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ rt6_nh_nlmsg_size(nh, &nexthop_len);
+
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ goto retry;
}
- nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
-
+ rcu_read_unlock();
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+common:
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
+ nla_total_size(16) /* RTA_DST */
@@ -5605,7 +5749,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
unsigned char nh_flags = 0;
@@ -5679,7 +5823,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
} else if (dest) {
struct in6_addr saddr_buf;
- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 &&
nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
@@ -5702,18 +5846,20 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
* each as a nexthop within RTA_MULTIPATH.
*/
if (rt6) {
+ struct net_device *dev;
+
if (rt6_flags & RTF_GATEWAY &&
nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
goto nla_put_failure;
- if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
+ dev = dst_dev(dst);
+ if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
goto nla_put_failure;
- if (dst->lwtstate &&
- lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
+ if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
- } else if (rt->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ } else if (READ_ONCE(rt->fib6_nsiblings)) {
+ struct fib6_info *sibling;
struct nlattr *mp;
mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
@@ -5725,14 +5871,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
0) < 0)
goto nla_put_failure;
- list_for_each_entry_safe(sibling, next_sibling,
- &rt->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &rt->fib6_siblings,
+ fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6, 0) < 0)
+ AF_INET6, 0) < 0) {
+ rcu_read_unlock();
+
goto nla_put_failure;
+ }
}
+ rcu_read_unlock();
+
nla_nest_end(skb, mp);
} else if (rt->nh) {
if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
@@ -5755,7 +5908,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
}
if (rt6_flags & RTF_EXPIRES) {
- expires = dst ? dst->expires : rt->expires;
+ expires = dst ? READ_ONCE(dst->expires) : rt->expires;
expires -= jiffies;
}
@@ -5807,16 +5960,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
- if (f6i->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ const struct fib6_info *sibling;
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh->fib_nh_dev == dev)
+ rcu_read_lock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ if (sibling->fib6_nh->fib_nh_dev == dev) {
+ rcu_read_unlock();
return true;
+ }
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ break;
}
+ rcu_read_unlock();
}
-
return false;
}
@@ -5955,7 +6113,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG_MOD(extack,
"Invalid header for get route request");
return -EINVAL;
@@ -5965,7 +6124,6 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv6_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
(rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
@@ -5990,6 +6148,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (tb[RTA_FLOWLABEL] &&
+ (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Invalid flow label");
+ return -EINVAL;
+ }
+
for (i = 0; i <= RTA_MAX; i++) {
if (!tb[i])
continue;
@@ -6004,6 +6169,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
case RTA_SPORT:
case RTA_DPORT:
case RTA_IP_PROTO:
+ case RTA_FLOWLABEL:
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
@@ -6026,6 +6192,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6 = {};
+ __be32 flowlabel;
bool fibmatch;
err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
@@ -6034,7 +6201,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
- fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
@@ -6080,6 +6246,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
+ fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
+
if (iif) {
struct net_device *dev;
int flags = 0;
@@ -6108,7 +6277,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->dst.error) {
err = rt->dst.error;
ip6_rt_put(rt);
@@ -6161,32 +6330,42 @@ errout:
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int nlm_flags)
{
- struct sk_buff *skb;
struct net *net = info->nl_net;
+ struct sk_buff *skb;
+ size_t sz;
u32 seq;
int err;
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ rcu_read_lock();
+ sz = rt6_nlmsg_size(rt);
+retry:
+ skb = nlmsg_new(sz, GFP_ATOMIC);
if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
event, info->portid, seq, nlm_flags);
if (err < 0) {
- /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
+ /* -EMSGSIZE implies needed space grew under us. */
+ if (err == -EMSGSIZE) {
+ sz = max(rt6_nlmsg_size(rt), sz << 1);
+ goto retry;
+ }
goto errout;
}
+
+ rcu_read_unlock();
+
rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
- info->nlh, gfp_any());
+ info->nlh, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rcu_read_unlock();
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_rt_update(struct net *net, struct fib6_info *rt,
@@ -6212,8 +6391,7 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
@@ -6326,7 +6504,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_SYSCTL
-static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
+static int ipv6_sysctl_rtcache_flush(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -6335,12 +6513,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
if (!write)
return -EINVAL;
- net = (struct net *)ctl->extra1;
- delay = net->ipv6.sysctl.flush_delay;
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (ret)
return ret;
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}
@@ -6419,13 +6597,12 @@ static struct ctl_table ipv6_route_table_template[] = {
{
.procname = "skip_notify_on_dev_down",
.data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
@@ -6449,14 +6626,19 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[1].procname = NULL;
}
return table;
}
+
+size_t ipv6_route_sysctl_table_size(struct net *net)
+{
+ /* Don't export sysctls to unprivileged users */
+ if (net->user_ns != &init_user_ns)
+ return 1;
+
+ return ARRAY_SIZE(ipv6_route_table_template);
+}
#endif
static int __net_init ip6_route_net_init(struct net *net)
@@ -6483,7 +6665,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
ip6_template_metrics, true);
- INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->rt6i_uncached);
+ INIT_LIST_HEAD(&net->ipv6.ip6_null_entry->dst.rt_uncached);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_has_custom_rules = false;
@@ -6495,7 +6677,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
ip6_template_metrics, true);
- INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->rt6i_uncached);
+ INIT_LIST_HEAD(&net->ipv6.ip6_prohibit_entry->dst.rt_uncached);
net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -6505,14 +6687,14 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
- INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
+ INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->dst.rt_uncached);
#ifdef CONFIG_IPV6_SUBTREES
net->ipv6.fib6_routes_require_src = 0;
#endif
#endif
net->ipv6.sysctl.flush_delay = 0;
- net->ipv6.sysctl.ip6_rt_max_size = 4096;
+ net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
@@ -6637,8 +6819,7 @@ void __init ip6_route_init_special_entries(void)
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
-BTF_ID_LIST(btf_fib6_info_id)
-BTF_ID(struct, fib6_info)
+BTF_ID_LIST_SINGLE(btf_fib6_info_id, struct, fib6_info)
static const struct bpf_iter_seq_info ipv6_route_seq_info = {
.seq_ops = &ipv6_route_seq_ops,
@@ -6670,6 +6851,15 @@ static void bpf_iter_unregister(void)
#endif
#endif
+static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
+ .doit = inet6_rtm_newroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
+ .doit = inet6_rtm_delroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip6_route_init(void)
{
int ret;
@@ -6712,19 +6902,7 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
- inet6_rtm_newroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
- inet6_rtm_delroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
- inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ ret = rtnl_register_many(ip6_route_rtnl_msg_handlers);
if (ret < 0)
goto out_register_late_subsys;
@@ -6744,7 +6922,6 @@ int __init ip6_route_init(void)
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
- INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}