summaryrefslogtreecommitdiff
path: root/net/ipv6/ip6_fib.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/ip6_fib.c')
-rw-r--r--net/ipv6/ip6_fib.c214
1 files changed, 141 insertions, 73 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 9180c8b6f764..49884f96232b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -143,20 +143,19 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
{
struct fib6_info *f6i;
+ size_t sz = sizeof(*f6i);
- f6i = kzalloc(sizeof(*f6i), gfp_flags);
+ if (with_fib6_nh)
+ sz += sizeof(struct fib6_nh);
+
+ f6i = kzalloc(sz, gfp_flags);
if (!f6i)
return NULL;
- f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
- if (!f6i->rt6i_pcpu) {
- kfree(f6i);
- return NULL;
- }
-
+ /* fib6_siblings is a union with nh_list, so this initializes both */
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
@@ -166,36 +165,15 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
- struct rt6_exception_bucket *bucket;
WARN_ON(f6i->fib6_node);
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- kfree(bucket);
-
- if (f6i->rt6i_pcpu) {
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **ppcpu_rt;
- struct rt6_info *pcpu_rt;
-
- ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
- dst_dev_put(&pcpu_rt->dst);
- dst_release(&pcpu_rt->dst);
- *ppcpu_rt = NULL;
- }
- }
-
- free_percpu(f6i->rt6i_pcpu);
- }
-
- fib6_nh_release(&f6i->fib6_nh);
+ if (f6i->nh)
+ nexthop_put(f6i->nh);
+ else
+ fib6_nh_release(f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
-
kfree(f6i);
}
EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
@@ -338,9 +316,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
- ip6_rt_put(rt);
+ ip6_rt_put_flags(rt, flags);
rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
+ if (!(flags | RT6_LOOKUP_F_DST_NOREF))
+ dst_hold(&rt->dst);
}
return &rt->dst;
@@ -389,14 +368,30 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
return call_fib6_notifier(nb, net, event_type, &info.info);
}
-static int call_fib6_entry_notifiers(struct net *net,
- enum fib_event_type event_type,
- struct fib6_info *rt,
- struct netlink_ext_ack *extack)
+int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
+ .rt = rt,
+ };
+
+ rt->fib6_table->fib_seq++;
+ return call_fib6_notifiers(net, event_type, &info.info);
+}
+
+int call_fib6_multipath_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ unsigned int nsiblings,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
.info.extack = extack,
.rt = rt,
+ .nsiblings = nsiblings,
};
rt->fib6_table->fib_seq++;
@@ -469,12 +464,19 @@ static int fib6_dump_node(struct fib6_walker *w)
struct fib6_info *rt;
for_each_fib6_walker_rt(w) {
- res = rt6_dump_route(rt, w->args);
- if (res < 0) {
+ res = rt6_dump_route(rt, w->args, w->skip_in_node);
+ if (res >= 0) {
/* Frame is full, suspend walking */
w->leaf = rt;
+
+ /* We'll restart from this node, so if some routes were
+ * already dumped, skip them next time.
+ */
+ w->skip_in_node += res;
+
return 1;
}
+ w->skip_in_node = 0;
/* Multipath routes are dumped in one route with the
* RTA_MULTIPATH attribute. Jump 'rt' to point to the
@@ -526,6 +528,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
if (cb->args[4] == 0) {
w->count = 0;
w->skip = 0;
+ w->skip_in_node = 0;
spin_lock_bh(&table->tb6_lock);
res = fib6_walk(net, w);
@@ -541,6 +544,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->state = FWS_INIT;
w->node = w->root;
w->skip = w->count;
+ w->skip_in_node = 0;
} else
w->skip = 0;
@@ -558,9 +562,10 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
+ .filter.dump_routes = true };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- struct rt6_rtnl_dump_arg arg = {};
unsigned int h, s_h;
unsigned int e = 0, s_e;
struct fib6_walker *w;
@@ -577,13 +582,10 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
- arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED);
+ if (rtm->rtm_flags & RTM_F_PREFIX)
+ arg.filter.flags = RTM_F_PREFIX;
}
- /* fib entries are never clones */
- if (arg.filter.flags & RTM_F_CLONED)
- goto out;
-
w = (void *)cb->args[2];
if (!w) {
/* New dump:
@@ -895,16 +897,14 @@ insert_above:
return ln;
}
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
- const struct fib6_table *table)
+static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+ const struct fib6_info *match,
+ const struct fib6_table *table)
{
int cpu;
- /* Make sure rt6_make_pcpu_route() wont add other percpu routes
- * while we are cleaning them here.
- */
- f6i->fib6_destroying = 1;
- mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+ if (!fib6_nh->rt6i_pcpu)
+ return;
/* release the reference to this fib entry from
* all of its cached pcpu routes
@@ -913,9 +913,15 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;
- ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
+
+ /* only dropping the 'from' reference if the cached route
+ * is using 'match'. The cached pcpu_rt->from only changes
+ * from a fib6_info to NULL (ip6_dst_destroy); it can never
+ * change from one fib6_info reference to another
+ */
+ if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
@@ -924,13 +930,53 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
}
}
+struct fib6_nh_pcpu_arg {
+ struct fib6_info *from;
+ const struct fib6_table *table;
+};
+
+static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
+{
+ struct fib6_nh_pcpu_arg *arg = _arg;
+
+ __fib6_drop_pcpu_from(nh, arg->from, arg->table);
+ return 0;
+}
+
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+ const struct fib6_table *table)
+{
+ /* Make sure rt6_make_pcpu_route() wont add other percpu routes
+ * while we are cleaning them here.
+ */
+ f6i->fib6_destroying = 1;
+ mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+
+ if (f6i->nh) {
+ struct fib6_nh_pcpu_arg arg = {
+ .from = f6i,
+ .table = table
+ };
+
+ nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
+ &arg);
+ } else {
+ struct fib6_nh *fib6_nh;
+
+ fib6_nh = f6i->fib6_nh;
+ __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ }
+}
+
static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct net *net)
{
struct fib6_table *table = rt->fib6_table;
- if (rt->rt6i_pcpu)
- fib6_drop_pcpu_from(rt, table);
+ fib6_drop_pcpu_from(rt, table);
+
+ if (rt->nh && !list_empty(&rt->nh_list))
+ list_del_init(&rt->nh_list);
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
@@ -1101,11 +1147,13 @@ next_iter:
add:
nlflags |= NLM_F_CREATE;
- err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_ADD,
- rt, extack);
- if (err)
- return err;
+ if (!info->skip_notify_kernel) {
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
+ if (err)
+ return err;
+ }
rcu_assign_pointer(rt->fib6_next, iter);
fib6_info_hold(rt);
@@ -1130,11 +1178,13 @@ add:
return -ENOENT;
}
- err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_REPLACE,
- rt, extack);
- if (err)
- return err;
+ if (!info->skip_notify_kernel) {
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
+ if (err)
+ return err;
+ }
fib6_info_hold(rt);
rcu_assign_pointer(rt->fib6_node, fn);
@@ -1218,6 +1268,14 @@ void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
}
+/* allow ipv4 to update sernum via ipv6_stub */
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_update_sernum_upto_root(net, f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
@@ -1331,6 +1389,8 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
err = fib6_add_rt2node(fn, rt, info, extack);
if (!err) {
+ if (rt->nh)
+ list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
}
@@ -1536,7 +1596,8 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root,
if (plen == fn->fn_bit)
return fn;
- prev = fn;
+ if (fn->fn_flags & RTN_RTINFO)
+ prev = fn;
next:
/*
@@ -1807,9 +1868,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
+ if (!info->skip_notify_kernel)
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
+
fib6_info_release(rt);
}
@@ -2041,6 +2104,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.w.func = fib6_clean_node;
c.w.count = 0;
c.w.skip = 0;
+ c.w.skip_in_node = 0;
c.func = func;
c.sernum = sernum;
c.arg = arg;
@@ -2292,9 +2356,13 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{
struct fib6_info *rt = v;
struct ipv6_route_iter *iter = seq->private;
+ struct fib6_nh *fib6_nh = rt->fib6_nh;
unsigned int flags = rt->fib6_flags;
const struct net_device *dev;
+ if (rt->nh)
+ fib6_nh = nexthop_fib6_nh(rt->nh);
+
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
#ifdef CONFIG_IPV6_SUBTREES
@@ -2302,14 +2370,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh.fib_nh_gw_family) {
+ if (fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
- seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
+ seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}
- dev = rt->fib6_nh.fib_nh_dev;
+ dev = fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");