summaryrefslogtreecommitdiff
path: root/net/ipv6/ip6_fib.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/ip6_fib.c')
-rw-r--r--net/ipv6/ip6_fib.c403
1 files changed, 254 insertions, 149 deletions
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2438da5ff6da..2111af022d94 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -196,16 +198,9 @@ static void node_free_immediate(struct net *net, struct fib6_node *fn)
net->ipv6.rt6_stats->fib_nodes--;
}
-static void node_free_rcu(struct rcu_head *head)
-{
- struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
-
- kmem_cache_free(fib6_node_kmem, fn);
-}
-
static void node_free(struct net *net, struct fib6_node *fn)
{
- call_rcu(&fn->rcu, node_free_rcu);
+ kfree_rcu(fn, rcu);
net->ipv6.rt6_stats->fib_nodes--;
}
@@ -246,6 +241,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -253,40 +249,52 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
struct fib6_table *fib6_new_table(struct net *net, u32 id)
{
- struct fib6_table *tb;
+ struct fib6_table *tb, *new_tb;
if (id == 0)
id = RT6_TABLE_MAIN;
+
tb = fib6_get_table(net, id);
if (tb)
return tb;
- tb = fib6_alloc_table(net, id);
- if (tb)
- fib6_link_table(net, tb);
+ new_tb = fib6_alloc_table(net, id);
+ if (!new_tb)
+ return NULL;
+
+ spin_lock_bh(&net->ipv6.fib_table_hash_lock);
+
+ tb = fib6_get_table(net, id);
+ if (unlikely(tb)) {
+ spin_unlock_bh(&net->ipv6.fib_table_hash_lock);
+ kfree(new_tb);
+ return tb;
+ }
+
+ fib6_link_table(net, new_tb);
+
+ spin_unlock_bh(&net->ipv6.fib_table_hash_lock);
- return tb;
+ return new_tb;
}
EXPORT_SYMBOL_GPL(fib6_new_table);
struct fib6_table *fib6_get_table(struct net *net, u32 id)
{
- struct fib6_table *tb;
struct hlist_head *head;
- unsigned int h;
+ struct fib6_table *tb;
- if (id == 0)
+ if (!id)
id = RT6_TABLE_MAIN;
- h = id & (FIB6_TABLE_HASHSZ - 1);
- rcu_read_lock();
- head = &net->ipv6.fib_table_hash[h];
- hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
- if (tb->tb6_id == id) {
- rcu_read_unlock();
+
+ head = &net->ipv6.fib_table_hash[id & (FIB6_TABLE_HASHSZ - 1)];
+
+ /* See comment in fib6_link_table(). RCU is not required,
+ * but rcu_dereference_raw() is used to avoid data-race.
+ */
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist, true)
+ if (tb->tb6_id == id)
return tb;
- }
- }
- rcu_read_unlock();
return NULL;
}
@@ -342,17 +350,17 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
-unsigned int fib6_tables_seq_read(struct net *net)
+unsigned int fib6_tables_seq_read(const struct net *net)
{
unsigned int h, fib_seq = 0;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- struct hlist_head *head = &net->ipv6.fib_table_hash[h];
- struct fib6_table *tb;
+ const struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ const struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib_seq += tb->fib_seq;
+ fib_seq += READ_ONCE(tb->fib_seq);
}
rcu_read_unlock();
@@ -397,7 +405,7 @@ int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -413,7 +421,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
.nsiblings = nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -424,7 +432,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
.nsiblings = rt->fib6_nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}
@@ -437,15 +445,17 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ unsigned int nsiblings;
int err;
if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- if (rt->fib6_nsiblings)
+ nsiblings = READ_ONCE(rt->fib6_nsiblings);
+ if (nsiblings)
err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
rt,
- rt->fib6_nsiblings,
+ nsiblings,
arg->extack);
else
err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
@@ -617,23 +627,25 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
- .filter.dump_routes = true };
+ struct rt6_rtnl_dump_arg arg = {
+ .filter.dump_exceptions = true,
+ .filter.dump_routes = true,
+ .filter.rtnl_held = false,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
unsigned int e = 0, s_e;
+ struct hlist_head *head;
struct fib6_walker *w;
struct fib6_table *tb;
- struct hlist_head *head;
- int res = 0;
+ unsigned int h, s_h;
+ int err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
- int err;
-
err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -645,19 +657,21 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (!w)
- return -ENOMEM;
+ if (!w) {
+ err = -ENOMEM;
+ goto unlock;
+ }
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}
arg.skb = skb;
@@ -669,46 +683,46 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET6)
- goto out;
+ goto unlock;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
if (!cb->args[0]) {
- res = fib6_dump_table(tb, skb, cb);
- if (!res)
+ err = fib6_dump_table(tb, skb, cb);
+ if (!err)
cb->args[0] = 1;
}
- goto out;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (e < s_e)
goto next;
- res = fib6_dump_table(tb, skb, cb);
- if (res != 0)
- goto out_unlock;
+ err = fib6_dump_table(tb, skb, cb);
+ if (err != 0)
+ goto out;
next:
e++;
}
}
-out_unlock:
- rcu_read_unlock();
+out:
cb->args[1] = e;
cb->args[0] = h;
-out:
- res = res < 0 ? res : skb->len;
- if (res <= 0)
+
+unlock:
+ rcu_read_unlock();
+ if (err <= 0)
fib6_dump_end(cb);
- return res;
+ return err;
}
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
@@ -751,8 +765,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
int bit;
__be32 dir = 0;
- RT6_TRACE("fib6_add_1\n");
-
/* insert node in tree */
fn = root;
@@ -953,14 +965,14 @@ insert_above:
}
static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
- const struct fib6_info *match,
- const struct fib6_table *table)
+ const struct fib6_info *match)
{
int cpu;
if (!fib6_nh->rt6i_pcpu)
return;
+ rcu_read_lock();
/* release the reference to this fib entry from
* all of its cached pcpu routes
*/
@@ -969,7 +981,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
+
+ /* Paired with xchg() in rt6_get_pcpu_route() */
+ pcpu_rt = READ_ONCE(*ppcpu_rt);
/* only dropping the 'from' reference if the cached route
* is using 'match'. The cached pcpu_rt->from only changes
@@ -979,27 +993,22 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
}
+ rcu_read_unlock();
}
-struct fib6_nh_pcpu_arg {
- struct fib6_info *from;
- const struct fib6_table *table;
-};
-
static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
{
- struct fib6_nh_pcpu_arg *arg = _arg;
+ struct fib6_info *arg = _arg;
- __fib6_drop_pcpu_from(nh, arg->from, arg->table);
+ __fib6_drop_pcpu_from(nh, arg);
return 0;
}
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
- const struct fib6_table *table)
+static void fib6_drop_pcpu_from(struct fib6_info *f6i)
{
/* Make sure rt6_make_pcpu_route() wont add other percpu routes
* while we are cleaning them here.
@@ -1008,18 +1017,14 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
if (f6i->nh) {
- struct fib6_nh_pcpu_arg arg = {
- .from = f6i,
- .table = table
- };
-
- nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
- &arg);
+ rcu_read_lock();
+ nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, f6i);
+ rcu_read_unlock();
} else {
struct fib6_nh *fib6_nh;
fib6_nh = f6i->fib6_nh;
- __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+ __fib6_drop_pcpu_from(fib6_nh, f6i);
}
}
@@ -1030,10 +1035,16 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
/* Flush all cached dst in exception table */
rt6_flush_exceptions(rt);
- fib6_drop_pcpu_from(rt, table);
+ fib6_drop_pcpu_from(rt);
+
+ if (rt->nh) {
+ spin_lock(&rt->nh->lock);
- if (rt->nh && !list_empty(&rt->nh_list))
- list_del_init(&rt->nh_list);
+ if (!list_empty(&rt->nh_list))
+ list_del_init(&rt->nh_list);
+
+ spin_unlock(&rt->nh->lock);
+ }
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
@@ -1057,6 +1068,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
}
/*
@@ -1064,8 +1078,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
- struct nl_info *info,
- struct netlink_ext_ack *extack)
+ struct nl_info *info, struct netlink_ext_ack *extack,
+ struct list_head *purge_list)
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -1114,13 +1128,20 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (rt6_duplicate_nexthop(iter, rt)) {
if (rt->fib6_nsiblings)
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
- if (!(rt->fib6_flags & RTF_EXPIRES))
+ if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- else
+ fib6_remove_gc_list(iter);
+ } else {
fib6_set_expires(iter, rt->expires);
+ fib6_add_gc_list(iter);
+ }
+ if (!(rt->fib6_flags & (RTF_ADDRCONF | RTF_PREFIX_RT))) {
+ iter->fib6_flags &= ~RTF_ADDRCONF;
+ iter->fib6_flags &= ~RTF_PREFIX_RT;
+ }
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1140,7 +1161,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
*/
if (rt_can_ecmp &&
rt6_qualify_for_ecmp(iter))
- rt->fib6_nsiblings++;
+ WRITE_ONCE(rt->fib6_nsiblings,
+ rt->fib6_nsiblings + 1);
}
if (iter->fib6_metric > rt->fib6_metric)
@@ -1175,8 +1197,8 @@ next_iter:
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->fib6_siblings,
- &sibling->fib6_siblings);
+ list_add_tail_rcu(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->fib6_next,
@@ -1190,12 +1212,15 @@ next_iter:
fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
&rt->fib6_siblings, fib6_siblings) {
- sibling->fib6_nsiblings++;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings + 1);
BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
fib6_nsiblings++;
}
BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+ rcu_read_lock();
rt6_multipath_rebalance(temp_sibling);
+ rcu_read_unlock();
}
/*
@@ -1235,10 +1260,13 @@ add:
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
+ list_del_rcu(&rt->fib6_siblings);
+ rcu_read_lock();
rt6_multipath_rebalance(next_sibling);
+ rcu_read_unlock();
return err;
}
}
@@ -1286,10 +1314,9 @@ add:
}
nsiblings = iter->fib6_nsiblings;
iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
+ list_add(&iter->purge_link, purge_list);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
- fib6_info_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
@@ -1302,10 +1329,9 @@ add:
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->fib6_next;
iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
+ list_add(&iter->purge_link, purge_list);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
- fib6_info_release(iter);
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
@@ -1321,6 +1347,28 @@ add:
return 0;
}
+static int fib6_add_rt2node_nh(struct fib6_node *fn, struct fib6_info *rt,
+ struct nl_info *info, struct netlink_ext_ack *extack,
+ struct list_head *purge_list)
+{
+ int err;
+
+ spin_lock(&rt->nh->lock);
+
+ if (rt->nh->dead) {
+ NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -EINVAL;
+ } else {
+ err = fib6_add_rt2node(fn, rt, info, extack, purge_list);
+ if (!err)
+ list_add(&rt->nh_list, &rt->nh->f6i_list);
+ }
+
+ spin_unlock(&rt->nh->lock);
+
+ return err;
+}
+
static void fib6_start_gc(struct net *net, struct fib6_info *rt)
{
if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
@@ -1375,7 +1423,11 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ LIST_HEAD(purge_list);
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1399,9 +1451,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
@@ -1474,11 +1526,24 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
}
#endif
- err = fib6_add_rt2node(fn, rt, info, extack);
+ if (rt->nh)
+ err = fib6_add_rt2node_nh(fn, rt, info, extack, &purge_list);
+ else
+ err = fib6_add_rt2node(fn, rt, info, extack, &purge_list);
if (!err) {
- if (rt->nh)
- list_add(&rt->nh_list, &rt->nh->f6i_list);
+ struct fib6_info *iter, *next;
+
+ list_for_each_entry_safe(iter, next, &purge_list, purge_link) {
+ list_del(&iter->purge_link);
+ fib6_purge_rt(iter, fn, info->nl_net);
+ fib6_info_release(iter);
+ }
+
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (rt->fib6_flags & RTF_EXPIRES)
+ fib6_add_gc_list(rt);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -1501,13 +1566,9 @@ out:
if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
pn_leaf = fib6_find_prefix(info->nl_net, table,
pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
+ if (!pn_leaf)
pn_leaf =
info->nl_net->ipv6.fib6_null_entry;
- }
-#endif
fib6_info_hold(pn_leaf);
rcu_assign_pointer(pn->leaf, pn_leaf);
}
@@ -1807,7 +1868,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
lockdep_is_held(&table->tb6_lock));
struct fib6_info *new_fn_leaf;
- RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
@@ -1870,7 +1931,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
FOR_WALKERS(net, w) {
if (!child) {
if (w->node == fn) {
- RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
+ w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
@@ -1878,10 +1940,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (w->node == fn) {
w->node = child;
if (children&2) {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
@@ -1909,8 +1973,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct net *net = info->nl_net;
bool notify_del = false;
- RT6_TRACE("fib6_del_route\n");
-
/* If the deleted route is the first in the node and it is not part of
* a multipath route, then we need to replace it with the next route
* in the node, if exists.
@@ -1949,9 +2011,10 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
@@ -1959,7 +2022,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
- RT6_TRACE("walker %p adjusted by delroute\n", w);
+ pr_debug("walker %p adjusted by delroute\n", w);
w->leaf = rcu_dereference_protected(rt->fib6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
@@ -2285,9 +2348,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2297,7 +2359,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
if (time_after(now, rt->expires)) {
- RT6_TRACE("expiring %p\n", rt);
+ pr_debug("expiring %p\n", rt);
return -1;
}
gc_args->more++;
@@ -2312,6 +2374,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+
+ fib6_gc_table(net, table, gc_args);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2327,7 +2425,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -2336,13 +2434,13 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
round_jiffies(now
+ net->ipv6.sysctl.ip6_rt_gc_interval));
else
- del_timer(&net->ipv6.ip6_fib_timer);
+ timer_delete(&net->ipv6.ip6_fib_timer);
spin_unlock_bh(&net->ipv6.fib6_gc_lock);
}
static void fib6_gc_timer_cb(struct timer_list *t)
{
- struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
+ struct net *arg = timer_container_of(arg, t, ipv6.ip6_fib_timer);
fib6_run_gc(0, arg, true);
}
@@ -2376,6 +2474,8 @@ static int __net_init fib6_net_init(struct net *net)
if (!net->ipv6.fib_table_hash)
goto out_rt6_stats;
+ spin_lock_init(&net->ipv6.fib_table_hash_lock);
+
net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
GFP_KERNEL);
if (!net->ipv6.fib6_main_tbl)
@@ -2387,6 +2487,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2399,6 +2500,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
fib6_tables_init(net);
@@ -2421,7 +2523,7 @@ static void fib6_net_exit(struct net *net)
{
unsigned int i;
- del_timer_sync(&net->ipv6.ip6_fib_timer);
+ timer_delete_sync(&net->ipv6.ip6_fib_timer);
for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
struct hlist_head *head = &net->ipv6.fib_table_hash[i];
@@ -2444,14 +2546,18 @@ static struct pernet_operations fib6_net_ops = {
.exit = fib6_net_exit,
};
+static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .dumpit = inet6_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
int __init fib6_init(void)
{
int ret = -ENOMEM;
- fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
+ fib6_node_kmem = KMEM_CACHE(fib6_node,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT);
if (!fib6_node_kmem)
goto out;
@@ -2459,8 +2565,7 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, 0);
+ ret = rtnl_register_many(fib6_rtnl_msg_handlers);
if (ret)
goto out_unregister_subsys;
@@ -2491,7 +2596,7 @@ static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
const struct net_device *dev;
if (rt->nh)
- fib6_nh = nexthop_fib6_nh_bh(rt->nh);
+ fib6_nh = nexthop_fib6_nh(rt->nh);
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
@@ -2556,14 +2661,14 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
if (tbl) {
h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
- node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+ node = rcu_dereference(hlist_next_rcu(&tbl->tb6_hlist));
} else {
h = 0;
node = NULL;
}
while (!node && h < FIB6_TABLE_HASHSZ) {
- node = rcu_dereference_bh(
+ node = rcu_dereference(
hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
}
return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
@@ -2593,7 +2698,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
+ n = rcu_dereference(((struct fib6_info *)v)->fib6_next);
if (n)
return n;
@@ -2619,12 +2724,12 @@ iter_table:
}
static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU_BH)
+ __acquires(RCU)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
- rcu_read_lock_bh();
+ rcu_read_lock();
iter->tbl = ipv6_route_seq_next_table(NULL, net);
iter->skip = *pos;
@@ -2645,7 +2750,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
}
static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU_BH)
+ __releases(RCU)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
@@ -2653,7 +2758,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
if (ipv6_route_iter_active(iter))
fib6_walker_unlink(net, &iter->w);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)