summaryrefslogtreecommitdiff
path: root/net/ipv4/fib_semantics.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/fib_semantics.c')
-rw-r--r--net/ipv4/fib_semantics.c358
1 files changed, 177 insertions, 181 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 5eb1b8d302bb..d643bd1a0d9d 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -50,17 +50,6 @@
#include "fib_lookup.h"
-static DEFINE_SPINLOCK(fib_info_lock);
-static struct hlist_head *fib_info_hash;
-static struct hlist_head *fib_info_laddrhash;
-static unsigned int fib_info_hash_size;
-static unsigned int fib_info_hash_bits;
-static unsigned int fib_info_cnt;
-
-#define DEVINDEX_HASHBITS 8
-#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
-
/* for_nexthops and change_nexthops only used when nexthop object
* is not set in a fib_info. The logic within can reference fib_nh.
*/
@@ -254,18 +243,16 @@ void free_fib_info(struct fib_info *fi)
return;
}
- call_rcu(&fi->rcu, free_fib_info_rcu);
+ call_rcu_hurry(&fi->rcu, free_fib_info_rcu);
}
EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
- spin_lock_bh(&fib_info_lock);
+ ASSERT_RTNL();
if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
-
- /* Paired with READ_ONCE() in fib_create_info(). */
- WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+ fi->fib_net->ipv4.fib_info_cnt--;
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -275,14 +262,13 @@ void fib_release_info(struct fib_info *fi)
change_nexthops(fi) {
if (!nexthop_nh->fib_nh_dev)
continue;
- hlist_del(&nexthop_nh->nh_hash);
+ hlist_del_rcu(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
/* Paired with READ_ONCE() from fib_table_lookup() */
WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
- spin_unlock_bh(&fib_info_lock);
}
static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
@@ -322,17 +308,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
return 0;
}
-static inline unsigned int fib_devindex_hashfn(unsigned int val)
-{
- return hash_32(val, DEVINDEX_HASHBITS);
-}
-
-static struct hlist_head *
-fib_info_devhash_bucket(const struct net_device *dev)
+static struct hlist_head *fib_nh_head(struct net_device *dev)
{
- u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
-
- return &fib_info_devhash[fib_devindex_hashfn(val)];
+ return &dev->fib_nh_head;
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -347,15 +325,15 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
return val;
}
-static unsigned int fib_info_hashfn_result(unsigned int val)
+static unsigned int fib_info_hashfn_result(const struct net *net,
+ unsigned int val)
{
- unsigned int mask = (fib_info_hash_size - 1);
-
- return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ return hash_32(val ^ net_hash_mix(net), net->ipv4.fib_info_hash_bits);
}
-static inline unsigned int fib_info_hashfn(struct fib_info *fi)
+static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi)
{
+ struct net *net = fi->fib_net;
unsigned int val;
val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol,
@@ -363,14 +341,77 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
fi->fib_priority);
if (fi->nh) {
- val ^= fib_devindex_hashfn(fi->nh->id);
+ val ^= fi->nh->id;
} else {
for_nexthops(fi) {
- val ^= fib_devindex_hashfn(nh->fib_nh_oif);
+ val ^= nh->fib_nh_oif;
} endfor_nexthops(fi)
}
- return fib_info_hashfn_result(val);
+ return &net->ipv4.fib_info_hash[fib_info_hashfn_result(net, val)];
+}
+
+static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net,
+ __be32 val)
+{
+ unsigned int hash_bits = net->ipv4.fib_info_hash_bits;
+ u32 slot;
+
+ slot = hash_32(net_hash_mix(net) ^ (__force u32)val, hash_bits);
+
+ return &net->ipv4.fib_info_hash[(1 << hash_bits) + slot];
+}
+
+static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits)
+{
+ /* The second half is used for prefsrc */
+ return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head),
+ GFP_KERNEL);
+}
+
+static void fib_info_hash_free(struct hlist_head *head)
+{
+ kvfree(head);
+}
+
+static void fib_info_hash_grow(struct net *net)
+{
+ unsigned int old_size = 1 << net->ipv4.fib_info_hash_bits;
+ struct hlist_head *new_info_hash, *old_info_hash;
+ unsigned int i;
+
+ if (net->ipv4.fib_info_cnt < old_size)
+ return;
+
+ new_info_hash = fib_info_hash_alloc(net->ipv4.fib_info_hash_bits + 1);
+ if (!new_info_hash)
+ return;
+
+ old_info_hash = net->ipv4.fib_info_hash;
+ net->ipv4.fib_info_hash = new_info_hash;
+ net->ipv4.fib_info_hash_bits += 1;
+
+ for (i = 0; i < old_size; i++) {
+ struct hlist_head *head = &old_info_hash[i];
+ struct hlist_node *n;
+ struct fib_info *fi;
+
+ hlist_for_each_entry_safe(fi, n, head, fib_hash)
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
+ }
+
+ for (i = 0; i < old_size; i++) {
+ struct hlist_head *lhead = &old_info_hash[old_size + i];
+ struct hlist_node *n;
+ struct fib_info *fi;
+
+ hlist_for_each_entry_safe(fi, n, lhead, fib_lhash)
+ hlist_add_head(&fi->fib_lhash,
+ fib_info_laddrhash_bucket(fi->fib_net,
+ fi->fib_prefsrc));
+ }
+
+ fib_info_hash_free(old_info_hash);
}
/* no metrics, only nexthop id */
@@ -381,18 +422,17 @@ static struct fib_info *fib_find_info_nh(struct net *net,
struct fib_info *fi;
unsigned int hash;
- hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
+ hash = fib_info_hashfn_1(cfg->fc_nh_id,
cfg->fc_protocol, cfg->fc_scope,
(__force u32)cfg->fc_prefsrc,
cfg->fc_priority);
- hash = fib_info_hashfn_result(hash);
- head = &fib_info_hash[hash];
+ hash = fib_info_hashfn_result(net, hash);
+ head = &net->ipv4.fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
- if (!net_eq(fi->fib_net, net))
- continue;
if (!fi->nh || fi->nh->id != cfg->fc_nh_id)
continue;
+
if (cfg->fc_protocol == fi->fib_protocol &&
cfg->fc_scope == fi->fib_scope &&
cfg->fc_prefsrc == fi->fib_prefsrc &&
@@ -408,18 +448,13 @@ static struct fib_info *fib_find_info_nh(struct net *net,
static struct fib_info *fib_find_info(struct fib_info *nfi)
{
- struct hlist_head *head;
+ struct hlist_head *head = fib_info_hash_bucket(nfi);
struct fib_info *fi;
- unsigned int hash;
-
- hash = fib_info_hashfn(nfi);
- head = &fib_info_hash[hash];
hlist_for_each_entry(fi, head, fib_hash) {
- if (!net_eq(fi->fib_net, nfi->fib_net))
- continue;
if (fi->fib_nhs != nfi->fib_nhs)
continue;
+
if (nfi->fib_protocol == fi->fib_protocol &&
nfi->fib_scope == fi->fib_scope &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
@@ -437,28 +472,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
}
/* Check, that the gateway is already configured.
- * Used only by redirect accept routine.
+ * Used only by redirect accept routine, under rcu_read_lock();
*/
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- spin_lock(&fib_info_lock);
-
- head = fib_info_devhash_bucket(dev);
+ head = fib_nh_head(dev);
- hlist_for_each_entry(nh, head, nh_hash) {
- if (nh->fib_nh_dev == dev &&
- nh->fib_nh_gw4 == gw &&
+ hlist_for_each_entry_rcu(nh, head, nh_hash) {
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (nh->fib_nh_gw4 == gw &&
!(nh->fib_nh_flags & RTNH_F_DEAD)) {
- spin_unlock(&fib_info_lock);
return 0;
}
}
- spin_unlock(&fib_info_lock);
-
return -1;
}
@@ -543,8 +573,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
info->nlh, GFP_KERNEL);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
}
static int fib_detect_death(struct fib_info *fi, int order,
@@ -1030,7 +1059,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
bool ecn_ca = false;
nla_strscpy(tmp, nla, sizeof(tmp));
- val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
+ val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
} else {
if (nla_len(nla) != sizeof(u32))
return false;
@@ -1261,66 +1290,6 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
return err;
}
-static struct hlist_head *
-fib_info_laddrhash_bucket(const struct net *net, __be32 val)
-{
- u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
- fib_info_hash_bits);
-
- return &fib_info_laddrhash[slot];
-}
-
-static void fib_info_hash_move(struct hlist_head *new_info_hash,
- struct hlist_head *new_laddrhash,
- unsigned int new_size)
-{
- struct hlist_head *old_info_hash, *old_laddrhash;
- unsigned int old_size = fib_info_hash_size;
- unsigned int i;
-
- spin_lock_bh(&fib_info_lock);
- old_info_hash = fib_info_hash;
- old_laddrhash = fib_info_laddrhash;
- fib_info_hash_size = new_size;
- fib_info_hash_bits = ilog2(new_size);
-
- for (i = 0; i < old_size; i++) {
- struct hlist_head *head = &fib_info_hash[i];
- struct hlist_node *n;
- struct fib_info *fi;
-
- hlist_for_each_entry_safe(fi, n, head, fib_hash) {
- struct hlist_head *dest;
- unsigned int new_hash;
-
- new_hash = fib_info_hashfn(fi);
- dest = &new_info_hash[new_hash];
- hlist_add_head(&fi->fib_hash, dest);
- }
- }
- fib_info_hash = new_info_hash;
-
- fib_info_laddrhash = new_laddrhash;
- for (i = 0; i < old_size; i++) {
- struct hlist_head *lhead = &old_laddrhash[i];
- struct hlist_node *n;
- struct fib_info *fi;
-
- hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
- struct hlist_head *ldest;
-
- ldest = fib_info_laddrhash_bucket(fi->fib_net,
- fi->fib_prefsrc);
- hlist_add_head(&fi->fib_lhash, ldest);
- }
- }
-
- spin_unlock_bh(&fib_info_lock);
-
- kvfree(old_info_hash);
- kvfree(old_laddrhash);
-}
-
__be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
unsigned char scope)
{
@@ -1392,6 +1361,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int nhs = 1;
struct net *net = cfg->fc_nlinfo.nl_net;
+ ASSERT_RTNL();
if (cfg->fc_type > RTN_MAX)
goto err_inval;
@@ -1432,35 +1402,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
#endif
- err = -ENOBUFS;
-
- /* Paired with WRITE_ONCE() in fib_release_info() */
- if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
- unsigned int new_size = fib_info_hash_size << 1;
- struct hlist_head *new_info_hash;
- struct hlist_head *new_laddrhash;
- size_t bytes;
-
- if (!new_size)
- new_size = 16;
- bytes = (size_t)new_size * sizeof(struct hlist_head *);
- new_info_hash = kvzalloc(bytes, GFP_KERNEL);
- new_laddrhash = kvzalloc(bytes, GFP_KERNEL);
- if (!new_info_hash || !new_laddrhash) {
- kvfree(new_info_hash);
- kvfree(new_laddrhash);
- } else {
- fib_info_hash_move(new_info_hash, new_laddrhash, new_size);
- }
- if (!fib_info_hash_size)
- goto failure;
- }
+ fib_info_hash_grow(net);
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL);
- if (!fi)
+ if (!fi) {
+ err = -ENOBUFS;
goto failure;
- fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
- cfg->fc_mx_len, extack);
+ }
+
+ fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack);
if (IS_ERR(fi->fib_metrics)) {
err = PTR_ERR(fi->fib_metrics);
kfree(fi);
@@ -1595,10 +1545,10 @@ link_it:
refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
- spin_lock_bh(&fib_info_lock);
- fib_info_cnt++;
- hlist_add_head(&fi->fib_hash,
- &fib_info_hash[fib_info_hashfn(fi)]);
+
+ net->ipv4.fib_info_cnt++;
+ hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi));
+
if (fi->fib_prefsrc) {
struct hlist_head *head;
@@ -1613,11 +1563,10 @@ link_it:
if (!nexthop_nh->fib_nh_dev)
continue;
- head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
- hlist_add_head(&nexthop_nh->nh_hash, head);
+ head = fib_nh_head(nexthop_nh->fib_nh_dev);
+ hlist_add_head_rcu(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
- spin_unlock_bh(&fib_info_lock);
return fi;
err_inval:
@@ -1881,7 +1830,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
struct fib_info *fi;
int ret = 0;
- if (!fib_info_laddrhash || local == 0)
+ if (!local)
return 0;
head = fib_info_laddrhash_bucket(net, local);
@@ -1967,12 +1916,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct hlist_head *head = fib_nh_head(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
- if (nh->fib_nh_dev == dev)
- fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}
@@ -1986,7 +1935,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- struct hlist_head *head = fib_info_devhash_bucket(dev);
+ struct hlist_head *head = fib_nh_head(dev);
struct fib_info *prev_fi = NULL;
int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
@@ -2000,7 +1949,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
int dead;
BUG_ON(!fi->fib_nhs);
- if (nh->fib_nh_dev != dev || fi == prev_fi)
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (fi == prev_fi)
continue;
prev_fi = fi;
dead = 0;
@@ -2067,8 +2017,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_dscp &&
- fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
if (fa->tb_id != tb->tb_id)
continue;
@@ -2151,7 +2100,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- head = fib_info_devhash_bucket(dev);
+ head = fib_nh_head(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -2159,7 +2108,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
int alive;
BUG_ON(!fi->fib_nhs);
- if (nh->fib_nh_dev != dev || fi == prev_fi)
+ DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
+ if (fi == prev_fi)
continue;
prev_fi = fi;
@@ -2218,34 +2168,52 @@ static bool fib_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID);
}
-void fib_select_multipath(struct fib_result *res, int hash)
+void fib_select_multipath(struct fib_result *res, int hash,
+ const struct flowi4 *fl4)
{
struct fib_info *fi = res->fi;
struct net *net = fi->fib_net;
- bool first = false;
+ bool found = false;
+ bool use_neigh;
+ __be32 saddr;
if (unlikely(res->fi->nh)) {
nexthop_path_fib_result(res, hash);
return;
}
+ use_neigh = READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh);
+ saddr = fl4 ? fl4->saddr : 0;
+
change_nexthops(fi) {
- if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
- if (!fib_good_nh(nexthop_nh))
- continue;
- if (!first) {
- res->nh_sel = nhsel;
- res->nhc = &nexthop_nh->nh_common;
- first = true;
- }
+ int nh_upper_bound;
+
+ /* Nexthops without a carrier are assigned an upper bound of
+ * minus one when "ignore_routes_with_linkdown" is set.
+ */
+ nh_upper_bound = atomic_read(&nexthop_nh->fib_nh_upper_bound);
+ if (nh_upper_bound == -1 ||
+ (use_neigh && !fib_good_nh(nexthop_nh)))
+ continue;
+
+ if (!found) {
+ res->nh_sel = nhsel;
+ res->nhc = &nexthop_nh->nh_common;
+ found = !saddr || nexthop_nh->nh_saddr == saddr;
}
- if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound))
+ if (hash > nh_upper_bound)
continue;
- res->nh_sel = nhsel;
- res->nhc = &nexthop_nh->nh_common;
- return;
+ if (!saddr || nexthop_nh->nh_saddr == saddr) {
+ res->nh_sel = nhsel;
+ res->nhc = &nexthop_nh->nh_common;
+ return;
+ }
+
+ if (found)
+ return;
+
} endfor_nexthops(fi);
}
#endif
@@ -2260,7 +2228,7 @@ void fib_select_path(struct net *net, struct fib_result *res,
if (fib_info_num_path(res->fi) > 1) {
int h = fib_multipath_hash(net, fl4, skb, NULL);
- fib_select_multipath(res, h);
+ fib_select_multipath(res, h, fl4);
}
else
#endif
@@ -2270,6 +2238,34 @@ void fib_select_path(struct net *net, struct fib_result *res,
fib_select_default(fl4, res);
check_saddr:
- if (!fl4->saddr)
- fl4->saddr = fib_result_prefsrc(net, res);
+ if (!fl4->saddr) {
+ struct net_device *l3mdev;
+
+ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev);
+
+ if (!l3mdev ||
+ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev)
+ fl4->saddr = fib_result_prefsrc(net, res);
+ else
+ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK);
+ }
+}
+
+int __net_init fib4_semantics_init(struct net *net)
+{
+ unsigned int hash_bits = 4;
+
+ net->ipv4.fib_info_hash = fib_info_hash_alloc(hash_bits);
+ if (!net->ipv4.fib_info_hash)
+ return -ENOMEM;
+
+ net->ipv4.fib_info_hash_bits = hash_bits;
+ net->ipv4.fib_info_cnt = 0;
+
+ return 0;
+}
+
+void __net_exit fib4_semantics_exit(struct net *net)
+{
+ fib_info_hash_free(net->ipv4.fib_info_hash);
}