diff options
Diffstat (limited to 'net/ipv4/fib_semantics.c')
-rw-r--r-- | net/ipv4/fib_semantics.c | 358 |
1 files changed, 177 insertions, 181 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5eb1b8d302bb..d643bd1a0d9d 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -50,17 +50,6 @@ #include "fib_lookup.h" -static DEFINE_SPINLOCK(fib_info_lock); -static struct hlist_head *fib_info_hash; -static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_info_hash_size; -static unsigned int fib_info_hash_bits; -static unsigned int fib_info_cnt; - -#define DEVINDEX_HASHBITS 8 -#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) -static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; - /* for_nexthops and change_nexthops only used when nexthop object * is not set in a fib_info. The logic within can reference fib_nh. */ @@ -254,18 +243,16 @@ void free_fib_info(struct fib_info *fi) return; } - call_rcu(&fi->rcu, free_fib_info_rcu); + call_rcu_hurry(&fi->rcu, free_fib_info_rcu); } EXPORT_SYMBOL_GPL(free_fib_info); void fib_release_info(struct fib_info *fi) { - spin_lock_bh(&fib_info_lock); + ASSERT_RTNL(); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); - - /* Paired with READ_ONCE() in fib_create_info(). */ - WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); + fi->fib_net->ipv4.fib_info_cnt--; if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); @@ -275,14 +262,13 @@ void fib_release_info(struct fib_info *fi) change_nexthops(fi) { if (!nexthop_nh->fib_nh_dev) continue; - hlist_del(&nexthop_nh->nh_hash); + hlist_del_rcu(&nexthop_nh->nh_hash); } endfor_nexthops(fi) } /* Paired with READ_ONCE() from fib_table_lookup() */ WRITE_ONCE(fi->fib_dead, 1); fib_info_put(fi); } - spin_unlock_bh(&fib_info_lock); } static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) @@ -322,17 +308,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) return 0; } -static inline unsigned int fib_devindex_hashfn(unsigned int val) -{ - return hash_32(val, DEVINDEX_HASHBITS); -} - -static struct hlist_head * -fib_info_devhash_bucket(const struct net_device *dev) +static struct hlist_head *fib_nh_head(struct net_device *dev) { - u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - - return &fib_info_devhash[fib_devindex_hashfn(val)]; + return &dev->fib_nh_head; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -347,15 +325,15 @@ static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, return val; } -static unsigned int fib_info_hashfn_result(unsigned int val) +static unsigned int fib_info_hashfn_result(const struct net *net, + unsigned int val) { - unsigned int mask = (fib_info_hash_size - 1); - - return (val ^ (val >> 7) ^ (val >> 12)) & mask; + return hash_32(val ^ net_hash_mix(net), net->ipv4.fib_info_hash_bits); } -static inline unsigned int fib_info_hashfn(struct fib_info *fi) +static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi) { + struct net *net = fi->fib_net; unsigned int val; val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol, @@ -363,14 +341,77 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi) fi->fib_priority); if (fi->nh) { - val ^= fib_devindex_hashfn(fi->nh->id); + val ^= fi->nh->id; } else { for_nexthops(fi) { - val ^= fib_devindex_hashfn(nh->fib_nh_oif); + val ^= nh->fib_nh_oif; } endfor_nexthops(fi) } - return fib_info_hashfn_result(val); + return &net->ipv4.fib_info_hash[fib_info_hashfn_result(net, val)]; +} + +static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net, + __be32 val) +{ + unsigned int hash_bits = net->ipv4.fib_info_hash_bits; + u32 slot; + + slot = hash_32(net_hash_mix(net) ^ (__force u32)val, hash_bits); + + return &net->ipv4.fib_info_hash[(1 << hash_bits) + slot]; +} + +static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits) +{ + /* The second half is used for prefsrc */ + return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head), + GFP_KERNEL); +} + +static void fib_info_hash_free(struct hlist_head *head) +{ + kvfree(head); +} + +static void fib_info_hash_grow(struct net *net) +{ + unsigned int old_size = 1 << net->ipv4.fib_info_hash_bits; + struct hlist_head *new_info_hash, *old_info_hash; + unsigned int i; + + if (net->ipv4.fib_info_cnt < old_size) + return; + + new_info_hash = fib_info_hash_alloc(net->ipv4.fib_info_hash_bits + 1); + if (!new_info_hash) + return; + + old_info_hash = net->ipv4.fib_info_hash; + net->ipv4.fib_info_hash = new_info_hash; + net->ipv4.fib_info_hash_bits += 1; + + for (i = 0; i < old_size; i++) { + struct hlist_head *head = &old_info_hash[i]; + struct hlist_node *n; + struct fib_info *fi; + + hlist_for_each_entry_safe(fi, n, head, fib_hash) + hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi)); + } + + for (i = 0; i < old_size; i++) { + struct hlist_head *lhead = &old_info_hash[old_size + i]; + struct hlist_node *n; + struct fib_info *fi; + + hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) + hlist_add_head(&fi->fib_lhash, + fib_info_laddrhash_bucket(fi->fib_net, + fi->fib_prefsrc)); + } + + fib_info_hash_free(old_info_hash); } /* no metrics, only nexthop id */ @@ -381,18 +422,17 @@ static struct fib_info *fib_find_info_nh(struct net *net, struct fib_info *fi; unsigned int hash; - hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id), + hash = fib_info_hashfn_1(cfg->fc_nh_id, cfg->fc_protocol, cfg->fc_scope, (__force u32)cfg->fc_prefsrc, cfg->fc_priority); - hash = fib_info_hashfn_result(hash); - head = &fib_info_hash[hash]; + hash = fib_info_hashfn_result(net, hash); + head = &net->ipv4.fib_info_hash[hash]; hlist_for_each_entry(fi, head, fib_hash) { - if (!net_eq(fi->fib_net, net)) - continue; if (!fi->nh || fi->nh->id != cfg->fc_nh_id) continue; + if (cfg->fc_protocol == fi->fib_protocol && cfg->fc_scope == fi->fib_scope && cfg->fc_prefsrc == fi->fib_prefsrc && @@ -408,18 +448,13 @@ static struct fib_info *fib_find_info_nh(struct net *net, static struct fib_info *fib_find_info(struct fib_info *nfi) { - struct hlist_head *head; + struct hlist_head *head = fib_info_hash_bucket(nfi); struct fib_info *fi; - unsigned int hash; - - hash = fib_info_hashfn(nfi); - head = &fib_info_hash[hash]; hlist_for_each_entry(fi, head, fib_hash) { - if (!net_eq(fi->fib_net, nfi->fib_net)) - continue; if (fi->fib_nhs != nfi->fib_nhs) continue; + if (nfi->fib_protocol == fi->fib_protocol && nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && @@ -437,28 +472,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) } /* Check, that the gateway is already configured. - * Used only by redirect accept routine. + * Used only by redirect accept routine, under rcu_read_lock(); */ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - spin_lock(&fib_info_lock); - - head = fib_info_devhash_bucket(dev); + head = fib_nh_head(dev); - hlist_for_each_entry(nh, head, nh_hash) { - if (nh->fib_nh_dev == dev && - nh->fib_nh_gw4 == gw && + hlist_for_each_entry_rcu(nh, head, nh_hash) { + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (nh->fib_nh_gw4 == gw && !(nh->fib_nh_flags & RTNH_F_DEAD)) { - spin_unlock(&fib_info_lock); return 0; } } - spin_unlock(&fib_info_lock); - return -1; } @@ -543,8 +573,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, info->nlh, GFP_KERNEL); return; errout: - if (err < 0) - rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); + rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } static int fib_detect_death(struct fib_info *fi, int order, @@ -1030,7 +1059,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) bool ecn_ca = false; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); } else { if (nla_len(nla) != sizeof(u32)) return false; @@ -1261,66 +1290,6 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, return err; } -static struct hlist_head * -fib_info_laddrhash_bucket(const struct net *net, __be32 val) -{ - u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val, - fib_info_hash_bits); - - return &fib_info_laddrhash[slot]; -} - -static void fib_info_hash_move(struct hlist_head *new_info_hash, - struct hlist_head *new_laddrhash, - unsigned int new_size) -{ - struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_info_hash_size; - unsigned int i; - - spin_lock_bh(&fib_info_lock); - old_info_hash = fib_info_hash; - old_laddrhash = fib_info_laddrhash; - fib_info_hash_size = new_size; - fib_info_hash_bits = ilog2(new_size); - - for (i = 0; i < old_size; i++) { - struct hlist_head *head = &fib_info_hash[i]; - struct hlist_node *n; - struct fib_info *fi; - - hlist_for_each_entry_safe(fi, n, head, fib_hash) { - struct hlist_head *dest; - unsigned int new_hash; - - new_hash = fib_info_hashfn(fi); - dest = &new_info_hash[new_hash]; - hlist_add_head(&fi->fib_hash, dest); - } - } - fib_info_hash = new_info_hash; - - fib_info_laddrhash = new_laddrhash; - for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &old_laddrhash[i]; - struct hlist_node *n; - struct fib_info *fi; - - hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { - struct hlist_head *ldest; - - ldest = fib_info_laddrhash_bucket(fi->fib_net, - fi->fib_prefsrc); - hlist_add_head(&fi->fib_lhash, ldest); - } - } - - spin_unlock_bh(&fib_info_lock); - - kvfree(old_info_hash); - kvfree(old_laddrhash); -} - __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, unsigned char scope) { @@ -1392,6 +1361,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; + ASSERT_RTNL(); if (cfg->fc_type > RTN_MAX) goto err_inval; @@ -1432,35 +1402,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg, } #endif - err = -ENOBUFS; - - /* Paired with WRITE_ONCE() in fib_release_info() */ - if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { - unsigned int new_size = fib_info_hash_size << 1; - struct hlist_head *new_info_hash; - struct hlist_head *new_laddrhash; - size_t bytes; - - if (!new_size) - new_size = 16; - bytes = (size_t)new_size * sizeof(struct hlist_head *); - new_info_hash = kvzalloc(bytes, GFP_KERNEL); - new_laddrhash = kvzalloc(bytes, GFP_KERNEL); - if (!new_info_hash || !new_laddrhash) { - kvfree(new_info_hash); - kvfree(new_laddrhash); - } else { - fib_info_hash_move(new_info_hash, new_laddrhash, new_size); - } - if (!fib_info_hash_size) - goto failure; - } + fib_info_hash_grow(net); fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); - if (!fi) + if (!fi) { + err = -ENOBUFS; goto failure; - fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, - cfg->fc_mx_len, extack); + } + + fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(fi->fib_metrics)) { err = PTR_ERR(fi->fib_metrics); kfree(fi); @@ -1595,10 +1545,10 @@ link_it: refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); - spin_lock_bh(&fib_info_lock); - fib_info_cnt++; - hlist_add_head(&fi->fib_hash, - &fib_info_hash[fib_info_hashfn(fi)]); + + net->ipv4.fib_info_cnt++; + hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi)); + if (fi->fib_prefsrc) { struct hlist_head *head; @@ -1613,11 +1563,10 @@ link_it: if (!nexthop_nh->fib_nh_dev) continue; - head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); - hlist_add_head(&nexthop_nh->nh_hash, head); + head = fib_nh_head(nexthop_nh->fib_nh_dev); + hlist_add_head_rcu(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } - spin_unlock_bh(&fib_info_lock); return fi; err_inval: @@ -1881,7 +1830,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) struct fib_info *fi; int ret = 0; - if (!fib_info_laddrhash || local == 0) + if (!local) return 0; head = fib_info_laddrhash_bucket(net, local); @@ -1967,12 +1916,12 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - struct hlist_head *head = fib_info_devhash_bucket(dev); + struct hlist_head *head = fib_nh_head(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->fib_nh_dev == dev) - fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } @@ -1986,7 +1935,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - struct hlist_head *head = fib_info_devhash_bucket(dev); + struct hlist_head *head = fib_nh_head(dev); struct fib_info *prev_fi = NULL; int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; @@ -2000,7 +1949,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) int dead; BUG_ON(!fi->fib_nhs); - if (nh->fib_nh_dev != dev || fi == prev_fi) + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (fi == prev_fi) continue; prev_fi = fi; dead = 0; @@ -2067,8 +2017,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) if (fa->fa_slen != slen) continue; - if (fa->fa_dscp && - fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos)) + if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp)) continue; if (fa->tb_id != tb->tb_id) continue; @@ -2151,7 +2100,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - head = fib_info_devhash_bucket(dev); + head = fib_nh_head(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { @@ -2159,7 +2108,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) int alive; BUG_ON(!fi->fib_nhs); - if (nh->fib_nh_dev != dev || fi == prev_fi) + DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); + if (fi == prev_fi) continue; prev_fi = fi; @@ -2218,34 +2168,52 @@ static bool fib_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -void fib_select_multipath(struct fib_result *res, int hash) +void fib_select_multipath(struct fib_result *res, int hash, + const struct flowi4 *fl4) { struct fib_info *fi = res->fi; struct net *net = fi->fib_net; - bool first = false; + bool found = false; + bool use_neigh; + __be32 saddr; if (unlikely(res->fi->nh)) { nexthop_path_fib_result(res, hash); return; } + use_neigh = READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh); + saddr = fl4 ? fl4->saddr : 0; + change_nexthops(fi) { - if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) { - if (!fib_good_nh(nexthop_nh)) - continue; - if (!first) { - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - first = true; - } + int nh_upper_bound; + + /* Nexthops without a carrier are assigned an upper bound of + * minus one when "ignore_routes_with_linkdown" is set. + */ + nh_upper_bound = atomic_read(&nexthop_nh->fib_nh_upper_bound); + if (nh_upper_bound == -1 || + (use_neigh && !fib_good_nh(nexthop_nh))) + continue; + + if (!found) { + res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; + found = !saddr || nexthop_nh->nh_saddr == saddr; } - if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) + if (hash > nh_upper_bound) continue; - res->nh_sel = nhsel; - res->nhc = &nexthop_nh->nh_common; - return; + if (!saddr || nexthop_nh->nh_saddr == saddr) { + res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; + return; + } + + if (found) + return; + } endfor_nexthops(fi); } #endif @@ -2260,7 +2228,7 @@ void fib_select_path(struct net *net, struct fib_result *res, if (fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(net, fl4, skb, NULL); - fib_select_multipath(res, h); + fib_select_multipath(res, h, fl4); } else #endif @@ -2270,6 +2238,34 @@ void fib_select_path(struct net *net, struct fib_result *res, fib_select_default(fl4, res); check_saddr: - if (!fl4->saddr) - fl4->saddr = fib_result_prefsrc(net, res); + if (!fl4->saddr) { + struct net_device *l3mdev; + + l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); + + if (!l3mdev || + l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) + fl4->saddr = fib_result_prefsrc(net, res); + else + fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); + } +} + +int __net_init fib4_semantics_init(struct net *net) +{ + unsigned int hash_bits = 4; + + net->ipv4.fib_info_hash = fib_info_hash_alloc(hash_bits); + if (!net->ipv4.fib_info_hash) + return -ENOMEM; + + net->ipv4.fib_info_hash_bits = hash_bits; + net->ipv4.fib_info_cnt = 0; + + return 0; +} + +void __net_exit fib4_semantics_exit(struct net *net) +{ + fib_info_hash_free(net->ipv4.fib_info_hash); } |