diff options
Diffstat (limited to 'net/ipv4/fib_frontend.c')
| -rw-r--r-- | net/ipv4/fib_frontend.c | 406 |
1 files changed, 295 insertions, 111 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fe4f6a624238..1dab44e13d3b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -6,11 +7,6 @@ * IPv4 Forwarding Information Base: FIB frontend. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/module.h> @@ -36,6 +32,8 @@ #include <linux/list.h> #include <linux/slab.h> +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -43,6 +41,7 @@ #include <net/sock.h> #include <net/arp.h> #include <net/ip_fib.h> +#include <net/nexthop.h> #include <net/rtnetlink.h> #include <net/xfrm.h> #include <net/l3mdev.h> @@ -73,11 +72,6 @@ fail: fib_free_table(main_table); return -ENOMEM; } - -static bool fib4_has_custom_rules(struct net *net) -{ - return false; -} #else struct fib_table *fib_new_table(struct net *net, u32 id) @@ -127,17 +121,13 @@ struct fib_table *fib_get_table(struct net *net, u32 id) h = id & (FIB_TABLE_HASHSZ - 1); head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry_rcu(tb, head, tb_hlist) { + hlist_for_each_entry_rcu(tb, head, tb_hlist, + lockdep_rtnl_is_held()) { if (tb->tb_id == id) return tb; } return NULL; } - -static bool fib4_has_custom_rules(struct net *net) -{ - return net->ipv4.fib_has_custom_rules; -} #endif /* CONFIG_IP_MULTIPLE_TABLES */ static void fib_replace_table(struct net *net, struct fib_table *old, @@ -192,7 +182,7 @@ int fib_unmerge(struct net *net) return 0; } -static void fib_flush(struct net *net) +void fib_flush(struct net *net) { int flushed = 0; unsigned int h; @@ -234,7 +224,9 @@ static inline unsigned int __inet_dev_addr_type(struct net *net, if (table) { ret = RTN_UNICAST; if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) { - if (!dev || dev == res.fi->fib_dev) + struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0); + + if (!dev || dev == nhc->nhc_dev) ret = res.type; } } @@ -300,14 +292,14 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, - .flowi4_oif = l3mdev_master_ifindex_rcu(dev), + .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = RT_TOS(ip_hdr(skb)->tos), + .flowi4_dscp = ip4h_dscp(ip_hdr(skb)), .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) - return FIB_RES_PREFSRC(net, res); + return fib_result_prefsrc(net, &res); } else { scope = RT_SCOPE_LINK; } @@ -319,21 +311,22 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) { bool dev_match = false; #ifdef CONFIG_IP_ROUTE_MULTIPATH - int ret; + if (unlikely(fi->nh)) { + dev_match = nexthop_uses_dev(fi->nh, dev); + } else { + int ret; - for (ret = 0; ret < fi->fib_nhs; ret++) { - struct fib_nh *nh = &fi->fib_nh[ret]; + for (ret = 0; ret < fib_info_num_path(fi); ret++) { + const struct fib_nh_common *nhc = fib_info_nhc(fi, ret); - if (nh->nh_dev == dev) { - dev_match = true; - break; - } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { - dev_match = true; - break; + if (nhc_l3mdev_matches_dev(nhc, dev)) { + dev_match = true; + break; + } } } #else - if (fi->fib_nh[0].nh_dev == dev) + if (fib_info_nhc(fi, 0)->nhc_dev == dev) dev_match = true; #endif @@ -350,10 +343,11 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev); * called with rcu_read_lock() */ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { struct net *net = dev_net(dev); + enum skb_drop_reason reason; struct flow_keys flkeys; int ret, no_addr; struct fib_result res; @@ -361,16 +355,16 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); - if (!fl4.flowi4_iif) - fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; - fl4.flowi4_tos = tos; + fl4.flowi4_dscp = dscp; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; no_addr = idev->ifa_list == NULL; @@ -379,18 +373,31 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_proto = 0; fl4.fl4_sport = 0; fl4.fl4_dport = 0; + } else { + swap(fl4.fl4_sport, fl4.fl4_dport); } if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; - if (res.type != RTN_UNICAST && - (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) - goto e_inval; + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; + goto e_inval; + } else if (!IN_DEV_ACCEPT_LOCAL(idev)) { + reason = SKB_DROP_REASON_IP_LOCAL_SOURCE; + goto e_inval; + } + } fib_combine_itag(itag, &res); dev_match = fib_info_nh_uses_dev(res.fi, dev); + /* This is not common, loopback packets retain skb_dst so normally they + * would not even hit this slow path. + */ + dev_match = dev_match || (res.type == RTN_LOCAL && + dev == net->loopback_dev); if (dev_match) { - ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; return ret; } if (no_addr) @@ -402,7 +409,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; } return ret; @@ -413,14 +420,14 @@ last_resort: return 0; e_inval: - return -EINVAL; + return -reason; e_rpf: - return -EXDEV; + return -SKB_DROP_REASON_IP_RPFILTER; } /* Ignore rp_filter for packets protected by IPsec. */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); @@ -437,8 +444,11 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (net->ipv4.fib_has_custom_local_routes || fib4_has_custom_rules(net)) goto full_check; + /* Within the same container, it is regarded as a martian source, + * and the same host but different containers are not. + */ if (inet_lookup_ifaddr_rcu(net, src)) - return -EINVAL; + return -SKB_DROP_REASON_IP_LOCAL_SOURCE; ok: *itag = 0; @@ -446,7 +456,8 @@ ok: } full_check: - return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); + return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev, + itag); } static inline __be32 sk_extract_addr(struct sockaddr *addr) @@ -540,14 +551,20 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, cfg->fc_oif = dev->ifindex; cfg->fc_table = l3mdev_fib_table(dev); if (colon) { - struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get_rtnl(dev); + const struct in_ifaddr *ifa; + struct in_device *in_dev; + + in_dev = __in_dev_get_rtnl_net(dev); if (!in_dev) return -ENODEV; + *colon = ':'; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + + in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) { if (strcmp(ifa->ifa_label, devname) == 0) break; + } + if (!ifa) return -ENODEV; cfg->fc_prefsrc = ifa->ifa_local; @@ -558,17 +575,21 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, if (rt->rt_gateway.sa_family == AF_INET && addr) { unsigned int addr_type; - cfg->fc_gw = addr; + cfg->fc_gw4 = addr; + cfg->fc_gw_family = AF_INET; addr_type = inet_addr_type_table(net, addr, cfg->fc_table); if (rt->rt_flags & RTF_GATEWAY && addr_type == RTN_UNICAST) cfg->fc_scope = RT_SCOPE_UNIVERSE; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + if (cmd == SIOCDELRT) return 0; - if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) + if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family) return -EINVAL; if (cfg->fc_scope == RT_SCOPE_NOWHERE) @@ -613,7 +634,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - rtnl_lock(); + rtnl_net_lock(net); err = rtentry_to_fib_config(net, cmd, rt, &cfg); if (err == 0) { struct fib_table *tb; @@ -637,13 +658,14 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) /* allocated by rtentry_to_fib_config() */ kfree(cfg.fc_mx); } - rtnl_unlock(); + rtnl_net_unlock(net); return err; } return -EINVAL; } const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { + [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 }, [RTA_DST] = { .type = NLA_U32 }, [RTA_SRC] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, @@ -662,26 +684,80 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_IP_PROTO] = { .type = NLA_U8 }, [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, + [RTA_NH_ID] = { .type = NLA_U32 }, }; +int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + struct rtvia *via; + int alen; + + if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) { + NL_SET_ERR_MSG(extack, "Invalid attribute length for RTA_VIA"); + return -EINVAL; + } + + via = nla_data(nla); + alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr); + + switch (via->rtvia_family) { + case AF_INET: + if (alen != sizeof(__be32)) { + NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_VIA"); + return -EINVAL; + } + cfg->fc_gw_family = AF_INET; + cfg->fc_gw4 = *((__be32 *)via->rtvia_addr); + break; + case AF_INET6: +#if IS_ENABLED(CONFIG_IPV6) + if (alen != sizeof(struct in6_addr)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA"); + return -EINVAL; + } + cfg->fc_gw_family = AF_INET6; + cfg->fc_gw6 = *((struct in6_addr *)via->rtvia_addr); +#else + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EINVAL; +#endif + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported address family in RTA_VIA"); + return -EINVAL; + } + + return 0; +} + static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh, struct fib_config *cfg, struct netlink_ext_ack *extack) { + bool has_gw = false, has_via = false; struct nlattr *attr; int err, remaining; struct rtmsg *rtm; - err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy, - extack); + err = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, + rtm_ipv4_policy, extack); if (err < 0) goto errout; memset(cfg, 0, sizeof(*cfg)); rtm = nlmsg_data(nlh); + + if (!inet_validate_dscp(rtm->rtm_tos)) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): ECN bits must be 0"); + err = -EINVAL; + goto errout; + } + cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos); + cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_tos = rtm->rtm_tos; cfg->fc_table = rtm->rtm_table; cfg->fc_protocol = rtm->rtm_protocol; cfg->fc_scope = rtm->rtm_scope; @@ -708,7 +784,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_oif = nla_get_u32(attr); break; case RTA_GATEWAY: - cfg->fc_gw = nla_get_be32(attr); + has_gw = true; + cfg->fc_gw4 = nla_get_be32(attr); + if (cfg->fc_gw4) + cfg->fc_gw_family = AF_INET; + break; + case RTA_VIA: + has_via = true; + err = fib_gw_from_via(cfg, attr, extack); + if (err) + goto errout; break; case RTA_PRIORITY: cfg->fc_priority = nla_get_u32(attr); @@ -745,9 +830,44 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (err < 0) goto errout; break; + case RTA_NH_ID: + cfg->fc_nh_id = nla_get_u32(attr); + break; } } + if (cfg->fc_dst_len > 32) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + err = -EINVAL; + goto errout; + } + + if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) { + NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length"); + err = -EINVAL; + goto errout; + } + + if (cfg->fc_nh_id) { + if (cfg->fc_oif || cfg->fc_gw_family || + cfg->fc_encap || cfg->fc_mp) { + NL_SET_ERR_MSG(extack, + "Nexthop specification and nexthop id are mutually exclusive"); + err = -EINVAL; + goto errout; + } + } + + if (has_gw && has_via) { + NL_SET_ERR_MSG(extack, + "Nexthop configuration can not contain both GATEWAY and VIA"); + err = -EINVAL; + goto errout; + } + + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + return 0; errout: return err; @@ -765,14 +885,24 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + rtnl_net_lock(net); + + if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + err = -EINVAL; + goto unlock; + } + tb = fib_get_table(net, cfg.fc_table); if (!tb) { NL_SET_ERR_MSG(extack, "FIB table does not exist"); err = -ESRCH; - goto errout; + goto unlock; } err = fib_table_delete(net, tb, &cfg, extack); +unlock: + rtnl_net_unlock(net); errout: return err; } @@ -789,15 +919,20 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + rtnl_net_lock(net); + tb = fib_new_table(net, cfg.fc_table); if (!tb) { err = -ENOBUFS; - goto errout; + goto unlock; } err = fib_table_insert(net, tb, &cfg, extack); if (!err && cfg.fc_type == RTN_LOCAL) net->ipv4.fib_has_custom_local_routes = true; + +unlock: + rtnl_net_unlock(net); errout: return err; } @@ -811,32 +946,37 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct rtmsg *rtm; int err, i; - ASSERT_RTNL(); + if (filter->rtnl_held) + ASSERT_RTNL(); - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); return -EINVAL; } - rtm = nlmsg_data(nlh); if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_scope) { NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); return -EINVAL; } + if (rtm->rtm_flags & ~(RTM_F_CLONED | RTM_F_PREFIX)) { NL_SET_ERR_MSG(extack, "Invalid flags for FIB dump request"); return -EINVAL; } + if (rtm->rtm_flags & RTM_F_CLONED) + filter->dump_routes = false; + else + filter->dump_exceptions = false; - filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; filter->protocol = rtm->rtm_protocol; filter->rt_type = rtm->rtm_type; filter->table_id = rtm->rtm_table; - err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv4_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); if (err < 0) return err; @@ -852,7 +992,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, break; case RTA_OIF: ifindex = nla_get_u32(tb[i]); - filter->dev = __dev_get_by_index(net, ifindex); + if (filter->rtnl_held) + filter->dev = __dev_get_by_index(net, ifindex); + else + filter->dev = dev_get_by_index_rcu(net, ifindex); if (!filter->dev) return -ENODEV; break; @@ -874,48 +1017,52 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req); static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct fib_dump_filter filter = { + .dump_routes = true, + .dump_exceptions = true, + .rtnl_held = false, + }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - struct fib_dump_filter filter = {}; unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0, err; + int dumped = 0, err = 0; + rcu_read_lock(); if (cb->strict_check) { err = ip_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) - return err; + goto unlock; } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(nlh); filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED); } - /* fib entries are never clones and ipv4 does not use prefix flag */ - if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED)) - return skb->len; + /* ipv4 does not use prefix flag */ + if (filter.flags & RTM_F_PREFIX) + goto unlock; if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { - if (filter.dump_all_families) - return skb->len; + if (rtnl_msg_family(cb->nlh) != PF_INET) + goto unlock; NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); - return -ENOENT; + err = -ENOENT; + goto unlock; } - err = fib_table_dump(tb, skb, cb, &filter); - return skb->len ? : err; + goto unlock; } s_h = cb->args[0]; s_e = cb->args[1]; - rcu_read_lock(); - + err = 0; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; @@ -926,25 +1073,20 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } + if (err < 0) + goto out; dumped = 1; next: e++; } } out: - err = skb->len; -out_err: - rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; +unlock: + rcu_read_unlock(); return err; } @@ -1017,9 +1159,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1029,12 +1173,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) prefix, ifa->ifa_prefixlen, prim, ifa->ifa_rt_priority); - /* Add network specific broadcasts, when it takes a sense */ + /* Add the network broadcast address, when it makes sense */ if (ifa->ifa_prefixlen < 31) { - fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, - prim, 0); fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } @@ -1048,7 +1191,7 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric) if (!(dev->flags & IFF_UP) || ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) || ipv4_is_zeronet(prefix) || - prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32) + (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32)) return; /* add the new */ @@ -1115,8 +1258,8 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) * * Scan address list to be sure that addresses are really gone. */ - - for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { + rcu_read_lock(); + in_dev_for_each_ifa_rcu(ifa1, in_dev) { if (ifa1 == ifa) { /* promotion, keep the IP */ gone = 0; @@ -1184,6 +1327,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim) } } } + rcu_read_unlock(); no_promotions: if (!(ok & BRD_OK)) @@ -1229,7 +1373,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) struct flowi4 fl4 = { .flowi4_mark = frn->fl_mark, .daddr = frn->fl_addr, - .flowi4_tos = frn->fl_tos, + .flowi4_dscp = inet_dsfield_to_dscp(frn->fl_tos), .flowi4_scope = frn->fl_scope, }; struct fib_table *tb; @@ -1276,13 +1420,13 @@ static void nl_fib_input(struct sk_buff *skb) return; nlh = nlmsg_hdr(skb); - frn = (struct fib_result_nl *) nlmsg_data(nlh); + frn = nlmsg_data(nlh); nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); + nlmsg_unicast(net->ipv4.fibnl, skb, portid); } static int __net_init nl_fib_lookup_init(struct net *net) @@ -1317,7 +1461,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event, static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct in_ifaddr *ifa = ptr; struct net_device *dev = ifa->ifa_dev->dev; struct net *net = dev_net(dev); @@ -1328,7 +1472,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev, RTNH_F_DEAD); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev)); + rt_cache_flush(net); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1339,7 +1483,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, */ fib_disable_ip(dev, event, true); } else { - rt_cache_flush(dev_net(dev)); + rt_cache_flush(net); } break; } @@ -1353,6 +1497,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); + struct in_ifaddr *ifa; unsigned int flags; if (event == NETDEV_UNREGISTER) { @@ -1367,9 +1512,9 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo switch (event) { case NETDEV_UP: - for_ifa(in_dev) { + in_dev_for_each_ifa_rtnl(ifa, in_dev) { fib_add_ifaddr(ifa); - } endfor_ifa(in_dev); + } #ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev, RTNH_F_DEAD); #endif @@ -1380,7 +1525,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_disable_ip(dev, event, false); break; case NETDEV_CHANGE: - flags = dev_get_flags(dev); + flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) fib_sync_up(dev, RTNH_F_LINKDOWN); else @@ -1421,6 +1566,12 @@ static int __net_init ip_fib_net_init(struct net *net) if (err) return err; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + /* Default to 3-tuple */ + net->ipv4.sysctl_fib_multipath_hash_fields = + FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK; +#endif + /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); @@ -1446,7 +1597,7 @@ static void ip_fib_net_exit(struct net *net) { int i; - rtnl_lock(); + ASSERT_RTNL_NET(net); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); @@ -1471,7 +1622,7 @@ static void ip_fib_net_exit(struct net *net) #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - rtnl_unlock(); + kfree(net->ipv4.fib_table_hash); fib4_notifier_exit(net); } @@ -1481,14 +1632,20 @@ static int __net_init fib_net_init(struct net *net) int error; #ifdef CONFIG_IP_ROUTE_CLASSID - net->ipv4.fib_num_tclassid_users = 0; + atomic_set(&net->ipv4.fib_num_tclassid_users, 0); #endif error = ip_fib_net_init(net); if (error < 0) goto out; + + error = fib4_semantics_init(net); + if (error) + goto out_semantics; + error = nl_fib_lookup_init(net); if (error < 0) goto out_nlfl; + error = fib_proc_init(net); if (error < 0) goto out_proc; @@ -1498,7 +1655,11 @@ out: out_proc: nl_fib_lookup_exit(net); out_nlfl: + fib4_semantics_exit(net); +out_semantics: + rtnl_net_lock(net); ip_fib_net_exit(net); + rtnl_net_unlock(net); goto out; } @@ -1506,12 +1667,37 @@ static void __net_exit fib_net_exit(struct net *net) { fib_proc_exit(net); nl_fib_lookup_exit(net); - ip_fib_net_exit(net); +} + +static void __net_exit fib_net_exit_batch(struct list_head *net_list) +{ + struct net *net; + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + __rtnl_net_lock(net); + ip_fib_net_exit(net); + __rtnl_net_unlock(net); + } + rtnl_unlock(); + + list_for_each_entry(net, net_list, exit_list) + fib4_semantics_exit(net); } static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, + .exit_batch = fib_net_exit_batch, +}; + +static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_NEWROUTE, + .doit = inet_rtm_newroute, .flags = RTNL_FLAG_DOIT_PERNET}, + {.protocol = PF_INET, .msgtype = RTM_DELROUTE, + .doit = inet_rtm_delroute, .flags = RTNL_FLAG_DOIT_PERNET}, + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, }; void __init ip_fib_init(void) @@ -1523,7 +1709,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0); + rtnl_register_many(fib_rtnl_msg_handlers); } |
