diff options
Diffstat (limited to 'net/ipv4/fib_frontend.c')
| -rw-r--r-- | net/ipv4/fib_frontend.c | 215 |
1 files changed, 149 insertions, 66 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4d61ddd8a0ec..1dab44e13d3b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -32,6 +32,8 @@ #include <linux/list.h> #include <linux/slab.h> +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> @@ -290,9 +292,9 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, - .flowi4_oif = l3mdev_master_ifindex_rcu(dev), + .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, + .flowi4_dscp = ip4h_dscp(ip_hdr(skb)), .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; @@ -341,10 +343,11 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev); * called with rcu_read_lock() */ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { struct net *net = dev_net(dev); + enum skb_drop_reason reason; struct flow_keys flkeys; int ret, no_addr; struct fib_result res; @@ -352,12 +355,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); - if (!fl4.flowi4_iif) - fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; - fl4.flowi4_tos = tos; + fl4.flowi4_dscp = dscp; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; @@ -377,9 +379,15 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; - if (res.type != RTN_UNICAST && - (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) - goto e_inval; + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL) { + reason = SKB_DROP_REASON_IP_INVALID_SOURCE; + goto e_inval; + } else if (!IN_DEV_ACCEPT_LOCAL(idev)) { + reason = SKB_DROP_REASON_IP_LOCAL_SOURCE; + goto e_inval; + } + } fib_combine_itag(itag, &res); dev_match = fib_info_nh_uses_dev(res.fi, dev); @@ -412,14 +420,14 @@ last_resort: return 0; e_inval: - return -EINVAL; + return -reason; e_rpf: - return -EXDEV; + return -SKB_DROP_REASON_IP_RPFILTER; } /* Ignore rp_filter for packets protected by IPsec. */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, - u8 tos, int oif, struct net_device *dev, + dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); @@ -436,8 +444,11 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (net->ipv4.fib_has_custom_local_routes || fib4_has_custom_rules(net)) goto full_check; + /* Within the same container, it is regarded as a martian source, + * and the same host but different containers are not. + */ if (inet_lookup_ifaddr_rcu(net, src)) - return -EINVAL; + return -SKB_DROP_REASON_IP_LOCAL_SOURCE; ok: *itag = 0; @@ -445,7 +456,8 @@ ok: } full_check: - return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); + return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev, + itag); } static inline __be32 sk_extract_addr(struct sockaddr *addr) @@ -542,18 +554,16 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, const struct in_ifaddr *ifa; struct in_device *in_dev; - in_dev = __in_dev_get_rtnl(dev); + in_dev = __in_dev_get_rtnl_net(dev); if (!in_dev) return -ENODEV; *colon = ':'; - rcu_read_lock(); - in_dev_for_each_ifa_rcu(ifa, in_dev) { + in_dev_for_each_ifa_rtnl_net(net, ifa, in_dev) { if (strcmp(ifa->ifa_label, devname) == 0) break; } - rcu_read_unlock(); if (!ifa) return -ENODEV; @@ -573,6 +583,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, cfg->fc_scope = RT_SCOPE_UNIVERSE; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + if (cmd == SIOCDELRT) return 0; @@ -621,7 +634,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - rtnl_lock(); + rtnl_net_lock(net); err = rtentry_to_fib_config(net, cmd, rt, &cfg); if (err == 0) { struct fib_table *tb; @@ -645,7 +658,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) /* allocated by rtentry_to_fib_config() */ kfree(cfg.fc_mx); } - rtnl_unlock(); + rtnl_net_unlock(net); return err; } return -EINVAL; @@ -735,8 +748,16 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, memset(cfg, 0, sizeof(*cfg)); rtm = nlmsg_data(nlh); + + if (!inet_validate_dscp(rtm->rtm_tos)) { + NL_SET_ERR_MSG(extack, + "Invalid dsfield (tos): ECN bits must be 0"); + err = -EINVAL; + goto errout; + } + cfg->fc_dscp = inet_dsfield_to_dscp(rtm->rtm_tos); + cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_tos = rtm->rtm_tos; cfg->fc_table = rtm->rtm_table; cfg->fc_protocol = rtm->rtm_protocol; cfg->fc_scope = rtm->rtm_scope; @@ -815,21 +836,38 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, } } + if (cfg->fc_dst_len > 32) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + err = -EINVAL; + goto errout; + } + + if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) { + NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length"); + err = -EINVAL; + goto errout; + } + if (cfg->fc_nh_id) { if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_encap || cfg->fc_mp) { NL_SET_ERR_MSG(extack, "Nexthop specification and nexthop id are mutually exclusive"); - return -EINVAL; + err = -EINVAL; + goto errout; } } if (has_gw && has_via) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); - return -EINVAL; + err = -EINVAL; + goto errout; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + return 0; errout: return err; @@ -847,20 +885,24 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + rtnl_net_lock(net); + if (cfg.fc_nh_id && !nexthop_find_by_id(net, cfg.fc_nh_id)) { NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); err = -EINVAL; - goto errout; + goto unlock; } tb = fib_get_table(net, cfg.fc_table); if (!tb) { NL_SET_ERR_MSG(extack, "FIB table does not exist"); err = -ESRCH; - goto errout; + goto unlock; } err = fib_table_delete(net, tb, &cfg, extack); +unlock: + rtnl_net_unlock(net); errout: return err; } @@ -877,15 +919,20 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + rtnl_net_lock(net); + tb = fib_new_table(net, cfg.fc_table); if (!tb) { err = -ENOBUFS; - goto errout; + goto unlock; } err = fib_table_insert(net, tb, &cfg, extack); if (!err && cfg.fc_type == RTN_LOCAL) net->ipv4.fib_has_custom_local_routes = true; + +unlock: + rtnl_net_unlock(net); errout: return err; } @@ -899,14 +946,15 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct rtmsg *rtm; int err, i; - ASSERT_RTNL(); + if (filter->rtnl_held) + ASSERT_RTNL(); - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { + rtm = nlmsg_payload(nlh, sizeof(*rtm)); + if (!rtm) { NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); return -EINVAL; } - rtm = nlmsg_data(nlh); if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_scope) { NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); @@ -944,7 +992,10 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, break; case RTA_OIF: ifindex = nla_get_u32(tb[i]); - filter->dev = __dev_get_by_index(net, ifindex); + if (filter->rtnl_held) + filter->dev = __dev_get_by_index(net, ifindex); + else + filter->dev = dev_get_by_index_rcu(net, ifindex); if (!filter->dev) return -ENODEV; break; @@ -966,20 +1017,24 @@ EXPORT_SYMBOL_GPL(ip_valid_fib_dump_req); static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - struct fib_dump_filter filter = { .dump_routes = true, - .dump_exceptions = true }; + struct fib_dump_filter filter = { + .dump_routes = true, + .dump_exceptions = true, + .rtnl_held = false, + }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; struct hlist_head *head; - int dumped = 0, err; + int dumped = 0, err = 0; + rcu_read_lock(); if (cb->strict_check) { err = ip_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) - return err; + goto unlock; } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(nlh); @@ -988,29 +1043,26 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) /* ipv4 does not use prefix flag */ if (filter.flags & RTM_F_PREFIX) - return skb->len; + goto unlock; if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { if (rtnl_msg_family(cb->nlh) != PF_INET) - return skb->len; + goto unlock; NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); - return -ENOENT; + err = -ENOENT; + goto unlock; } - - rcu_read_lock(); err = fib_table_dump(tb, skb, cb, &filter); - rcu_read_unlock(); - return skb->len ? : err; + goto unlock; } s_h = cb->args[0]; s_e = cb->args[1]; - rcu_read_lock(); - + err = 0; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv4.fib_table_hash[h]; @@ -1021,25 +1073,20 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); err = fib_table_dump(tb, skb, cb, &filter); - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } + if (err < 0) + goto out; dumped = 1; next: e++; } } out: - err = skb->len; -out_err: - rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; +unlock: + rcu_read_unlock(); return err; } @@ -1112,9 +1159,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) return; /* Add broadcast address, if it is explicitly assigned. */ - if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) + if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim, 0); + arp_invalidate(dev, ifa->ifa_broadcast, false); + } if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) && (prefix != addr || ifa->ifa_prefixlen < 32)) { @@ -1128,6 +1177,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa) if (ifa->ifa_prefixlen < 31) { fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask, 32, prim, 0); + arp_invalidate(dev, prefix | ~mask, false); } } } @@ -1323,7 +1373,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) struct flowi4 fl4 = { .flowi4_mark = frn->fl_mark, .daddr = frn->fl_addr, - .flowi4_tos = frn->fl_tos, + .flowi4_dscp = inet_dsfield_to_dscp(frn->fl_tos), .flowi4_scope = frn->fl_scope, }; struct fib_table *tb; @@ -1370,7 +1420,7 @@ static void nl_fib_input(struct sk_buff *skb) return; nlh = nlmsg_hdr(skb); - frn = (struct fib_result_nl *) nlmsg_data(nlh); + frn = nlmsg_data(nlh); nl_fib_lookup(net, frn); portid = NETLINK_CB(skb).portid; /* netlink portid */ @@ -1411,7 +1461,7 @@ static void fib_disable_ip(struct net_device *dev, unsigned long event, static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct in_ifaddr *ifa = ptr; struct net_device *dev = ifa->ifa_dev->dev; struct net *net = dev_net(dev); @@ -1422,7 +1472,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, fib_sync_up(dev, RTNH_F_DEAD); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev)); + rt_cache_flush(net); break; case NETDEV_DOWN: fib_del_ifaddr(ifa, NULL); @@ -1433,7 +1483,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, */ fib_disable_ip(dev, event, true); } else { - rt_cache_flush(dev_net(dev)); + rt_cache_flush(net); } break; } @@ -1475,7 +1525,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_disable_ip(dev, event, false); break; case NETDEV_CHANGE: - flags = dev_get_flags(dev); + flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) fib_sync_up(dev, RTNH_F_LINKDOWN); else @@ -1547,7 +1597,7 @@ static void ip_fib_net_exit(struct net *net) { int i; - rtnl_lock(); + ASSERT_RTNL_NET(net); #ifdef CONFIG_IP_MULTIPLE_TABLES RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); @@ -1572,7 +1622,7 @@ static void ip_fib_net_exit(struct net *net) #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - rtnl_unlock(); + kfree(net->ipv4.fib_table_hash); fib4_notifier_exit(net); } @@ -1587,9 +1637,15 @@ static int __net_init fib_net_init(struct net *net) error = ip_fib_net_init(net); if (error < 0) goto out; + + error = fib4_semantics_init(net); + if (error) + goto out_semantics; + error = nl_fib_lookup_init(net); if (error < 0) goto out_nlfl; + error = fib_proc_init(net); if (error < 0) goto out_proc; @@ -1599,7 +1655,11 @@ out: out_proc: nl_fib_lookup_exit(net); out_nlfl: + fib4_semantics_exit(net); +out_semantics: + rtnl_net_lock(net); ip_fib_net_exit(net); + rtnl_net_unlock(net); goto out; } @@ -1607,12 +1667,37 @@ static void __net_exit fib_net_exit(struct net *net) { fib_proc_exit(net); nl_fib_lookup_exit(net); - ip_fib_net_exit(net); +} + +static void __net_exit fib_net_exit_batch(struct list_head *net_list) +{ + struct net *net; + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + __rtnl_net_lock(net); + ip_fib_net_exit(net); + __rtnl_net_unlock(net); + } + rtnl_unlock(); + + list_for_each_entry(net, net_list, exit_list) + fib4_semantics_exit(net); } static struct pernet_operations fib_net_ops = { .init = fib_net_init, .exit = fib_net_exit, + .exit_batch = fib_net_exit_batch, +}; + +static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = { + {.protocol = PF_INET, .msgtype = RTM_NEWROUTE, + .doit = inet_rtm_newroute, .flags = RTNL_FLAG_DOIT_PERNET}, + {.protocol = PF_INET, .msgtype = RTM_DELROUTE, + .doit = inet_rtm_delroute, .flags = RTNL_FLAG_DOIT_PERNET}, + {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .dumpit = inet_dump_fib, + .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, }; void __init ip_fib_init(void) @@ -1624,7 +1709,5 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0); + rtnl_register_many(fib_rtnl_msg_handlers); } |
