diff options
Diffstat (limited to 'net/ipv6/anycast.c')
| -rw-r--r-- | net/ipv6/anycast.c | 431 |
1 files changed, 282 insertions, 149 deletions
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 5a80f15a9de2..52599584422b 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Anycast support for IPv6 * Linux INET6 implementation @@ -6,11 +7,6 @@ * David L Stevens (dlstevens@us.ibm.com) * * based heavily on net/ipv6/mcast.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ #include <linux/capability.h> @@ -44,11 +40,25 @@ #include <net/checksum.h> +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) +/* anycast address hash table + */ +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; +static DEFINE_SPINLOCK(acaddr_hash_lock); + +#define ac_dereference(a, idev) \ + rcu_dereference_protected(a, lockdep_is_held(&(idev)->lock)) + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); -/* Big ac list lock for all the sockets */ -static DEFINE_SPINLOCK(ipv6_sk_ac_lock); +static u32 inet6_acaddr_hash(const struct net *net, + const struct in6_addr *addr) +{ + u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net)); + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} /* * socket join an anycast group @@ -57,51 +67,64 @@ static DEFINE_SPINLOCK(ipv6_sk_ac_lock); int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_ac_socklist *pac = NULL; + struct net *net = sock_net(sk); + netdevice_tracker dev_tracker; struct net_device *dev = NULL; struct inet6_dev *idev; - struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int ishost = !net->ipv6.devconf_all->forwarding; - int err = 0; + int err = 0, ishost; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; - if (ipv6_chk_addr(net, addr, NULL, 0)) - return -EINVAL; + + if (ifindex) + dev = netdev_get_by_index(net, ifindex, &dev_tracker, GFP_KERNEL); + + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) { + err = -EINVAL; + goto error; + } pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (pac == NULL) - return -ENOMEM; + if (!pac) { + err = -ENOMEM; + goto error; + } + pac->acl_next = NULL; pac->acl_addr = *addr; - rcu_read_lock(); + ishost = !READ_ONCE(net->ipv6.devconf_all->forwarding); + if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rcu_read_lock(); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev_rcu(&rt->dst); + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); ip6_rt_put(rt); } else if (ishost) { + rcu_read_unlock(); err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ - dev = dev_get_by_flags_rcu(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = netdev_get_by_flags_rcu(net, &dev_tracker, IFF_UP, + IFF_UP | IFF_LOOPBACK); } - } else - dev = dev_get_by_index_rcu(net, ifindex); + rcu_read_unlock(); + } - if (dev == NULL) { + if (!dev) { err = -ENODEV; goto error; } - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; @@ -109,8 +132,9 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = -EADDRNOTAVAIL; goto error; } + /* reset ishost, now that we have a specific device */ - ishost = !idev->cnf.forwarding; + ishost = !READ_ONCE(idev->cnf.forwarding); pac->acl_ifindex = dev->ifindex; @@ -123,20 +147,21 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto error; + goto error_idev; } - err = ipv6_dev_ac_inc(dev, addr); + err = __ipv6_dev_ac_inc(idev, addr); if (!err) { - spin_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; - spin_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } +error_idev: + in6_dev_put(idev); error: - rcu_read_unlock(); + netdev_put(dev, &dev_tracker); + if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -147,12 +172,11 @@ error: */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; + struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); + struct net_device *dev; - spin_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -160,50 +184,40 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) break; prev_pac = pac; } - if (!pac) { - spin_unlock_bh(&ipv6_sk_ac_lock); + if (!pac) return -ENOENT; - } if (prev_pac) prev_pac->acl_next = pac->acl_next; else np->ipv6_ac_list = pac->acl_next; - spin_unlock_bh(&ipv6_sk_ac_lock); - - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); - if (dev) + dev = dev_get_by_index(net, pac->acl_ifindex); + if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); - rcu_read_unlock(); + dev_put(dev); + } sock_kfree_s(sk, pac, sizeof(*pac)); return 0; } -void ipv6_sock_ac_close(struct sock *sk) +void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int prev_index; - - if (!np->ipv6_ac_list) - return; + int prev_index = 0; - spin_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - spin_unlock_bh(&ipv6_sk_ac_lock); - prev_index = 0; - rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = dev_get_by_index_rcu(net, pac->acl_ifindex); + dev_put(dev); + dev = dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -211,40 +225,125 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } - rcu_read_unlock(); + + dev_put(dev); +} + +void ipv6_sock_ac_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!np->ipv6_ac_list) + return; + + __ipv6_sock_ac_close(sk); +} + +static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) +{ + unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); + + spin_lock(&acaddr_hash_lock); + hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); + spin_unlock(&acaddr_hash_lock); +} + +static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) +{ + spin_lock(&acaddr_hash_lock); + hlist_del_init_rcu(&aca->aca_addr_lst); + spin_unlock(&acaddr_hash_lock); +} + +static void aca_get(struct ifacaddr6 *aca) +{ + refcount_inc(&aca->aca_refcnt); +} + +static void aca_free_rcu(struct rcu_head *h) +{ + struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); + + fib6_info_release(aca->aca_rt); + kfree(aca); } static void aca_put(struct ifacaddr6 *ac) { - if (atomic_dec_and_test(&ac->aca_refcnt)) { - in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->dst); - kfree(ac); + if (refcount_dec_and_test(&ac->aca_refcnt)) + call_rcu_hurry(&ac->rcu, aca_free_rcu); +} + +static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, + const struct in6_addr *addr) +{ + struct ifacaddr6 *aca; + + aca = kzalloc(sizeof(*aca), GFP_ATOMIC); + if (!aca) + return NULL; + + aca->aca_addr = *addr; + fib6_info_hold(f6i); + aca->aca_rt = f6i; + INIT_HLIST_NODE(&aca->aca_addr_lst); + aca->aca_users = 1; + /* aca_tstamp should be updated upon changes */ + aca->aca_cstamp = aca->aca_tstamp = jiffies; + refcount_set(&aca->aca_refcnt, 1); + + return aca; +} + +static void inet6_ifacaddr_notify(struct net_device *dev, + const struct ifacaddr6 *ifaca, int event) +{ + struct inet6_fill_args fillargs = { + .event = event, + .netnsid = -1, + }; + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(struct in6_addr)) + + nla_total_size(sizeof(struct ifa_cacheinfo)), + GFP_KERNEL); + if (!skb) + goto error; + + err = inet6_fill_ifacaddr(skb, ifaca, &fillargs); + if (err < 0) { + pr_err("Failed to fill in anycast addresses (err %d)\n", err); + nlmsg_free(skb); + goto error; } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err); } /* * device anycast group inc (add if not found) */ -int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr) +int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; - struct inet6_dev *idev; - struct rt6_info *rt; + struct fib6_info *f6i; + struct net *net; int err; - idev = in6_dev_get(dev); - - if (idev == NULL) - return -EINVAL; - write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; goto out; } - for (aca = idev->ac_list; aca; aca = aca->aca_next) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; @@ -252,46 +351,40 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr) } } - /* - * not found: create a new one. - */ - - aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC); - - if (aca == NULL) { - err = -ENOMEM; + net = dev_net(idev->dev); + f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL); + if (IS_ERR(f6i)) { + err = PTR_ERR(f6i); goto out; } - - rt = addrconf_dst_alloc(idev, addr, true); - if (IS_ERR(rt)) { - kfree(aca); - err = PTR_ERR(rt); + aca = aca_alloc(f6i, addr); + if (!aca) { + fib6_info_release(f6i); + err = -ENOMEM; goto out; } - aca->aca_addr = *addr; - aca->aca_idev = idev; - aca->aca_rt = rt; - aca->aca_users = 1; - /* aca_tstamp should be updated upon changes */ - aca->aca_cstamp = aca->aca_tstamp = jiffies; - atomic_set(&aca->aca_refcnt, 2); - spin_lock_init(&aca->aca_lock); - + /* Hold this for addrconf_join_solict() below before we unlock, + * it is already exposed via idev->ac_list. + */ + aca_get(aca); aca->aca_next = idev->ac_list; - idev->ac_list = aca; + rcu_assign_pointer(idev->ac_list, aca); + write_unlock_bh(&idev->lock); - ip6_ins_rt(rt); + ipv6_add_acaddr_hash(net, aca); - addrconf_join_solict(dev, &aca->aca_addr); + ip6_ins_rt(net, f6i); + + addrconf_join_solict(idev->dev, &aca->aca_addr); + + inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST); aca_put(aca); return 0; out: write_unlock_bh(&idev->lock); - in6_dev_put(idev); return err; } @@ -304,7 +397,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_lock_bh(&idev->lock); prev_aca = NULL; - for (aca = idev->ac_list; aca; aca = aca->aca_next) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; @@ -318,27 +412,55 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } if (prev_aca) - prev_aca->aca_next = aca->aca_next; + rcu_assign_pointer(prev_aca->aca_next, aca->aca_next); else - idev->ac_list = aca->aca_next; + rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); + ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); + + inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST); aca_put(aca); return 0; } -/* called with rcu_read_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev = in6_dev_get(dev); + int err; - if (idev == NULL) + if (!idev) return -ENODEV; - return __ipv6_dev_ac_dec(idev, addr); + + err = __ipv6_dev_ac_dec(idev, addr); + in6_dev_put(idev); + + return err; +} + +void ipv6_ac_destroy_dev(struct inet6_dev *idev) +{ + struct ifacaddr6 *aca; + + write_lock_bh(&idev->lock); + while ((aca = ac_dereference(idev->ac_list, idev)) != NULL) { + rcu_assign_pointer(idev->ac_list, aca->aca_next); + write_unlock_bh(&idev->lock); + + ipv6_del_acaddr_hash(aca); + + addrconf_leave_solict(idev, &aca->aca_addr); + + ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); + + aca_put(aca); + + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); } /* @@ -352,11 +474,10 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - for (aca = idev->ac_list; aca; aca = aca->aca_next) + for (aca = rcu_dereference(idev->ac_list); aca; + aca = rcu_dereference(aca->aca_next)) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; - read_unlock_bh(&idev->lock); return aca != NULL; } return false; @@ -368,50 +489,66 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { + struct net_device *nh_dev; + struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); - else - for_each_netdev_rcu(net, dev) - if (ipv6_chk_acast_dev(dev, addr)) { + else { + unsigned int hash = inet6_acaddr_hash(net, addr); + + hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], + aca_addr_lst) { + nh_dev = fib6_info_nh_dev(aca->aca_rt); + if (!nh_dev || !net_eq(dev_net(nh_dev), net)) + continue; + if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } + } + } rcu_read_unlock(); return found; } +/* check if this anycast address is link-local on given interface or + * is global + */ +bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, + const struct in6_addr *addr) +{ + return ipv6_chk_acast_addr(net, + (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? + dev : NULL), + addr); +} #ifdef CONFIG_PROC_FS struct ac6_iter_state { struct seq_net_private p; struct net_device *dev; - struct inet6_dev *idev; }; #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { - struct ifacaddr6 *im = NULL; struct ac6_iter_state *state = ac6_seq_private(seq); struct net *net = seq_file_net(seq); + struct ifacaddr6 *im = NULL; - state->idev = NULL; for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; + idev = __in6_dev_get(state->dev); if (!idev) continue; - read_lock_bh(&idev->lock); - im = idev->ac_list; - if (im) { - state->idev = idev; + im = rcu_dereference(idev->ac_list); + if (im) break; - } - read_unlock_bh(&idev->lock); } return im; } @@ -419,22 +556,17 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) { struct ac6_iter_state *state = ac6_seq_private(seq); + struct inet6_dev *idev; - im = im->aca_next; + im = rcu_dereference(im->aca_next); while (!im) { - if (likely(state->idev != NULL)) - read_unlock_bh(&state->idev->lock); - state->dev = next_net_device_rcu(state->dev); - if (!state->dev) { - state->idev = NULL; + if (!state->dev) break; - } - state->idev = __in6_dev_get(state->dev); - if (!state->idev) + idev = __in6_dev_get(state->dev); + if (!idev) continue; - read_lock_bh(&state->idev->lock); - im = state->idev->ac_list; + im = rcu_dereference(idev->ac_list); } return im; } @@ -466,12 +598,6 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ac6_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { - struct ac6_iter_state *state = ac6_seq_private(seq); - - if (likely(state->idev != NULL)) { - read_unlock_bh(&state->idev->lock); - state->idev = NULL; - } rcu_read_unlock(); } @@ -493,23 +619,10 @@ static const struct seq_operations ac6_seq_ops = { .show = ac6_seq_show, }; -static int ac6_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ac6_seq_ops, - sizeof(struct ac6_iter_state)); -} - -static const struct file_operations ac6_seq_fops = { - .owner = THIS_MODULE, - .open = ac6_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - int __net_init ac6_proc_init(struct net *net) { - if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops)) + if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops, + sizeof(struct ac6_iter_state))) return -ENOMEM; return 0; @@ -521,3 +634,23 @@ void ac6_proc_exit(struct net *net) } #endif +/* Init / cleanup code + */ +int __init ipv6_anycast_init(void) +{ + int i; + + for (i = 0; i < IN6_ADDR_HSIZE; i++) + INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); + return 0; +} + +void ipv6_anycast_cleanup(void) +{ + int i; + + spin_lock(&acaddr_hash_lock); + for (i = 0; i < IN6_ADDR_HSIZE; i++) + WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); + spin_unlock(&acaddr_hash_lock); +} |
