diff options
Diffstat (limited to 'net/ipv6')
45 files changed, 909 insertions, 736 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 870a0bd6c2ba..f17a5dd4789f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -239,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, .ra_honor_pio_pflag = 0, + .force_forwarding = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -303,6 +304,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ndisc_evict_nocarrier = 1, .ra_honor_pio_life = 0, .ra_honor_pio_pflag = 0, + .force_forwarding = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -857,6 +859,9 @@ static void addrconf_forward_change(struct net *net, __s32 newf) idev = __in6_dev_get_rtnl_net(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); + /* Disabling all.forwarding sets 0 to force_forwarding for all interfaces */ + if (newf == 0) + WRITE_ONCE(idev->cnf.force_forwarding, 0); WRITE_ONCE(idev->cnf.forwarding, newf); if (changed) @@ -2229,32 +2234,29 @@ errdad: in6_ifa_put(ifp); } -/* Join to solicited addr multicast group. - * caller must hold RTNL */ +/* Join to solicited addr multicast group. */ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); } -/* caller must hold RTNL */ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; - if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (READ_ONCE(idev->dev->flags) & (IFF_LOOPBACK | IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); __ipv6_dev_mc_dec(idev, &maddr); } -/* caller must hold RTNL */ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; @@ -2267,7 +2269,6 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) __ipv6_dev_ac_inc(ifp->idev, &addr); } -/* caller must hold RTNL */ static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; @@ -3208,7 +3209,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, } } -#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) +#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) static void add_v4_addrs(struct inet6_dev *idev) { struct in6_addr addr; @@ -3367,7 +3368,7 @@ static int ipv6_generate_stable_address(struct in6_addr *address, retry: spin_lock_bh(&lock); - sha1_init(digest); + sha1_init_raw(digest); memset(&data, 0, sizeof(data)); memset(workspace, 0, sizeof(workspace)); memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len); @@ -3463,6 +3464,7 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN) && + (dev->type != ARPHRD_IP6GRE) && (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_RAWIP)) { @@ -3518,7 +3520,7 @@ static void addrconf_sit_config(struct net_device *dev) } #endif -#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) +#if IS_ENABLED(CONFIG_NET_IPGRE) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; @@ -3534,7 +3536,7 @@ static void addrconf_gre_config(struct net_device *dev) * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this * case). Such devices fall back to add_v4_addrs() instead. */ - if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && + if (!(*(__be32 *)dev->dev_addr == 0 && idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { addrconf_addr_gen(idev, true); return; @@ -3552,8 +3554,7 @@ static void addrconf_init_auto_addrs(struct net_device *dev) addrconf_sit_config(dev); break; #endif -#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) - case ARPHRD_IP6GRE: +#if IS_ENABLED(CONFIG_NET_IPGRE) case ARPHRD_IPGRE: addrconf_gre_config(dev); break; @@ -3860,7 +3861,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) * Do not dev_put! */ if (unregister) { - idev->dead = 1; + WRITE_ONCE(idev->dead, 1); /* protected by rtnl_lock */ RCU_INIT_POINTER(dev->ip6_ptr, NULL); @@ -5714,6 +5715,7 @@ static void ipv6_store_devconf(const struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_UNTRACKED_NA] = READ_ONCE(cnf->accept_untracked_na); array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft); + array[DEVCONF_FORCE_FORWARDING] = READ_ONCE(cnf->force_forwarding); } static inline size_t inet6_ifla6_size(void) @@ -6076,7 +6078,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, hdr->ifi_type = dev->type; ifindex = READ_ONCE(dev->ifindex); hdr->ifi_index = ifindex; - hdr->ifi_flags = dev_get_flags(dev); + hdr->ifi_flags = netif_get_flags(dev); hdr->ifi_change = 0; iflink = dev_get_iflink(dev); @@ -6742,6 +6744,75 @@ static int addrconf_sysctl_disable_policy(const struct ctl_table *ctl, int write return ret; } +static void addrconf_force_forward_change(struct net *net, __s32 newf) +{ + struct net_device *dev; + struct inet6_dev *idev; + + for_each_netdev(net, dev) { + idev = __in6_dev_get_rtnl_net(dev); + if (idev) { + int changed = (!idev->cnf.force_forwarding) ^ (!newf); + + WRITE_ONCE(idev->cnf.force_forwarding, newf); + if (changed) + inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + dev->ifindex, &idev->cnf); + } + } +} + +static int addrconf_sysctl_force_forwarding(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct inet6_dev *idev = ctl->extra1; + struct ctl_table tmp_ctl = *ctl; + struct net *net = ctl->extra2; + int *valp = ctl->data; + int new_val = *valp; + int old_val = *valp; + loff_t pos = *ppos; + int ret; + + tmp_ctl.extra1 = SYSCTL_ZERO; + tmp_ctl.extra2 = SYSCTL_ONE; + tmp_ctl.data = &new_val; + + ret = proc_douintvec_minmax(&tmp_ctl, write, buffer, lenp, ppos); + + if (write && old_val != new_val) { + if (!rtnl_net_trylock(net)) + return restart_syscall(); + + WRITE_ONCE(*valp, new_val); + + if (valp == &net->ipv6.devconf_dflt->force_forwarding) { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + } else if (valp == &net->ipv6.devconf_all->force_forwarding) { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + + addrconf_force_forward_change(net, new_val); + } else { + inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, + NETCONFA_FORCE_FORWARDING, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_net_unlock(net); + } + + if (ret) + *ppos = pos; + return ret; +} + static int minus_one = -1; static const int two_five_five = 255; static u32 ioam6_if_id_max = U16_MAX; @@ -7212,6 +7283,13 @@ static const struct ctl_table addrconf_sysctl[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "force_forwarding", + .data = &ipv6_devconf.force_forwarding, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = addrconf_sysctl_force_forwarding, + }, }; static int __addrconf_sysctl_register(struct net *net, char *dev_name, diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index fb63ffbcfc64..567efd626ab4 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -20,12 +20,6 @@ #include <linux/netlink.h> #include <linux/rtnetlink.h> -#if 0 -#define ADDRLABEL(x...) printk(x) -#else -#define ADDRLABEL(x...) do { ; } while (0) -#endif - /* * Policy Table */ @@ -150,8 +144,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", - __func__, addr, type, ifindex, label); + net_dbg_ratelimited("%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, + ifindex, label); return label; } @@ -164,8 +158,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, prefix, prefixlen, ifindex, (unsigned int)label); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, + prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -207,8 +201,7 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, struct hlist_node *n; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, - replace); + net_dbg_ratelimited("%s(newp=%p, replace=%d)\n", __func__, newp, replace); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && @@ -247,9 +240,8 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, prefix, prefixlen, ifindex, (unsigned int)label, - replace); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -271,8 +263,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", - __func__, prefix, prefixlen, ifindex); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, + prefixlen, ifindex); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -294,8 +286,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", - __func__, prefix, prefixlen, ifindex); + net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, + prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&net->ipv6.ip6addrlbl_table.lock); @@ -312,8 +304,6 @@ static int __net_init ip6addrlbl_net_init(struct net *net) int err; int i; - ADDRLABEL(KERN_DEBUG "%s\n", __func__); - spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index acaff1296783..1992621e3f3f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -842,7 +842,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 21e01695b48c..f8a8e46286b8 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -47,6 +47,9 @@ static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(acaddr_hash_lock); +#define ac_dereference(a, idev) \ + rcu_dereference_protected(a, lockdep_is_held(&(idev)->lock)) + static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); static u32 inet6_acaddr_hash(const struct net *net, @@ -64,14 +67,12 @@ static u32 inet6_acaddr_hash(const struct net *net, int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_ac_socklist *pac = NULL; + struct net *net = sock_net(sk); + netdevice_tracker dev_tracker; struct net_device *dev = NULL; struct inet6_dev *idev; - struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int ishost = !net->ipv6.devconf_all->forwarding; - int err = 0; - - ASSERT_RTNL(); + int err = 0, ishost; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -79,32 +80,43 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; if (ifindex) - dev = __dev_get_by_index(net, ifindex); + dev = netdev_get_by_index(net, ifindex, &dev_tracker, GFP_KERNEL); - if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) - return -EINVAL; + if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) { + err = -EINVAL; + goto error; + } pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (!pac) - return -ENOMEM; + if (!pac) { + err = -ENOMEM; + goto error; + } + pac->acl_next = NULL; pac->acl_addr = *addr; + ishost = !READ_ONCE(net->ipv6.devconf_all->forwarding); + if (ifindex == 0) { struct rt6_info *rt; + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + netdev_hold(dev, &dev_tracker, GFP_ATOMIC); ip6_rt_put(rt); } else if (ishost) { + rcu_read_unlock(); err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ - dev = __dev_get_by_flags(net, IFF_UP, - IFF_UP | IFF_LOOPBACK); + dev = netdev_get_by_flags_rcu(net, &dev_tracker, IFF_UP, + IFF_UP | IFF_LOOPBACK); } + rcu_read_unlock(); } if (!dev) { @@ -112,7 +124,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) goto error; } - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; @@ -120,8 +132,9 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = -EADDRNOTAVAIL; goto error; } + /* reset ishost, now that we have a specific device */ - ishost = !idev->cnf.forwarding; + ishost = !READ_ONCE(idev->cnf.forwarding); pac->acl_ifindex = dev->ifindex; @@ -134,7 +147,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ishost) err = -EADDRNOTAVAIL; if (err) - goto error; + goto error_idev; } err = __ipv6_dev_ac_inc(idev, addr); @@ -144,7 +157,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) pac = NULL; } +error_idev: + in6_dev_put(idev); error: + netdev_put(dev, &dev_tracker); + if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -155,12 +172,10 @@ error: */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { - struct ipv6_pinfo *np = inet6_sk(sk); - struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; + struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); - - ASSERT_RTNL(); + struct net_device *dev; prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { @@ -176,9 +191,11 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) else np->ipv6_ac_list = pac->acl_next; - dev = __dev_get_by_index(net, pac->acl_ifindex); - if (dev) + dev = dev_get_by_index(net, pac->acl_ifindex); + if (dev) { ipv6_dev_ac_dec(dev, &pac->acl_addr); + dev_put(dev); + } sock_kfree_s(sk, pac, sizeof(*pac)); return 0; @@ -187,21 +204,20 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; - struct net *net = sock_net(sk); - int prev_index; + int prev_index = 0; - ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; - prev_index = 0; while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { - dev = __dev_get_by_index(net, pac->acl_ifindex); + dev_put(dev); + dev = dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) @@ -209,6 +225,8 @@ void __ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } + + dev_put(dev); } void ipv6_sock_ac_close(struct sock *sk) @@ -217,9 +235,8 @@ void ipv6_sock_ac_close(struct sock *sk) if (!np->ipv6_ac_list) return; - rtnl_lock(); + __ipv6_sock_ac_close(sk); - rtnl_unlock(); } static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) @@ -319,16 +336,14 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) struct net *net; int err; - ASSERT_RTNL(); - write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; goto out; } - for (aca = rtnl_dereference(idev->ac_list); aca; - aca = rtnl_dereference(aca->aca_next)) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; @@ -380,12 +395,10 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca, *prev_aca; - ASSERT_RTNL(); - write_lock_bh(&idev->lock); prev_aca = NULL; - for (aca = rtnl_dereference(idev->ac_list); aca; - aca = rtnl_dereference(aca->aca_next)) { + for (aca = ac_dereference(idev->ac_list, idev); aca; + aca = ac_dereference(aca->aca_next, idev)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; @@ -414,14 +427,18 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) return 0; } -/* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev = in6_dev_get(dev); + int err; if (!idev) return -ENODEV; - return __ipv6_dev_ac_dec(idev, addr); + + err = __ipv6_dev_ac_dec(idev, addr); + in6_dev_put(idev); + + return err; } void ipv6_ac_destroy_dev(struct inet6_dev *idev) @@ -429,7 +446,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) struct ifacaddr6 *aca; write_lock_bh(&idev->lock); - while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { + while ((aca = ac_dereference(idev->ac_list, idev)) != NULL) { rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index a247bb93908b..df1986973430 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -32,7 +32,7 @@ #include <linux/unaligned.h> #include <linux/crc-ccitt.h> -/* Maximium size of the calipso option including +/* Maximum size of the calipso option including * the two-byte TLV header. */ #define CALIPSO_OPT_LEN_MAX (2 + 252) @@ -42,13 +42,13 @@ */ #define CALIPSO_HDR_LEN (2 + 8) -/* Maximium size of the calipso option including +/* Maximum size of the calipso option including * the two-byte TLV header and upto 3 bytes of * leading pad and 7 bytes of trailing pad. */ #define CALIPSO_OPT_LEN_MAX_WITH_PAD (3 + CALIPSO_OPT_LEN_MAX + 7) - /* Maximium size of u32 aligned buffer required to hold calipso + /* Maximum size of u32 aligned buffer required to hold calipso * option. Max of 3 initial pad bytes starting from buffer + 3. * i.e. the worst case is when the previous tlv finishes on 4n + 3. */ diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fff78496803d..972bf0426d59 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -53,7 +53,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); if (!oif) oif = np->sticky_pktinfo.ipi6_ifindex; @@ -127,7 +127,7 @@ void ip6_datagram_release_cb(struct sock *sk) rcu_read_lock(); dst = __sk_dst_get(sk); - if (!dst || !dst->obsolete || + if (!dst || !READ_ONCE(dst->obsolete) || dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) { rcu_read_unlock(); return; @@ -1064,7 +1064,7 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, sk_wmem_alloc_get(sp), rqueue, 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 457de0745a33..d1ef9644f826 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -306,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - __IP6_INC_STATS(dev_net(dst->dev), idev, + __IP6_INC_STATS(dev_net(dst_dev(dst)), idev, IPSTATS_MIB_INHDRERRORS); fail_and_free: kfree_skb(skb); @@ -460,7 +460,7 @@ looped_back: return -1; } - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, @@ -621,7 +621,7 @@ looped_back: return -1; } - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, @@ -783,7 +783,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { + if (!ipv6_chk_home_addr(skb_dst_dev_net(skb), addr)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -809,7 +809,7 @@ looped_back: return -1; } - if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { + if (skb_dst_dev(skb)->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 3fd19a84b358..44550957fd4e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -196,6 +196,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); + struct net_device *dev; struct dst_entry *dst; bool res = false; @@ -208,10 +209,11 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); + dev = dst_dev(dst); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { + } else if (dev && (dev->flags & IFF_LOOPBACK)) { res = true; } else { struct rt6_info *rt = dst_rt6_info(dst); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 7d574f5132e2..7bb9edc5c28c 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -70,7 +70,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) */ memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = orig_dst->dev->ifindex; + fl6.flowi6_oif = dst_dev(orig_dst)->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst), &ip6h->daddr); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8f500eaf33cf..333e43434dd7 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -45,7 +45,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); @@ -79,7 +79,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c index a84d332f952f..9553a3200081 100644 --- a/net/ipv6/ioam6.c +++ b/net/ipv6/ioam6.c @@ -696,6 +696,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct ioam6_schema *sc, u8 sclen, bool is_input) { + struct net_device *dev = skb_dst_dev(skb); struct timespec64 ts; ktime_t tstamp; u64 raw64; @@ -712,7 +713,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (is_input) byte--; - raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; + raw32 = dev_net(dev)->ipv6.sysctl.ioam6_id; *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); data += sizeof(__be32); @@ -728,10 +729,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + if (dev->flags & IFF_LOOPBACK) raw16 = IOAM6_U16_UNAVAILABLE; else - raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id); + raw16 = (__force u16)READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id); *(__be16 *)data = cpu_to_be16(raw16); data += sizeof(__be16); @@ -783,10 +784,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, struct Qdisc *qdisc; __u32 qlen, backlog; - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { + if (dev->flags & IFF_LOOPBACK) { *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); } else { - queue = skb_get_tx_queue(skb_dst(skb)->dev, skb); + queue = skb_get_tx_queue(dev, skb); qdisc = rcu_dereference(queue->qdisc); qdisc_qstats_qlen_backlog(qdisc, &qlen, &backlog); @@ -807,7 +808,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, if (is_input) byte--; - raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; + raw64 = dev_net(dev)->ipv6.sysctl.ioam6_id_wide; *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); data += sizeof(__be64); @@ -823,10 +824,10 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb, *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); - if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) + if (dev->flags & IFF_LOOPBACK) raw32 = IOAM6_U32_UNAVAILABLE; else - raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide); + raw32 = READ_ONCE(__in6_dev_get(dev)->cnf.ioam6_id_wide); *(__be32 *)data = cpu_to_be32(raw32); data += sizeof(__be32); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 40df8bdfaacd..1fe7894f14dd 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -335,7 +335,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, if (has_tunsrc) memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc)); else - ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr, + ipv6_dev_get_saddr(net, dst_dev(dst), &hdr->daddr, IPV6_PREFER_SRC_PUBLIC, &hdr->saddr); skb_postpush_rcsum(skb, hdr, len); @@ -442,7 +442,7 @@ do_encap: dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); local_bh_enable(); - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 93578b2ec35f..02c16909f618 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -445,15 +445,17 @@ struct fib6_dump_arg { static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE; + unsigned int nsiblings; int err; if (!rt || rt == arg->net->ipv6.fib6_null_entry) return 0; - if (rt->fib6_nsiblings) + nsiblings = READ_ONCE(rt->fib6_nsiblings); + if (nsiblings) err = call_fib6_multipath_entry_notifier(arg->nb, fib_event, rt, - rt->fib6_nsiblings, + nsiblings, arg->extack); else err = call_fib6_entry_notifier(arg->nb, fib_event, rt, @@ -963,8 +965,7 @@ insert_above: } static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, - const struct fib6_info *match, - const struct fib6_table *table) + const struct fib6_info *match) { int cpu; @@ -999,21 +1000,15 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, rcu_read_unlock(); } -struct fib6_nh_pcpu_arg { - struct fib6_info *from; - const struct fib6_table *table; -}; - static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg) { - struct fib6_nh_pcpu_arg *arg = _arg; + struct fib6_info *arg = _arg; - __fib6_drop_pcpu_from(nh, arg->from, arg->table); + __fib6_drop_pcpu_from(nh, arg); return 0; } -static void fib6_drop_pcpu_from(struct fib6_info *f6i, - const struct fib6_table *table) +static void fib6_drop_pcpu_from(struct fib6_info *f6i) { /* Make sure rt6_make_pcpu_route() wont add other percpu routes * while we are cleaning them here. @@ -1022,19 +1017,14 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i, mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */ if (f6i->nh) { - struct fib6_nh_pcpu_arg arg = { - .from = f6i, - .table = table - }; - rcu_read_lock(); - nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg); + nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, f6i); rcu_read_unlock(); } else { struct fib6_nh *fib6_nh; fib6_nh = f6i->fib6_nh; - __fib6_drop_pcpu_from(fib6_nh, f6i, table); + __fib6_drop_pcpu_from(fib6_nh, f6i); } } @@ -1045,7 +1035,7 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, /* Flush all cached dst in exception table */ rt6_flush_exceptions(rt); - fib6_drop_pcpu_from(rt, table); + fib6_drop_pcpu_from(rt); if (rt->nh) { spin_lock(&rt->nh->lock); @@ -1138,7 +1128,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, if (rt6_duplicate_nexthop(iter, rt)) { if (rt->fib6_nsiblings) - rt->fib6_nsiblings = 0; + WRITE_ONCE(rt->fib6_nsiblings, 0); if (!(iter->fib6_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->fib6_flags & RTF_EXPIRES)) { @@ -1167,7 +1157,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) - rt->fib6_nsiblings++; + WRITE_ONCE(rt->fib6_nsiblings, + rt->fib6_nsiblings + 1); } if (iter->fib6_metric > rt->fib6_metric) @@ -1217,7 +1208,8 @@ next_iter: fib6_nsiblings = 0; list_for_each_entry_safe(sibling, temp_sibling, &rt->fib6_siblings, fib6_siblings) { - sibling->fib6_nsiblings++; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings + 1); BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings); fib6_nsiblings++; } @@ -1264,8 +1256,9 @@ add: list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) - sibling->fib6_nsiblings--; - rt->fib6_nsiblings = 0; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings - 1); + WRITE_ONCE(rt->fib6_nsiblings, 0); list_del_rcu(&rt->fib6_siblings); rcu_read_lock(); rt6_multipath_rebalance(next_sibling); @@ -2014,8 +2007,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, notify_del = true; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) - sibling->fib6_nsiblings--; - rt->fib6_nsiblings = 0; + WRITE_ONCE(sibling->fib6_nsiblings, + sibling->fib6_nsiblings - 1); + WRITE_ONCE(rt->fib6_nsiblings, 0); list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 2dc9dcffe2ca..74d49dd6124d 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -111,8 +111,32 @@ static u32 HASH_ADDR(const struct in6_addr *addr) #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] -/* Given src, dst and key, find appropriate for input tunnel. */ +static bool ip6gre_tunnel_match(struct ip6_tnl *t, int dev_type, int link, + int *cand_score, struct ip6_tnl **ret) +{ + int score = 0; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + return false; + + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) { + *ret = t; + return true; + } + + if (score < *cand_score) { + *ret = t; + *cand_score = score; + } + return false; +} +/* Given src, dst and key, find appropriate for input tunnel. */ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, const struct in6_addr *remote, const struct in6_addr *local, __be32 key, __be16 gre_proto) @@ -127,8 +151,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, gre_proto == htons(ETH_P_ERSPAN) || gre_proto == htons(ETH_P_ERSPAN2)) ? ARPHRD_ETHER : ARPHRD_IP6GRE; - int score, cand_score = 4; struct net_device *ndev; + int cand_score = 4; for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || @@ -137,22 +161,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, !(t->dev->flags & IFF_UP)) continue; - if (t->dev->type != ARPHRD_IP6GRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } + if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand)) + return cand; } for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { @@ -161,22 +171,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, !(t->dev->flags & IFF_UP)) continue; - if (t->dev->type != ARPHRD_IP6GRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } + if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand)) + return cand; } for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { @@ -187,22 +183,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, !(t->dev->flags & IFF_UP)) continue; - if (t->dev->type != ARPHRD_IP6GRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } + if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand)) + return cand; } for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { @@ -210,22 +192,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, !(t->dev->flags & IFF_UP)) continue; - if (t->dev->type != ARPHRD_IP6GRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } + if (ip6gre_tunnel_match(t, dev_type, link, &cand_score, &cand)) + return cand; } if (cand) @@ -1085,9 +1053,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ - if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); - + if (!t->parms.collect_md && dst) { + mtu = READ_ONCE(dst_dev(dst)->mtu); + if (dst_mtu(dst) > mtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); + } err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, NEXTHDR_GRE); if (err != 0) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 39da6a7ce5f1..168ec07e31cc 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -187,7 +187,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji */ - IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex; + IP6CB(skb)->iif = skb_valid_dst(skb) ? + ip6_dst_idev(skb_dst(skb))->dev->ifindex : + dev->ifindex; if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; @@ -476,6 +478,13 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INDISCARDS); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); + return 0; + } + skb_clear_delivery_time(skb); ip6_protocol_deliver_rcu(net, skb, 0, false); @@ -499,38 +508,32 @@ EXPORT_SYMBOL_GPL(ip6_input); int ip6_mc_input(struct sk_buff *skb) { + struct net_device *dev = skb->dev; int sdif = inet6_sdif(skb); const struct ipv6hdr *hdr; - struct net_device *dev; bool deliver; - __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), - __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST, - skb->len); + __IP6_UPD_PO_STATS(skb_dst_dev_net_rcu(skb), + __in6_dev_get_safely(dev), IPSTATS_MIB_INMCAST, + skb->len); /* skb->dev passed may be master dev for vrfs. */ if (sdif) { - rcu_read_lock(); - dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif); + dev = dev_get_by_index_rcu(dev_net_rcu(dev), sdif); if (!dev) { - rcu_read_unlock(); kfree_skb(skb); return -ENODEV; } - } else { - dev = skb->dev; } hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL); - if (sdif) - rcu_read_unlock(); #ifdef CONFIG_IPV6_MROUTE /* * IPv6 multicast router mode is now supported ;) */ - if (atomic_read(&dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding) && + if (atomic_read(&dev_net_rcu(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { @@ -571,22 +574,21 @@ int ip6_mc_input(struct sk_buff *skb) /* unknown RA - process it normally */ } - if (deliver) + if (deliver) { skb2 = skb_clone(skb, GFP_ATOMIC); - else { + } else { skb2 = skb; skb = NULL; } - if (skb2) { + if (skb2) ip6_mr_input(skb2); - } } out: #endif - if (likely(deliver)) + if (likely(deliver)) { ip6_input(skb); - else { + } else { /* discard */ kfree_skb(skb); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7bd29a9ff0db..1e1410237b6e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,7 +60,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; @@ -232,8 +232,9 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); + struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst_dev(dst), *indev = skb->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -271,7 +272,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); @@ -503,13 +504,15 @@ int ip6_forward(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dev_net(dst->dev); + struct net *net = dev_net(dst_dev(dst)); + struct net_device *dev; struct inet6_dev *idev; SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); - if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) + if (!READ_ONCE(net->ipv6.devconf_all->forwarding) && + (!idev || !READ_ONCE(idev->cnf.force_forwarding))) goto error; if (skb->pkt_type != PACKET_HOST) @@ -561,7 +564,7 @@ int ip6_forward(struct sk_buff *skb) /* XXX: idev->cnf.proxy_ndp? */ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && - pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit @@ -591,12 +594,12 @@ int ip6_forward(struct sk_buff *skb) goto drop; } dst = skb_dst(skb); - + dev = dst_dev(dst); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (IP6CB(skb)->iif == dst->dev->ifindex && + if (IP6CB(skb)->iif == dev->ifindex && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct inet_peer *peer; @@ -644,7 +647,7 @@ int ip6_forward(struct sk_buff *skb) if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ - skb->dev = dst->dev; + skb->dev = dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), @@ -653,7 +656,7 @@ int ip6_forward(struct sk_buff *skb) return -EMSGSIZE; } - if (skb_cow(skb, dst->dev->hard_header_len)) { + if (skb_cow(skb, dev->hard_header_len)) { __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; @@ -666,7 +669,7 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, dst->dev, + net, NULL, skb, skb->dev, dev, ip6_forward_finish); error: @@ -1093,7 +1096,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) { dst_release(dst); dst = NULL; } @@ -1760,8 +1763,7 @@ alloc_new_skb: if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; - err = skb_splice_from_iter(skb, &msg->msg_iter, copy, - sk->sk_allocation); + err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 894d3158a6f0..3262e81223df 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -632,7 +632,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else { if (ip_route_input(skb2, eiph->daddr, eiph->saddr, ip4h_dscp(eiph), skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) + skb_dst_dev(skb2)->type != ARPHRD_TUNNEL6) goto out; } @@ -1179,7 +1179,7 @@ route_lookup: ndst = dst; } - tdev = dst->dev; + tdev = dst_dev(dst); if (tdev == dev) { DEV_STATS_INC(dev, collisions); @@ -1255,7 +1255,7 @@ route_lookup: /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. */ - max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr) + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr) + dst->header_len + t->hlen; if (max_headroom > READ_ONCE(dev->needed_headroom)) WRITE_ONCE(dev->needed_headroom, max_headroom); @@ -1278,7 +1278,7 @@ route_lookup: ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(NULL, skb, dev); + ip6tunnel_xmit(NULL, skb, dev, 0); return 0; tx_err_link_failure: DEV_STATS_INC(dev, tx_carrier_errors); @@ -1562,11 +1562,22 @@ static void ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) netdev_state_change(t->dev); } -static void ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p, + bool strict) { - /* for default tnl0 device allow to change only the proto */ + /* For the default ip6tnl0 device, allow changing only the protocol + * (the IP6_TNL_F_CAP_PER_PACKET flag is set on ip6tnl0, and all other + * parameters are 0). + */ + if (strict && + (!ipv6_addr_any(&p->laddr) || !ipv6_addr_any(&p->raddr) || + p->flags != t->parms.flags || p->hop_limit || p->encap_limit || + p->flowinfo || p->link || p->fwmark || p->collect_md)) + return -EINVAL; + t->parms.proto = p->proto; netdev_state_change(t->dev); + return 0; } static void @@ -1680,7 +1691,7 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, } else t = netdev_priv(dev); if (dev == ip6n->fb_tnl_dev) - ip6_tnl0_update(t, &p1); + ip6_tnl0_update(t, &p1, false); else ip6_tnl_update(t, &p1); } @@ -2053,8 +2064,28 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip_tunnel_encap ipencap; - if (dev == ip6n->fb_tnl_dev) - return -EINVAL; + if (dev == ip6n->fb_tnl_dev) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { + /* iproute2 always sets TUNNEL_ENCAP_FLAG_CSUM6, so + * let's ignore this flag. + */ + ipencap.flags &= ~TUNNEL_ENCAP_FLAG_CSUM6; + if (memchr_inv(&ipencap, 0, sizeof(ipencap))) { + NL_SET_ERR_MSG(extack, + "Only protocol can be changed for fallback tunnel, not encap params"); + return -EINVAL; + } + } + + ip6_tnl_netlink_parms(data, &p); + if (ip6_tnl0_update(t, &p, true) < 0) { + NL_SET_ERR_MSG(extack, + "Only protocol can be changed for fallback tunnel"); + return -EINVAL; + } + + return 0; + } if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(t, &ipencap); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index c99053189ea8..0ff547a4bff7 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -74,13 +74,14 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, - struct net_device *dev, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - __u8 prio, __u8 ttl, __be32 label, - __be16 src_port, __be16 dst_port, bool nocheck) +void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 prio, __u8 ttl, __be32 label, + __be16 src_port, __be16 dst_port, bool nocheck, + u16 ip6cb_flags) { struct udphdr *uh; struct ipv6hdr *ip6h; @@ -108,8 +109,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - ip6tunnel_xmit(sk, skb, dev); - return 0; + ip6tunnel_xmit(sk, skb, dev, ip6cb_flags); } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); @@ -168,7 +168,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); return ERR_PTR(-ENETUNREACH); } - if (dst->dev == dev) { /* is this necessary? */ + if (dst_dev(dst) == dev) { /* is this necessary? */ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); dst_release(dst); return ERR_PTR(-ELOOP); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 40464a88bca6..ad5290be4dd6 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -497,7 +497,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) (const struct in6_addr *)&x->id.daddr)) goto tx_err_link_failure; - tdev = dst->dev; + tdev = dst_dev(dst); if (tdev == dev) { DEV_STATS_INC(dev, collisions); @@ -529,7 +529,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) xmit: skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); skb_dst_set(skb, dst); - skb->dev = skb_dst(skb)->dev; + skb->dev = dst_dev(dst); err = dst_output(t->net, skb->sk, skb); if (net_xmit_eval(err) == 0) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9db31e5b998c..e047a4680ab0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2035,8 +2035,8 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct * Processing handlers for ip6mr_forward */ -static int ip6mr_forward2(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, int vifi) +static int ip6mr_prepare_xmit(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) { struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *vif_dev; @@ -2046,7 +2046,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, vif_dev = vif_dev_read(vif); if (!vif_dev) - goto out_free; + return -1; #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { @@ -2055,7 +2055,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, DEV_STATS_ADD(vif_dev, tx_bytes, skb->len); DEV_STATS_INC(vif_dev, tx_packets); ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); - goto out_free; + return -1; } #endif @@ -2069,7 +2069,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); - goto out_free; + return -1; } skb_dst_drop(skb); @@ -2093,20 +2093,43 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, /* We are about to write */ /* XXX: extension headers? */ if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) - goto out_free; + return -1; ipv6h = ipv6_hdr(skb); ipv6h->hop_limit--; + return 0; +} + +static void ip6mr_forward2(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) +{ + struct net_device *indev = skb->dev; + + if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) + goto out_free; IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, vif_dev, - ip6mr_forward2_finish); + NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, + net, NULL, skb, indev, skb->dev, + ip6mr_forward2_finish); + return; + +out_free: + kfree_skb(skb); +} + +static void ip6mr_output2(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, int vifi) +{ + if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) + goto out_free; + + ip6_output(net, NULL, skb); + return; out_free: kfree_skb(skb); - return 0; } /* Called with rcu_read_lock() */ @@ -2221,6 +2244,56 @@ dont_forward: kfree_skb(skb); } +/* Called under rcu_read_lock() */ +static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, + struct net_device *dev, struct sk_buff *skb, + struct mfc6_cache *c) +{ + int psend = -1; + int ct; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + atomic_long_inc(&c->_c.mfc_un.res.pkt); + atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); + WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); + + /* Forward the frame */ + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp)) { + if (ipv6_hdr(skb)->hop_limit > + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { + /* It's an (*,*) entry and the packet is not coming from + * the upstream: forward the packet to the upstream + * only. + */ + psend = c->_c.mfc_parent; + goto last_forward; + } + goto dont_forward; + } + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { + if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { + if (psend != -1) { + struct sk_buff *skb2; + + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) + ip6mr_output2(net, mrt, skb2, psend); + } + psend = ct; + } + } +last_forward: + if (psend != -1) { + ip6mr_output2(net, mrt, skb, psend); + return; + } + +dont_forward: + kfree_skb(skb); +} /* * Multicast packets for forwarding arrive here @@ -2228,21 +2301,20 @@ dont_forward: int ip6_mr_input(struct sk_buff *skb) { + struct net_device *dev = skb->dev; + struct net *net = dev_net_rcu(dev); struct mfc6_cache *cache; - struct net *net = dev_net(skb->dev); struct mr_table *mrt; struct flowi6 fl6 = { - .flowi6_iif = skb->dev->ifindex, + .flowi6_iif = dev->ifindex, .flowi6_mark = skb->mark, }; int err; - struct net_device *dev; /* skb->dev passed in is the master dev for vrfs. * Get the proper interface that does have a vif associated with it. */ - dev = skb->dev; - if (netif_is_l3_master(skb->dev)) { + if (netif_is_l3_master(dev)) { dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) { kfree_skb(skb); @@ -2288,6 +2360,61 @@ int ip6_mr_input(struct sk_buff *skb) return 0; } +int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + struct flowi6 fl6 = (struct flowi6) { + .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_mark = skb->mark, + }; + struct mfc6_cache *cache; + struct mr_table *mrt; + int err; + int vif; + + guard(rcu)(); + + if (IP6CB(skb)->flags & IP6SKB_FORWARDED) + goto ip6_output; + if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) + goto ip6_output; + + err = ip6mr_fib_lookup(net, &fl6, &mrt); + if (err < 0) { + kfree_skb(skb); + return err; + } + + cache = ip6mr_cache_find(mrt, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + if (!cache) { + vif = ip6mr_find_vif(mrt, dev); + if (vif >= 0) + cache = ip6mr_cache_find_any(mrt, + &ipv6_hdr(skb)->daddr, + vif); + } + + /* No usable cache entry */ + if (!cache) { + vif = ip6mr_find_vif(mrt, dev); + if (vif >= 0) + return ip6mr_cache_unresolved(mrt, vif, skb, dev); + goto ip6_output; + } + + /* Wrong interface */ + vif = cache->_c.mfc_parent; + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) + goto ip6_output; + + ip6_mr_output_finish(net, mrt, dev, skb, cache); + return 0; + +ip6_output: + return ip6_output(net, sk, skb); +} + int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid) { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 72d4858dec18..8607569de34f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -71,6 +71,7 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } +static struct lock_class_key xfrm_state_lock_key; static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); @@ -79,6 +80,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t = xfrm_state_alloc(net); if (!t) goto out; + lockdep_set_class(&t->lock, &xfrm_state_lock_key); t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1e225e6489ea..e66ec623972e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -117,26 +117,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, return opt; } -static bool setsockopt_needs_rtnl(int optname) -{ - switch (optname) { - case IPV6_ADDRFORM: - case IPV6_ADD_MEMBERSHIP: - case IPV6_DROP_MEMBERSHIP: - case IPV6_JOIN_ANYCAST: - case IPV6_LEAVE_ANYCAST: - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - case MCAST_MSFILTER: - return true; - } - return false; -} - static int copy_group_source_from_sockptr(struct group_source_req *greqs, sockptr_t optval, int optlen) { @@ -395,9 +375,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); - int val, valbool; int retv = -ENOPROTOOPT; - bool needs_rtnl = setsockopt_needs_rtnl(optname); + int val, valbool; if (sockptr_is_null(optval)) val = 0; @@ -562,8 +541,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, return 0; } } - if (needs_rtnl) - rtnl_lock(); + sockopt_lock_sock(sk); /* Another thread has converted the socket into IPv4 with @@ -969,8 +947,6 @@ done: unlock: sockopt_release_sock(sk); - if (needs_rtnl) - rtnl_unlock(); return retv; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 616bf4c0c8fd..36ca27496b3c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -108,9 +108,9 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; int sysctl_mld_qrv __read_mostly = MLD_QRV_DEFAULT; -/* - * socket join on multicast group - */ +#define mc_assert_locked(idev) \ + lockdep_assert_held(&(idev)->mc_lock) + #define mc_dereference(e, idev) \ rcu_dereference_protected(e, lockdep_is_held(&(idev)->mc_lock)) @@ -169,17 +169,18 @@ static int unsolicited_report_interval(struct inet6_dev *idev) return iv > 0 ? iv : 1; } +/* + * socket join on multicast group + */ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr, unsigned int mode) { - struct net_device *dev = NULL; - struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); + struct net_device *dev = NULL; int err; - ASSERT_RTNL(); - if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -199,13 +200,18 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, if (ifindex == 0) { struct rt6_info *rt; + + rcu_read_lock(); rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } - } else - dev = __dev_get_by_index(net, ifindex); + rcu_read_unlock(); + } else { + dev = dev_get_by_index(net, ifindex); + } if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -216,12 +222,11 @@ static int __ipv6_sock_mc_join(struct sock *sk, int ifindex, mc_lst->sfmode = mode; RCU_INIT_POINTER(mc_lst->sflist, NULL); - /* - * now add/increase the group membership on the device - */ - + /* now add/increase the group membership on the device */ err = __ipv6_dev_mc_inc(dev, addr, mode); + dev_put(dev); + if (err) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; @@ -248,14 +253,36 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex, /* * socket leave on multicast group */ +static void __ipv6_sock_mc_drop(struct sock *sk, struct ipv6_mc_socklist *mc_lst) +{ + struct net *net = sock_net(sk); + struct net_device *dev; + + dev = dev_get_by_index(net, mc_lst->ifindex); + if (dev) { + struct inet6_dev *idev = in6_dev_get(dev); + + ip6_mc_leave_src(sk, mc_lst, idev); + + if (idev) { + __ipv6_dev_mc_dec(idev, &mc_lst->addr); + in6_dev_put(idev); + } + + dev_put(dev); + } else { + ip6_mc_leave_src(sk, mc_lst, NULL); + } + + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + kfree_rcu(mc_lst, rcu); +} + int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst; struct ipv6_mc_socklist __rcu **lnk; - struct net *net = sock_net(sk); - - ASSERT_RTNL(); + struct ipv6_mc_socklist *mc_lst; if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -265,23 +292,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - struct net_device *dev; - *lnk = mc_lst->next; - - dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev) { - struct inet6_dev *idev = __in6_dev_get(dev); - - ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) - __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else { - ip6_mc_leave_src(sk, mc_lst, NULL); - } - - atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); - kfree_rcu(mc_lst, rcu); + __ipv6_sock_mc_drop(sk, mc_lst); return 0; } } @@ -290,31 +302,33 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } EXPORT_SYMBOL(ipv6_sock_mc_drop); -static struct inet6_dev *ip6_mc_find_dev_rtnl(struct net *net, - const struct in6_addr *group, - int ifindex) +static struct inet6_dev *ip6_mc_find_dev(struct net *net, + const struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; if (ifindex == 0) { - struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0); + struct rt6_info *rt; + rcu_read_lock(); + rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { - dev = rt->dst.dev; + dev = dst_dev(&rt->dst); + dev_hold(dev); ip6_rt_put(rt); } + rcu_read_unlock(); } else { - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); } - if (!dev) return NULL; - idev = __in6_dev_get(dev); - if (!idev) - return NULL; - if (idev->dead) - return NULL; + + idev = in6_dev_get(dev); + dev_put(dev); + return idev; } @@ -322,28 +336,10 @@ void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; - struct net *net = sock_net(sk); - - ASSERT_RTNL(); while ((mc_lst = sock_dereference(np->ipv6_mc_list, sk)) != NULL) { - struct net_device *dev; - np->ipv6_mc_list = mc_lst->next; - - dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev) { - struct inet6_dev *idev = __in6_dev_get(dev); - - ip6_mc_leave_src(sk, mc_lst, idev); - if (idev) - __ipv6_dev_mc_dec(idev, &mc_lst->addr); - } else { - ip6_mc_leave_src(sk, mc_lst, NULL); - } - - atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); - kfree_rcu(mc_lst, rcu); + __ipv6_sock_mc_drop(sk, mc_lst); } } @@ -354,24 +350,22 @@ void ipv6_sock_mc_close(struct sock *sk) if (!rcu_access_pointer(np->ipv6_mc_list)) return; - rtnl_lock(); lock_sock(sk); __ipv6_sock_mc_close(sk); release_sock(sk); - rtnl_unlock(); } int ip6_mc_source(int add, int omode, struct sock *sk, - struct group_source_req *pgsr) + struct group_source_req *pgsr) { + struct ipv6_pinfo *inet6 = inet6_sk(sk); struct in6_addr *source, *group; + struct net *net = sock_net(sk); struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; - struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; - struct net *net = sock_net(sk); - int i, j, rv; + struct inet6_dev *idev; int leavegroup = 0; + int i, j, rv; int err; source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr; @@ -380,13 +374,19 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, pgsr->gsr_interface); + idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); if (!idev) return -ENODEV; + mutex_lock(&idev->mc_lock); + + if (idev->dead) { + err = -ENODEV; + goto done; + } + err = -EADDRNOTAVAIL; - mutex_lock(&idev->mc_lock); for_each_pmc_socklock(inet6, sk, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; @@ -483,6 +483,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, ip6_mc_add_src(idev, group, omode, 1, source, 1); done: mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); if (leavegroup) err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; @@ -491,12 +492,12 @@ done: int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, struct sockaddr_storage *list) { - const struct in6_addr *group; - struct ipv6_mc_socklist *pmc; - struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; struct net *net = sock_net(sk); + const struct in6_addr *group; + struct ipv6_mc_socklist *pmc; + struct inet6_dev *idev; int leavegroup = 0; int i, err; @@ -508,10 +509,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev_rtnl(net, group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; + mutex_lock(&idev->mc_lock); + + if (idev->dead) { + err = -ENODEV; + goto done; + } + err = 0; if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { @@ -544,24 +552,19 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } - mutex_lock(&idev->mc_lock); + err = ip6_mc_add_src(idev, group, gsf->gf_fmode, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { - mutex_unlock(&idev->mc_lock); sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, newpsl->sl_max)); goto done; } - mutex_unlock(&idev->mc_lock); } else { newpsl = NULL; - mutex_lock(&idev->mc_lock); ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); - mutex_unlock(&idev->mc_lock); } - mutex_lock(&idev->mc_lock); psl = sock_dereference(pmc->sflist, sk); if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, @@ -571,12 +574,14 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); } + rcu_assign_pointer(pmc->sflist, newpsl); - mutex_unlock(&idev->mc_lock); kfree_rcu(psl, rcu); pmc->sfmode = gsf->gf_fmode; err = 0; done: + mutex_unlock(&idev->mc_lock); + in6_dev_put(idev); if (leavegroup) err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); return err; @@ -597,10 +602,6 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - /* changes to the ipv6_mc_list require the socket lock and - * rtnl lock. We have the socket lock, so reading the list is safe. - */ - for_each_pmc_socklock(inet6, sk, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; @@ -668,12 +669,13 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, return rv; } -/* called with mc_lock */ static void igmp6_group_added(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + mc_assert_locked(mc->idev); + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; @@ -703,12 +705,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc) mld_ifc_event(mc->idev); } -/* called with mc_lock */ static void igmp6_group_dropped(struct ifmcaddr6 *mc) { struct net_device *dev = mc->idev->dev; char buf[MAX_ADDR_LEN]; + mc_assert_locked(mc->idev); + if (IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) return; @@ -729,14 +732,13 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc) refcount_dec(&mc->mca_refcnt); } -/* - * deleted ifmcaddr6 manipulation - * called with mc_lock - */ +/* deleted ifmcaddr6 manipulation */ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ifmcaddr6 *pmc; + mc_assert_locked(idev); + /* this is an "ifmcaddr6" for convenience; only the fields below * are actually used. In particular, the refcnt and users are not * used for management of the delete list. Using the same structure @@ -770,54 +772,54 @@ static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) rcu_assign_pointer(idev->mc_tomb, pmc); } -/* called with mc_lock */ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) { struct ip6_sf_list *psf, *sources, *tomb; struct in6_addr *pmca = &im->mca_addr; struct ifmcaddr6 *pmc, *pmc_prev; + mc_assert_locked(idev); + pmc_prev = NULL; for_each_mc_tomb(idev, pmc) { if (ipv6_addr_equal(&pmc->mca_addr, pmca)) break; pmc_prev = pmc; } - if (pmc) { - if (pmc_prev) - rcu_assign_pointer(pmc_prev->next, pmc->next); - else - rcu_assign_pointer(idev->mc_tomb, pmc->next); - } - - if (pmc) { - im->idev = pmc->idev; - if (im->mca_sfmode == MCAST_INCLUDE) { - tomb = rcu_replace_pointer(im->mca_tomb, - mc_dereference(pmc->mca_tomb, pmc->idev), - lockdep_is_held(&im->idev->mc_lock)); - rcu_assign_pointer(pmc->mca_tomb, tomb); - - sources = rcu_replace_pointer(im->mca_sources, - mc_dereference(pmc->mca_sources, pmc->idev), - lockdep_is_held(&im->idev->mc_lock)); - rcu_assign_pointer(pmc->mca_sources, sources); - for_each_psf_mclock(im, psf) - psf->sf_crcount = idev->mc_qrv; - } else { - im->mca_crcount = idev->mc_qrv; - } - ip6_mc_clear_src(pmc); - in6_dev_put(pmc->idev); - kfree_rcu(pmc, rcu); + if (!pmc) + return; + if (pmc_prev) + rcu_assign_pointer(pmc_prev->next, pmc->next); + else + rcu_assign_pointer(idev->mc_tomb, pmc->next); + + im->idev = pmc->idev; + if (im->mca_sfmode == MCAST_INCLUDE) { + tomb = rcu_replace_pointer(im->mca_tomb, + mc_dereference(pmc->mca_tomb, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_tomb, tomb); + + sources = rcu_replace_pointer(im->mca_sources, + mc_dereference(pmc->mca_sources, pmc->idev), + lockdep_is_held(&im->idev->mc_lock)); + rcu_assign_pointer(pmc->mca_sources, sources); + for_each_psf_mclock(im, psf) + psf->sf_crcount = idev->mc_qrv; + } else { + im->mca_crcount = idev->mc_qrv; } + ip6_mc_clear_src(pmc); + in6_dev_put(pmc->idev); + kfree_rcu(pmc, rcu); } -/* called with mc_lock */ static void mld_clear_delrec(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *nextpmc; + mc_assert_locked(idev); + pmc = mc_dereference(idev->mc_tomb, idev); RCU_INIT_POINTER(idev->mc_tomb, NULL); @@ -843,29 +845,18 @@ static void mld_clear_delrec(struct inet6_dev *idev) static void mld_clear_query(struct inet6_dev *idev) { - struct sk_buff *skb; - spin_lock_bh(&idev->mc_query_lock); - while ((skb = __skb_dequeue(&idev->mc_query_queue))) - kfree_skb(skb); + __skb_queue_purge(&idev->mc_query_queue); spin_unlock_bh(&idev->mc_query_lock); } static void mld_clear_report(struct inet6_dev *idev) { - struct sk_buff *skb; - spin_lock_bh(&idev->mc_report_lock); - while ((skb = __skb_dequeue(&idev->mc_report_queue))) - kfree_skb(skb); + __skb_queue_purge(&idev->mc_report_queue); spin_unlock_bh(&idev->mc_report_lock); } -static void mca_get(struct ifmcaddr6 *mc) -{ - refcount_inc(&mc->mca_refcnt); -} - static void ma_put(struct ifmcaddr6 *mc) { if (refcount_dec_and_test(&mc->mca_refcnt)) { @@ -874,13 +865,14 @@ static void ma_put(struct ifmcaddr6 *mc) } } -/* called with mc_lock */ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, const struct in6_addr *addr, unsigned int mode) { struct ifmcaddr6 *mc; + mc_assert_locked(idev); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; @@ -945,23 +937,22 @@ error: static int __ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr, unsigned int mode) { - struct ifmcaddr6 *mc; struct inet6_dev *idev; - - ASSERT_RTNL(); + struct ifmcaddr6 *mc; /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (!idev) return -EINVAL; - if (idev->dead) { + mutex_lock(&idev->mc_lock); + + if (READ_ONCE(idev->dead)) { + mutex_unlock(&idev->mc_lock); in6_dev_put(idev); return -ENODEV; } - mutex_lock(&idev->mc_lock); for_each_mc_mclock(idev, mc) { if (ipv6_addr_equal(&mc->mca_addr, addr)) { mc->mca_users++; @@ -982,13 +973,11 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, rcu_assign_pointer(mc->next, idev->mc_list); rcu_assign_pointer(idev->mc_list, mc); - mca_get(mc); - mld_del_delrec(idev, mc); igmp6_group_added(mc); inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); - ma_put(mc); + return 0; } @@ -1005,9 +994,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifmcaddr6 *ma, __rcu **map; - ASSERT_RTNL(); - mutex_lock(&idev->mc_lock); + for (map = &idev->mc_list; (ma = mc_dereference(*map, idev)); map = &ma->next) { @@ -1038,13 +1026,12 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) struct inet6_dev *idev; int err; - ASSERT_RTNL(); - - idev = __in6_dev_get(dev); + idev = in6_dev_get(dev); if (!idev) - err = -ENODEV; - else - err = __ipv6_dev_mc_dec(idev, addr); + return -ENODEV; + + err = __ipv6_dev_mc_dec(idev, addr); + in6_dev_put(idev); return err; } @@ -1091,46 +1078,51 @@ unlock: return rv; } -/* called with mc_lock */ static void mld_gq_start_work(struct inet6_dev *idev) { unsigned long tv = get_random_u32_below(idev->mc_maxdelay); + mc_assert_locked(idev); + idev->mc_gq_running = 1; if (!mod_delayed_work(mld_wq, &idev->mc_gq_work, tv + 2)) in6_dev_hold(idev); } -/* called with mc_lock */ static void mld_gq_stop_work(struct inet6_dev *idev) { + mc_assert_locked(idev); + idev->mc_gq_running = 0; if (cancel_delayed_work(&idev->mc_gq_work)) __in6_dev_put(idev); } -/* called with mc_lock */ static void mld_ifc_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = get_random_u32_below(delay); + mc_assert_locked(idev); + if (!mod_delayed_work(mld_wq, &idev->mc_ifc_work, tv + 2)) in6_dev_hold(idev); } -/* called with mc_lock */ static void mld_ifc_stop_work(struct inet6_dev *idev) { + mc_assert_locked(idev); + idev->mc_ifc_count = 0; if (cancel_delayed_work(&idev->mc_ifc_work)) __in6_dev_put(idev); } -/* called with mc_lock */ static void mld_dad_start_work(struct inet6_dev *idev, unsigned long delay) { unsigned long tv = get_random_u32_below(delay); + mc_assert_locked(idev); + if (!mod_delayed_work(mld_wq, &idev->mc_dad_work, tv + 2)) in6_dev_hold(idev); } @@ -1155,14 +1147,13 @@ static void mld_report_stop_work(struct inet6_dev *idev) __in6_dev_put(idev); } -/* - * IGMP handling (alias multicast ICMPv6 messages) - * called with mc_lock - */ +/* IGMP handling (alias multicast ICMPv6 messages) */ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) { unsigned long delay = resptime; + mc_assert_locked(ma->idev); + /* Do not start work for these addresses */ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) || IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) @@ -1181,15 +1172,15 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) ma->mca_flags |= MAF_TIMER_RUNNING; } -/* mark EXCLUDE-mode sources - * called with mc_lock - */ +/* mark EXCLUDE-mode sources */ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; + mc_assert_locked(pmc->idev); + scount = 0; for_each_psf_mclock(pmc, psf) { if (scount == nsrcs) @@ -1212,13 +1203,14 @@ static bool mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, return true; } -/* called with mc_lock */ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs, const struct in6_addr *srcs) { struct ip6_sf_list *psf; int i, scount; + mc_assert_locked(pmc->idev); + if (pmc->mca_sfmode == MCAST_EXCLUDE) return mld_xmarksources(pmc, nsrcs, srcs); @@ -1913,7 +1905,6 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) -/* called with mc_lock */ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) @@ -1927,6 +1918,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_report *pmr; unsigned int mtu; + mc_assert_locked(idev); + if (pmc->mca_flags & MAF_NOREPORT) return skb; @@ -2045,12 +2038,13 @@ empty_source: return skb; } -/* called with mc_lock */ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) { struct sk_buff *skb = NULL; int type; + mc_assert_locked(idev); + if (!pmc) { for_each_mc_mclock(idev, pmc) { if (pmc->mca_flags & MAF_NOREPORT) @@ -2072,10 +2066,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) mld_sendpack(skb); } -/* - * remove zero-count source records from a source filter list - * called with mc_lock - */ +/* remove zero-count source records from a source filter list */ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *idev) { struct ip6_sf_list *psf_prev, *psf_next, *psf; @@ -2099,7 +2090,6 @@ static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf, struct inet6_dev *i } } -/* called with mc_lock */ static void mld_send_cr(struct inet6_dev *idev) { struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next; @@ -2263,13 +2253,14 @@ err_out: goto out; } -/* called with mc_lock */ static void mld_send_initial_cr(struct inet6_dev *idev) { - struct sk_buff *skb; struct ifmcaddr6 *pmc; + struct sk_buff *skb; int type; + mc_assert_locked(idev); + if (mld_in_v1_mode(idev)) return; @@ -2316,13 +2307,14 @@ static void mld_dad_work(struct work_struct *work) in6_dev_put(idev); } -/* called with mc_lock */ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; int rv = 0; + mc_assert_locked(pmc->idev); + psf_prev = NULL; for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) @@ -2359,7 +2351,6 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, return rv; } -/* called with mc_lock */ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2371,6 +2362,8 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; + mc_assert_locked(idev); + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; @@ -2412,15 +2405,14 @@ static int ip6_mc_del_src(struct inet6_dev *idev, const struct in6_addr *pmca, return err; } -/* - * Add multicast single-source filter to the interface list - * called with mc_lock - */ +/* Add multicast single-source filter to the interface list */ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, - const struct in6_addr *psfsrc) + const struct in6_addr *psfsrc) { struct ip6_sf_list *psf, *psf_prev; + mc_assert_locked(pmc->idev); + psf_prev = NULL; for_each_psf_mclock(pmc, psf) { if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) @@ -2443,11 +2435,12 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, return 0; } -/* called with mc_lock */ static void sf_markstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; + struct ip6_sf_list *psf; + + mc_assert_locked(pmc->idev); for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { @@ -2460,14 +2453,15 @@ static void sf_markstate(struct ifmcaddr6 *pmc) } } -/* called with mc_lock */ static int sf_setstate(struct ifmcaddr6 *pmc) { - struct ip6_sf_list *psf, *dpsf; int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; + struct ip6_sf_list *psf, *dpsf; int qrv = pmc->idev->mc_qrv; int new_in, rv; + mc_assert_locked(pmc->idev); + rv = 0; for_each_psf_mclock(pmc, psf) { if (pmc->mca_sfcount[MCAST_EXCLUDE]) { @@ -2526,10 +2520,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc) return rv; } -/* - * Add multicast source filter list to the interface list - * called with mc_lock - */ +/* Add multicast source filter list to the interface list */ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int sfmode, int sfcount, const struct in6_addr *psfsrc, int delta) @@ -2541,6 +2532,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, if (!idev) return -ENODEV; + mc_assert_locked(idev); + for_each_mc_mclock(idev, pmc) { if (ipv6_addr_equal(pmca, &pmc->mca_addr)) break; @@ -2588,11 +2581,12 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, return err; } -/* called with mc_lock */ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) { struct ip6_sf_list *psf, *nextpsf; + mc_assert_locked(pmc->idev); + for (psf = mc_dereference(pmc->mca_tomb, pmc->idev); psf; psf = nextpsf) { @@ -2613,11 +2607,12 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1); } -/* called with mc_lock */ static void igmp6_join_group(struct ifmcaddr6 *ma) { unsigned long delay; + mc_assert_locked(ma->idev); + if (ma->mca_flags & MAF_NOREPORT) return; @@ -2664,9 +2659,10 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, return err; } -/* called with mc_lock */ static void igmp6_leave_group(struct ifmcaddr6 *ma) { + mc_assert_locked(ma->idev); + if (mld_in_v1_mode(ma->idev)) { if (ma->mca_flags & MAF_LAST_REPORTER) { igmp6_send(&ma->mca_addr, ma->idev->dev, @@ -2711,9 +2707,10 @@ static void mld_ifc_work(struct work_struct *work) in6_dev_put(idev); } -/* called with mc_lock */ static void mld_ifc_event(struct inet6_dev *idev) { + mc_assert_locked(idev); + if (mld_in_v1_mode(idev)) return; @@ -2868,8 +2865,6 @@ static void ipv6_mc_rejoin_groups(struct inet6_dev *idev) { struct ifmcaddr6 *pmc; - ASSERT_RTNL(); - mutex_lock(&idev->mc_lock); if (mld_in_v1_mode(idev)) { for_each_mc_mclock(idev, pmc) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ecb5c4b8518f..7d5abb3158ec 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -243,9 +243,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { - ND_PRINTK(2, warn, - "%s: duplicated ND6 option found: type=%d\n", - __func__, nd_opt->nd_opt_type); + net_dbg_ratelimited("%s: duplicated ND6 option found: type=%d\n", + __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } @@ -275,11 +274,8 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, * to accommodate future extension to the * protocol. */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); + net_dbg_ratelimited("%s: ignored unsupported option; type=%d, len=%d\n", + __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } next_opt: opt_len -= l; @@ -377,24 +373,25 @@ static int ndisc_constructor(struct neighbour *neigh) static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; - struct in6_addr maddr; struct net_device *dev = n->dev; + struct in6_addr maddr; - if (!dev || !__in6_dev_get(dev)) + if (!dev) return -EINVAL; + addrconf_addr_solict_mult(addr, &maddr); - ipv6_dev_mc_inc(dev, &maddr); - return 0; + return ipv6_dev_mc_inc(dev, &maddr); } static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; - struct in6_addr maddr; struct net_device *dev = n->dev; + struct in6_addr maddr; - if (!dev || !__in6_dev_get(dev)) + if (!dev) return; + addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); } @@ -473,6 +470,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, { struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); + struct net_device *dev; struct inet6_dev *idev; struct net *net; struct sock *sk; @@ -507,11 +505,12 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - idev = __in6_dev_get(dst->dev); + dev = dst_dev(dst); + idev = __in6_dev_get(dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, dst->dev, + net, sk, skb, NULL, dev, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); @@ -751,9 +750,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { - ND_PRINTK(1, dbg, - "%s: trying to ucast probe in NUD_INVALID: %pI6\n", - __func__, target); + net_dbg_ratelimited("%s: trying to ucast probe in NUD_INVALID: %pI6\n", + __func__, target); } ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { @@ -770,11 +768,9 @@ static int pndisc_is_router(const void *pkey, struct pneigh_entry *n; int ret = -1; - read_lock_bh(&nd_tbl.lock); - n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev); + n = pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev); if (n) - ret = !!(n->flags & NTF_ROUTER); - read_unlock_bh(&nd_tbl.lock); + ret = !!(READ_ONCE(n->flags) & NTF_ROUTER); return ret; } @@ -811,7 +807,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK(2, warn, "NS: multicast target address\n"); + net_dbg_ratelimited("NS: multicast target address\n"); return reason; } @@ -820,7 +816,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) * DAD has to be destined for solicited node multicast address. */ if (dad && !ipv6_addr_is_solict_mult(daddr)) { - ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); + net_dbg_ratelimited("NS: bad DAD packet (wrong destination)\n"); return reason; } @@ -830,8 +826,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { - ND_PRINTK(2, warn, - "NS: invalid link-layer address length\n"); + net_dbg_ratelimited("NS: invalid link-layer address length\n"); return reason; } @@ -841,8 +836,7 @@ static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) * in the message. */ if (dad) { - ND_PRINTK(2, warn, - "NS: bad DAD packet (link-layer address option)\n"); + net_dbg_ratelimited("NS: bad DAD packet (link-layer address option)\n"); return reason; } } @@ -859,10 +853,8 @@ have_ifp: if (nonce != 0 && ifp->dad_nonce == nonce) { u8 *np = (u8 *)&nonce; /* Matching nonce if looped back */ - ND_PRINTK(2, notice, - "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", - ifp->idev->dev->name, - &ifp->addr, np); + net_dbg_ratelimited("%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", + ifp->idev->dev->name, &ifp->addr, np); goto out; } /* @@ -1013,13 +1005,13 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { - ND_PRINTK(2, warn, "NA: target address is multicast\n"); + net_dbg_ratelimited("NA: target address is multicast\n"); return reason; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { - ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); + net_dbg_ratelimited("NA: solicited NA is multicasted\n"); return reason; } @@ -1038,8 +1030,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { - ND_PRINTK(2, warn, - "NA: invalid link-layer address length\n"); + net_dbg_ratelimited("NA: invalid link-layer address length\n"); return reason; } } @@ -1060,9 +1051,9 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) - ND_PRINTK(1, warn, - "NA: %pM advertised our address %pI6c on %s!\n", - eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); + net_warn_ratelimited("NA: %pM advertised our address %pI6c on %s!\n", + eth_hdr(skb)->h_source, &ifp->addr, + ifp->idev->dev->name); in6_ifa_put(ifp); return reason; } @@ -1107,7 +1098,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && - pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, net, &msg->target, dev)) { /* XXX: idev->cnf.proxy_ndp */ goto out; } @@ -1149,7 +1140,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); if (!idev) { - ND_PRINTK(1, err, "RS: can't find in6 device\n"); + net_err_ratelimited("RS: can't find in6 device\n"); return reason; } @@ -1257,11 +1248,9 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); - ND_PRINTK(2, info, - "RA: %s, dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, dev: %s\n", __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, "RA: source address is not link-local\n"); + net_dbg_ratelimited("RA: source address is not link-local\n"); return reason; } if (optlen < 0) @@ -1269,15 +1258,14 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { - ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); + net_dbg_ratelimited("RA: from host or unauthorized router\n"); return reason; } #endif in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) { - ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", - skb->dev->name); + net_err_ratelimited("RA: can't find inet6 device for %s\n", skb->dev->name); return reason; } @@ -1285,18 +1273,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ipv6_accept_ra(in6_dev)) { - ND_PRINTK(2, info, - "RA: %s, did not accept ra for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, did not accept ra for dev: %s\n", __func__, + skb->dev->name); goto skip_linkparms; } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { - ND_PRINTK(2, info, - "RA: %s, nodetype is NODEFAULT, dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__, + skb->dev->name); goto skip_linkparms; } #endif @@ -1325,18 +1311,16 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) send_ifinfo_notify = true; if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { - ND_PRINTK(2, info, - "RA: %s, defrtr is false for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, defrtr is false for dev: %s\n", __func__, + skb->dev->name); goto skip_defrtr; } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); if (lifetime != 0 && lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { - ND_PRINTK(2, info, - "RA: router lifetime (%ds) is too short: %s\n", - lifetime, skb->dev->name); + net_dbg_ratelimited("RA: router lifetime (%ds) is too short: %s\n", lifetime, + skb->dev->name); goto skip_defrtr; } @@ -1346,9 +1330,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) net = dev_net(in6_dev->dev); if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { - ND_PRINTK(2, info, - "RA from local address detected on dev: %s: default router ignored\n", - skb->dev->name); + net_dbg_ratelimited("RA from local address detected on dev: %s: default router ignored\n", + skb->dev->name); goto skip_defrtr; } @@ -1366,9 +1349,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK(0, err, - "RA: %s got default router without neighbour\n", - __func__); + net_err_ratelimited("RA: %s got default router without neighbour\n", + __func__); fib6_info_release(rt); return reason; } @@ -1381,10 +1363,10 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt = NULL; } - ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", - rt, lifetime, defrtr_usr_metric, skb->dev->name); + net_dbg_ratelimited("RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime, + defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { - ND_PRINTK(3, info, "RA: adding default router\n"); + net_dbg_ratelimited("RA: adding default router\n"); if (neigh) neigh_release(neigh); @@ -1393,9 +1375,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) skb->dev, pref, defrtr_usr_metric, lifetime); if (!rt) { - ND_PRINTK(0, err, - "RA: %s failed to add default route\n", - __func__); + net_err_ratelimited("RA: %s failed to add default route\n", __func__); return reason; } @@ -1403,9 +1383,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { - ND_PRINTK(0, err, - "RA: %s got default router without neighbour\n", - __func__); + net_err_ratelimited("RA: %s got default router without neighbour\n", + __func__); fib6_info_release(rt); return reason; } @@ -1436,7 +1415,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { - ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); + net_dbg_ratelimited("RA: Got route advertisement with lower hop_limit than minimum\n"); } } @@ -1492,8 +1471,7 @@ skip_linkparms: lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { - ND_PRINTK(2, warn, - "RA: invalid link-layer address length\n"); + net_dbg_ratelimited("RA: invalid link-layer address length\n"); goto out; } } @@ -1507,9 +1485,8 @@ skip_linkparms: } if (!ipv6_accept_ra(in6_dev)) { - ND_PRINTK(2, info, - "RA: %s, accept_ra is false for dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, accept_ra is false for dev: %s\n", __func__, + skb->dev->name); goto out; } @@ -1517,9 +1494,8 @@ skip_linkparms: if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { - ND_PRINTK(2, info, - "RA from local address detected on dev: %s: router info ignored.\n", - skb->dev->name); + net_dbg_ratelimited("RA from local address detected on dev: %s: router info ignored.\n", + skb->dev->name); goto skip_routeinfo; } @@ -1555,9 +1531,8 @@ skip_routeinfo: #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { - ND_PRINTK(2, info, - "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", - __func__, skb->dev->name); + net_dbg_ratelimited("RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", + __func__, skb->dev->name); goto out; } #endif @@ -1586,7 +1561,7 @@ skip_routeinfo: } if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { - ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); + net_dbg_ratelimited("RA: invalid mtu: %d\n", mtu); } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); @@ -1605,7 +1580,7 @@ skip_routeinfo: } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { - ND_PRINTK(2, warn, "RA: invalid RA options\n"); + net_dbg_ratelimited("RA: invalid RA options\n"); } out: /* Send a notify if RA changed managed/otherconf flags or @@ -1633,15 +1608,13 @@ static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb) switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: - ND_PRINTK(2, warn, - "Redirect: from host or unauthorized router\n"); + net_dbg_ratelimited("Redirect: from host or unauthorized router\n"); return reason; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: source address is not link-local\n"); + net_dbg_ratelimited("Redirect: source address is not link-local\n"); return reason; } @@ -1702,15 +1675,13 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { - ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", - dev->name); + net_dbg_ratelimited("Redirect: no link-local address on %s\n", dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { - ND_PRINTK(2, warn, - "Redirect: target address is not link-local unicast\n"); + net_dbg_ratelimited("Redirect: target address is not link-local unicast\n"); return; } @@ -1729,8 +1700,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { - ND_PRINTK(2, warn, - "Redirect: destination is not a neighbour\n"); + net_dbg_ratelimited("Redirect: destination is not a neighbour\n"); goto release; } @@ -1743,8 +1713,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { - ND_PRINTK(2, warn, - "Redirect: no neigh for target address\n"); + net_dbg_ratelimited("Redirect: no neigh for target address\n"); goto release; } @@ -1845,14 +1814,12 @@ enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { - ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", - ipv6_hdr(skb)->hop_limit); + net_dbg_ratelimited("NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { - ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", - msg->icmph.icmp6_code); + net_dbg_ratelimited("NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } @@ -2003,9 +1970,8 @@ static int __net_init ndisc_net_init(struct net *net) err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { - ND_PRINTK(0, err, - "NDISC: Failed to initialize the control socket (err %d)\n", - err); + net_err_ratelimited("NDISC: Failed to initialize the control socket (err %d)\n", + err); return err; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 4541836ee3da..45f9105f9ac1 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,7 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst_dev(skb); struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; @@ -72,7 +72,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #endif /* Change in oif may mean change in hh_len. */ - hh_len = skb_dst(skb)->dev->hard_header_len; + hh_len = skb_dst_dev(skb)->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index e087a8e97ba7..276860f65baa 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -9,9 +9,8 @@ menu "IPv6: Netfilter Configuration" # old sockopt interface and eval loop config IP6_NF_IPTABLES_LEGACY tristate "Legacy IP6 tables support" - depends on INET && IPV6 - select NETFILTER_XTABLES - default n + depends on INET && IPV6 && NETFILTER_XTABLES_LEGACY + default m if NETFILTER_XTABLES_LEGACY help ip6tables is a legacy packet classifier. This is not needed if you are using iptables over nftables @@ -196,8 +195,8 @@ config IP6_NF_TARGET_HL config IP6_NF_FILTER tristate "Packet filtering" - default m if NETFILTER_ADVANCED=n - select IP6_NF_IPTABLES_LEGACY + default m if NETFILTER_ADVANCED=n || IP6_NF_IPTABLES_LEGACY + depends on IP6_NF_IPTABLES_LEGACY tristate help Packet filtering defines a table `filter', which has a series of @@ -233,8 +232,8 @@ config IP6_NF_TARGET_SYNPROXY config IP6_NF_MANGLE tristate "Packet mangling" - default m if NETFILTER_ADVANCED=n - select IP6_NF_IPTABLES_LEGACY + default m if NETFILTER_ADVANCED=n || IP6_NF_IPTABLES_LEGACY + depends on IP6_NF_IPTABLES_LEGACY help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -244,7 +243,7 @@ config IP6_NF_MANGLE config IP6_NF_RAW tristate 'raw table support (required for TRACE)' - select IP6_NF_IPTABLES_LEGACY + depends on IP6_NF_IPTABLES_LEGACY help This option adds a `raw' table to ip6tables. This table is the very first in the netfilter framework and hooks in at the PREROUTING @@ -258,7 +257,7 @@ config IP6_NF_SECURITY tristate "Security table" depends on SECURITY depends on NETFILTER_ADVANCED - select IP6_NF_IPTABLES_LEGACY + depends on IP6_NF_IPTABLES_LEGACY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. @@ -269,8 +268,8 @@ config IP6_NF_NAT tristate "ip6tables NAT support" depends on NF_CONNTRACK depends on NETFILTER_ADVANCED + depends on IP6_NF_IPTABLES_LEGACY select NF_NAT - select IP6_NF_IPTABLES_LEGACY select NETFILTER_XT_NAT help This enables the `nat' table in ip6tables. This allows masquerading, diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index b903c62c00c9..6da3102b7c1b 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -38,7 +38,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, } skb_dst_drop(skb); skb_dst_set(skb, dst); - skb->dev = dst->dev; + skb->dev = dst_dev(dst); skb->protocol = htons(ETH_P_IPV6); return true; diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 9ae2b2725bf9..838295fa32e3 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -300,7 +300,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, skb_dst_set(oldskb, dst); } - fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); + fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst_dev(oldskb)); fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6)); dst = ip6_route_output(net, NULL, &fl6); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 806d4b5dd1e6..d21fe27fe21e 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -105,7 +105,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit == 0) { - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); struct inet6_dev *idev; rcu_read_lock(); @@ -141,7 +141,7 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, sk, skb, NULL, skb_dst(skb)->dev, + net, sk, skb, NULL, skb_dst_dev(skb), dst_output); } EXPORT_SYMBOL_GPL(__ip6_local_out); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 84d90dd8b3f0..82b0492923d4 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -142,7 +142,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.saddr = np->saddr; fl6.daddr = *daddr; fl6.flowi6_mark = ipc6.sockc.mark; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fda640ebd53f..4c3f8245c40f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -777,7 +777,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = ipc6.sockc.mark; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7d4bcf3fda5b..25ec8001898d 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -104,11 +104,11 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) return container_of(q, struct frag_queue, q); } -static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static int ip6_frag_queue(struct net *net, + struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff, u32 *prob_offset, int *refs) { - struct net *net = dev_net(skb_dst(skb)->dev); int offset, end, fragsize; struct sk_buff *prev_tail; struct net_device *dev; @@ -324,10 +324,10 @@ out_fail: static int ipv6_frag_rcv(struct sk_buff *skb) { + const struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net = skb_dst_dev_net(skb); struct frag_hdr *fhdr; struct frag_queue *fq; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - struct net *net = dev_net(skb_dst(skb)->dev); u8 nexthdr; int iif; @@ -384,7 +384,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) spin_lock(&fq->q.lock); fq->iif = iif; - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, + ret = ip6_frag_queue(net, fq, skb, fhdr, IP6CB(skb)->nhoff, &prob_offset, &refs); spin_unlock(&fq->q.lock); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 79c8f1acf8a3..3299cfa12e21 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -228,13 +228,13 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, const struct rt6_info *rt = dst_rt6_info(dst); return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any), - dst->dev, skb, daddr); + dst_dev(dst), skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct rt6_info *rt = dst_rt6_info(dst); - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) @@ -391,9 +391,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev) static bool __rt6_check_expired(const struct rt6_info *rt) { if (rt->rt6i_flags & RTF_EXPIRES) - return time_after(jiffies, rt->dst.expires); - else - return false; + return time_after(jiffies, READ_ONCE(rt->dst.expires)); + return false; } static bool rt6_check_expired(const struct rt6_info *rt) @@ -403,10 +402,10 @@ static bool rt6_check_expired(const struct rt6_info *rt) from = rcu_dereference(rt->from); if (rt->rt6i_flags & RTF_EXPIRES) { - if (time_after(jiffies, rt->dst.expires)) + if (time_after(jiffies, READ_ONCE(rt->dst.expires))) return true; } else if (from) { - return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || + return READ_ONCE(rt->dst.obsolete) != DST_OBSOLETE_FORCE_CHK || fib6_check_expired(from); } return false; @@ -1145,6 +1144,7 @@ static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res) rt->dst.input = ip6_input; } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; + rt->dst.output = ip6_mr_output; } else { rt->dst.input = ip6_forward; } @@ -2133,12 +2133,13 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, * expired, independently from their aging, as per RFC 8201 section 4 */ if (!(rt->rt6i_flags & RTF_EXPIRES)) { - if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + if (time_after_eq(now, READ_ONCE(rt->dst.lastuse) + + gc_args->timeout)) { pr_debug("aging clone %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; } - } else if (time_after(jiffies, rt->dst.expires)) { + } else if (time_after(jiffies, READ_ONCE(rt->dst.expires))) { pr_debug("purging expired route %p\n", rt); rt6_remove_exception(bucket, rt6_ex); return; @@ -2776,11 +2777,10 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) { if (!__rt6_check_expired(rt) && - rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + READ_ONCE(rt->dst.obsolete) == DST_OBSOLETE_FORCE_CHK && fib6_check(from, cookie)) return &rt->dst; - else - return NULL; + return NULL; } INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, @@ -2870,7 +2870,7 @@ static void rt6_update_expires(struct rt6_info *rt0, int timeout) rcu_read_lock(); from = rcu_dereference(rt0->from); if (from) - rt0->dst.expires = from->expires; + WRITE_ONCE(rt0->dst.expires, from->expires); rcu_read_unlock(); } @@ -2943,7 +2943,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst->dev, + .dev = dst_dev(dst), .gw = &rt6->rt6i_gateway, }; @@ -3010,10 +3010,10 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) oif = l3mdev_master_ifindex(skb->dev); ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark), - sk->sk_uid); + sk_uid(sk)); dst = __sk_dst_get(sk); - if (!dst || !dst->obsolete || + if (!dst || !READ_ONCE(dst->obsolete) || dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) return; @@ -3232,13 +3232,13 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) { ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, - READ_ONCE(sk->sk_mark), sk->sk_uid); + READ_ONCE(sk->sk_mark), sk_uid(sk)); } EXPORT_SYMBOL_GPL(ip6_sk_redirect); static unsigned int ip6_default_advmss(const struct dst_entry *dst) { - struct net_device *dev = dst->dev; + struct net_device *dev = dst_dev(dst); unsigned int mtu = dst_mtu(dst); struct net *net; @@ -4301,7 +4301,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (res.f6i->nh) { struct fib6_nh_match_arg arg = { - .dev = dst->dev, + .dev = dst_dev(dst), .gw = &rt->rt6i_gateway, }; @@ -4587,13 +4587,14 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(dst->dev); + struct net_device *dev = dst_dev(dst); + struct net *net = dev_net(dev); struct inet6_dev *idev; SKB_DR(reason); int type; if (netif_is_l3_master(skb->dev) || - dst->dev == net->loopback_dev) + dev == net->loopback_dev) idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); else idev = ip6_dst_idev(dst); @@ -4630,7 +4631,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->dev = skb_dst(skb)->dev; + skb->dev = skb_dst_dev(skb); return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } @@ -4641,7 +4642,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) { - skb->dev = skb_dst(skb)->dev; + skb->dev = skb_dst_dev(skb); return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } @@ -5346,7 +5347,8 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, */ rcu_read_lock(); - if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { + if ((nlflags & NLM_F_APPEND) && rt_last && + READ_ONCE(rt_last->fib6_nsiblings)) { rt = list_first_or_null_rcu(&rt_last->fib6_siblings, struct fib6_info, fib6_siblings); @@ -5670,32 +5672,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg) static size_t rt6_nlmsg_size(struct fib6_info *f6i) { + struct fib6_info *sibling; + struct fib6_nh *nh; int nexthop_len; if (f6i->nh) { nexthop_len = nla_total_size(4); /* RTA_NH_ID */ nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); - } else { - struct fib6_nh *nh = f6i->fib6_nh; - struct fib6_info *sibling; - - nexthop_len = 0; - if (f6i->fib6_nsiblings) { - rt6_nh_nlmsg_size(nh, &nexthop_len); - - rcu_read_lock(); + goto common; + } - list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, - fib6_siblings) { - rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); - } + rcu_read_lock(); +retry: + nh = f6i->fib6_nh; + nexthop_len = 0; + if (READ_ONCE(f6i->fib6_nsiblings)) { + rt6_nh_nlmsg_size(nh, &nexthop_len); - rcu_read_unlock(); + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { + rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); + if (!READ_ONCE(f6i->fib6_nsiblings)) + goto retry; } - nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } - + rcu_read_unlock(); + nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); +common: return NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(16) /* RTA_SRC */ + nla_total_size(16) /* RTA_DST */ @@ -5844,17 +5848,19 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, * each as a nexthop within RTA_MULTIPATH. */ if (rt6) { + struct net_device *dev; + if (rt6_flags & RTF_GATEWAY && nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway)) goto nla_put_failure; - if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) + dev = dst_dev(dst); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; - if (dst->lwtstate && - lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) + if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; - } else if (rt->fib6_nsiblings) { + } else if (READ_ONCE(rt->fib6_nsiblings)) { struct fib6_info *sibling; struct nlattr *mp; @@ -5904,7 +5910,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, } if (rt6_flags & RTF_EXPIRES) { - expires = dst ? dst->expires : rt->expires; + expires = dst ? READ_ONCE(dst->expires) : rt->expires; expires -= jiffies; } @@ -5956,16 +5962,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, if (f6i->fib6_nh->fib_nh_dev == dev) return true; - if (f6i->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + if (READ_ONCE(f6i->fib6_nsiblings)) { + const struct fib6_info *sibling; - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh->fib_nh_dev == dev) + rcu_read_lock(); + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { + if (sibling->fib6_nh->fib_nh_dev == dev) { + rcu_read_unlock(); return true; + } + if (!READ_ONCE(f6i->fib6_nsiblings)) + break; } + rcu_read_unlock(); } - return false; } @@ -6321,8 +6332,9 @@ errout: void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int nlm_flags) { - struct sk_buff *skb; struct net *net = info->nl_net; + struct sk_buff *skb; + size_t sz; u32 seq; int err; @@ -6330,17 +6342,21 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, seq = info->nlh ? info->nlh->nlmsg_seq : 0; rcu_read_lock(); - - skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); + sz = rt6_nlmsg_size(rt); +retry: + skb = nlmsg_new(sz, GFP_ATOMIC); if (!skb) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, event, info->portid, seq, nlm_flags); if (err < 0) { - /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ - WARN_ON(err == -EMSGSIZE); kfree_skb(skb); + /* -EMSGSIZE implies needed space grew under us. */ + if (err == -EMSGSIZE) { + sz = max(rt6_nlmsg_size(rt), sz << 1); + goto retry; + } goto errout; } @@ -6805,8 +6821,7 @@ void __init ip6_route_init_special_entries(void) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) -BTF_ID_LIST(btf_fib6_info_id) -BTF_ID(struct, fib6_info) +BTF_ID_LIST_SINGLE(btf_fib6_info_id, struct, fib6_info) static const struct bpf_iter_seq_info ipv6_route_seq_info = { .seq_ops = &ipv6_route_seq_ops, diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index eccfa4203e96..c7942cf65567 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -242,7 +242,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } @@ -297,7 +297,7 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } else { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 51583461ae29..3e1b9991131a 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -128,7 +128,8 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(dst->dev); + struct net_device *dev = dst_dev(dst); + struct net *net = dev_net(dev); struct ipv6hdr *hdr, *inner_hdr; struct ipv6_sr_hdr *isrh; int hdrlen, tot_len, err; @@ -181,7 +182,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { @@ -212,7 +213,8 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(dst->dev); + struct net_device *dev = dst_dev(dst); + struct net *net = dev_net(dev); struct ipv6hdr *hdr, *inner_hdr; int hdrlen = ipv6_optlen(osrh); int red_tlv_offset, tlv_offset; @@ -270,7 +272,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, if (skip_srh) { hdr->nexthdr = proto; - set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); goto out; } @@ -306,7 +308,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, srcaddr: isrh->nexthdr = proto; - set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); #ifdef CONFIG_IPV6_SEG6_HMAC if (unlikely(!skip_srh && sr_has_hmac(isrh))) { @@ -362,7 +364,7 @@ static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = skb_dst_dev_net(skb); err = seg6_push_hmac(net, &hdr->saddr, isrh); if (unlikely(err)) @@ -507,7 +509,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } else { @@ -518,7 +520,7 @@ static int seg6_input_core(struct net *net, struct sock *sk, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, dev_net(skb->dev), NULL, skb, NULL, - skb_dst(skb)->dev, seg6_input_finish); + skb_dst_dev(skb), seg6_input_finish); return seg6_input_finish(dev_net(skb->dev), NULL, skb); drop: @@ -528,7 +530,7 @@ drop: static int seg6_input_nf(struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst_dev(skb); struct net *net = dev_net(skb->dev); switch (skb->protocol) { @@ -593,7 +595,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, local_bh_enable(); } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst))); if (unlikely(err)) goto drop; } @@ -603,7 +605,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, - NULL, skb_dst(skb)->dev, dst_output); + NULL, dst_dev(dst), dst_output); return dst_output(net, sk, skb); drop: @@ -614,7 +616,7 @@ drop: static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_device *dev = skb_dst(skb)->dev; + struct net_device *dev = skb_dst_dev(skb); switch (skb->protocol) { case htons(ETH_P_IP): diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index a11a02b4ba95..2b41e4c0dddd 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -270,7 +270,7 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) static int seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, - u32 tbl_id, bool local_delivery) + u32 tbl_id, bool local_delivery, int oif) { struct net *net = dev_net(skb->dev); struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -282,6 +282,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_iif = skb->dev->ifindex; + fl6.flowi6_oif = oif; fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); @@ -291,17 +292,19 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (nhaddr) fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; - if (!tbl_id) { + if (!tbl_id && !oif) { dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); - } else { + } else if (tbl_id) { struct fib6_table *table; table = fib6_get_table(net, tbl_id); if (!table) goto out; - rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); + rt = ip6_pol_route(net, table, oif, &fl6, skb, flags); dst = &rt->dst; + } else { + dst = ip6_route_output(net, NULL, &fl6); } /* we want to discard traffic destined for local packet processing, @@ -310,7 +313,7 @@ seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (!local_delivery) dev_flags |= IFF_LOOPBACK; - if (dst && (dst->dev->flags & dev_flags) && !dst->error) { + if (dst && (dst_dev(dst)->flags & dev_flags) && !dst->error) { dst_release(dst); dst = NULL; } @@ -330,7 +333,7 @@ out: int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id) { - return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); + return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false, 0); } static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo) @@ -418,7 +421,7 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) static int input_action_end_x_finish(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - seg6_lookup_nexthop(skb, &slwt->nh6, 0); + seg6_lookup_any_nexthop(skb, &slwt->nh6, 0, false, slwt->oif); return dst_input(skb); } @@ -1277,7 +1280,7 @@ static int input_action_end_dt6(struct sk_buff *skb, /* note: this time we do not need to specify the table because the VRF * takes care of selecting the correct table. */ - seg6_lookup_any_nexthop(skb, NULL, 0, true); + seg6_lookup_any_nexthop(skb, NULL, 0, true, 0); return dst_input(skb); @@ -1285,7 +1288,7 @@ legacy_mode: #endif skb_set_transport_header(skb, sizeof(struct ipv6hdr)); - seg6_lookup_any_nexthop(skb, NULL, slwt->table, true); + seg6_lookup_any_nexthop(skb, NULL, slwt->table, true, 0); return dst_input(skb); @@ -1477,7 +1480,8 @@ static struct seg6_action_desc seg6_action_table[] = { .action = SEG6_LOCAL_ACTION_END_X, .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6), .optattrs = SEG6_F_LOCAL_COUNTERS | - SEG6_F_LOCAL_FLAVORS, + SEG6_F_LOCAL_FLAVORS | + SEG6_F_ATTR(SEG6_LOCAL_OIF), .input = input_action_end_x, }, { @@ -2083,7 +2087,7 @@ struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = { static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len) { /* Locator-Block and Locator-Node Function cannot exceed 128 bits - * (i.e. C-SID container lenghts). + * (i.e. C-SID container length). */ if (next_csid_chk_cntr_bits(block_len, func_len)) return -EINVAL; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a72dbca9e8fc..12496ba1b7d4 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1035,7 +1035,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, - df, !net_eq(tunnel->net, dev_net(dev))); + df, !net_eq(tunnel->net, dev_net(dev)), 0); return NETDEV_TX_OK; tx_error_icmp: diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9d83eadd308b..f0ee1a909771 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -236,7 +236,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e8e68a142649..7577e7eb2c97 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -41,6 +41,7 @@ #include <linux/random.h> #include <linux/indirect_call_wrapper.h> +#include <net/aligned_data.h> #include <net/tcp.h> #include <net/ndisc.h> #include <net/inet6_hashtables.h> @@ -269,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !fl6.fl6_sport) fl6.flowi6_flags = FLOWI_FLAG_ANY_SPORT; - fl6.flowi6_uid = sk->sk_uid; + fl6.flowi6_uid = sk_uid(sk); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); @@ -835,7 +836,6 @@ static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), - .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, @@ -868,7 +868,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 int oif, int rst, u8 tclass, __be32 label, u32 priority, u32 txhash, struct tcp_key *key) { - struct net *net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + struct net *net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); unsigned int tot_len = sizeof(struct tcphdr); struct sock *ctl_sk = net->ipv6.tcp_sk; const struct tcphdr *th = tcp_hdr(skb); @@ -1043,7 +1043,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, if (!sk && !ipv6_unicast_destination(skb)) return; - net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; @@ -1834,14 +1834,12 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb)) { + if (!tcp_filter(sk, skb, &drop_reason)) { th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen, &drop_reason); - } else { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); @@ -1897,10 +1895,9 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (tcp_filter(sk, skb, &drop_reason)) goto discard_and_relse; - } + th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); @@ -2168,7 +2165,7 @@ static void get_openreq6(struct seq_file *seq, jiffies_to_clock_t(ttd), req->num_timeout, from_kuid_munged(seq_user_ns(seq), - sock_i_uid(req->rsk_listener)), + sk_uid(req->rsk_listener)), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); @@ -2234,7 +2231,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + from_kuid_munged(seq_user_ns(seq), sk_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, @@ -2357,7 +2354,7 @@ struct proto tcpv6_prot = { .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, - .memory_allocated = &tcp_memory_allocated, + .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7317f8e053f1..6a68f77da44b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -750,7 +750,8 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == NDISC_REDIRECT) { if (tunnel) { ip6_redirect(skb, sock_net(sk), inet6_iif(skb), - READ_ONCE(sk->sk_mark), sk->sk_uid); + READ_ONCE(sk->sk_mark), + sk_uid(sk)); } else { ip6_sk_redirect(skb, sk); } @@ -893,10 +894,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) goto drop; - } udp_csum_pull_header(skb); @@ -1620,7 +1619,7 @@ do_udp_sendmsg: if (!fl6->flowi6_oif) fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6->flowi6_uid = sk->sk_uid; + fl6->flowi6_uid = sk_uid(sk); if (msg->msg_controllen) { opt = &opt_space; @@ -1924,7 +1923,7 @@ struct proto udpv6_prot = { .psock_update_sk_prot = udp_bpf_update_proto, #endif - .memory_allocated = &udp_memory_allocated, + .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 0590f566379d..8a406be25a3a 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _UDP6_IMPL_H #define _UDP6_IMPL_H +#include <net/aligned_data.h> #include <net/udp.h> #include <net/udplite.h> #include <net/protocol.h> diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index a60bec9b14f1..2cec542437f7 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -59,7 +59,7 @@ struct proto udplitev6_prot = { .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, - .memory_allocated = &udp_memory_allocated, + .memory_allocated = &net_aligned_data.udp_memory_allocated, .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 841c81abaaf4..9005fc156a20 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -202,6 +202,9 @@ struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, if (len <= sizeof(struct ip_esp_hdr) || udpdata32[0] == 0) goto out; + /* set the transport header to ESP */ + skb_set_transport_header(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b3d5d1f266ee..512bdaf13699 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -106,7 +106,7 @@ skip_frag: int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, - net, sk, skb, skb->dev, skb_dst(skb)->dev, + net, sk, skb, skb->dev, skb_dst_dev(skb), __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index bf140ef781c1..5120a763da0d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -334,8 +334,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); xfrm_flush_gc(); - xfrm_state_flush(net, 0, false, true); for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); |