summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c1226
-rw-r--r--net/ipv6/addrlabel.c42
-rw-r--r--net/ipv6/af_inet6.c29
-rw-r--r--net/ipv6/anycast.c106
-rw-r--r--net/ipv6/calipso.c7
-rw-r--r--net/ipv6/esp6.c30
-rw-r--r--net/ipv6/esp6_offload.c13
-rw-r--r--net/ipv6/exthdrs.c34
-rw-r--r--net/ipv6/fib6_notifier.c2
-rw-r--r--net/ipv6/fib6_rules.c112
-rw-r--r--net/ipv6/icmp.c85
-rw-r--r--net/ipv6/ila/ila.h1
-rw-r--r--net/ipv6/ila/ila_lwt.c13
-rw-r--r--net/ipv6/ila/ila_main.c6
-rw-r--r--net/ipv6/ila/ila_xlat.c42
-rw-r--r--net/ipv6/inet6_hashtables.c27
-rw-r--r--net/ipv6/ioam6.c84
-rw-r--r--net/ipv6/ioam6_iptunnel.c175
-rw-r--r--net/ipv6/ip6_fib.c208
-rw-r--r--net/ipv6/ip6_gre.c136
-rw-r--r--net/ipv6/ip6_input.c26
-rw-r--r--net/ipv6/ip6_offload.c35
-rw-r--r--net/ipv6/ip6_output.c98
-rw-r--r--net/ipv6/ip6_tunnel.c70
-rw-r--r--net/ipv6/ip6_vti.c27
-rw-r--r--net/ipv6/ip6mr.c123
-rw-r--r--net/ipv6/ipv6_sockglue.c15
-rw-r--r--net/ipv6/mcast.c165
-rw-r--r--net/ipv6/ndisc.c162
-rw-r--r--net/ipv6/netfilter.c7
-rw-r--r--net/ipv6/netfilter/Kconfig27
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c10
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c15
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c38
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c5
-rw-r--r--net/ipv6/output_core.c4
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/proc.c2
-rw-r--r--net/ipv6/raw.c39
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c264
-rw-r--r--net/ipv6/rpl_iptunnel.c83
-rw-r--r--net/ipv6/seg6.c34
-rw-r--r--net/ipv6/seg6_hmac.c50
-rw-r--r--net/ipv6/seg6_iptunnel.c123
-rw-r--r--net/ipv6/seg6_local.c44
-rw-r--r--net/ipv6/sit.c76
-rw-r--r--net/ipv6/syncookies.c36
-rw-r--r--net/ipv6/sysctl_net_ipv6.c18
-rw-r--r--net/ipv6/tcp_ipv6.c156
-rw-r--r--net/ipv6/tcpv6_offload.c154
-rw-r--r--net/ipv6/udp.c266
-rw-r--r--net/ipv6/udp_offload.c32
-rw-r--r--net/ipv6/xfrm6_input.c28
-rw-r--r--net/ipv6/xfrm6_output.c4
-rw-r--r--net/ipv6/xfrm6_policy.c51
-rw-r--r--net/ipv6/xfrm6_tunnel.c5
63 files changed, 2820 insertions, 1895 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 08d4b7132d4c..1c9c686d9522 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -323,6 +323,7 @@ config IPV6_RPL_LWTUNNEL
bool "IPv6: RPL Source Routing Header support"
depends on IPV6
select LWTUNNEL
+ select DST_CACHE
help
Support for RFC6554 RPL Source Routing Header using the lightweight
tunnels mechanism.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 055230b669cf..ac8cc1076536 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -63,6 +63,7 @@
#include <linux/string.h>
#include <linux/hash.h>
+#include <net/ip_tunnels.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -84,15 +85,13 @@
#include <linux/netconf.h>
#include <linux/random.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
#include <linux/ioam6.h>
-#define INFINITY_LIFE_TIME 0xFFFFFFFF
-
#define IPV6_MAX_STRLEN \
sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
@@ -195,6 +194,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -237,6 +237,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -257,6 +258,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.use_tempaddr = 0,
.temp_valid_lft = TEMP_VALID_LIFETIME,
.temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_min_advance = REGEN_MIN_ADVANCE,
.regen_max_retry = REGEN_MAX_RETRY,
.max_desync_factor = MAX_DESYNC_FACTOR,
.max_addresses = IPV6_MAX_ADDRESSES,
@@ -299,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
.ra_honor_pio_life = 0,
+ .ra_honor_pio_pflag = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -549,7 +552,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
goto out;
if ((all || type == NETCONFA_FORWARDING) &&
- nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0)
+ nla_put_s32(skb, NETCONFA_FORWARDING,
+ READ_ONCE(devconf->forwarding)) < 0)
goto nla_put_failure;
#ifdef CONFIG_IPV6_MROUTE
if ((all || type == NETCONFA_MC_FORWARDING) &&
@@ -558,12 +562,13 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
goto nla_put_failure;
#endif
if ((all || type == NETCONFA_PROXY_NEIGH) &&
- nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
+ nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
+ READ_ONCE(devconf->proxy_ndp)) < 0)
goto nla_put_failure;
if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
- devconf->ignore_routes_with_linkdown) < 0)
+ READ_ONCE(devconf->ignore_routes_with_linkdown)) < 0)
goto nla_put_failure;
out:
@@ -713,7 +718,7 @@ errout:
static u32 inet6_base_seq(const struct net *net)
{
u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
- net->dev_base_seq;
+ READ_ONCE(net->dev_base_seq);
/* Must not return 0 (see nl_dump_check_consistent()).
* Chose a value far away from 0.
@@ -723,17 +728,18 @@ static u32 inet6_base_seq(const struct net *net)
return res;
}
-
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx, s_idx;
+ struct {
+ unsigned long ifindex;
+ unsigned int all_default;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err = 0;
if (cb->strict_check) {
struct netlink_ext_ack *extack = cb->extack;
@@ -750,64 +756,46 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
}
}
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
-
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- rcu_read_lock();
- cb->seq = inet6_base_seq(net);
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
-
- if (inet6_netconf_fill_devconf(skb, dev->ifindex,
- &idev->cnf,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF,
- NLM_F_MULTI,
- NETCONFA_ALL) < 0) {
- rcu_read_unlock();
- goto done;
- }
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_netconf_fill_devconf(skb, dev->ifindex,
+ &idev->cnf,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF,
+ NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
+ goto done;
}
- if (h == NETDEV_HASHENTRIES) {
- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
- net->ipv6.devconf_all,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ if (ctx->all_default == 0) {
+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
- }
- if (h == NETDEV_HASHENTRIES + 1) {
- if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
- net->ipv6.devconf_dflt,
- NETLINK_CB(cb->skb).portid,
- nlh->nlmsg_seq,
- RTM_NEWNETCONF, NLM_F_MULTI,
- NETCONFA_ALL) < 0)
+ ctx->all_default++;
+ }
+ if (ctx->all_default == 1) {
+ err = inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt,
+ NETLINK_CB(cb->skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWNETCONF, NLM_F_MULTI,
+ NETCONFA_ALL);
+ if (err < 0)
goto done;
- else
- h++;
+ ctx->all_default++;
}
done:
- cb->args[0] = h;
- cb->args[1] = idx;
-
- return skb->len;
+ rcu_read_unlock();
+ return err;
}
#ifdef CONFIG_SYSCTL
@@ -864,27 +852,27 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
- idev->cnf.forwarding = newf;
+
+ WRITE_ONCE(idev->cnf.forwarding, newf);
if (changed)
dev_forward_change(idev);
}
}
}
-static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
+static int addrconf_fixup_forwarding(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
- *p = newf;
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_dflt->forwarding) {
if ((!newf) ^ (!old))
@@ -892,14 +880,14 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
NETCONFA_FORWARDING,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
if (p == &net->ipv6.devconf_all->forwarding) {
int old_dflt = net->ipv6.devconf_dflt->forwarding;
- net->ipv6.devconf_dflt->forwarding = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->forwarding, newf);
if ((!newf) ^ (!old_dflt))
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_FORWARDING,
@@ -914,7 +902,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
net->ipv6.devconf_all);
} else if ((!newf) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
- rtnl_unlock();
+ rtnl_net_unlock(net);
if (newf)
rt6_purge_dflt_routers(net);
@@ -927,11 +915,11 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
- idev->cnf.ignore_routes_with_linkdown = newf;
+ WRITE_ONCE(idev->cnf.ignore_routes_with_linkdown, newf);
if (changed)
inet6_netconf_notify_devconf(dev_net(dev),
RTM_NEWNETCONF,
@@ -942,17 +930,16 @@ static void addrconf_linkdown_change(struct net *net, __s32 newf)
}
}
-static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
+static int addrconf_fixup_linkdown(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- net = (struct net *)table->extra2;
old = *p;
- *p = newf;
+ WRITE_ONCE(*p, newf);
if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
if ((!newf) ^ (!old))
@@ -961,12 +948,12 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
- net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->ignore_routes_with_linkdown, newf);
addrconf_linkdown_change(net, newf);
if ((!newf) ^ (!old))
inet6_netconf_notify_devconf(net,
@@ -975,7 +962,8 @@ static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
}
- rtnl_unlock();
+
+ rtnl_net_unlock(net);
return 1;
}
@@ -1027,7 +1015,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
@@ -1270,6 +1258,7 @@ static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
bool del_rt, bool del_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1279,8 +1268,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
if (del_rt)
ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
else {
- if (!(f6i->fib6_flags & RTF_EXPIRES))
+ if (!(f6i->fib6_flags & RTF_EXPIRES)) {
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
fib6_info_release(f6i);
}
}
@@ -1346,12 +1342,21 @@ out:
in6_ifa_put(ifp);
}
+static unsigned long ipv6_get_regen_advance(const struct inet6_dev *idev)
+{
+ return READ_ONCE(idev->cnf.regen_min_advance) +
+ READ_ONCE(idev->cnf.regen_max_retry) *
+ READ_ONCE(idev->cnf.dad_transmits) *
+ max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+}
+
static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
{
struct inet6_dev *idev = ifp->idev;
unsigned long tmp_tstamp, age;
unsigned long regen_advance;
unsigned long now = jiffies;
+ u32 if_public_preferred_lft;
s32 cnf_temp_preferred_lft;
struct inet6_ifaddr *ift;
struct ifa6_config cfg;
@@ -1363,7 +1368,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
retry:
in6_dev_hold(idev);
- if (idev->cnf.use_tempaddr <= 0) {
+ if (READ_ONCE(idev->cnf.use_tempaddr) <= 0) {
write_unlock_bh(&idev->lock);
pr_info("%s: use_tempaddr is disabled\n", __func__);
in6_dev_put(idev);
@@ -1371,8 +1376,8 @@ retry:
goto out;
}
spin_lock_bh(&ifp->lock);
- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
- idev->cnf.use_tempaddr = -1; /*XXX*/
+ if (ifp->regen_count++ >= READ_ONCE(idev->cnf.regen_max_retry)) {
+ WRITE_ONCE(idev->cnf.use_tempaddr, -1); /*XXX*/
spin_unlock_bh(&ifp->lock);
write_unlock_bh(&idev->lock);
pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",
@@ -1387,16 +1392,14 @@ retry:
age = (now - ifp->tstamp) / HZ;
- regen_advance = idev->cnf.regen_max_retry *
- idev->cnf.dad_transmits *
- max(NEIGH_VAR(idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ regen_advance = ipv6_get_regen_advance(idev);
/* recalculate max_desync_factor each time and update
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
max_desync_factor = min_t(long,
- idev->cnf.max_desync_factor,
+ READ_ONCE(idev->cnf.max_desync_factor),
cnf_temp_preferred_lft - regen_advance);
if (unlikely(idev->desync_factor > max_desync_factor)) {
@@ -1409,11 +1412,13 @@ retry:
}
}
+ if_public_preferred_lft = ifp->prefered_lft;
+
memset(&cfg, 0, sizeof(cfg));
cfg.valid_lft = min_t(__u32, ifp->valid_lft,
- idev->cnf.temp_valid_lft + age);
+ READ_ONCE(idev->cnf.temp_valid_lft) + age);
cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
- cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+ cfg.preferred_lft = min_t(__u32, if_public_preferred_lft, cfg.preferred_lft);
cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);
cfg.plen = ifp->prefix_len;
@@ -1422,19 +1427,41 @@ retry:
write_unlock_bh(&idev->lock);
- /* A temporary address is created only if this calculated Preferred
- * Lifetime is greater than REGEN_ADVANCE time units. In particular,
- * an implementation must not create a temporary address with a zero
- * Preferred Lifetime.
+ /* From RFC 4941:
+ *
+ * A temporary address is created only if this calculated Preferred
+ * Lifetime is greater than REGEN_ADVANCE time units. In
+ * particular, an implementation must not create a temporary address
+ * with a zero Preferred Lifetime.
+ *
+ * ...
+ *
+ * When creating a temporary address, the lifetime values MUST be
+ * derived from the corresponding prefix as follows:
+ *
+ * ...
+ *
+ * * Its Preferred Lifetime is the lower of the Preferred Lifetime
+ * of the public address or TEMP_PREFERRED_LIFETIME -
+ * DESYNC_FACTOR.
+ *
+ * To comply with the RFC's requirements, clamp the preferred lifetime
+ * to a minimum of regen_advance, unless that would exceed valid_lft or
+ * ifp->prefered_lft.
+ *
* Use age calculation as in addrconf_verify to avoid unnecessary
* temporary addresses being generated.
*/
age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
if (cfg.preferred_lft <= regen_advance + age) {
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
+ cfg.preferred_lft = regen_advance + age + 1;
+ if (cfg.preferred_lft > cfg.valid_lft ||
+ cfg.preferred_lft > if_public_preferred_lft) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
}
cfg.ifa_flags = IFA_F_TEMPORARY;
@@ -1513,15 +1540,17 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
-static bool ipv6_use_optimistic_addr(struct net *net,
- struct inet6_dev *idev)
+static bool ipv6_use_optimistic_addr(const struct net *net,
+ const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (!idev)
return false;
- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
+ !READ_ONCE(idev->cnf.optimistic_dad))
return false;
- if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic)
+ if (!READ_ONCE(net->ipv6.devconf_all->use_optimistic) &&
+ !READ_ONCE(idev->cnf.use_optimistic))
return false;
return true;
@@ -1530,13 +1559,14 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
-static bool ipv6_allow_optimistic_dad(struct net *net,
- struct inet6_dev *idev)
+static bool ipv6_allow_optimistic_dad(const struct net *net,
+ const struct inet6_dev *idev)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if (!idev)
return false;
- if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ if (!READ_ONCE(net->ipv6.devconf_all->optimistic_dad) &&
+ !READ_ONCE(idev->cnf.optimistic_dad))
return false;
return true;
@@ -1642,7 +1672,7 @@ static int ipv6_get_saddr_eval(struct net *net,
*/
int preftmp = dst->prefs & (IPV6_PREFER_SRC_PUBLIC|IPV6_PREFER_SRC_TMP) ?
!!(dst->prefs & IPV6_PREFER_SRC_TMP) :
- score->ifa->idev->cnf.use_tempaddr >= 2;
+ READ_ONCE(score->ifa->idev->cnf.use_tempaddr) >= 2;
ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;
break;
}
@@ -1818,7 +1848,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
idev = __in6_dev_get(dst_dev);
if ((dst_type & IPV6_ADDR_MULTICAST) ||
dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
- (idev && idev->cnf.use_oif_addrs_only)) {
+ (idev && READ_ONCE(idev->cnf.use_oif_addrs_only))) {
use_oif_addr = true;
}
}
@@ -1842,7 +1872,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
master, &dst,
scores, hiscore_idx);
- if (scores[hiscore_idx].ifa)
+ if (scores[hiscore_idx].ifa &&
+ scores[hiscore_idx].scopedist >= 0)
goto out;
}
@@ -2061,9 +2092,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
@@ -2125,6 +2157,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net *net = dev_net(idev->dev);
+ int max_addresses;
if (addrconf_dad_end(ifp)) {
in6_ifa_put(ifp);
@@ -2162,9 +2195,9 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
spin_unlock_bh(&ifp->lock);
- if (idev->cnf.max_addresses &&
- ipv6_count_addresses(idev) >=
- idev->cnf.max_addresses)
+ max_addresses = READ_ONCE(idev->cnf.max_addresses);
+ if (max_addresses &&
+ ipv6_count_addresses(idev) >= max_addresses)
goto lock_errdad;
net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
@@ -2536,6 +2569,24 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
return idev;
}
+static void delete_tempaddrs(struct inet6_dev *idev,
+ struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ift, *tmp;
+
+ write_lock_bh(&idev->lock);
+ list_for_each_entry_safe(ift, tmp, &idev->tempaddr_list, tmp_list) {
+ if (ift->ifpub != ifp)
+ continue;
+
+ in6_ifa_hold(ift);
+ write_unlock_bh(&idev->lock);
+ ipv6_del_addr(ift);
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
static void manage_tempaddrs(struct inet6_dev *idev,
struct inet6_ifaddr *ifp,
__u32 valid_lft, __u32 prefered_lft,
@@ -2561,11 +2612,11 @@ static void manage_tempaddrs(struct inet6_dev *idev,
* (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively.
*/
age = (now - ift->cstamp) / HZ;
- max_valid = idev->cnf.temp_valid_lft - age;
+ max_valid = READ_ONCE(idev->cnf.temp_valid_lft) - age;
if (max_valid < 0)
max_valid = 0;
- max_prefered = idev->cnf.temp_prefered_lft -
+ max_prefered = READ_ONCE(idev->cnf.temp_prefered_lft) -
idev->desync_factor - age;
if (max_prefered < 0)
max_prefered = 0;
@@ -2598,7 +2649,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
create = true;
- if (create && idev->cnf.use_tempaddr > 0) {
+ if (create && READ_ONCE(idev->cnf.use_tempaddr) > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
*/
@@ -2626,7 +2677,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
int create = 0, update_lft = 0;
if (!ifp && valid_lft) {
- int max_addresses = in6_dev->cnf.max_addresses;
+ int max_addresses = READ_ONCE(in6_dev->cnf.max_addresses);
struct ifa6_config cfg = {
.pfx = addr,
.plen = pinfo->prefix_len,
@@ -2638,8 +2689,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
};
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if ((net->ipv6.devconf_all->optimistic_dad ||
- in6_dev->cnf.optimistic_dad) &&
+ if ((READ_ONCE(net->ipv6.devconf_all->optimistic_dad) ||
+ READ_ONCE(in6_dev->cnf.optimistic_dad)) &&
!net->ipv6.devconf_all->forwarding && sllao)
cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -2688,7 +2739,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
*/
update_lft = !create && stored_lft;
- if (update_lft && !in6_dev->cnf.ra_honor_pio_life) {
+ if (update_lft && !READ_ONCE(in6_dev->cnf.ra_honor_pio_life)) {
const u32 minimum_lft = min_t(u32,
stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
@@ -2697,7 +2748,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
if (update_lft) {
ifp->valid_lft = valid_lft;
ifp->prefered_lft = prefered_lft;
- ifp->tstamp = now;
+ WRITE_ONCE(ifp->tstamp, now);
flags = ifp->flags;
ifp->flags &= ~IFA_F_DEPRECATED;
spin_unlock_bh(&ifp->lock);
@@ -2721,12 +2772,14 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
struct prefix_info *pinfo;
+ struct fib6_table *table;
__u32 valid_lft;
__u32 prefered_lft;
int addr_type, err;
u32 addr_flags = 0;
struct inet6_dev *in6_dev;
struct net *net = dev_net(dev);
+ bool ignore_autoconf = false;
pinfo = (struct prefix_info *) opt;
@@ -2797,11 +2850,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (valid_lft == 0) {
ip6_del_rt(net, rt, false);
rt = NULL;
- } else if (addrconf_finite_timeout(rt_expires)) {
- /* not infinity */
- fib6_set_expires(rt, jiffies + rt_expires);
} else {
- fib6_clean_expires(rt);
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (addrconf_finite_timeout(rt_expires)) {
+ /* not infinity */
+ fib6_set_expires(rt, jiffies + rt_expires);
+ fib6_add_gc_list(rt);
+ } else {
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
}
} else if (valid_lft) {
clock_t expires = 0;
@@ -2820,7 +2882,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
/* Try to figure out our local address for this prefix */
- if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ ignore_autoconf = READ_ONCE(in6_dev->cnf.ra_honor_pio_pflag) && pinfo->preferpd;
+ if (pinfo->autoconf && in6_dev->cnf.autoconf && !ignore_autoconf) {
struct in6_addr addr;
bool tokenized = false, dev_addr_generated = false;
@@ -2876,7 +2939,7 @@ put:
static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
struct in6_ifreq *ireq)
{
- struct ip_tunnel_parm p = { };
+ struct ip_tunnel_parm_kern p = { };
int err;
if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
@@ -2916,11 +2979,11 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
- rtnl_lock();
+ rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
if (dev && dev->type == ARPHRD_SIT)
err = addrconf_set_sit_dstaddr(net, dev, &ireq);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -2944,39 +3007,25 @@ static int ipv6_mc_config(struct sock *sk, bool join,
/*
* Manual configuration of address on an interface
*/
-static int inet6_addr_add(struct net *net, int ifindex,
- struct ifa6_config *cfg,
+static int inet6_addr_add(struct net *net, struct net_device *dev,
+ struct ifa6_config *cfg, clock_t expires, u32 flags,
struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
- struct net_device *dev;
- unsigned long timeout;
- clock_t expires;
- u32 flags;
- ASSERT_RTNL();
+ ASSERT_RTNL_NET(net);
if (cfg->plen > 128) {
NL_SET_ERR_MSG_MOD(extack, "Invalid prefix length");
return -EINVAL;
}
- /* check the lifetime */
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft) {
- NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
- return -EINVAL;
- }
-
if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64) {
NL_SET_ERR_MSG_MOD(extack, "address with \"mngtmpaddr\" flag must have a prefix length of 64");
return -EINVAL;
}
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
- return -ENODEV;
-
idev = addrconf_add_dev(dev);
if (IS_ERR(idev)) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
@@ -2985,7 +3034,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
- true, cfg->pfx, ifindex);
+ true, cfg->pfx, dev->ifindex);
if (ret < 0) {
NL_SET_ERR_MSG_MOD(extack, "Multicast auto join failed");
@@ -2995,24 +3044,6 @@ static int inet6_addr_add(struct net *net, int ifindex,
cfg->scope = ipv6_addr_scope(cfg->pfx);
- timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- expires = jiffies_to_clock_t(timeout * HZ);
- cfg->valid_lft = timeout;
- flags = RTF_EXPIRES;
- } else {
- expires = 0;
- flags = 0;
- cfg->ifa_flags |= IFA_F_PERMANENT;
- }
-
- timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- if (timeout == 0)
- cfg->ifa_flags |= IFA_F_DEPRECATED;
- cfg->preferred_lft = timeout;
- }
-
ifp = ipv6_add_addr(idev, cfg, true, extack);
if (!IS_ERR(ifp)) {
if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -3040,7 +3071,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
return 0;
} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
- cfg->pfx, ifindex);
+ cfg->pfx, dev->ifindex);
}
return PTR_ERR(ifp);
@@ -3065,7 +3096,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
return -ENODEV;
}
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (!idev) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 is disabled on this device");
return -ENXIO;
@@ -3078,11 +3109,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
- if (!(ifp->flags & IFA_F_TEMPORARY) &&
- (ifa_flags & IFA_F_MANAGETEMPADDR))
- manage_tempaddrs(idev, ifp, 0, 0, false,
- jiffies);
ipv6_del_addr(ifp);
+
+ if (!(ifp->flags & IFA_F_TEMPORARY) &&
+ (ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(idev, ifp);
+
addrconf_verify_rtnl(net);
if (ipv6_addr_is_multicast(pfx)) {
ipv6_mc_config(net->ipv6.mc_autojoin_sk,
@@ -3105,6 +3137,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
.preferred_lft = INFINITY_LIFE_TIME,
.valid_lft = INFINITY_LIFE_TIME,
};
+ struct net_device *dev;
struct in6_ifreq ireq;
int err;
@@ -3117,9 +3150,13 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
cfg.pfx = &ireq.ifr6_addr;
cfg.plen = ireq.ifr6_prefixlen;
- rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL);
- rtnl_unlock();
+ rtnl_net_lock(net);
+ dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
+ if (dev)
+ err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
+ else
+ err = -ENODEV;
+ rtnl_net_unlock(net);
return err;
}
@@ -3134,10 +3171,10 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
- rtnl_lock();
+ rtnl_net_lock(net);
err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,
ireq.ifr6_prefixlen, NULL);
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
@@ -3262,8 +3299,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
struct inet6_ifaddr *ifp;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
- idev->cnf.optimistic_dad) &&
+ if ((READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad) ||
+ READ_ONCE(idev->cnf.optimistic_dad)) &&
!dev_net(idev->dev)->ipv6.devconf_all->forwarding)
cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
@@ -3442,7 +3479,8 @@ static void addrconf_dev_config(struct net_device *dev)
/* this device type has no EUI support */
if (dev->type == ARPHRD_NONE &&
idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_RANDOM;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_RANDOM);
addrconf_addr_gen(idev, false);
}
@@ -3620,7 +3658,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev) {
rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
+ WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
break;
}
@@ -3712,9 +3750,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (idev->cnf.mtu6 != dev->mtu &&
dev->mtu >= IPV6_MIN_MTU) {
rt6_mtu_change(dev, dev->mtu);
- idev->cnf.mtu6 = dev->mtu;
+ WRITE_ONCE(idev->cnf.mtu6, dev->mtu);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
/*
@@ -3834,10 +3872,10 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
*/
if (!unregister && !idev->cnf.disable_ipv6) {
/* aggregate the system setting and interface setting */
- int _keep_addr = net->ipv6.devconf_all->keep_addr_on_down;
+ int _keep_addr = READ_ONCE(net->ipv6.devconf_all->keep_addr_on_down);
if (!_keep_addr)
- _keep_addr = idev->cnf.keep_addr_on_down;
+ _keep_addr = READ_ONCE(idev->cnf.keep_addr_on_down);
keep_addr = (_keep_addr > 0);
}
@@ -3956,7 +3994,7 @@ restart:
ipv6_mc_down(idev);
}
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
@@ -3974,6 +4012,7 @@ static void addrconf_rs_timer(struct timer_list *t)
struct inet6_dev *idev = from_timer(idev, t, rs_timer);
struct net_device *dev = idev->dev;
struct in6_addr lladdr;
+ int rtr_solicits;
write_lock(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY))
@@ -3986,7 +4025,9 @@ static void addrconf_rs_timer(struct timer_list *t)
if (idev->if_flags & IF_RA_RCVD)
goto out;
- if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
+ rtr_solicits = READ_ONCE(idev->cnf.rtr_solicits);
+
+ if (idev->rs_probes++ < rtr_solicits || rtr_solicits < 0) {
write_unlock(&idev->lock);
if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
ndisc_send_rs(dev, &lladdr,
@@ -3996,11 +4037,12 @@ static void addrconf_rs_timer(struct timer_list *t)
write_lock(&idev->lock);
idev->rs_interval = rfc3315_s14_backoff_update(
- idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
+ idev->rs_interval,
+ READ_ONCE(idev->cnf.rtr_solicit_max_interval));
/* The wait after the last probe can be shorter */
addrconf_mod_rs_timer(idev, (idev->rs_probes ==
- idev->cnf.rtr_solicits) ?
- idev->cnf.rtr_solicit_delay :
+ READ_ONCE(idev->cnf.rtr_solicits)) ?
+ READ_ONCE(idev->cnf.rtr_solicit_delay) :
idev->rs_interval);
} else {
/*
@@ -4021,24 +4063,25 @@ put:
*/
static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
{
- unsigned long rand_num;
struct inet6_dev *idev = ifp->idev;
+ unsigned long rand_num;
u64 nonce;
if (ifp->flags & IFA_F_OPTIMISTIC)
rand_num = 0;
else
- rand_num = get_random_u32_below(idev->cnf.rtr_solicit_delay ? : 1);
+ rand_num = get_random_u32_below(
+ READ_ONCE(idev->cnf.rtr_solicit_delay) ? : 1);
nonce = 0;
- if (idev->cnf.enhanced_dad ||
- dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad) {
+ if (READ_ONCE(idev->cnf.enhanced_dad) ||
+ READ_ONCE(dev_net(idev->dev)->ipv6.devconf_all->enhanced_dad)) {
do
get_random_bytes(&nonce, 6);
while (nonce == 0);
}
ifp->dad_nonce = nonce;
- ifp->dad_probes = idev->cnf.dad_transmits;
+ ifp->dad_probes = READ_ONCE(idev->cnf.dad_transmits);
addrconf_mod_dad_work(ifp, rand_num);
}
@@ -4058,8 +4101,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
net = dev_net(dev);
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
- (net->ipv6.devconf_all->accept_dad < 1 &&
- idev->cnf.accept_dad < 1) ||
+ (READ_ONCE(net->ipv6.devconf_all->accept_dad) < 1 &&
+ READ_ONCE(idev->cnf.accept_dad) < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
bool send_na = false;
@@ -4134,6 +4177,7 @@ static void addrconf_dad_work(struct work_struct *w)
struct inet6_dev *idev = ifp->idev;
bool bump_id, disable_ipv6 = false;
struct in6_addr mcaddr;
+ struct net *net;
enum {
DAD_PROCESS,
@@ -4141,7 +4185,9 @@ static void addrconf_dad_work(struct work_struct *w)
DAD_ABORT,
} action = DAD_PROCESS;
- rtnl_lock();
+ net = dev_net(idev->dev);
+
+ rtnl_net_lock(net);
spin_lock_bh(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
@@ -4151,8 +4197,8 @@ static void addrconf_dad_work(struct work_struct *w)
action = DAD_ABORT;
ifp->state = INET6_IFADDR_STATE_POSTDAD;
- if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 ||
- idev->cnf.accept_dad > 1) &&
+ if ((READ_ONCE(net->ipv6.devconf_all->accept_dad) > 1 ||
+ READ_ONCE(idev->cnf.accept_dad) > 1) &&
!idev->cnf.disable_ipv6 &&
!(ifp->flags & IFA_F_STABLE_PRIVACY)) {
struct in6_addr addr;
@@ -4163,7 +4209,7 @@ static void addrconf_dad_work(struct work_struct *w)
if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
ipv6_addr_equal(&ifp->addr, &addr)) {
/* DAD failed for link-local based on MAC */
- idev->cnf.disable_ipv6 = 1;
+ WRITE_ONCE(idev->cnf.disable_ipv6, 1);
pr_info("%s: IPv6 being disabled!\n",
ifp->idev->dev->name);
@@ -4233,7 +4279,7 @@ static void addrconf_dad_work(struct work_struct *w)
ifp->dad_nonce);
out:
in6_ifa_put(ifp);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
/* ifp->idev must be at least read locked */
@@ -4277,7 +4323,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
- ifp->idev->cnf.rtr_solicits != 0 &&
+ READ_ONCE(ifp->idev->cnf.rtr_solicits) != 0 &&
(dev->flags & IFF_LOOPBACK) == 0 &&
(dev->type != ARPHRD_TUNNEL) &&
!netif_is_team_port(dev);
@@ -4291,8 +4337,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
/* send unsolicited NA if enabled */
if (send_na &&
- (ifp->idev->cnf.ndisc_notify ||
- dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
+ (READ_ONCE(ifp->idev->cnf.ndisc_notify) ||
+ READ_ONCE(dev_net(dev)->ipv6.devconf_all->ndisc_notify))) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
/*router=*/ !!ifp->idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
@@ -4312,7 +4358,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
write_lock_bh(&ifp->idev->lock);
spin_lock(&ifp->lock);
ifp->idev->rs_interval = rfc3315_s14_backoff_init(
- ifp->idev->cnf.rtr_solicit_interval);
+ READ_ONCE(ifp->idev->cnf.rtr_solicit_interval));
ifp->idev->rs_probes = 1;
ifp->idev->if_flags |= IF_RS_SENT;
addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
@@ -4592,9 +4638,7 @@ restart:
!ifp->regen_count && ifp->ifpub) {
/* This is a non-regenerated temporary addr. */
- unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
- ifp->idev->cnf.dad_transmits *
- max(NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME), HZ/100) / HZ;
+ unsigned long regen_advance = ipv6_get_regen_advance(ifp->idev);
if (age + regen_advance >= ifp->prefered_lft) {
struct inet6_ifaddr *ifpub = ifp->ifpub;
@@ -4683,9 +4727,9 @@ static void addrconf_verify_work(struct work_struct *w)
struct net *net = container_of(to_delayed_work(w), struct net,
ipv6.addr_chk_work);
- rtnl_lock();
+ rtnl_net_lock(net);
addrconf_verify_rtnl(net);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
static void addrconf_verify(struct net *net)
@@ -4743,19 +4787,24 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!pfx)
return -EINVAL;
- ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+ ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
/* We ignore other flags so far. */
ifa_flags &= IFA_F_MANAGETEMPADDR;
- return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
- ifm->ifa_prefixlen, extack);
+ rtnl_net_lock(net);
+ err = inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx,
+ ifm->ifa_prefixlen, extack);
+ rtnl_net_unlock(net);
+
+ return err;
}
-static int modify_prefix_route(struct inet6_ifaddr *ifp,
+static int modify_prefix_route(struct net *net, struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags,
bool modify_peer)
{
+ struct fib6_table *table;
struct fib6_info *f6i;
u32 prio;
@@ -4775,32 +4824,36 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
- } else {
- if (!expires)
+ return 0;
+ }
+ if (f6i != net->ipv6.fib6_null_entry) {
+ table = f6i->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!(flags & RTF_EXPIRES)) {
fib6_clean_expires(f6i);
- else
+ fib6_remove_gc_list(f6i);
+ } else {
fib6_set_expires(f6i, expires);
+ fib6_add_gc_list(f6i);
+ }
- fib6_info_release(f6i);
+ spin_unlock_bh(&table->tb6_lock);
}
+ fib6_info_release(f6i);
return 0;
}
static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
- struct ifa6_config *cfg)
+ struct ifa6_config *cfg, clock_t expires,
+ u32 flags)
{
- u32 flags;
- clock_t expires;
- unsigned long timeout;
bool was_managetempaddr;
- bool had_prefixroute;
bool new_peer = false;
+ bool had_prefixroute;
- ASSERT_RTNL();
-
- if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
- return -EINVAL;
+ ASSERT_RTNL_NET(net);
if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR &&
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
@@ -4809,24 +4862,6 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
cfg->ifa_flags &= ~IFA_F_OPTIMISTIC;
- timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- expires = jiffies_to_clock_t(timeout * HZ);
- cfg->valid_lft = timeout;
- flags = RTF_EXPIRES;
- } else {
- expires = 0;
- flags = 0;
- cfg->ifa_flags |= IFA_F_PERMANENT;
- }
-
- timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
- if (addrconf_finite_timeout(timeout)) {
- if (timeout == 0)
- cfg->ifa_flags |= IFA_F_DEPRECATED;
- cfg->preferred_lft = timeout;
- }
-
if (cfg->peer_pfx &&
memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) {
if (!ipv6_addr_any(&ifp->peer_addr))
@@ -4842,13 +4877,13 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
IFA_F_NOPREFIXROUTE);
ifp->flags |= cfg->ifa_flags;
- ifp->tstamp = jiffies;
- ifp->valid_lft = cfg->valid_lft;
- ifp->prefered_lft = cfg->preferred_lft;
- ifp->ifa_proto = cfg->ifa_proto;
+ WRITE_ONCE(ifp->tstamp, jiffies);
+ WRITE_ONCE(ifp->valid_lft, cfg->valid_lft);
+ WRITE_ONCE(ifp->prefered_lft, cfg->preferred_lft);
+ WRITE_ONCE(ifp->ifa_proto, cfg->ifa_proto);
if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
- ifp->rt_priority = cfg->rt_priority;
+ WRITE_ONCE(ifp->rt_priority, cfg->rt_priority);
if (new_peer)
ifp->peer_addr = *cfg->peer_pfx;
@@ -4861,7 +4896,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
int rc = -ENOENT;
if (had_prefixroute)
- rc = modify_prefix_route(ifp, expires, flags, false);
+ rc = modify_prefix_route(net, ifp, expires, flags, false);
/* prefix route could have been deleted; if so restore it */
if (rc == -ENOENT) {
@@ -4871,7 +4906,7 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr))
- rc = modify_prefix_route(ifp, expires, flags, true);
+ rc = modify_prefix_route(net, ifp, expires, flags, true);
if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) {
addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len,
@@ -4893,14 +4928,12 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp,
}
if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
- if (was_managetempaddr &&
- !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
- cfg->valid_lft = 0;
- cfg->preferred_lft = 0;
- }
- manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
- cfg->preferred_lft, !was_managetempaddr,
- jiffies);
+ if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
+ delete_tempaddrs(ifp->idev, ifp);
+ else
+ manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
+ cfg->preferred_lft, !was_managetempaddr,
+ jiffies);
}
addrconf_verify_rtnl(net);
@@ -4913,13 +4946,16 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
- struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
struct inet6_dev *idev;
struct ifa6_config cfg;
+ struct ifaddrmsg *ifm;
+ unsigned long timeout;
+ clock_t expires;
+ u32 flags;
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
@@ -4942,8 +4978,18 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[IFA_PROTO])
cfg.ifa_proto = nla_get_u8(tb[IFA_PROTO]);
+ cfg.ifa_flags = nla_get_u32_default(tb[IFA_FLAGS], ifm->ifa_flags);
+
+ /* We ignore other flags so far. */
+ cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
+ IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
+ IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ cfg.ifa_flags |= IFA_F_PERMANENT;
cfg.valid_lft = INFINITY_LIFE_TIME;
cfg.preferred_lft = INFINITY_LIFE_TIME;
+ expires = 0;
+ flags = 0;
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -4951,27 +4997,43 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
cfg.valid_lft = ci->ifa_valid;
cfg.preferred_lft = ci->ifa_prefered;
+
+ if (!cfg.valid_lft || cfg.preferred_lft > cfg.valid_lft) {
+ NL_SET_ERR_MSG_MOD(extack, "address lifetime invalid");
+ return -EINVAL;
+ }
+
+ timeout = addrconf_timeout_fixup(cfg.valid_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ cfg.ifa_flags &= ~IFA_F_PERMANENT;
+ cfg.valid_lft = timeout;
+ expires = jiffies_to_clock_t(timeout * HZ);
+ flags = RTF_EXPIRES;
+ }
+
+ timeout = addrconf_timeout_fixup(cfg.preferred_lft, HZ);
+ if (addrconf_finite_timeout(timeout)) {
+ if (timeout == 0)
+ cfg.ifa_flags |= IFA_F_DEPRECATED;
+
+ cfg.preferred_lft = timeout;
+ }
}
+ rtnl_net_lock(net);
+
dev = __dev_get_by_index(net, ifm->ifa_index);
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
- return -ENODEV;
+ err = -ENODEV;
+ goto unlock;
}
- if (tb[IFA_FLAGS])
- cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
- else
- cfg.ifa_flags = ifm->ifa_flags;
-
- /* We ignore other flags so far. */
- cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
- IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
- IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
-
idev = ipv6_find_idev(dev);
- if (IS_ERR(idev))
- return PTR_ERR(idev);
+ if (IS_ERR(idev)) {
+ err = PTR_ERR(idev);
+ goto unlock;
+ }
if (!ipv6_allow_optimistic_dad(net, idev))
cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
@@ -4979,7 +5041,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (cfg.ifa_flags & IFA_F_NODAD &&
cfg.ifa_flags & IFA_F_OPTIMISTIC) {
NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1);
@@ -4988,7 +5051,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, &cfg, extack);
+ err = inet6_addr_add(net, dev, &cfg, expires, flags, extack);
+ goto unlock;
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4996,10 +5060,12 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
NL_SET_ERR_MSG_MOD(extack, "address already assigned");
err = -EEXIST;
} else {
- err = inet6_addr_modify(net, ifa, &cfg);
+ err = inet6_addr_modify(net, ifa, &cfg, expires, flags);
}
in6_ifa_put(ifa);
+unlock:
+ rtnl_net_unlock(net);
return err;
}
@@ -5053,33 +5119,21 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(4) /* IFA_RT_PRIORITY */;
}
-enum addr_type_t {
- UNICAST_ADDR,
- MULTICAST_ADDR,
- ANYCAST_ADDR,
-};
-
-struct inet6_fill_args {
- u32 portid;
- u32 seq;
- int event;
- unsigned int flags;
- int netnsid;
- int ifindex;
- enum addr_type_t type;
-};
-
-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+static int inet6_fill_ifaddr(struct sk_buff *skb,
+ const struct inet6_ifaddr *ifa,
struct inet6_fill_args *args)
{
- struct nlmsghdr *nlh;
+ struct nlmsghdr *nlh;
u32 preferred, valid;
+ u32 flags, priority;
+ u8 proto;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
sizeof(struct ifaddrmsg), args->flags);
if (!nlh)
return -EMSGSIZE;
+ flags = READ_ONCE(ifa->flags);
put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),
ifa->idev->dev->ifindex);
@@ -5087,13 +5141,14 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
goto error;
- spin_lock_bh(&ifa->lock);
- if (!((ifa->flags&IFA_F_PERMANENT) &&
- (ifa->prefered_lft == INFINITY_LIFE_TIME))) {
- preferred = ifa->prefered_lft;
- valid = ifa->valid_lft;
+ preferred = READ_ONCE(ifa->prefered_lft);
+ valid = READ_ONCE(ifa->valid_lft);
+
+ if (!((flags & IFA_F_PERMANENT) &&
+ (preferred == INFINITY_LIFE_TIME))) {
if (preferred != INFINITY_LIFE_TIME) {
- long tval = (jiffies - ifa->tstamp)/HZ;
+ long tval = (jiffies - READ_ONCE(ifa->tstamp)) / HZ;
+
if (preferred > tval)
preferred -= tval;
else
@@ -5109,28 +5164,29 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
preferred = INFINITY_LIFE_TIME;
valid = INFINITY_LIFE_TIME;
}
- spin_unlock_bh(&ifa->lock);
if (!ipv6_addr_any(&ifa->peer_addr)) {
if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 ||
nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0)
goto error;
- } else
+ } else {
if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
goto error;
+ }
- if (ifa->rt_priority &&
- nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority))
+ priority = READ_ONCE(ifa->rt_priority);
+ if (priority && nla_put_u32(skb, IFA_RT_PRIORITY, priority))
goto error;
- if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
+ if (put_cacheinfo(skb, ifa->cstamp, READ_ONCE(ifa->tstamp),
+ preferred, valid) < 0)
goto error;
- if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
+ if (nla_put_u32(skb, IFA_FLAGS, flags) < 0)
goto error;
- if (ifa->ifa_proto &&
- nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto))
+ proto = READ_ONCE(ifa->ifa_proto);
+ if (proto && nla_put_u8(skb, IFA_PROTO, proto))
goto error;
nlmsg_end(skb, nlh);
@@ -5141,14 +5197,16 @@ error:
return -EMSGSIZE;
}
-static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
- struct inet6_fill_args *args)
+int inet6_fill_ifmcaddr(struct sk_buff *skb,
+ const struct ifmcaddr6 *ifmca,
+ struct inet6_fill_args *args)
{
- struct nlmsghdr *nlh;
- u8 scope = RT_SCOPE_UNIVERSE;
int ifindex = ifmca->idev->dev->ifindex;
+ u8 scope = RT_SCOPE_UNIVERSE;
+ struct nlmsghdr *nlh;
- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
+ if (!args->force_rt_scope_universe &&
+ ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
nlh = nlmsg_put(skb, args->portid, args->seq, args->event,
@@ -5164,7 +5222,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp,
+ put_cacheinfo(skb, ifmca->mca_cstamp, READ_ONCE(ifmca->mca_tstamp),
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -5174,13 +5232,14 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
return 0;
}
-static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
- struct inet6_fill_args *args)
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
- struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
+ struct nlmsghdr *nlh;
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
@@ -5198,7 +5257,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||
- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp,
+ put_cacheinfo(skb, ifaca->aca_cstamp, READ_ONCE(ifaca->aca_tstamp),
INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) {
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -5209,24 +5268,23 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
}
/* called with rcu_read_lock() */
-static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
- struct netlink_callback *cb, int s_ip_idx,
+static int in6_dump_addrs(const struct inet6_dev *idev, struct sk_buff *skb,
+ struct netlink_callback *cb, int *s_ip_idx,
struct inet6_fill_args *fillargs)
{
- struct ifmcaddr6 *ifmca;
- struct ifacaddr6 *ifaca;
+ const struct ifmcaddr6 *ifmca;
+ const struct ifacaddr6 *ifaca;
int ip_idx = 0;
- int err = 1;
+ int err = 0;
- read_lock_bh(&idev->lock);
switch (fillargs->type) {
case UNICAST_ADDR: {
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
fillargs->event = RTM_NEWADDR;
/* unicast address incl. temp addr */
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
- if (ip_idx < s_ip_idx)
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
+ if (ip_idx < *s_ip_idx)
goto next;
err = inet6_fill_ifaddr(skb, ifa, fillargs);
if (err < 0)
@@ -5238,27 +5296,25 @@ next:
break;
}
case MULTICAST_ADDR:
- read_unlock_bh(&idev->lock);
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
- for (ifmca = rtnl_dereference(idev->mc_list);
+ for (ifmca = rcu_dereference(idev->mc_list);
ifmca;
- ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
- if (ip_idx < s_ip_idx)
+ ifmca = rcu_dereference(ifmca->next), ip_idx++) {
+ if (ip_idx < *s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
if (err < 0)
break;
}
- read_lock_bh(&idev->lock);
break;
case ANYCAST_ADDR:
fillargs->event = RTM_GETANYCAST;
/* anycast address */
- for (ifaca = idev->ac_list; ifaca;
- ifaca = ifaca->aca_next, ip_idx++) {
- if (ip_idx < s_ip_idx)
+ for (ifaca = rcu_dereference(idev->ac_list); ifaca;
+ ifaca = rcu_dereference(ifaca->aca_next), ip_idx++) {
+ if (ip_idx < *s_ip_idx)
continue;
err = inet6_fill_ifacaddr(skb, ifaca, fillargs);
if (err < 0)
@@ -5268,8 +5324,7 @@ next:
default:
break;
}
- read_unlock_bh(&idev->lock);
- cb->args[2] = ip_idx;
+ *s_ip_idx = err ? ip_idx : 0;
return err;
}
@@ -5332,6 +5387,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
enum addr_type_t type)
{
+ struct net *tgt_net = sock_net(skb->sk);
const struct nlmsghdr *nlh = cb->nlh;
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(cb->skb).portid,
@@ -5339,73 +5395,55 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.flags = NLM_F_MULTI,
.netnsid = -1,
.type = type,
+ .force_rt_scope_universe = false,
};
- struct net *tgt_net = sock_net(skb->sk);
- int idx, s_idx, s_ip_idx;
- int h, s_h;
+ struct {
+ unsigned long ifindex;
+ int ip_idx;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
int err = 0;
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- s_ip_idx = cb->args[2];
-
+ rcu_read_lock();
if (cb->strict_check) {
err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- goto put_tgt_net;
+ goto done;
err = 0;
if (fillargs.ifindex) {
- dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
+ dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
if (!dev) {
err = -ENODEV;
- goto put_tgt_net;
+ goto done;
}
idev = __in6_dev_get(dev);
- if (idev) {
- err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
+ if (idev)
+ err = in6_dump_addrs(idev, skb, cb,
+ &ctx->ip_idx,
&fillargs);
- if (err > 0)
- err = 0;
- }
- goto put_tgt_net;
+ goto done;
}
}
- rcu_read_lock();
cb->seq = inet6_base_seq(tgt_net);
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &tgt_net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- if (h > s_h || idx > s_idx)
- s_ip_idx = 0;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
-
- if (in6_dump_addrs(idev, skb, cb, s_ip_idx,
- &fillargs) < 0)
- goto done;
-cont:
- idx++;
- }
+ for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = in6_dump_addrs(idev, skb, cb, &ctx->ip_idx,
+ &fillargs);
+ if (err < 0)
+ goto done;
}
done:
rcu_read_unlock();
- cb->args[0] = h;
- cb->args[1] = idx;
-put_tgt_net:
if (fillargs.netnsid >= 0)
put_net(tgt_net);
- return skb->len ? : err;
+ return err;
}
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -5486,6 +5524,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.event = RTM_NEWADDR,
.flags = 0,
.netnsid = -1,
+ .force_rt_scope_universe = false,
};
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
@@ -5557,6 +5596,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
.event = event,
.flags = 0,
.netnsid = -1,
+ .force_rt_scope_universe = false,
};
int err = -ENOBUFS;
@@ -5574,91 +5614,100 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
}
-static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
- __s32 *array, int bytes)
+static void ipv6_store_devconf(const struct ipv6_devconf *cnf,
+ __s32 *array, int bytes)
{
BUG_ON(bytes < (DEVCONF_MAX * 4));
memset(array, 0, bytes);
- array[DEVCONF_FORWARDING] = cnf->forwarding;
- array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
- array[DEVCONF_MTU6] = cnf->mtu6;
- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
- array[DEVCONF_AUTOCONF] = cnf->autoconf;
- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+ array[DEVCONF_FORWARDING] = READ_ONCE(cnf->forwarding);
+ array[DEVCONF_HOPLIMIT] = READ_ONCE(cnf->hop_limit);
+ array[DEVCONF_MTU6] = READ_ONCE(cnf->mtu6);
+ array[DEVCONF_ACCEPT_RA] = READ_ONCE(cnf->accept_ra);
+ array[DEVCONF_ACCEPT_REDIRECTS] = READ_ONCE(cnf->accept_redirects);
+ array[DEVCONF_AUTOCONF] = READ_ONCE(cnf->autoconf);
+ array[DEVCONF_DAD_TRANSMITS] = READ_ONCE(cnf->dad_transmits);
+ array[DEVCONF_RTR_SOLICITS] = READ_ONCE(cnf->rtr_solicits);
array[DEVCONF_RTR_SOLICIT_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_interval));
array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_solicit_max_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_max_interval));
array[DEVCONF_RTR_SOLICIT_DELAY] =
- jiffies_to_msecs(cnf->rtr_solicit_delay);
- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_solicit_delay));
+ array[DEVCONF_FORCE_MLD_VERSION] = READ_ONCE(cnf->force_mld_version);
array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->mldv1_unsolicited_report_interval));
array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
- jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
- array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
- array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
+ jiffies_to_msecs(READ_ONCE(cnf->mldv2_unsolicited_report_interval));
+ array[DEVCONF_USE_TEMPADDR] = READ_ONCE(cnf->use_tempaddr);
+ array[DEVCONF_TEMP_VALID_LFT] = READ_ONCE(cnf->temp_valid_lft);
+ array[DEVCONF_TEMP_PREFERED_LFT] = READ_ONCE(cnf->temp_prefered_lft);
+ array[DEVCONF_REGEN_MAX_RETRY] = READ_ONCE(cnf->regen_max_retry);
+ array[DEVCONF_MAX_DESYNC_FACTOR] = READ_ONCE(cnf->max_desync_factor);
+ array[DEVCONF_MAX_ADDRESSES] = READ_ONCE(cnf->max_addresses);
+ array[DEVCONF_ACCEPT_RA_DEFRTR] = READ_ONCE(cnf->accept_ra_defrtr);
+ array[DEVCONF_RA_DEFRTR_METRIC] = READ_ONCE(cnf->ra_defrtr_metric);
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] =
+ READ_ONCE(cnf->accept_ra_min_hop_limit);
+ array[DEVCONF_ACCEPT_RA_PINFO] = READ_ONCE(cnf->accept_ra_pinfo);
#ifdef CONFIG_IPV6_ROUTER_PREF
- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
+ array[DEVCONF_ACCEPT_RA_RTR_PREF] = READ_ONCE(cnf->accept_ra_rtr_pref);
array[DEVCONF_RTR_PROBE_INTERVAL] =
- jiffies_to_msecs(cnf->rtr_probe_interval);
+ jiffies_to_msecs(READ_ONCE(cnf->rtr_probe_interval));
#ifdef CONFIG_IPV6_ROUTE_INFO
- array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] = cnf->accept_ra_rt_info_min_plen;
- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen;
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_min_plen);
+ array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] =
+ READ_ONCE(cnf->accept_ra_rt_info_max_plen);
#endif
#endif
- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+ array[DEVCONF_PROXY_NDP] = READ_ONCE(cnf->proxy_ndp);
+ array[DEVCONF_ACCEPT_SOURCE_ROUTE] =
+ READ_ONCE(cnf->accept_source_route);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
- array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
+ array[DEVCONF_OPTIMISTIC_DAD] = READ_ONCE(cnf->optimistic_dad);
+ array[DEVCONF_USE_OPTIMISTIC] = READ_ONCE(cnf->use_optimistic);
#endif
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = atomic_read(&cnf->mc_forwarding);
#endif
- array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
- array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
- array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
- array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
- array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
- array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
- array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
- array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
+ array[DEVCONF_DISABLE_IPV6] = READ_ONCE(cnf->disable_ipv6);
+ array[DEVCONF_ACCEPT_DAD] = READ_ONCE(cnf->accept_dad);
+ array[DEVCONF_FORCE_TLLAO] = READ_ONCE(cnf->force_tllao);
+ array[DEVCONF_NDISC_NOTIFY] = READ_ONCE(cnf->ndisc_notify);
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] =
+ READ_ONCE(cnf->suppress_frag_ndisc);
+ array[DEVCONF_ACCEPT_RA_FROM_LOCAL] =
+ READ_ONCE(cnf->accept_ra_from_local);
+ array[DEVCONF_ACCEPT_RA_MTU] = READ_ONCE(cnf->accept_ra_mtu);
+ array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] =
+ READ_ONCE(cnf->ignore_routes_with_linkdown);
/* we omit DEVCONF_STABLE_SECRET for now */
- array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
- array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
- array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
- array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
- array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = READ_ONCE(cnf->use_oif_addrs_only);
+ array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] =
+ READ_ONCE(cnf->drop_unicast_in_l2_multicast);
+ array[DEVCONF_DROP_UNSOLICITED_NA] = READ_ONCE(cnf->drop_unsolicited_na);
+ array[DEVCONF_KEEP_ADDR_ON_DOWN] = READ_ONCE(cnf->keep_addr_on_down);
+ array[DEVCONF_SEG6_ENABLED] = READ_ONCE(cnf->seg6_enabled);
#ifdef CONFIG_IPV6_SEG6_HMAC
- array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+ array[DEVCONF_SEG6_REQUIRE_HMAC] = READ_ONCE(cnf->seg6_require_hmac);
#endif
- array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad;
- array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode;
- array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
- array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
- array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
- array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
- array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
- array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
- array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
- array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
- array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
+ array[DEVCONF_ENHANCED_DAD] = READ_ONCE(cnf->enhanced_dad);
+ array[DEVCONF_ADDR_GEN_MODE] = READ_ONCE(cnf->addr_gen_mode);
+ array[DEVCONF_DISABLE_POLICY] = READ_ONCE(cnf->disable_policy);
+ array[DEVCONF_NDISC_TCLASS] = READ_ONCE(cnf->ndisc_tclass);
+ array[DEVCONF_RPL_SEG_ENABLED] = READ_ONCE(cnf->rpl_seg_enabled);
+ array[DEVCONF_IOAM6_ENABLED] = READ_ONCE(cnf->ioam6_enabled);
+ array[DEVCONF_IOAM6_ID] = READ_ONCE(cnf->ioam6_id);
+ array[DEVCONF_IOAM6_ID_WIDE] = READ_ONCE(cnf->ioam6_id_wide);
+ array[DEVCONF_NDISC_EVICT_NOCARRIER] =
+ READ_ONCE(cnf->ndisc_evict_nocarrier);
+ array[DEVCONF_ACCEPT_UNTRACKED_NA] =
+ READ_ONCE(cnf->accept_untracked_na);
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = READ_ONCE(cnf->accept_ra_min_lft);
}
static inline size_t inet6_ifla6_size(void)
@@ -5738,13 +5787,14 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
- struct nlattr *nla;
struct ifla_cacheinfo ci;
+ struct nlattr *nla;
+ u32 ra_mtu;
- if (nla_put_u32(skb, IFLA_INET6_FLAGS, idev->if_flags))
+ if (nla_put_u32(skb, IFLA_INET6_FLAGS, READ_ONCE(idev->if_flags)))
goto nla_put_failure;
ci.max_reasm_len = IPV6_MAXPLEN;
- ci.tstamp = cstamp_delta(idev->tstamp);
+ ci.tstamp = cstamp_delta(READ_ONCE(idev->tstamp));
ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time);
ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));
if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))
@@ -5776,11 +5826,12 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
read_unlock_bh(&idev->lock);
- if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
+ if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE,
+ READ_ONCE(idev->cnf.addr_gen_mode)))
goto nla_put_failure;
- if (idev->ra_mtu &&
- nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ ra_mtu = READ_ONCE(idev->ra_mtu);
+ if (ra_mtu && nla_put_u32(skb, IFLA_INET6_RA_MTU, ra_mtu))
goto nla_put_failure;
return 0;
@@ -5842,7 +5893,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
return -EINVAL;
}
- if (idev->cnf.rtr_solicits == 0) {
+ if (READ_ONCE(idev->cnf.rtr_solicits) == 0) {
NL_SET_ERR_MSG(extack,
"Router solicitation is disabled on device");
return -EINVAL;
@@ -5875,7 +5926,7 @@ update_lft:
if (update_rs) {
idev->if_flags |= IF_RS_SENT;
idev->rs_interval = rfc3315_s14_backoff_init(
- idev->cnf.rtr_solicit_interval);
+ READ_ONCE(idev->cnf.rtr_solicit_interval));
idev->rs_probes = 1;
addrconf_mod_rs_timer(idev, idev->rs_interval);
}
@@ -5981,7 +6032,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
- idev->cnf.addr_gen_mode = mode;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, mode);
}
return 0;
@@ -5993,6 +6044,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
+ int ifindex, iflink;
void *protoinfo;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
@@ -6003,18 +6055,20 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
hdr->ifi_family = AF_INET6;
hdr->__ifi_pad = 0;
hdr->ifi_type = dev->type;
- hdr->ifi_index = dev->ifindex;
+ ifindex = READ_ONCE(dev->ifindex);
+ hdr->ifi_index = ifindex;
hdr->ifi_flags = dev_get_flags(dev);
hdr->ifi_change = 0;
+ iflink = dev_get_iflink(dev);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) ||
(dev->addr_len &&
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
- nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+ nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
+ (ifindex != iflink &&
+ nla_put_u32(skb, IFLA_LINK, iflink)) ||
nla_put_u8(skb, IFLA_OPERSTATE,
- netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
+ netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN))
goto nla_put_failure;
protoinfo = nla_nest_start_noflag(skb, IFLA_PROTINFO);
if (!protoinfo)
@@ -6060,50 +6114,39 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- int h, s_h;
- int idx = 0, s_idx;
+ struct {
+ unsigned long ifindex;
+ } *ctx = (void *)cb->ctx;
struct net_device *dev;
struct inet6_dev *idev;
- struct hlist_head *head;
+ int err;
/* only requests using strict checking can pass data to
* influence the dump
*/
if (cb->strict_check) {
- int err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
+ err = inet6_valid_dump_ifinfo(cb->nlh, cb->extack);
if (err < 0)
return err;
}
- s_h = cb->args[0];
- s_idx = cb->args[1];
-
+ err = 0;
rcu_read_lock();
- for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
- idx = 0;
- head = &net->dev_index_head[h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx < s_idx)
- goto cont;
- idev = __in6_dev_get(dev);
- if (!idev)
- goto cont;
- if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWLINK, NLM_F_MULTI) < 0)
- goto out;
-cont:
- idx++;
- }
+ for_each_netdev_dump(net, dev, ctx->ifindex) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
+ continue;
+ err = inet6_fill_ifinfo(skb, idev,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWLINK, NLM_F_MULTI);
+ if (err < 0)
+ break;
}
-out:
rcu_read_unlock();
- cb->args[1] = idx;
- cb->args[0] = h;
- return skb->len;
+ return err;
}
void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
@@ -6126,8 +6169,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFINFO, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_IFINFO, err);
}
static inline size_t inet6_prefix_nlmsg_size(void)
@@ -6194,8 +6236,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
}
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -6262,7 +6303,7 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
#ifdef CONFIG_SYSCTL
-static int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_forward(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6287,7 +6328,7 @@ static int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
return ret;
}
-static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_mtu(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct inet6_dev *idev = ctl->extra1;
@@ -6321,44 +6362,45 @@ static void addrconf_disable_change(struct net *net, __s32 newf)
struct inet6_dev *idev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
int changed = (!idev->cnf.disable_ipv6) ^ (!newf);
- idev->cnf.disable_ipv6 = newf;
+
+ WRITE_ONCE(idev->cnf.disable_ipv6, newf);
if (changed)
dev_disable_change(idev);
}
}
}
-static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
+static int addrconf_disable_ipv6(const struct ctl_table *table, int *p, int newf)
{
- struct net *net;
+ struct net *net = (struct net *)table->extra2;
int old;
- if (!rtnl_trylock())
- return restart_syscall();
-
- net = (struct net *)table->extra2;
- old = *p;
- *p = newf;
-
if (p == &net->ipv6.devconf_dflt->disable_ipv6) {
- rtnl_unlock();
+ WRITE_ONCE(*p, newf);
return 0;
}
+ if (!rtnl_net_trylock(net))
+ return restart_syscall();
+
+ old = *p;
+ WRITE_ONCE(*p, newf);
+
if (p == &net->ipv6.devconf_all->disable_ipv6) {
- net->ipv6.devconf_dflt->disable_ipv6 = newf;
+ WRITE_ONCE(net->ipv6.devconf_dflt->disable_ipv6, newf);
addrconf_disable_change(net, newf);
- } else if ((!newf) ^ (!old))
+ } else if ((!newf) ^ (!old)) {
dev_disable_change((struct inet6_dev *)table->extra1);
+ }
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
-static int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_disable(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6383,7 +6425,7 @@ static int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
return ret;
}
-static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_proxy_ndp(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6397,20 +6439,20 @@ static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
if (write && old != new) {
struct net *net = ctl->extra2;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
- if (valp == &net->ipv6.devconf_dflt->proxy_ndp)
+ if (valp == &net->ipv6.devconf_dflt->proxy_ndp) {
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_DEFAULT,
net->ipv6.devconf_dflt);
- else if (valp == &net->ipv6.devconf_all->proxy_ndp)
+ } else if (valp == &net->ipv6.devconf_all->proxy_ndp) {
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_PROXY_NEIGH,
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- else {
+ } else {
struct inet6_dev *idev = ctl->extra1;
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -6418,13 +6460,13 @@ static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
idev->dev->ifindex,
&idev->cnf);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
return ret;
}
-static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -6438,7 +6480,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
.mode = ctl->mode,
};
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
new_val = *((u32 *)ctl->data);
@@ -6460,33 +6502,34 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
}
if (idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
- net->ipv6.devconf_dflt->addr_gen_mode = new_val;
+ WRITE_ONCE(net->ipv6.devconf_dflt->addr_gen_mode, new_val);
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
- idev->cnf.addr_gen_mode = new_val;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ new_val);
addrconf_init_auto_addrs(idev->dev);
}
}
}
- *((u32 *)ctl->data) = new_val;
+ WRITE_ONCE(*((u32 *)ctl->data), new_val);
}
out:
- rtnl_unlock();
+ rtnl_net_unlock(net);
return ret;
}
-static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_stable_secret(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -6503,7 +6546,7 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
lctl.maxlen = IPV6_MAX_STRLEN;
lctl.data = str;
- if (!rtnl_trylock())
+ if (!rtnl_net_trylock(net))
return restart_syscall();
if (!write && !secret->initialized) {
@@ -6533,27 +6576,28 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
struct net_device *dev;
for_each_netdev(net, dev) {
- struct inet6_dev *idev = __in6_dev_get(dev);
+ struct inet6_dev *idev = __in6_dev_get_rtnl_net(dev);
if (idev) {
- idev->cnf.addr_gen_mode =
- IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
}
} else {
struct inet6_dev *idev = ctl->extra1;
- idev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+ WRITE_ONCE(idev->cnf.addr_gen_mode,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY);
}
out:
- rtnl_unlock();
+ rtnl_net_unlock(net);
return err;
}
static
-int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
+int addrconf_sysctl_ignore_routes_with_linkdown(const struct ctl_table *ctl,
int write, void *buffer,
size_t *lenp,
loff_t *ppos)
@@ -6621,27 +6665,26 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
}
static
-int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
+int addrconf_disable_policy(const struct ctl_table *ctl, int *valp, int val)
{
+ struct net *net = (struct net *)ctl->extra2;
struct inet6_dev *idev;
- struct net *net;
- if (!rtnl_trylock())
- return restart_syscall();
-
- *valp = val;
-
- net = (struct net *)ctl->extra2;
if (valp == &net->ipv6.devconf_dflt->disable_policy) {
- rtnl_unlock();
+ WRITE_ONCE(*valp, val);
return 0;
}
+ if (!rtnl_net_trylock(net))
+ return restart_syscall();
+
+ WRITE_ONCE(*valp, val);
+
if (valp == &net->ipv6.devconf_all->disable_policy) {
struct net_device *dev;
for_each_netdev(net, dev) {
- idev = __in6_dev_get(dev);
+ idev = __in6_dev_get_rtnl_net(dev);
if (idev)
addrconf_disable_policy_idev(idev, val);
}
@@ -6650,11 +6693,11 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
addrconf_disable_policy_idev(idev, val);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
return 0;
}
-static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+static int addrconf_sysctl_disable_policy(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int *valp = ctl->data;
@@ -6806,6 +6849,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "regen_min_advance",
+ .data = &ipv6_devconf.regen_min_advance,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "regen_max_retry",
.data = &ipv6_devconf.regen_max_retry,
.maxlen = sizeof(int),
@@ -6871,6 +6921,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ {
+ .procname = "ra_honor_pio_pflag",
+ .data = &ipv6_devconf.ra_honor_pio_pflag,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
{
.procname = "accept_ra_rtr_pref",
@@ -7130,14 +7189,12 @@ static const struct ctl_table addrconf_sysctl[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
- {
- /* sentinel */
- }
};
static int __addrconf_sysctl_register(struct net *net, char *dev_name,
struct inet6_dev *idev, struct ipv6_devconf *p)
{
+ size_t table_size = ARRAY_SIZE(addrconf_sysctl);
int i, ifindex;
struct ctl_table *table;
char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
@@ -7146,7 +7203,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
if (!table)
goto out;
- for (i = 0; table[i].data; i++) {
+ for (i = 0; i < table_size; i++) {
table[i].data += (char *)p - (char *)&ipv6_devconf;
/* If one of these is already set, then it is not safe to
* overwrite either of them: this makes proc_dointvec_minmax
@@ -7161,7 +7218,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
p->sysctl_header = register_net_sysctl_sz(net, path, table,
- ARRAY_SIZE(addrconf_sysctl));
+ table_size);
if (!p->sysctl_header)
goto free;
@@ -7184,7 +7241,7 @@ out:
static void __addrconf_sysctl_unregister(struct net *net,
struct ipv6_devconf *p, int ifindex)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!p->sysctl_header)
return;
@@ -7345,6 +7402,27 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
.set_link_af = inet6_set_link_af,
};
+static const struct rtnl_msg_handler addrconf_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETLINK,
+ .dumpit = inet6_dump_ifinfo, .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDR,
+ .doit = inet6_rtm_newaddr, .flags = RTNL_FLAG_DOIT_PERNET},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDR,
+ .doit = inet6_rtm_deladdr, .flags = RTNL_FLAG_DOIT_PERNET},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDR,
+ .doit = inet6_rtm_getaddr, .dumpit = inet6_dump_ifaddr,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETMULTICAST,
+ .dumpit = inet6_dump_ifmcaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETANYCAST,
+ .dumpit = inet6_dump_ifacaddr,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETNETCONF,
+ .doit = inet6_netconf_get_devconf, .dumpit = inet6_netconf_dump_devconf,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
/*
* Init / cleanup code
*/
@@ -7365,15 +7443,16 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;
- addrconf_wq = create_workqueue("ipv6_addrconf");
+ /* All works using addrconf_wq need to lock rtnl. */
+ addrconf_wq = create_singlethread_workqueue("ipv6_addrconf");
if (!addrconf_wq) {
err = -ENOMEM;
goto out_nowq;
}
- rtnl_lock();
+ rtnl_net_lock(&init_net);
idev = ipv6_add_dev(blackhole_netdev);
- rtnl_unlock();
+ rtnl_net_unlock(&init_net);
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
goto errlo;
@@ -7385,40 +7464,14 @@ int __init addrconf_init(void)
addrconf_verify(&init_net);
- rtnl_af_register(&inet6_ops);
+ err = rtnl_af_register(&inet6_ops);
+ if (err)
+ goto erraf;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
- NULL, inet6_dump_ifinfo, 0);
- if (err < 0)
+ err = rtnl_register_many(addrconf_rtnl_msg_handlers);
+ if (err)
goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
- inet6_rtm_newaddr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
- inet6_rtm_deladdr, NULL, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
- inet6_rtm_getaddr, inet6_dump_ifaddr,
- RTNL_FLAG_DOIT_UNLOCKED);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
- NULL, inet6_dump_ifmcaddr, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
- NULL, inet6_dump_ifacaddr, 0);
- if (err < 0)
- goto errout;
- err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
- inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf,
- RTNL_FLAG_DOIT_UNLOCKED);
- if (err < 0)
- goto errout;
err = ipv6_addr_label_rtnl_register();
if (err < 0)
goto errout;
@@ -7427,6 +7480,7 @@ int __init addrconf_init(void)
errout:
rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
+erraf:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
destroy_workqueue(addrconf_wq);
@@ -7448,17 +7502,17 @@ void addrconf_cleanup(void)
rtnl_af_unregister(&inet6_ops);
- rtnl_lock();
+ rtnl_net_lock(&init_net);
/* clean dev list */
for_each_netdev(&init_net, dev) {
- if (__in6_dev_get(dev) == NULL)
+ if (!__in6_dev_get_rtnl_net(dev))
continue;
addrconf_ifdown(dev, true);
}
addrconf_ifdown(init_net.loopback_dev, true);
- rtnl_unlock();
+ rtnl_net_unlock(&init_net);
destroy_workqueue(addrconf_wq);
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 17ac45aa7194..ab054f329e12 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -234,7 +234,8 @@ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
out:
if (!ret)
- net->ipv6.ip6addrlbl_table.seq++;
+ WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq,
+ net->ipv6.ip6addrlbl_table.seq + 1);
return ret;
}
@@ -445,7 +446,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
};
static int ip6addrlbl_fill(struct sk_buff *skb,
- struct ip6addrlbl_entry *p,
+ const struct ip6addrlbl_entry *p,
u32 lseq,
u32 portid, u32 seq, int event,
unsigned int flags)
@@ -498,7 +499,8 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct net *net = sock_net(skb->sk);
struct ip6addrlbl_entry *p;
int idx = 0, s_idx = cb->args[0];
- int err;
+ int err = 0;
+ u32 lseq;
if (cb->strict_check) {
err = ip6addrlbl_valid_dump_req(nlh, cb->extack);
@@ -507,10 +509,11 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_lock();
+ lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
- net->ipv6.ip6addrlbl_table.seq,
+ lseq,
NETLINK_CB(cb->skb).portid,
nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
@@ -522,7 +525,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
}
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline int ip6addrlbl_msgsize(void)
@@ -614,7 +617,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- lseq = net->ipv6.ip6addrlbl_table.seq;
+ lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq);
if (p)
err = ip6addrlbl_fill(skb, p, lseq,
NETLINK_CB(in_skb).portid,
@@ -631,22 +634,17 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
+static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL,
+ .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL,
+ .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump,
+ .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
+};
+
int __init ipv6_addr_label_rtnl_register(void)
{
- int ret;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
- ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- if (ret < 0)
- return ret;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
- ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
- return ret;
+ return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 959bfd9f6344..f60ec8b0f8ea 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -64,6 +64,7 @@
#include <net/xfrm.h>
#include <net/ioam6.h>
#include <net/rawv6.h>
+#include <net/rps.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -251,31 +252,29 @@ lookup_protocol:
*/
inet->inet_sport = htons(inet->inet_num);
err = sk->sk_prot->hash(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
if (!kern) {
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
- if (err) {
- sk_common_release(sk);
- goto out;
- }
+ if (err)
+ goto out_sk_release;
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
+out_sk_release:
+ sk_common_release(sk);
+ sock->sk = NULL;
+ goto out;
}
static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
@@ -508,7 +507,7 @@ void inet6_cleanup_sock(struct sock *sk)
/* Free tx options */
- opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ opt = unrcu_pointer(xchg(&np->opt, NULL));
if (opt) {
atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
txopt_put(opt);
@@ -707,6 +706,7 @@ const struct proto_ops inet6_stream_ops = {
.splice_eof = inet_splice_eof,
.sendmsg_locked = tcp_sendmsg_locked,
.splice_read = tcp_splice_read,
+ .set_peek_off = sk_set_peek_off,
.read_sock = tcp_read_sock,
.read_skb = tcp_read_skb,
.peek_len = tcp_peek_len,
@@ -736,7 +736,7 @@ const struct proto_ops inet6_dgram_ops = {
.recvmsg = inet6_recvmsg, /* retpoline's sake */
.read_skb = udp_read_skb,
.mmap = sock_no_mmap,
- .set_peek_off = sk_set_peek_off,
+ .set_peek_off = udp_set_peek_off,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet6_compat_ioctl,
#endif
@@ -1059,6 +1059,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
.ipv6_fragment = ip6_fragment,
.ipv6_dev_find = ipv6_dev_find,
+ .ip6_xmit = ip6_xmit,
};
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index bb17f484ee2c..21e01695b48c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -49,9 +49,10 @@ static DEFINE_SPINLOCK(acaddr_hash_lock);
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
-static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+static u32 inet6_acaddr_hash(const struct net *net,
+ const struct in6_addr *addr)
{
- u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+ u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net));
return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
}
@@ -252,9 +253,8 @@ static void aca_free_rcu(struct rcu_head *h)
static void aca_put(struct ifacaddr6 *ac)
{
- if (refcount_dec_and_test(&ac->aca_refcnt)) {
- call_rcu(&ac->rcu, aca_free_rcu);
- }
+ if (refcount_dec_and_test(&ac->aca_refcnt))
+ call_rcu_hurry(&ac->rcu, aca_free_rcu);
}
static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
@@ -278,6 +278,37 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
return aca;
}
+static void inet6_ifacaddr_notify(struct net_device *dev,
+ const struct ifacaddr6 *ifaca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .event = event,
+ .netnsid = -1,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifacaddr(skb, ifaca, &fillargs);
+ if (err < 0) {
+ pr_err("Failed to fill in anycast addresses (err %d)\n", err);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err);
+}
+
/*
* device anycast group inc (add if not found)
*/
@@ -296,7 +327,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
goto out;
}
- for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ for (aca = rtnl_dereference(idev->ac_list); aca;
+ aca = rtnl_dereference(aca->aca_next)) {
if (ipv6_addr_equal(&aca->aca_addr, addr)) {
aca->aca_users++;
err = 0;
@@ -317,13 +349,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
goto out;
}
- aca->aca_next = idev->ac_list;
- idev->ac_list = aca;
-
/* Hold this for addrconf_join_solict() below before we unlock,
* it is already exposed via idev->ac_list.
*/
aca_get(aca);
+ aca->aca_next = idev->ac_list;
+ rcu_assign_pointer(idev->ac_list, aca);
+
write_unlock_bh(&idev->lock);
ipv6_add_acaddr_hash(net, aca);
@@ -332,6 +364,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
addrconf_join_solict(idev->dev, &aca->aca_addr);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST);
+
aca_put(aca);
return 0;
out:
@@ -350,7 +384,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
write_lock_bh(&idev->lock);
prev_aca = NULL;
- for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ for (aca = rtnl_dereference(idev->ac_list); aca;
+ aca = rtnl_dereference(aca->aca_next)) {
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
prev_aca = aca;
@@ -364,15 +399,17 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
return 0;
}
if (prev_aca)
- prev_aca->aca_next = aca->aca_next;
+ rcu_assign_pointer(prev_aca->aca_next, aca->aca_next);
else
- idev->ac_list = aca->aca_next;
+ rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST);
+
aca_put(aca);
return 0;
}
@@ -392,8 +429,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
struct ifacaddr6 *aca;
write_lock_bh(&idev->lock);
- while ((aca = idev->ac_list) != NULL) {
- idev->ac_list = aca->aca_next;
+ while ((aca = rtnl_dereference(idev->ac_list)) != NULL) {
+ rcu_assign_pointer(idev->ac_list, aca->aca_next);
write_unlock_bh(&idev->lock);
ipv6_del_acaddr_hash(aca);
@@ -420,11 +457,10 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- for (aca = idev->ac_list; aca; aca = aca->aca_next)
+ for (aca = rcu_dereference(idev->ac_list); aca;
+ aca = rcu_dereference(aca->aca_next))
if (ipv6_addr_equal(&aca->aca_addr, addr))
break;
- read_unlock_bh(&idev->lock);
return aca != NULL;
}
return false;
@@ -477,30 +513,25 @@ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
struct ac6_iter_state {
struct seq_net_private p;
struct net_device *dev;
- struct inet6_dev *idev;
};
#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
{
- struct ifacaddr6 *im = NULL;
struct ac6_iter_state *state = ac6_seq_private(seq);
struct net *net = seq_file_net(seq);
+ struct ifacaddr6 *im = NULL;
- state->idev = NULL;
for_each_netdev_rcu(net, state->dev) {
struct inet6_dev *idev;
+
idev = __in6_dev_get(state->dev);
if (!idev)
continue;
- read_lock_bh(&idev->lock);
- im = idev->ac_list;
- if (im) {
- state->idev = idev;
+ im = rcu_dereference(idev->ac_list);
+ if (im)
break;
- }
- read_unlock_bh(&idev->lock);
}
return im;
}
@@ -508,22 +539,17 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
{
struct ac6_iter_state *state = ac6_seq_private(seq);
+ struct inet6_dev *idev;
- im = im->aca_next;
+ im = rcu_dereference(im->aca_next);
while (!im) {
- if (likely(state->idev != NULL))
- read_unlock_bh(&state->idev->lock);
-
state->dev = next_net_device_rcu(state->dev);
- if (!state->dev) {
- state->idev = NULL;
+ if (!state->dev)
break;
- }
- state->idev = __in6_dev_get(state->dev);
- if (!state->idev)
+ idev = __in6_dev_get(state->dev);
+ if (!idev)
continue;
- read_lock_bh(&state->idev->lock);
- im = state->idev->ac_list;
+ im = rcu_dereference(idev->ac_list);
}
return im;
}
@@ -555,12 +581,6 @@ static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ac6_seq_stop(struct seq_file *seq, void *v)
__releases(RCU)
{
- struct ac6_iter_state *state = ac6_seq_private(seq);
-
- if (likely(state->idev != NULL)) {
- read_unlock_bh(&state->idev->lock);
- state->idev = NULL;
- }
rcu_read_unlock();
}
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1578ed9e97d8..dbcea9fee626 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -29,7 +29,7 @@
#include <net/calipso.h>
#include <linux/atomic.h>
#include <linux/bug.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/crc-ccitt.h>
/* Maximium size of the calipso option including
@@ -657,11 +657,8 @@ static int calipso_map_cat_ntoh(const struct calipso_doi *doi_def,
net_clen_bits,
spot + 1,
1);
- if (spot < 0) {
- if (spot == -2)
- return -EFAULT;
+ if (spot < 0)
return 0;
- }
ret_val = netlbl_catmap_setbit(&secattr->attr.mls.cat,
spot,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6e6efe026cdc..9e73944e3b53 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
#include <net/tcp.h>
#include <net/espintcp.h>
#include <net/inet6_hashtables.h>
+#include <linux/skbuff_ref.h>
#include <linux/highmem.h>
@@ -112,7 +113,7 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
-static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
{
struct crypto_aead *aead = x->data;
int extralen = 0;
@@ -131,7 +132,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
- put_page(sg_page(sg));
+ skb_page_unref(page_to_netmem(sg_page(sg)),
+ skb->pp_recycle);
}
#ifdef CONFIG_INET6_ESPINTCP
@@ -255,8 +257,7 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
#else
static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
{
- kfree_skb(skb);
-
+ WARN_ON(1);
return -EOPNOTSUPP;
}
#endif
@@ -294,7 +295,7 @@ static void esp_output_done(void *data, int err)
}
tmp = ESP_SKB_CB(skb)->tmp;
- esp_ssg_unref(x, tmp);
+ esp_ssg_unref(x, tmp, skb);
kfree(tmp);
esp_output_encap_csum(skb);
@@ -314,7 +315,7 @@ static void esp_output_done(void *data, int err)
x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
esp_output_tail_tcp(x, skb);
else
- xfrm_output_resume(skb->sk, skb, err);
+ xfrm_output_resume(skb_to_full_sk(skb), skb, err);
}
}
@@ -383,7 +384,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
__be16 dport)
{
struct udphdr *uh;
- __be32 *udpdata32;
unsigned int len;
len = skb->len + esp->tailen - skb_transport_offset(skb);
@@ -398,12 +398,6 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
*skb_mac_header(skb) = IPPROTO_UDP;
- if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
- udpdata32 = (__be32 *)(uh + 1);
- udpdata32[0] = udpdata32[1] = 0;
- return (struct ip_esp_hdr *)(udpdata32 + 2);
- }
-
return (struct ip_esp_hdr *)(uh + 1);
}
@@ -459,7 +453,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
switch (encap_type) {
default:
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
break;
case TCP_ENCAP_ESPINTCP:
@@ -677,7 +670,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
}
if (sg != dsg)
- esp_ssg_unref(x, tmp);
+ esp_ssg_unref(x, tmp, skb);
if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
err = esp_output_tail_tcp(x, skb);
@@ -822,7 +815,6 @@ int esp6_input_done2(struct sk_buff *skb, int err)
source = th->source;
break;
case UDP_ENCAP_ESPINUDP:
- case UDP_ENCAP_ESPINUDP_NON_IKE:
source = uh->source;
break;
default:
@@ -867,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
skb_pull_rcsum(skb, hlen);
- if (x->props.mode == XFRM_MODE_TUNNEL)
+ if (x->props.mode == XFRM_MODE_TUNNEL ||
+ x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
@@ -1232,9 +1225,6 @@ static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack)
case UDP_ENCAP_ESPINUDP:
x->props.header_len += sizeof(struct udphdr);
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
- break;
#ifdef CONFIG_INET6_ESPINTCP
case TCP_ENCAP_ESPINTCP:
/* only the length field, TCP encap is done by
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 527b7caddbc6..7b41fb4f00b5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,16 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
+
+ if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ /* non-offload path will record the error and audit log */
+ xfrm_state_put(x);
+ x = NULL;
+ }
+
if (!x)
goto out_reset;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 02e9ffb63af1..6789623b2b0d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,6 +50,7 @@
#endif
#include <net/rpl.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <net/ioam6.h>
#include <net/dst_metadata.h>
@@ -378,9 +379,8 @@ static int ipv6_srh_rcv(struct sk_buff *skb)
idev = __in6_dev_get(skb->dev);
- accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
- if (accept_seg6 > idev->cnf.seg6_enabled)
- accept_seg6 = idev->cnf.seg6_enabled;
+ accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled),
+ READ_ONCE(idev->cnf.seg6_enabled));
if (!accept_seg6) {
kfree_skb(skb);
@@ -654,10 +654,13 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
struct ipv6_rt_hdr *hdr;
struct rt0_hdr *rthdr;
struct net *net = dev_net(skb->dev);
- int accept_source_route = net->ipv6.devconf_all->accept_source_route;
+ int accept_source_route;
- if (idev && accept_source_route > idev->cnf.accept_source_route)
- accept_source_route = idev->cnf.accept_source_route;
+ accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route);
+
+ if (idev)
+ accept_source_route = min(accept_source_route,
+ READ_ONCE(idev->cnf.accept_source_route));
if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
!pskb_may_pull(skb, (skb_transport_offset(skb) +
@@ -801,7 +804,7 @@ looped_back:
ip6_route_input(skb);
if (skb_dst(skb)->error) {
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
dst_input(skb);
return -1;
}
@@ -818,7 +821,7 @@ looped_back:
goto looped_back;
}
- skb_push(skb, skb->data - skb_network_header(skb));
+ skb_push(skb, -skb_network_offset(skb));
dst_input(skb);
return -1;
@@ -880,14 +883,6 @@ void ipv6_exthdrs_exit(void)
Hop-by-hop options.
**********************************/
-/*
- * Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
- */
-static inline struct net *ipv6_skb_net(struct sk_buff *skb)
-{
- return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
-}
-
/* Router Alert as of RFC 2711 */
static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
@@ -918,7 +913,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
goto drop;
/* Ignore if IOAM is not enabled on ingress */
- if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled))
goto ignore;
/* Truncated Option header */
@@ -938,7 +933,7 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
goto drop;
/* Ignore if the IOAM namespace is unknown */
- ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id);
if (!ns)
goto ignore;
@@ -954,6 +949,9 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+ optoff + sizeof(*hdr));
ioam6_fill_trace_data(skb, ns, trace, true);
+
+ ioam6_event(IOAM6_EVENT_TRACE, dev_net(skb->dev),
+ GFP_ATOMIC, (void *)trace, hdr->opt_len - 2);
break;
default:
break;
diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c
index f87ae33e1d01..949b72610df7 100644
--- a/net/ipv6/fib6_notifier.c
+++ b/net/ipv6/fib6_notifier.c
@@ -22,7 +22,7 @@ int call_fib6_notifiers(struct net *net, enum fib_event_type event_type,
return call_fib_notifiers(net, event_type, info);
}
-static unsigned int fib6_seq_read(struct net *net)
+static unsigned int fib6_seq_read(const struct net *net)
{
return fib6_tables_seq_read(net) + fib6_rules_seq_read(net);
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 7523c4baef35..67d39114d9a6 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -26,14 +26,17 @@ struct fib6_rule {
struct fib_rule common;
struct rt6key src;
struct rt6key dst;
+ __be32 flowlabel;
+ __be32 flowlabel_mask;
dscp_t dscp;
+ u8 dscp_full:1; /* DSCP or TOS selector */
};
static bool fib6_rule_matchall(const struct fib_rule *rule)
{
struct fib6_rule *r = container_of(rule, struct fib6_rule, common);
- if (r->dst.plen || r->src.plen || r->dscp)
+ if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask)
return false;
return fib_rule_matchall(rule);
}
@@ -55,7 +58,7 @@ int fib6_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, AF_INET6, extack);
}
-unsigned int fib6_rules_seq_read(struct net *net)
+unsigned int fib6_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, AF_INET6);
}
@@ -233,8 +236,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = pol_lookup_func(lookup,
net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
+ struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
+
+ if (!idev)
+ goto again;
err = fib6_rule_saddr(net, rule, flags, flp6,
- ip6_dst_idev(&rt->dst)->dev);
+ idev->dev);
if (err == -EAGAIN)
goto again;
@@ -327,6 +334,9 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel))
return 0;
+ if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask)
+ return 0;
+
if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
return 0;
@@ -341,6 +351,49 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule,
return 1;
}
+static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ if (rule6->dscp) {
+ NL_SET_ERR_MSG(extack, "Cannot specify both TOS and DSCP");
+ return -EINVAL;
+ }
+
+ rule6->dscp = inet_dsfield_to_dscp(nla_get_u8(nla) << 2);
+ rule6->dscp_full = true;
+
+ return 0;
+}
+
+static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6,
+ struct netlink_ext_ack *extack)
+{
+ __be32 flowlabel, flowlabel_mask;
+
+ if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) ||
+ NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK))
+ return -EINVAL;
+
+ flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]);
+ flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]);
+
+ if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK],
+ "Invalid flow label mask");
+ return -EINVAL;
+ }
+
+ if (flowlabel & ~flowlabel_mask) {
+ NL_SET_ERR_MSG(extack, "Flow label and mask do not match");
+ return -EINVAL;
+ }
+
+ rule6->flowlabel = flowlabel;
+ rule6->flowlabel_mask = flowlabel_mask;
+
+ return 0;
+}
+
static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct fib_rule_hdr *frh,
struct nlattr **tb,
@@ -357,6 +410,13 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
}
rule6->dscp = inet_dsfield_to_dscp(frh->tos);
+ if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0)
+ goto errout;
+
+ if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) &&
+ fib6_nl2rule_flowlabel(tb, rule6, extack) < 0)
+ goto errout;
+
if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
if (rule->table == RT6_TABLE_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid table");
@@ -409,7 +469,25 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
if (frh->dst_len && (rule6->dst.plen != frh->dst_len))
return 0;
- if (frh->tos && inet_dscp_to_dsfield(rule6->dscp) != frh->tos)
+ if (frh->tos &&
+ (rule6->dscp_full ||
+ inet_dscp_to_dsfield(rule6->dscp) != frh->tos))
+ return 0;
+
+ if (tb[FRA_DSCP]) {
+ dscp_t dscp;
+
+ dscp = inet_dsfield_to_dscp(nla_get_u8(tb[FRA_DSCP]) << 2);
+ if (!rule6->dscp_full || rule6->dscp != dscp)
+ return 0;
+ }
+
+ if (tb[FRA_FLOWLABEL] &&
+ nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel)
+ return 0;
+
+ if (tb[FRA_FLOWLABEL_MASK] &&
+ nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask)
return 0;
if (frh->src_len &&
@@ -430,7 +508,20 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
frh->dst_len = rule6->dst.plen;
frh->src_len = rule6->src.plen;
- frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+
+ if (rule6->dscp_full) {
+ frh->tos = 0;
+ if (nla_put_u8(skb, FRA_DSCP,
+ inet_dscp_to_dsfield(rule6->dscp) >> 2))
+ goto nla_put_failure;
+ } else {
+ frh->tos = inet_dscp_to_dsfield(rule6->dscp);
+ }
+
+ if (rule6->flowlabel_mask &&
+ (nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) ||
+ nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask)))
+ goto nla_put_failure;
if ((rule6->dst.plen &&
nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) ||
@@ -446,7 +537,15 @@ nla_put_failure:
static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
{
return nla_total_size(16) /* dst */
- + nla_total_size(16); /* src */
+ + nla_total_size(16) /* src */
+ + nla_total_size(1) /* dscp */
+ + nla_total_size(4) /* flowlabel */
+ + nla_total_size(4); /* flowlabel mask */
+}
+
+static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
+{
+ rt_genid_bump_ipv6(ops->fro_net);
}
static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
@@ -461,6 +560,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
+ .flush_cache = fib6_rule_flush_cache,
.nlgroup = RTNLGRP_IPV6_RULE,
.owner = THIS_MODULE,
.fro_net = &init_net,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1635da07285f..4d14ab7f7e99 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
@@ -175,14 +175,16 @@ static bool icmpv6_mask_allow(struct net *net, int type)
return false;
}
-static bool icmpv6_global_allow(struct net *net, int type)
+static bool icmpv6_global_allow(struct net *net, int type,
+ bool *apply_ratelimit)
{
if (icmpv6_mask_allow(net, type))
return true;
- if (icmp_global_allow())
+ if (icmp_global_allow(net)) {
+ *apply_ratelimit = true;
return true;
-
+ }
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -191,13 +193,13 @@ static bool icmpv6_global_allow(struct net *net, int type)
* Check the ICMP output rate limit
*/
static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
- struct flowi6 *fl6)
+ struct flowi6 *fl6, bool apply_ratelimit)
{
struct net *net = sock_net(sk);
struct dst_entry *dst;
bool res = false;
- if (icmpv6_mask_allow(net, type))
+ if (!apply_ratelimit)
return true;
/*
@@ -212,7 +214,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
} else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
res = true;
} else {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
int tmo = net->ipv6.sysctl.icmpv6_time;
struct inet_peer *peer;
@@ -220,14 +222,16 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
- peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
}
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
ICMP6_MIB_RATELIMITHOST);
+ else
+ icmp_global_consume(net);
dst_release(dst);
return res;
}
@@ -241,7 +245,7 @@ static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
dst = ip6_route_output(net, sk, fl6);
if (!dst->error) {
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct in6_addr prefsrc;
rt6_get_prefsrc(rt, &prefsrc);
@@ -452,6 +456,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
+ bool apply_ratelimit = false;
struct dst_entry *dst;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
@@ -468,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!skb->dev)
return;
- net = dev_net(skb->dev);
+
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
mark = IP6_REPLY_MARK(net, skb->mark);
/*
* Make sure we respect the rules
@@ -491,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
!(type == ICMPV6_PARAMPROB &&
code == ICMPV6_UNK_OPTION &&
(opt_unrec(skb, info))))
- return;
+ goto out;
saddr = NULL;
}
@@ -521,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
/*
@@ -530,14 +538,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (is_ineligible(skb)) {
net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
&hdr->saddr, &hdr->daddr);
- return;
+ goto out;
}
- /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
+ /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */
local_bh_disable();
/* Check global sysctl_icmp_msgs_per_sec ratelimit */
- if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
+ if (!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, type, &apply_ratelimit))
goto out_bh_enable;
mip6_addr_swap(skb, parm);
@@ -575,8 +584,8 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
np = inet6_sk(sk);
- if (!icmpv6_xrlim_allow(sk, type, &fl6))
- goto out;
+ if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit))
+ goto out_unlock;
tmp_hdr.icmp6_type = type;
tmp_hdr.icmp6_code = code;
@@ -594,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
- goto out;
+ goto out_unlock;
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
@@ -610,13 +619,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
goto out_dst_release;
}
- rcu_read_lock();
idev = __in6_dev_get(skb->dev);
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
+ &ipc6, &fl6, dst_rt6_info(dst),
MSG_DONTWAIT)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -624,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
len + sizeof(struct icmp6hdr));
}
- rcu_read_unlock();
+
out_dst_release:
dst_release(dst);
-out:
+out_unlock:
icmpv6_xmit_unlock(sk);
out_bh_enable:
local_bh_enable();
+out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(icmp6_send);
@@ -673,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
- skb, 0);
+ rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr,
+ NULL, 0, skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -711,12 +721,13 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct sock *sk;
struct inet6_dev *idev;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct icmp6hdr *icmph = icmp6_hdr(skb);
+ bool apply_ratelimit = false;
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
@@ -781,8 +792,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
goto out;
/* Check the ratelimit */
- if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
- !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
+ if ((!(skb->dev->flags & IFF_LOOPBACK) &&
+ !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) ||
+ !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit))
goto out_dst_release;
idev = __in6_dev_get(skb->dev);
@@ -803,7 +815,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT)) {
+ dst_rt6_info(dst), MSG_DONTWAIT)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
@@ -824,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
u8 code, __be32 info)
{
struct inet6_skb_parm *opt = IP6CB(skb);
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
const struct inet6_protocol *ipprot;
enum skb_drop_reason reason;
int inner_offset;
@@ -881,7 +893,7 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
- struct net *net = dev_net(skb->dev);
+ struct net *net = dev_net_rcu(skb->dev);
struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
@@ -913,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS);
saddr = &ipv6_hdr(skb)->saddr;
daddr = &ipv6_hdr(skb)->daddr;
@@ -931,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
type = hdr->icmp6_type;
- ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
+ ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type);
switch (type) {
case ICMPV6_ECHO_REQUEST:
@@ -1026,9 +1038,9 @@ static int icmpv6_rcv(struct sk_buff *skb)
csum_error:
reason = SKB_DROP_REASON_ICMP_CSUM;
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS);
discard_it:
- __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
+ __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS);
drop_no_count:
kfree_skb_reason(skb, reason);
return 0;
@@ -1206,7 +1218,6 @@ static struct ctl_table ipv6_icmp_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { },
};
struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index ad5f6f6ba333..85b92917849b 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -108,6 +108,7 @@ int ila_lwt_init(void);
void ila_lwt_fini(void);
int ila_xlat_init_net(struct net *net);
+void ila_xlat_pre_exit_net(struct net *net);
void ila_xlat_exit_net(struct net *net);
int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 8c1ce78956ba..7d574f5132e2 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -38,7 +38,7 @@ static inline struct ila_params *ila_params_lwtunnel(
static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
- struct rt6_info *rt = (struct rt6_info *)orig_dst;
+ struct rt6_info *rt = dst_rt6_info(orig_dst);
struct ila_lwt *ilwt = ila_lwt_lwtunnel(orig_dst->lwtstate);
struct dst_entry *dst;
int err = -EINVAL;
@@ -58,7 +58,9 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return orig_dst->lwtstate->orig_output(net, sk, skb);
}
+ local_bh_disable();
dst = dst_cache_get(&ilwt->dst_cache);
+ local_bh_enable();
if (unlikely(!dst)) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -70,7 +72,7 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = orig_dst->dev->ifindex;
fl6.flowi6_iif = LOOPBACK_IFINDEX;
- fl6.daddr = *rt6_nexthop((struct rt6_info *)orig_dst,
+ fl6.daddr = *rt6_nexthop(dst_rt6_info(orig_dst),
&ip6h->daddr);
dst = ip6_route_output(net, NULL, &fl6);
@@ -86,10 +88,15 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (ilwt->connected)
+ /* cache only if we don't create a dst reference loop */
+ if (ilwt->connected && orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
}
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
return dst_output(net, sk, skb);
diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c
index 69caed07315f..976c78efbae1 100644
--- a/net/ipv6/ila/ila_main.c
+++ b/net/ipv6/ila/ila_main.c
@@ -71,6 +71,11 @@ ila_xlat_init_fail:
return err;
}
+static __net_exit void ila_pre_exit_net(struct net *net)
+{
+ ila_xlat_pre_exit_net(net);
+}
+
static __net_exit void ila_exit_net(struct net *net)
{
ila_xlat_exit_net(net);
@@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(struct net *net)
static struct pernet_operations ila_net_ops = {
.init = ila_init_net,
+ .pre_exit = ila_pre_exit_net,
.exit = ila_exit_net,
.id = &ila_net_id,
.size = sizeof(struct ila_net),
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 67e8c9440977..1d41b2ab4884 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -105,16 +105,11 @@ static int parse_nl_config(struct genl_info *info,
xp->ip.locator_match.v64 = (__force __be64)nla_get_u64(
info->attrs[ILA_ATTR_LOCATOR_MATCH]);
- if (info->attrs[ILA_ATTR_CSUM_MODE])
- xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]);
- else
- xp->ip.csum_mode = ILA_CSUM_NO_ACTION;
+ xp->ip.csum_mode = nla_get_u8_default(info->attrs[ILA_ATTR_CSUM_MODE],
+ ILA_CSUM_NO_ACTION);
- if (info->attrs[ILA_ATTR_IDENT_TYPE])
- xp->ip.ident_type = nla_get_u8(
- info->attrs[ILA_ATTR_IDENT_TYPE]);
- else
- xp->ip.ident_type = ILA_ATYPE_USE_FORMAT;
+ xp->ip.ident_type = nla_get_u8_default(info->attrs[ILA_ATTR_IDENT_TYPE],
+ ILA_ATYPE_USE_FORMAT);
if (info->attrs[ILA_ATTR_IFINDEX])
xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]);
@@ -200,6 +195,8 @@ static const struct nf_hook_ops ila_nf_hook_ops[] = {
},
};
+static DEFINE_MUTEX(ila_mutex);
+
static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -207,16 +204,20 @@ static int ila_add_mapping(struct net *net, struct ila_xlat_params *xp)
spinlock_t *lock = ila_get_lock(ilan, xp->ip.locator_match);
int err = 0, order;
- if (!ilan->xlat.hooks_registered) {
+ if (!READ_ONCE(ilan->xlat.hooks_registered)) {
/* We defer registering net hooks in the namespace until the
* first mapping is added.
*/
- err = nf_register_net_hooks(net, ila_nf_hook_ops,
- ARRAY_SIZE(ila_nf_hook_ops));
+ mutex_lock(&ila_mutex);
+ if (!ilan->xlat.hooks_registered) {
+ err = nf_register_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+ if (!err)
+ WRITE_ONCE(ilan->xlat.hooks_registered, true);
+ }
+ mutex_unlock(&ila_mutex);
if (err)
return err;
-
- ilan->xlat.hooks_registered = true;
}
ila = kzalloc(sizeof(*ila), GFP_KERNEL);
@@ -619,6 +620,15 @@ int ila_xlat_init_net(struct net *net)
return 0;
}
+void ila_xlat_pre_exit_net(struct net *net)
+{
+ struct ila_net *ilan = net_generic(net, ila_net_id);
+
+ if (ilan->xlat.hooks_registered)
+ nf_unregister_net_hooks(net, ila_nf_hook_ops,
+ ARRAY_SIZE(ila_nf_hook_ops));
+}
+
void ila_xlat_exit_net(struct net *net)
{
struct ila_net *ilan = net_generic(net, ila_net_id);
@@ -626,10 +636,6 @@ void ila_xlat_exit_net(struct net *net)
rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL);
free_bucket_spinlocks(ilan->xlat.locks);
-
- if (ilan->xlat.hooks_registered)
- nf_unregister_net_hooks(net, ila_nf_hook_ops,
- ARRAY_SIZE(ila_nf_hook_ops));
}
static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila)
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b0e8d278e8a9..9ec05e354baa 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -14,27 +14,26 @@
#include <linux/random.h>
#include <net/addrconf.h>
+#include <net/hotdata.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
#include <net/secure_seq.h>
#include <net/ip.h>
#include <net/sock_reuseport.h>
+#include <net/tcp.h>
u32 inet6_ehashfn(const struct net *net,
const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const __be16 fport)
{
- static u32 inet6_ehash_secret __read_mostly;
- static u32 ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret));
- net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
+ net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret));
lhash = (__force u32)laddr->s6_addr32[3];
- fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret);
+ fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
inet6_ehash_secret + net_hash_mix(net));
@@ -47,7 +46,7 @@ EXPORT_SYMBOL_GPL(inet6_ehashfn);
*
* The sockhash lock must be held as a reader here.
*/
-struct sock *__inet6_lookup_established(struct net *net,
+struct sock *__inet6_lookup_established(const struct net *net,
struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const __be16 sport,
@@ -90,7 +89,7 @@ found:
}
EXPORT_SYMBOL(__inet6_lookup_established);
-static inline int compute_score(struct sock *sk, struct net *net,
+static inline int compute_score(struct sock *sk, const struct net *net,
const unsigned short hnum,
const struct in6_addr *daddr,
const int dif, const int sdif)
@@ -127,7 +126,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
* Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to
* the selected sock or an error.
*/
-struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
+struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
__be16 sport,
@@ -148,7 +147,7 @@ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk,
EXPORT_SYMBOL_GPL(inet6_lookup_reuseport);
/* called with rcu_read_lock() */
-static struct sock *inet6_lhash2_lookup(struct net *net,
+static struct sock *inet6_lhash2_lookup(const struct net *net,
struct inet_listen_hashbucket *ilb2,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -175,7 +174,7 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
return result;
}
-struct sock *inet6_lookup_run_sk_lookup(struct net *net,
+struct sock *inet6_lookup_run_sk_lookup(const struct net *net,
int protocol,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -200,7 +199,7 @@ struct sock *inet6_lookup_run_sk_lookup(struct net *net,
}
EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup);
-struct sock *inet6_lookup_listener(struct net *net,
+struct sock *inet6_lookup_listener(const struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr,
@@ -244,7 +243,8 @@ done:
}
EXPORT_SYMBOL_GPL(inet6_lookup_listener);
-struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
+struct sock *inet6_lookup(const struct net *net,
+ struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
const struct in6_addr *saddr, const __be16 sport,
const struct in6_addr *daddr, const __be16 dport,
@@ -291,7 +291,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
dif, sdif))) {
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2);
- if (twsk_unique(sk, sk2, twp))
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ tcp_twsk_unique(sk, sk2, twp))
break;
}
goto not_unique;
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 571f0e4d9cf3..a84d332f952f 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -135,15 +135,11 @@ static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
ns->id = id;
- if (!info->attrs[IOAM6_ATTR_NS_DATA])
- data32 = IOAM6_U32_UNAVAILABLE;
- else
- data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+ data32 = nla_get_u32_default(info->attrs[IOAM6_ATTR_NS_DATA],
+ IOAM6_U32_UNAVAILABLE);
- if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
- data64 = IOAM6_U64_UNAVAILABLE;
- else
- data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+ data64 = nla_get_u64_default(info->attrs[IOAM6_ATTR_NS_DATA_WIDE],
+ IOAM6_U64_UNAVAILABLE);
ns->data = cpu_to_be32(data32);
ns->data_wide = cpu_to_be64(data64);
@@ -612,6 +608,68 @@ static const struct genl_ops ioam6_genl_ops[] = {
},
};
+#define IOAM6_GENL_EV_GRP_OFFSET 0
+
+static const struct genl_multicast_group ioam6_mcgrps[] = {
+ [IOAM6_GENL_EV_GRP_OFFSET] = { .name = IOAM6_GENL_EV_GRP_NAME,
+ .flags = GENL_MCAST_CAP_NET_ADMIN },
+};
+
+static int ioam6_event_put_trace(struct sk_buff *skb,
+ struct ioam6_trace_hdr *trace,
+ unsigned int len)
+{
+ if (nla_put_u16(skb, IOAM6_EVENT_ATTR_TRACE_NAMESPACE,
+ be16_to_cpu(trace->namespace_id)) ||
+ nla_put_u8(skb, IOAM6_EVENT_ATTR_TRACE_NODELEN, trace->nodelen) ||
+ nla_put_u32(skb, IOAM6_EVENT_ATTR_TRACE_TYPE,
+ be32_to_cpu(trace->type_be32)) ||
+ nla_put(skb, IOAM6_EVENT_ATTR_TRACE_DATA,
+ len - sizeof(struct ioam6_trace_hdr) - trace->remlen * 4,
+ trace->data + trace->remlen * 4))
+ return 1;
+
+ return 0;
+}
+
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+
+ if (!genl_has_listeners(&ioam6_genl_family, net,
+ IOAM6_GENL_EV_GRP_OFFSET))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!skb)
+ return;
+
+ nlh = genlmsg_put(skb, 0, 0, &ioam6_genl_family, 0, type);
+ if (!nlh)
+ goto nla_put_failure;
+
+ switch (type) {
+ case IOAM6_EVENT_UNSPEC:
+ WARN_ON_ONCE(1);
+ break;
+ case IOAM6_EVENT_TRACE:
+ if (ioam6_event_put_trace(skb, (struct ioam6_trace_hdr *)opt,
+ opt_len))
+ goto nla_put_failure;
+ break;
+ }
+
+ genlmsg_end(skb, nlh);
+ genlmsg_multicast_netns(&ioam6_genl_family, net, skb, 0,
+ IOAM6_GENL_EV_GRP_OFFSET, gfp);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+}
+
static struct genl_family ioam6_genl_family __ro_after_init = {
.name = IOAM6_GENL_NAME,
.version = IOAM6_GENL_VERSION,
@@ -620,6 +678,8 @@ static struct genl_family ioam6_genl_family __ro_after_init = {
.ops = ioam6_genl_ops,
.n_ops = ARRAY_SIZE(ioam6_genl_ops),
.resv_start_op = IOAM6_CMD_NS_SET_SCHEMA + 1,
+ .mcgrps = ioam6_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ioam6_mcgrps),
.module = THIS_MODULE,
};
@@ -663,7 +723,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -671,7 +731,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
raw16 = IOAM6_U16_UNAVAILABLE;
else
- raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+ raw16 = (__force u16)READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id);
*(__be16 *)data = cpu_to_be16(raw16);
data += sizeof(__be16);
@@ -758,7 +818,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (!skb->dev)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+ raw32 = READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
@@ -766,7 +826,7 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
raw32 = IOAM6_U32_UNAVAILABLE;
else
- raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+ raw32 = READ_ONCE(__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide);
*(__be32 *)data = cpu_to_be32(raw32);
data += sizeof(__be32);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 7563f8c6aa87..09065187378e 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -42,8 +42,10 @@ struct ioam6_lwt {
struct ioam6_lwt_freq freq;
atomic_t pkt_cnt;
u8 mode;
+ bool has_tunsrc;
+ struct in6_addr tunsrc;
struct in6_addr tundst;
- struct ioam6_lwt_encap tuninfo;
+ struct ioam6_lwt_encap tuninfo;
};
static const struct netlink_range_validation freq_range = {
@@ -72,8 +74,10 @@ static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
[IOAM6_IPTUNNEL_MODE] = NLA_POLICY_RANGE(NLA_U8,
IOAM6_IPTUNNEL_MODE_MIN,
IOAM6_IPTUNNEL_MODE_MAX),
+ [IOAM6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[IOAM6_IPTUNNEL_DST] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
- [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(
+ sizeof(struct ioam6_trace_hdr)),
};
static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
@@ -85,7 +89,7 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21)
+ trace->type.bit21 | trace->type.bit23)
return false;
trace->nodelen = 0;
@@ -138,10 +142,13 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
}
}
- if (!tb[IOAM6_IPTUNNEL_MODE])
- mode = IOAM6_IPTUNNEL_MODE_INLINE;
- else
- mode = nla_get_u8(tb[IOAM6_IPTUNNEL_MODE]);
+ mode = nla_get_u8_default(tb[IOAM6_IPTUNNEL_MODE],
+ IOAM6_IPTUNNEL_MODE_INLINE);
+
+ if (tb[IOAM6_IPTUNNEL_SRC] && mode == IOAM6_IPTUNNEL_MODE_INLINE) {
+ NL_SET_ERR_MSG(extack, "no tunnel src expected with this mode");
+ return -EINVAL;
+ }
if (!tb[IOAM6_IPTUNNEL_DST] && mode != IOAM6_IPTUNNEL_MODE_INLINE) {
NL_SET_ERR_MSG(extack, "this mode needs a tunnel destination");
@@ -167,19 +174,40 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
ilwt = ioam6_lwt_state(lwt);
err = dst_cache_init(&ilwt->cache, GFP_ATOMIC);
- if (err) {
- kfree(lwt);
- return err;
- }
+ if (err)
+ goto free_lwt;
atomic_set(&ilwt->pkt_cnt, 0);
ilwt->freq.k = freq_k;
ilwt->freq.n = freq_n;
ilwt->mode = mode;
- if (tb[IOAM6_IPTUNNEL_DST])
+
+ if (!tb[IOAM6_IPTUNNEL_SRC]) {
+ ilwt->has_tunsrc = false;
+ } else {
+ ilwt->has_tunsrc = true;
+ ilwt->tunsrc = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_SRC]);
+
+ if (ipv6_addr_any(&ilwt->tunsrc)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_SRC],
+ "invalid tunnel source address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
+ if (tb[IOAM6_IPTUNNEL_DST]) {
ilwt->tundst = nla_get_in6_addr(tb[IOAM6_IPTUNNEL_DST]);
+ if (ipv6_addr_any(&ilwt->tundst)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_DST],
+ "invalid tunnel dest address");
+ err = -EINVAL;
+ goto free_cache;
+ }
+ }
+
tuninfo = ioam6_lwt_info(lwt);
tuninfo->eh.hdrlen = ((sizeof(*tuninfo) + len_aligned) >> 3) - 1;
tuninfo->pad[0] = IPV6_TLV_PADN;
@@ -201,6 +229,11 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
*ts = lwt;
return 0;
+free_cache:
+ dst_cache_destroy(&ilwt->cache);
+free_lwt:
+ kfree(lwt);
+ return err;
}
static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
@@ -220,14 +253,15 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
}
static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
- struct ioam6_lwt_encap *tuninfo)
+ struct ioam6_lwt_encap *tuninfo,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *oldhdr, *hdr;
int hdrlen, err;
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -256,7 +290,10 @@ static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
struct ioam6_lwt_encap *tuninfo,
- struct in6_addr *tundst)
+ bool has_tunsrc,
+ struct in6_addr *tunsrc,
+ struct in6_addr *tundst,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr, *inner_hdr;
@@ -265,7 +302,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
len = sizeof(*hdr) + hdrlen;
- err = skb_cow_head(skb, len + skb->mac_len);
+ err = skb_cow_head(skb, len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -285,8 +322,12 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
hdr->nexthdr = NEXTHDR_HOP;
hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
hdr->daddr = *tundst;
- ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
- IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
+
+ if (has_tunsrc)
+ memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
+ else
+ ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+ IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
skb_postpush_rcsum(skb, hdr, len);
@@ -295,8 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
- struct in6_addr orig_daddr;
+ struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
u32 pkt_cnt;
@@ -311,7 +351,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (pkt_cnt % ilwt->freq.n >= ilwt->freq.k)
goto out;
- orig_daddr = ipv6_hdr(skb)->daddr;
+ local_bh_disable();
+ cache_dst = dst_cache_get(&ilwt->cache);
+ local_bh_enable();
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
@@ -320,7 +362,7 @@ do_inline:
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
if (unlikely(err))
goto drop;
@@ -328,7 +370,9 @@ do_inline:
case IOAM6_IPTUNNEL_MODE_ENCAP:
do_encap:
/* Encapsulation (ip6ip6) */
- err = ioam6_do_encap(net, skb, &ilwt->tuninfo, &ilwt->tundst);
+ err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
+ ilwt->has_tunsrc, &ilwt->tunsrc,
+ &ilwt->tundst, cache_dst);
if (unlikely(err))
goto drop;
@@ -346,46 +390,48 @@ do_encap:
goto drop;
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
+ if (unlikely(!cache_dst)) {
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ struct flowi6 fl6;
- if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
- preempt_disable();
- dst = dst_cache_get(&ilwt->cache);
- preempt_enable();
-
- if (unlikely(!dst)) {
- struct ipv6hdr *hdr = ipv6_hdr(skb);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = hdr->daddr;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
-
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- err = dst->error;
- dst_release(dst);
- goto drop;
- }
-
- preempt_disable();
- dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ cache_dst = ip6_route_output(net, NULL, &fl6);
+ if (cache_dst->error) {
+ err = cache_dst->error;
+ goto drop;
}
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
+ /* cache only if we don't create a dst reference loop */
+ if (dst->lwtstate != cache_dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
+ if (unlikely(err))
+ goto drop;
+ }
+ /* avoid lwtunnel_output() reentry loop when destination is the same
+ * after transformation (e.g., with the inline mode)
+ */
+ if (dst->lwtstate != cache_dst->lwtstate) {
+ skb_dst_drop(skb);
+ skb_dst_set(skb, cache_dst);
return dst_output(net, sk, skb);
}
out:
+ dst_release(cache_dst);
return dst->lwtstate->orig_output(net, sk, skb);
drop:
+ dst_release(cache_dst);
kfree_skb(skb);
return err;
}
@@ -414,6 +460,13 @@ static int ioam6_fill_encap_info(struct sk_buff *skb,
goto ret;
if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc) {
+ err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_SRC,
+ &ilwt->tunsrc);
+ if (err)
+ goto ret;
+ }
+
err = nla_put_in6_addr(skb, IOAM6_IPTUNNEL_DST, &ilwt->tundst);
if (err)
goto ret;
@@ -435,8 +488,12 @@ static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
nla_total_size(sizeof(ilwt->mode)) +
nla_total_size(sizeof(ilwt->tuninfo.traceh));
- if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE)
+ if (ilwt->mode != IOAM6_IPTUNNEL_MODE_INLINE) {
+ if (ilwt->has_tunsrc)
+ nlsize += nla_total_size(sizeof(ilwt->tunsrc));
+
nlsize += nla_total_size(sizeof(ilwt->tundst));
+ }
return nlsize;
}
@@ -451,17 +508,21 @@ static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
return (ilwt_a->freq.k != ilwt_b->freq.k ||
ilwt_a->freq.n != ilwt_b->freq.n ||
ilwt_a->mode != ilwt_b->mode ||
+ ilwt_a->has_tunsrc != ilwt_b->has_tunsrc ||
(ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
!ipv6_addr_equal(&ilwt_a->tundst, &ilwt_b->tundst)) ||
+ (ilwt_a->mode != IOAM6_IPTUNNEL_MODE_INLINE &&
+ ilwt_a->has_tunsrc &&
+ !ipv6_addr_equal(&ilwt_a->tunsrc, &ilwt_b->tunsrc)) ||
trace_a->namespace_id != trace_b->namespace_id);
}
static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
.build_state = ioam6_build_state,
.destroy_state = ioam6_destroy_state,
- .output = ioam6_output,
+ .output = ioam6_output,
.fill_encap = ioam6_fill_encap_info,
- .get_encap_size = ioam6_encap_nlsize,
+ .get_encap_size = ioam6_encap_nlsize,
.cmp_encap = ioam6_encap_cmp,
.owner = THIS_MODULE,
};
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 4fc2cae0d116..c134ba202c4c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
+ INIT_HLIST_NODE(&f6i->gc_link);
+
return f6i;
}
@@ -196,16 +198,9 @@ static void node_free_immediate(struct net *net, struct fib6_node *fn)
net->ipv6.rt6_stats->fib_nodes--;
}
-static void node_free_rcu(struct rcu_head *head)
-{
- struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
-
- kmem_cache_free(fib6_node_kmem, fn);
-}
-
static void node_free(struct net *net, struct fib6_node *fn)
{
- call_rcu(&fn->rcu, node_free_rcu);
+ kfree_rcu(fn, rcu);
net->ipv6.rt6_stats->fib_nodes--;
}
@@ -246,6 +241,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
net->ipv6.fib6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
+ INIT_HLIST_HEAD(&table->tb6_gc_hlist);
}
return table;
@@ -342,17 +338,17 @@ static void __net_init fib6_tables_init(struct net *net)
#endif
-unsigned int fib6_tables_seq_read(struct net *net)
+unsigned int fib6_tables_seq_read(const struct net *net)
{
unsigned int h, fib_seq = 0;
rcu_read_lock();
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
- struct hlist_head *head = &net->ipv6.fib_table_hash[h];
- struct fib6_table *tb;
+ const struct hlist_head *head = &net->ipv6.fib_table_hash[h];
+ const struct fib6_table *tb;
hlist_for_each_entry_rcu(tb, head, tb6_hlist)
- fib_seq += tb->fib_seq;
+ fib_seq += READ_ONCE(tb->fib_seq);
}
rcu_read_unlock();
@@ -397,7 +393,7 @@ int call_fib6_entry_notifiers(struct net *net,
.rt = rt,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -413,7 +409,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
.nsiblings = nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, event_type, &info.info);
}
@@ -424,7 +420,7 @@ int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
.nsiblings = rt->fib6_nsiblings,
};
- rt->fib6_table->fib_seq++;
+ WRITE_ONCE(rt->fib6_table->fib_seq, rt->fib6_table->fib_seq + 1);
return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
}
@@ -617,23 +613,25 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
- .filter.dump_routes = true };
+ struct rt6_rtnl_dump_arg arg = {
+ .filter.dump_exceptions = true,
+ .filter.dump_routes = true,
+ .filter.rtnl_held = false,
+ };
const struct nlmsghdr *nlh = cb->nlh;
struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
unsigned int e = 0, s_e;
+ struct hlist_head *head;
struct fib6_walker *w;
struct fib6_table *tb;
- struct hlist_head *head;
- int res = 0;
+ unsigned int h, s_h;
+ int err = 0;
+ rcu_read_lock();
if (cb->strict_check) {
- int err;
-
err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
if (err < 0)
- return err;
+ goto unlock;
} else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
struct rtmsg *rtm = nlmsg_data(nlh);
@@ -645,19 +643,21 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
- if (!w)
- return -ENOMEM;
+ if (!w) {
+ err = -ENOMEM;
+ goto unlock;
+ }
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}
arg.skb = skb;
@@ -669,46 +669,46 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
if (rtnl_msg_family(cb->nlh) != PF_INET6)
- goto out;
+ goto unlock;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
- return -ENOENT;
+ err = -ENOENT;
+ goto unlock;
}
if (!cb->args[0]) {
- res = fib6_dump_table(tb, skb, cb);
- if (!res)
+ err = fib6_dump_table(tb, skb, cb);
+ if (!err)
cb->args[0] = 1;
}
- goto out;
+ goto unlock;
}
s_h = cb->args[0];
s_e = cb->args[1];
- rcu_read_lock();
for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
if (e < s_e)
goto next;
- res = fib6_dump_table(tb, skb, cb);
- if (res != 0)
- goto out_unlock;
+ err = fib6_dump_table(tb, skb, cb);
+ if (err != 0)
+ goto out;
next:
e++;
}
}
-out_unlock:
- rcu_read_unlock();
+out:
cb->args[1] = e;
cb->args[0] = h;
-out:
- res = res < 0 ? res : skb->len;
- if (res <= 0)
+
+unlock:
+ rcu_read_unlock();
+ if (err <= 0)
fib6_dump_end(cb);
- return res;
+ return err;
}
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
@@ -751,8 +751,6 @@ static struct fib6_node *fib6_add_1(struct net *net,
int bit;
__be32 dir = 0;
- RT6_TRACE("fib6_add_1\n");
-
/* insert node in tree */
fn = root;
@@ -961,6 +959,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (!fib6_nh->rt6i_pcpu)
return;
+ rcu_read_lock();
/* release the reference to this fib entry from
* all of its cached pcpu routes
*/
@@ -969,7 +968,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
struct rt6_info *pcpu_rt;
ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
+
+ /* Paired with xchg() in rt6_get_pcpu_route() */
+ pcpu_rt = READ_ONCE(*ppcpu_rt);
/* only dropping the 'from' reference if the cached route
* is using 'match'. The cached pcpu_rt->from only changes
@@ -979,10 +980,11 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
}
+ rcu_read_unlock();
}
struct fib6_nh_pcpu_arg {
@@ -1057,6 +1059,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
lockdep_is_held(&table->tb6_lock));
}
}
+
+ fib6_clean_expires(rt);
+ fib6_remove_gc_list(rt);
}
/*
@@ -1117,10 +1122,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
rt->fib6_nsiblings = 0;
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
- if (!(rt->fib6_flags & RTF_EXPIRES))
+ if (!(rt->fib6_flags & RTF_EXPIRES)) {
fib6_clean_expires(iter);
- else
+ fib6_remove_gc_list(iter);
+ } else {
fib6_set_expires(iter, rt->expires);
+ fib6_add_gc_list(iter);
+ }
if (rt->fib6_pmtu)
fib6_metric_set(iter, RTAX_MTU,
@@ -1175,8 +1183,8 @@ next_iter:
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
- list_add_tail(&rt->fib6_siblings,
- &sibling->fib6_siblings);
+ list_add_tail_rcu(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->fib6_next,
@@ -1237,7 +1245,7 @@ add:
fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
return err;
}
@@ -1375,7 +1383,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1399,9 +1410,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
@@ -1479,6 +1490,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
+
+ if (rt->fib6_flags & RTF_EXPIRES)
+ fib6_add_gc_list(rt);
+
fib6_start_gc(info->nl_net, rt);
}
@@ -1803,7 +1818,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
lockdep_is_held(&table->tb6_lock));
struct fib6_info *new_fn_leaf;
- RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ pr_debug("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
@@ -1866,7 +1881,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
FOR_WALKERS(net, w) {
if (!child) {
if (w->node == fn) {
- RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ pr_debug("W %p adjusted by delnode 1, s=%d/%d\n",
+ w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
@@ -1874,10 +1890,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
if (w->node == fn) {
w->node = child;
if (children&2) {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
} else {
- RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ pr_debug("W %p adjusted by delnode 2, s=%d\n",
+ w, w->state);
w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
}
}
@@ -1905,8 +1923,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct net *net = info->nl_net;
bool notify_del = false;
- RT6_TRACE("fib6_del_route\n");
-
/* If the deleted route is the first in the node and it is not part of
* a multipath route, then we need to replace it with the next route
* in the node, if exists.
@@ -1947,7 +1963,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
rt->fib6_nsiblings = 0;
- list_del_init(&rt->fib6_siblings);
+ list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
@@ -1955,7 +1971,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
- RT6_TRACE("walker %p adjusted by delroute\n", w);
+ pr_debug("walker %p adjusted by delroute\n", w);
w->leaf = rcu_dereference_protected(rt->fib6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
@@ -2281,9 +2297,8 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static int fib6_age(struct fib6_info *rt, void *arg)
+static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
- struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -2293,7 +2308,7 @@ static int fib6_age(struct fib6_info *rt, void *arg)
if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
if (time_after(now, rt->expires)) {
- RT6_TRACE("expiring %p\n", rt);
+ pr_debug("expiring %p\n", rt);
return -1;
}
gc_args->more++;
@@ -2308,6 +2323,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
return 0;
}
+static void fib6_gc_table(struct net *net,
+ struct fib6_table *tb6,
+ struct fib6_gc_args *gc_args)
+{
+ struct fib6_info *rt;
+ struct hlist_node *n;
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = false,
+ };
+
+ hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
+ if (fib6_age(rt, gc_args) == -1)
+ fib6_del(rt, &info);
+}
+
+static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
+{
+ struct fib6_table *table;
+ struct hlist_head *head;
+ unsigned int h;
+
+ rcu_read_lock();
+ for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+ head = &net->ipv6.fib_table_hash[h];
+ hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+ spin_lock_bh(&table->tb6_lock);
+
+ fib6_gc_table(net, table, gc_args);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ }
+ rcu_read_unlock();
+}
+
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
@@ -2323,7 +2374,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
net->ipv6.sysctl.ip6_rt_gc_interval;
gc_args.more = 0;
- fib6_clean_all(net, fib6_age, &gc_args);
+ fib6_gc_all(net, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -2383,6 +2434,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2395,6 +2447,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
+ INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
fib6_tables_init(net);
@@ -2440,14 +2493,18 @@ static struct pernet_operations fib6_net_ops = {
.exit = fib6_net_exit,
};
+static const struct rtnl_msg_handler fib6_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .dumpit = inet6_dump_fib,
+ .flags = RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE},
+};
+
int __init fib6_init(void)
{
int ret = -ENOMEM;
- fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
- NULL);
+ fib6_node_kmem = KMEM_CACHE(fib6_node,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT);
if (!fib6_node_kmem)
goto out;
@@ -2455,8 +2512,7 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
- inet6_dump_fib, 0);
+ ret = rtnl_register_many(fib6_rtnl_msg_handlers);
if (ret)
goto out_unregister_subsys;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 070d87abf7c0..235808cfec70 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -496,11 +496,11 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
tpi->proto);
if (tunnel) {
if (tunnel->parms.collect_md) {
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct metadata_dst *tun_dst;
__be64 tun_id;
- __be16 flags;
- flags = tpi->flags;
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
struct ip6_tnl *tunnel;
u8 ver;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
@@ -548,14 +551,14 @@ static int ip6erspan_rcv(struct sk_buff *skb,
if (tunnel->parms.collect_md) {
struct erspan_metadata *pkt_md, *md;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct metadata_dst *tun_dst;
struct ip_tunnel_info *info;
unsigned char *gh;
__be64 tun_id;
- __be16 flags;
- tpi->flags |= TUNNEL_KEY;
- flags = tpi->flags;
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
@@ -577,7 +580,8 @@ static int ip6erspan_rcv(struct sk_buff *skb,
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
ERSPAN_V2_MDSIZE);
- info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
info->options_len = sizeof(*md);
ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
@@ -745,8 +749,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be16 protocol;
- __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -778,8 +782,11 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id);
dsfield = key->tos;
- flags = key->tun_flags &
- (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
+ ip_tunnel_flags_zero(flags);
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, flags, key->tun_flags);
tun_hlen = gre_calc_hlen(flags);
if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
@@ -788,19 +795,21 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
- : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) :
+ 0);
} else {
if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
return -ENOMEM;
- flags = tunnel->parms.o_flags;
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
- : 0);
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) :
+ 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -822,7 +831,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
&dsfield, &encap_limit);
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags));
if (err)
return -1;
@@ -856,7 +866,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
- if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags)))
return -1;
err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit,
@@ -883,7 +894,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
prepare_ip6gre_xmit_other(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
- err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
+ err = gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ t->parms.o_flags));
if (err)
return err;
err = __gre6_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, skb->protocol);
@@ -936,6 +948,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct ip_tunnel_info *tun_info = NULL;
struct ip6_tnl *t = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
bool truncate = false;
int encap_limit = -1;
__u8 dsfield = false;
@@ -979,7 +992,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
- t->parms.o_flags &= ~TUNNEL_KEY;
+ __clear_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
IPCB(skb)->flags = 0;
/* For collect_md mode, derive fl6 from the tunnel key,
@@ -1004,7 +1017,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id);
dsfield = key->tos;
- if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ tun_info->key.tun_flags))
goto tx_err;
if (tun_info->options_len < sizeof(*md))
goto tx_err;
@@ -1065,7 +1079,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
/* Push GRE header. */
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1208,8 +1224,8 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
t->parms.proto = p->proto;
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
- t->parms.i_flags = p->i_flags;
- t->parms.o_flags = p->o_flags;
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
t->parms.fwmark = p->fwmark;
t->parms.erspan_ver = p->erspan_ver;
t->parms.index = p->index;
@@ -1238,8 +1254,8 @@ static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
p->link = u->link;
p->i_key = u->i_key;
p->o_key = u->o_key;
- p->i_flags = gre_flags_to_tnl_flags(u->i_flags);
- p->o_flags = gre_flags_to_tnl_flags(u->o_flags);
+ gre_flags_to_tnl_flags(p->i_flags, u->i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, u->o_flags);
memcpy(p->name, u->name, sizeof(u->name));
}
@@ -1391,7 +1407,7 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
ipv6h->daddr = t->parms.raddr;
p = (__be16 *)(ipv6h + 1);
- p[0] = t->parms.o_flags;
+ p[0] = ip_tunnel_flags_to_be16(t->parms.o_flags);
p[1] = htons(type);
/*
@@ -1418,7 +1434,6 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1428,7 +1443,6 @@ static void ip6gre_dev_free(struct net_device *dev)
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
}
static void ip6gre_tunnel_setup(struct net_device *dev)
@@ -1437,6 +1451,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->type = ARPHRD_IP6GRE;
dev->flags |= IFF_NOARP;
@@ -1455,23 +1470,23 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
static void ip6gre_tnl_init_features(struct net_device *dev)
{
struct ip6_tnl *nt = netdev_priv(dev);
- __be16 flags;
- dev->features |= GRE6_FEATURES | NETIF_F_LLTX;
+ dev->features |= GRE6_FEATURES;
dev->hw_features |= GRE6_FEATURES;
- flags = nt->parms.o_flags;
-
/* TCP offload with GRE SEQ is not supported, nor can we support 2
* levels of outer headers requiring an update.
*/
- if (flags & TUNNEL_SEQ)
+ if (test_bit(IP_TUNNEL_SEQ_BIT, nt->parms.o_flags))
return;
- if (flags & TUNNEL_CSUM && nt->encap.type != TUNNEL_ENCAP_NONE)
+ if (test_bit(IP_TUNNEL_CSUM_BIT, nt->parms.o_flags) &&
+ nt->encap.type != TUNNEL_ENCAP_NONE)
return;
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ip6gre_tunnel_init_common(struct net_device *dev)
@@ -1486,13 +1501,9 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
- goto cleanup_alloc_pcpu_stats;
+ return ret;
ret = gro_cells_init(&tunnel->gro_cells, dev);
if (ret)
@@ -1511,13 +1522,11 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
ip6gre_tnl_init_features(dev);
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
cleanup_dst_cache_init:
dst_cache_destroy(&tunnel->dst_cache);
-cleanup_alloc_pcpu_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -1612,8 +1621,7 @@ static int __net_init ip6gre_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
-
+ ign->fb_tunnel_dev->netns_local = true;
ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
@@ -1632,21 +1640,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6gre_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch = ip6gre_exit_batch_net,
+ .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
@@ -1793,12 +1799,12 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(
- nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1851,7 +1857,6 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1880,13 +1885,9 @@ static int ip6erspan_tap_init(struct net_device *dev)
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
if (ret)
- goto cleanup_alloc_pcpu_stats;
+ return ret;
ret = gro_cells_init(&tunnel->gro_cells, dev);
if (ret)
@@ -1903,13 +1904,11 @@ static int ip6erspan_tap_init(struct net_device *dev)
ip6erspan_tnl_link_config(tunnel, 1);
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
cleanup_dst_cache_init:
dst_cache_destroy(&tunnel->dst_cache);
-cleanup_alloc_pcpu_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -1920,7 +1919,6 @@ static const struct net_device_ops ip6erspan_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1934,6 +1932,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2144,11 +2143,13 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm *p = &t->parms;
- __be16 o_flags = p->o_flags;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (p->erspan_ver == 1 || p->erspan_ver == 2) {
if (!p->collect_md)
- o_flags |= TUNNEL_KEY;
+ __set_bit(IP_TUNNEL_KEY_BIT, o_flags);
if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
goto nla_put_failure;
@@ -2234,6 +2235,7 @@ static void ip6erspan_tap_setup(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
netif_keep_dst(dev);
@@ -2405,7 +2407,7 @@ static void __exit ip6gre_fini(void)
module_init(ip6gre_init);
module_exit(ip6gre_fini);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_AUTHOR("D. Kozlov <xeb@mail.ru>");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
MODULE_ALIAS_RTNL_LINK("ip6gretap");
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index b8378814532c..39da6a7ce5f1 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -111,9 +111,8 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
{
struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
@@ -168,9 +167,9 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
SKB_DR_SET(reason, NOT_SPECIFIED);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
- !idev || unlikely(idev->cnf.disable_ipv6)) {
+ !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- if (idev && unlikely(idev->cnf.disable_ipv6))
+ if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6)))
SKB_DR_SET(reason, IPV6DISABLED);
goto drop;
}
@@ -236,7 +235,7 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (!ipv6_addr_is_multicast(&hdr->daddr) &&
(skb->pkt_type == PACKET_BROADCAST ||
skb->pkt_type == PACKET_MULTICAST) &&
- idev->cnf.drop_unicast_in_l2_multicast) {
+ READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) {
SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST);
goto err;
}
@@ -327,9 +326,8 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *curr_dev = NULL;
struct net *curr_net = NULL;
struct sk_buff *skb, *next;
- struct list_head sublist;
+ LIST_HEAD(sublist);
- INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
@@ -479,9 +477,7 @@ discard:
static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_clear_delivery_time(skb);
- rcu_read_lock();
ip6_protocol_deliver_rcu(net, skb, 0, false);
- rcu_read_unlock();
return 0;
}
@@ -489,9 +485,15 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk
int ip6_input(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
- dev_net(skb->dev), NULL, skb, skb->dev, NULL,
- ip6_input_finish);
+ int res;
+
+ rcu_read_lock();
+ res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL,
+ ip6_input_finish);
+ rcu_read_unlock();
+
+ return res;
}
EXPORT_SYMBOL_GPL(ip6_input);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index cca64c7809be..9822163428b0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -67,7 +67,7 @@ static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
off += len;
}
- skb_gro_pull(skb, off - skb_network_offset(skb));
+ skb_gro_pull(skb, off - skb_gro_receive_network_offset(skb));
return proto;
}
@@ -236,7 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if (unlikely(!iph))
goto out;
- skb_set_network_header(skb, off);
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
flush += ntohs(iph->payload_len) != skb->len - hlen;
@@ -259,7 +259,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
NAPI_GRO_CB(skb)->proto = proto;
flush--;
- nlen = skb_network_header_len(skb);
+ nlen = skb_gro_offset(skb) - off;
list_for_each_entry(p, head, list) {
const struct ipv6hdr *iph2;
@@ -290,19 +290,8 @@ not_same_flow:
nlen - sizeof(struct ipv6hdr)))
goto not_same_flow;
}
- /* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
- (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
- NAPI_GRO_CB(p)->flush |= flush;
-
- /* If the previous IP ID value was based on an atomic
- * datagram we can overwrite the value and ignore it.
- */
- if (NAPI_GRO_CB(skb)->is_atomic)
- NAPI_GRO_CB(p)->flush_id = 0;
}
- NAPI_GRO_CB(skb)->is_atomic = true;
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_postpull_rcsum(skb, iph, nlen);
@@ -419,14 +408,6 @@ static int ip4ip6_gro_complete(struct sk_buff *skb, int nhoff)
return inet_gro_complete(skb, nhoff);
}
-static struct packet_offload ipv6_packet_offload __read_mostly = {
- .type = cpu_to_be16(ETH_P_IPV6),
- .callbacks = {
- .gso_segment = ipv6_gso_segment,
- .gro_receive = ipv6_gro_receive,
- .gro_complete = ipv6_gro_complete,
- },
-};
static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -486,7 +467,15 @@ static int __init ipv6_offload_init(void)
if (ipv6_exthdrs_offload_init() < 0)
pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);
- dev_add_offload(&ipv6_packet_offload);
+ net_hotdata.ipv6_packet_offload = (struct packet_offload) {
+ .type = cpu_to_be16(ETH_P_IPV6),
+ .callbacks = {
+ .gso_segment = ipv6_gso_segment,
+ .gro_receive = ipv6_gro_receive,
+ .gro_complete = ipv6_gro_complete,
+ },
+ };
+ dev_add_offload(&net_hotdata.ipv6_packet_offload);
inet_add_offload(&sit_offload, IPPROTO_IPV6);
inet6_add_offload(&ip6ip6_offload, IPPROTO_IPV6);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 31b86fe661aa..d577bf2f3053 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
/* Be paranoid, rather than too clever. */
if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+ /* Make sure idev stays alive */
+ rcu_read_lock();
skb = skb_expand_head(skb, hh_len);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
return -ENOMEM;
}
+ rcu_read_unlock();
}
hdr = ipv6_hdr(skb);
@@ -120,10 +124,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
rcu_read_lock();
- nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ nexthop = rt6_nexthop(dst_rt6_info(dst), daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
- if (unlikely(IS_ERR_OR_NULL(neigh))) {
+ if (IS_ERR_OR_NULL(neigh)) {
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
@@ -234,7 +238,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- if (unlikely(idev->cnf.disable_ipv6)) {
+ if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED);
return 0;
@@ -283,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
head_room += opt->opt_nflen + opt->opt_flen;
if (unlikely(head_room > skb_headroom(skb))) {
+ /* Make sure idev stays alive */
+ rcu_read_lock();
skb = skb_expand_head(skb, head_room);
if (!skb) {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
return -ENOBUFS;
}
+ rcu_read_unlock();
}
if (opt) {
@@ -501,7 +509,7 @@ int ip6_forward(struct sk_buff *skb)
u32 mtu;
idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
goto error;
if (skb->pkt_type != PACKET_HOST)
@@ -513,8 +521,8 @@ int ip6_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!net->ipv6.devconf_all->disable_policy &&
- (!idev || !idev->cnf.disable_policy) &&
+ if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) &&
+ (!idev || !READ_ONCE(idev->cnf.disable_policy)) &&
!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -552,7 +560,7 @@ int ip6_forward(struct sk_buff *skb)
}
/* XXX: idev->cnf.proxy_ndp? */
- if (net->ipv6.devconf_all->proxy_ndp &&
+ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
@@ -599,21 +607,21 @@ int ip6_forward(struct sk_buff *skb)
* send a redirect.
*/
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY)
target = &rt->rt6i_gateway;
else
target = &hdr->daddr;
- peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here)
and by source (inside ndisc_send_redirect)
*/
if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
@@ -856,10 +864,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
@@ -955,7 +963,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -1016,7 +1024,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(frag, tstamp, mono_delivery_time);
+ skb_set_delivery_time(frag, tstamp, tstamp_type);
err = output(net, sk, frag);
if (err)
goto fail;
@@ -1063,7 +1071,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
return NULL;
}
- rt = (struct rt6_info *)dst;
+ rt = dst_rt6_info(dst);
/* Yes, checking route validity in not connected
* case is not very simple. Take into account,
* that we do not support routing by source, TOS,
@@ -1118,12 +1126,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct rt6_info *rt;
*dst = ip6_route_output(net, sk, fl6);
- rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
+ rt = (*dst)->error ? NULL : dst_rt6_info(*dst);
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
+ fl6->flowi6_l3mdev,
&fl6->saddr);
rcu_read_unlock();
@@ -1159,7 +1168,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
- rt = (struct rt6_info *) *dst;
+ rt = dst_rt6_info(*dst);
rcu_read_lock();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
@@ -1392,8 +1401,12 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
- sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
-
+ cork->base.priority = ipc6->sockc.priority;
+ sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags);
+ if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) {
+ cork->base.flags |= IPCORK_TS_OPT_ID;
+ cork->base.ts_opt_id = ipc6->sockc.ts_opt_id;
+ }
cork->base.length = 0;
cork->base.transmit_time = ipc6->sockc.transmit_time;
@@ -1423,8 +1436,8 @@ static int __ip6_append_data(struct sock *sk,
int offset = 0;
bool zc = false;
u32 tskey = 0;
- struct rt6_info *rt = (struct rt6_info *)cork->dst;
- bool paged, hold_tskey, extra_uref = false;
+ struct rt6_info *rt = dst_rt6_info(cork->dst);
+ bool paged, hold_tskey = false, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
@@ -1534,10 +1547,15 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
- hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
- if (hold_tskey)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
+ if (cork->flags & IPCORK_TS_OPT_ID) {
+ tskey = cork->ts_opt_id;
+ } else {
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ hold_tskey = true;
+ }
+ }
/*
* Let's try using as much space as possible.
@@ -1680,8 +1698,9 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
if (copy > 0 &&
- getfrag(from, data + transhdrlen, offset,
- copy, fraggap, skb) < 0) {
+ INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
@@ -1725,8 +1744,9 @@ alloc_new_skb:
unsigned int off;
off = skb->len;
- if (getfrag(from, skb_put(skb, copy),
- offset, copy, off, skb) < 0) {
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
__skb_trim(skb, off);
err = -EFAULT;
goto error;
@@ -1764,7 +1784,8 @@ alloc_new_skb:
get_page(pfrag->page);
}
copy = min_t(int, copy, pfrag->size - pfrag->offset);
- if (getfrag(from,
+ if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag,
+ from,
page_address(pfrag->page) + pfrag->offset,
offset, copy, skb->len, skb) < 0)
goto error_efault;
@@ -1877,7 +1898,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
- struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+ struct rt6_info *rt = dst_rt6_info(cork->base.dst);
struct flowi6 *fl6 = &cork->fl.u.ip6;
unsigned char proto = fl6->flowi6_proto;
@@ -1922,9 +1943,12 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
- skb->priority = READ_ONCE(sk->sk_priority);
+ skb->priority = cork->base.priority;
skb->mark = cork->base.mark;
- skb->tstamp = cork->base.transmit_time;
+ if (sk_is_tcp(sk))
+ skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC);
+ else
+ skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid);
ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
@@ -1933,7 +1957,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
u8 icmp6_type;
if (sk->sk_socket->type == SOCK_RAW &&
- !inet_test_bit(HDRINCL, sk))
+ !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH))
icmp6_type = fl6->fl6_icmp_type;
else
icmp6_type = icmp6_hdr(skb)->icmp6_type;
@@ -1949,9 +1973,10 @@ out:
int ip6_send_skb(struct sk_buff *skb)
{
struct net *net = sock_net(skb->sk);
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
int err;
+ rcu_read_lock();
err = ip6_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
@@ -1961,6 +1986,7 @@ int ip6_send_skb(struct sk_buff *skb)
IPSTATS_MIB_OUTDISCARDS);
}
+ rcu_read_unlock();
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9bbabf750a21..48fd53b98972 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -53,6 +53,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -247,7 +248,6 @@ static void ip6_dev_free(struct net_device *dev)
gro_cells_destroy(&t->gro_cells);
dst_cache_destroy(&t->dst_cache);
- free_percpu(dev->tstats);
}
static int ip6_tnl_create2(struct net_device *dev)
@@ -609,7 +609,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr,
- 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ 0, 0, 0, IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt))
goto out;
@@ -620,7 +621,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rt->rt_flags & RTCF_LOCAL) {
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr, 0, 0,
- IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
+ IPPROTO_IPIP,
+ eiph->tos & INET_DSCP_MASK, 0);
if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL6) {
if (!IS_ERR(rt))
ip_rt_put(rt);
@@ -628,8 +630,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
skb_dst_set(skb2, &rt->dst);
} else {
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
- skb2->dev) ||
+ if (ip_route_input(skb2, eiph->daddr, eiph->saddr,
+ ip4h_dscp(eiph), skb2->dev) ||
skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6)
goto out;
}
@@ -799,17 +801,15 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
int nh, err;
- if ((!(tpi->flags & TUNNEL_CSUM) &&
- (tunnel->parms.i_flags & TUNNEL_CSUM)) ||
- ((tpi->flags & TUNNEL_CSUM) &&
- !(tunnel->parms.i_flags & TUNNEL_CSUM))) {
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
DEV_STATS_INC(tunnel->dev, rx_crc_errors);
DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags & TUNNEL_SEQ) {
- if (!(tpi->flags & TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno &&
(s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
@@ -947,7 +947,9 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
if (t->parms.collect_md) {
- tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, 0, 0);
if (!tun_dst)
goto drop;
}
@@ -1508,7 +1510,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
tdev = __dev_get_by_index(t->net, p->link);
if (tdev) {
- dev->hard_header_len = tdev->hard_header_len + t_hlen;
+ dev->needed_headroom = tdev->hard_header_len +
+ tdev->needed_headroom + t_hlen;
mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU);
mtu = mtu - t_hlen;
@@ -1732,7 +1735,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip6_tnl *tnl = netdev_priv(dev);
+ int t_hlen;
+ t_hlen = tnl->hlen + sizeof(struct ipv6hdr);
if (tnl->parms.proto == IPPROTO_IPV6) {
if (new_mtu < IPV6_MIN_MTU)
return -EINVAL;
@@ -1741,13 +1746,13 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
return -EINVAL;
}
if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) {
- if (new_mtu > IP6_MAX_MTU - dev->hard_header_len)
+ if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen)
return -EINVAL;
} else {
- if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
+ if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen)
return -EINVAL;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL(ip6_tnl_change_mtu);
@@ -1756,7 +1761,7 @@ int ip6_tnl_get_iflink(const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- return t->parms.link;
+ return READ_ONCE(t->parms.link);
}
EXPORT_SYMBOL(ip6_tnl_get_iflink);
@@ -1847,7 +1852,8 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->type = ARPHRD_TUNNEL6;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
netif_keep_dst(dev);
dev->features |= IPXIPX_FEATURES;
@@ -1873,13 +1879,10 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
ret = dst_cache_init(&t->dst_cache, GFP_KERNEL);
if (ret)
- goto free_stats;
+ return ret;
ret = gro_cells_init(&t->gro_cells, dev);
if (ret)
@@ -1890,21 +1893,18 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
t_hlen = t->hlen + sizeof(struct ipv6hdr);
dev->type = ARPHRD_TUNNEL6;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
+ dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
destroy_dst:
dst_cache_destroy(&t->dst_cache);
-free_stats:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
return ret;
}
@@ -2151,7 +2151,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip6_tnl_get_link_net);
@@ -2261,7 +2261,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip6n->fb_tnl_dev->netns_local = true;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
@@ -2282,21 +2282,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
+static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ ip6_tnl_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch = ip6_tnl_exit_batch_net,
+ .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index e550240c85e1..590737c27537 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -174,11 +174,6 @@ vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t)
}
}
-static void vti6_dev_free(struct net_device *dev)
-{
- free_percpu(dev->tstats);
-}
-
static int vti6_tnl_create2(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
@@ -671,7 +666,8 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
dev->flags &= ~IFF_POINTOPOINT;
if (keep_mtu && dev->mtu) {
- dev->mtu = clamp(dev->mtu, dev->min_mtu, dev->max_mtu);
+ WRITE_ONCE(dev->mtu,
+ clamp(dev->mtu, dev->min_mtu, dev->max_mtu));
return;
}
@@ -892,7 +888,6 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
.ndo_siocdevprivate = vti6_siocdevprivate,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -908,8 +903,8 @@ static void vti6_dev_setup(struct net_device *dev)
dev->netdev_ops = &vti6_netdev_ops;
dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
- dev->priv_destructor = vti6_dev_free;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
dev->type = ARPHRD_TUNNEL6;
dev->min_mtu = IPV4_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr);
@@ -931,10 +926,8 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
t->dev = dev;
t->net = dev_net(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
netdev_hold(dev, &t->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -1174,24 +1167,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
+static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
struct vti6_net *ip6n;
struct net *net;
- LIST_HEAD(list);
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, &list);
+ vti6_destroy_tunnels(ip6n, dev_to_kill);
}
- unregister_netdevice_many(&list);
- rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch = vti6_exit_batch_net,
+ .exit_batch_rtnl = vti6_exit_batch_rtnl,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9782c180fee6..535e9f72514c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -108,6 +108,11 @@ static void ipmr_expire_process(struct timer_list *t);
lockdep_rtnl_is_held() || \
list_empty(&net->ipv6.mr6_tables))
+static bool ip6mr_can_free_table(struct net *net)
+{
+ return !check_net(net) || !net_initialized(net);
+}
+
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -125,7 +130,7 @@ static struct mr_table *ip6mr_mr_table_iter(struct net *net,
return ret;
}
-static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *__ip6mr_get_table(struct net *net, u32 id)
{
struct mr_table *mrt;
@@ -136,6 +141,16 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return NULL;
}
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
+
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, id);
+ rcu_read_unlock();
+ return mrt;
+}
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -177,7 +192,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
arg->table = fib_rule_get_table(rule, arg);
- mrt = ip6mr_get_table(rule->fr_net, arg->table);
+ mrt = __ip6mr_get_table(rule->fr_net, arg->table);
if (!mrt)
return -EAGAIN;
res->mrt = mrt;
@@ -276,7 +291,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR, extack);
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
}
@@ -291,6 +306,11 @@ EXPORT_SYMBOL(ip6mr_rule_default);
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+static bool ip6mr_can_free_table(struct net *net)
+{
+ return !check_net(net);
+}
+
static struct mr_table *ip6mr_mr_table_iter(struct net *net,
struct mr_table *mrt)
{
@@ -304,6 +324,8 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
return net->ipv6.mrt6;
}
+#define __ip6mr_get_table ip6mr_get_table
+
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr_table **mrt)
{
@@ -335,7 +357,7 @@ static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb,
return 0;
}
-static unsigned int ip6mr_rules_seq_read(struct net *net)
+static unsigned int ip6mr_rules_seq_read(const struct net *net)
{
return 0;
}
@@ -382,7 +404,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, id);
+ mrt = __ip6mr_get_table(net, id);
if (mrt)
return mrt;
@@ -392,6 +414,10 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr_table *mrt)
{
+ struct net *net = read_pnet(&mrt->net);
+
+ WARN_ON_ONCE(!ip6mr_can_free_table(net));
+
timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
@@ -411,13 +437,15 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
struct net *net = seq_file_net(seq);
struct mr_table *mrt;
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
+ }
iter->mrt = mrt;
- rcu_read_lock();
return mr_vif_seq_start(seq, pos);
}
@@ -492,9 +520,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->_c.mfc_un.res.pkt,
- mfc->_c.mfc_un.res.bytes,
- mfc->_c.mfc_un.res.wrong_if);
+ atomic_long_read(&mfc->_c.mfc_un.res.pkt),
+ atomic_long_read(&mfc->_c.mfc_un.res.bytes),
+ atomic_long_read(&mfc->_c.mfc_un.res.wrong_if));
for (n = mfc->_c.mfc_un.res.minvif;
n < mfc->_c.mfc_un.res.maxvif; n++) {
if (VIF_EXISTS(mrt, n) &&
@@ -640,7 +668,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->netdev_ops = &reg_vif_netdev_ops;
dev->needs_free_netdev = true;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->netns_local = true;
}
static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
@@ -856,7 +884,7 @@ static void ip6mr_update_thresholds(struct mr_table *mrt,
cache->mfc_un.res.maxvif = vifi + 1;
}
}
- cache->mfc_un.res.lastuse = jiffies;
+ WRITE_ONCE(cache->mfc_un.res.lastuse, jiffies);
}
static int mif6_add(struct net *net, struct mr_table *mrt,
@@ -1260,11 +1288,9 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static unsigned int ip6mr_seq_read(struct net *net)
+static unsigned int ip6mr_seq_read(const struct net *net)
{
- ASSERT_RTNL();
-
- return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+ return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net);
}
static int ip6mr_dump(struct net *net, struct notifier_block *nb,
@@ -1369,14 +1395,17 @@ static struct pernet_operations ip6mr_net_ops = {
.exit_batch = ip6mr_net_exit_batch,
};
+static const struct rtnl_msg_handler ip6mr_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = RTNL_FAMILY_IP6MR,
+ .msgtype = RTM_GETROUTE,
+ .doit = ip6mr_rtm_getroute, .dumpit = ip6mr_rtm_dumproute},
+};
+
int __init ip6_mr_init(void)
{
int err;
- mrt_cachep = kmem_cache_create("ip6_mrt_cache",
- sizeof(struct mfc6_cache),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ mrt_cachep = KMEM_CACHE(mfc6_cache, SLAB_HWCACHE_ALIGN);
if (!mrt_cachep)
return -ENOMEM;
@@ -1394,9 +1423,8 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
- ip6mr_rtm_getroute, ip6mr_rtm_dumproute, 0);
- if (err == 0)
+ err = rtnl_register_many(ip6mr_rtnl_msg_handlers);
+ if (!err)
return 0;
#ifdef CONFIG_IPV6_PIMSM_V2
@@ -1411,9 +1439,9 @@ reg_pernet_fail:
return err;
}
-void ip6_mr_cleanup(void)
+void __init ip6_mr_cleanup(void)
{
- rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
+ rtnl_unregister_many(ip6mr_rtnl_msg_handlers);
#ifdef CONFIG_IPV6_PIMSM_V2
inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
#endif
@@ -1917,9 +1945,9 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
&sr->grp.sin6_addr);
if (c) {
- sr->pktcnt = c->_c.mfc_un.res.pkt;
- sr->bytecnt = c->_c.mfc_un.res.bytes;
- sr->wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr->bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr->wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
return 0;
}
@@ -1989,9 +2017,9 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr.pktcnt = atomic_long_read(&c->_c.mfc_un.res.pkt);
+ sr.bytecnt = atomic_long_read(&c->_c.mfc_un.res.bytes);
+ sr.wrong_if = atomic_long_read(&c->_c.mfc_un.res.wrong_if);
rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
@@ -2114,9 +2142,9 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
int true_vifi = ip6mr_find_vif(mrt, dev);
vif = c->_c.mfc_parent;
- c->_c.mfc_un.res.pkt++;
- c->_c.mfc_un.res.bytes += skb->len;
- c->_c.mfc_un.res.lastuse = jiffies;
+ atomic_long_inc(&c->_c.mfc_un.res.pkt);
+ atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes);
+ WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies);
if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
@@ -2134,7 +2162,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
- c->_c.mfc_un.res.wrong_if++;
+ atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2276,13 +2304,15 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
int err;
struct mr_table *mrt;
struct mfc6_cache *cache;
- struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt = dst_rt6_info(skb_dst(skb));
- mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
- if (!mrt)
+ rcu_read_lock();
+ mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT);
+ if (!mrt) {
+ rcu_read_unlock();
return -ENOENT;
+ }
- rcu_read_lock();
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
if (!cache && skb->dev) {
int vif = ip6mr_find_vif(mrt, skb->dev);
@@ -2434,8 +2464,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
errout:
kfree_skb(skb);
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
}
static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
@@ -2561,9 +2590,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
src = nla_get_in6_addr(tb[RTA_SRC]);
if (tb[RTA_DST])
grp = nla_get_in6_addr(tb[RTA_DST]);
- tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
+ tableid = nla_get_u32_default(tb[RTA_TABLE], 0);
- mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
+ mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT);
if (!mrt) {
NL_SET_ERR_MSG_MOD(extack, "MR table does not exist");
return -ENOENT;
@@ -2595,7 +2624,9 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
- struct fib_dump_filter filter = {};
+ struct fib_dump_filter filter = {
+ .rtnl_held = true,
+ };
int err;
if (cb->strict_check) {
@@ -2608,7 +2639,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (filter.table_id) {
struct mr_table *mrt;
- mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+ mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR)
return skb->len;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 56c3c467f9de..1e225e6489ea 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,8 +111,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
}
- opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
- opt);
+ opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt)));
sk_dst_reset(sk);
return opt;
@@ -948,6 +947,8 @@ done:
if (optlen < sizeof(int))
goto e_inval;
retv = ip6_ra_control(sk, val);
+ if (retv == 0)
+ inet6_assign_bit(RTALERT, sk, valbool);
break;
case IPV6_FLOWLABEL_MGR:
retv = ipv6_flowlabel_opt(sk, optval, optlen);
@@ -984,7 +985,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+ return ip_setsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
@@ -1346,7 +1347,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
if (val < 0)
- val = sock_net(sk)->ipv6.devconf_all->hop_limit;
+ val = READ_ONCE(sock_net(sk)->ipv6.devconf_all->hop_limit);
break;
}
@@ -1445,6 +1446,10 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->rxopt.bits.recvfragsize;
break;
+ case IPV6_ROUTER_ALERT:
+ val = inet6_test_bit(RTALERT, sk);
+ break;
+
case IPV6_ROUTER_ALERT_ISOLATE:
val = inet6_test_bit(RTALERT_ISOLATE, sk);
break;
@@ -1470,7 +1475,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
int err;
if (level == SOL_IP && sk->sk_type != SOCK_RAW)
- return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ return ip_getsockopt(sk, level, optname, optval, optlen);
if (level != SOL_IPV6)
return -ENOPROTOOPT;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bc6e0a0bad3c..65831b4fee1f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -33,8 +33,10 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
+#include <linux/if_addr.h>
#include <linux/if_arp.h>
#include <linux/route.h>
+#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -47,6 +49,7 @@
#include <linux/netfilter_ipv6.h>
#include <net/net_namespace.h>
+#include <net/netlink.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -159,9 +162,9 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
int iv;
if (mld_in_v1_mode(idev))
- iv = idev->cnf.mldv1_unsolicited_report_interval;
+ iv = READ_ONCE(idev->cnf.mldv1_unsolicited_report_interval);
else
- iv = idev->cnf.mldv2_unsolicited_report_interval;
+ iv = READ_ONCE(idev->cnf.mldv2_unsolicited_report_interval);
return iv > 0 ? iv : 1;
}
@@ -586,7 +589,8 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
struct ip6_sf_socklist *psl;
- int i, count, copycount;
+ unsigned int count;
+ int i, copycount;
group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
@@ -610,7 +614,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
psl = sock_dereference(pmc->sflist, sk);
count = psl ? psl->sl_count : 0;
- copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+ copycount = min(count, gsf->gf_numsrc);
gsf->gf_numsrc = count;
for (i = 0; i < copycount; i++) {
struct sockaddr_in6 *psin6;
@@ -900,6 +904,41 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
return mc;
}
+static void inet6_ifmcaddr_notify(struct net_device *dev,
+ const struct ifmcaddr6 *ifmca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .portid = 0,
+ .seq = 0,
+ .event = event,
+ .flags = 0,
+ .netnsid = -1,
+ .force_rt_scope_universe = true,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs);
+ if (err < 0) {
+ WARN_ON_ONCE(err == -EMSGSIZE);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err);
+}
+
/*
* device multicast group inc (add if not found)
*/
@@ -947,6 +986,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev,
mld_del_delrec(idev, mc);
igmp6_group_added(mc);
+ inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST);
mutex_unlock(&idev->mc_lock);
ma_put(mc);
return 0;
@@ -976,6 +1016,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
*map = ma->next;
igmp6_group_dropped(ma);
+ inet6_ifmcaddr_notify(idev->dev, ma,
+ RTM_DELMULTICAST);
ip6_mc_clear_src(ma);
mutex_unlock(&idev->mc_lock);
@@ -1020,29 +1062,31 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
rcu_read_lock();
idev = __in6_dev_get(dev);
- if (idev) {
- for_each_mc_rcu(idev, mc) {
- if (ipv6_addr_equal(&mc->mca_addr, group))
- break;
- }
- if (mc) {
- if (src_addr && !ipv6_addr_any(src_addr)) {
- struct ip6_sf_list *psf;
+ if (!idev)
+ goto unlock;
+ for_each_mc_rcu(idev, mc) {
+ if (ipv6_addr_equal(&mc->mca_addr, group))
+ break;
+ }
+ if (!mc)
+ goto unlock;
+ if (src_addr && !ipv6_addr_any(src_addr)) {
+ struct ip6_sf_list *psf;
- for_each_psf_rcu(mc, psf) {
- if (ipv6_addr_equal(&psf->sf_addr, src_addr))
- break;
- }
- if (psf)
- rv = psf->sf_count[MCAST_INCLUDE] ||
- psf->sf_count[MCAST_EXCLUDE] !=
- mc->mca_sfcount[MCAST_EXCLUDE];
- else
- rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0;
- } else
- rv = true; /* don't filter unspecified source */
+ for_each_psf_rcu(mc, psf) {
+ if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+ break;
}
+ if (psf)
+ rv = READ_ONCE(psf->sf_count[MCAST_INCLUDE]) ||
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]) !=
+ READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]);
+ else
+ rv = READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]) != 0;
+ } else {
+ rv = true; /* don't filter unspecified source */
}
+unlock:
rcu_read_unlock();
return rv;
}
@@ -1202,15 +1246,15 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
static int mld_force_mld_version(const struct inet6_dev *idev)
{
+ const struct net *net = dev_net(idev->dev);
+ int all_force;
+
+ all_force = READ_ONCE(net->ipv6.devconf_all->force_mld_version);
/* Normally, both are 0 here. If enforcement to a particular is
* being used, individual device enforcement will have a lower
* precedence over 'all' device (.../conf/all/force_mld_version).
*/
-
- if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
- return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
- else
- return idev->cnf.force_mld_version;
+ return all_force ?: READ_ONCE(idev->cnf.force_mld_version);
}
static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
@@ -1729,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
struct net_device *dev = idev->dev;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct net *net = dev_net(dev);
const struct in6_addr *saddr;
struct in6_addr addr_buf;
struct mld2_report *pmr;
struct sk_buff *skb;
unsigned int size;
struct sock *sk;
- int err;
+ struct net *net;
- sk = net->ipv6.igmp_sk;
/* we assume size > sizeof(ra) here
* Also try to not allocate high-order pages for big MTU
*/
size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
- skb = sock_alloc_send_skb(sk, size, 1, &err);
+ skb = alloc_skb(size, GFP_KERNEL);
if (!skb)
return NULL;
@@ -1751,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
skb_reserve(skb, hlen);
skb_tailroom_reserve(skb, mtu, tlen);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
@@ -1762,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0);
+ rcu_read_unlock();
+
skb_put_data(skb, ra, sizeof(ra));
skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
@@ -2121,21 +2171,21 @@ static void mld_send_cr(struct inet6_dev *idev)
static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
{
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.igmp_sk;
+ const struct in6_addr *snd_addr, *saddr;
+ int err, len, payload_len, full_len;
+ struct in6_addr addr_buf;
struct inet6_dev *idev;
struct sk_buff *skb;
struct mld_msg *hdr;
- const struct in6_addr *snd_addr, *saddr;
- struct in6_addr addr_buf;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- int err, len, payload_len, full_len;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
- struct flowi6 fl6;
struct dst_entry *dst;
+ struct flowi6 fl6;
+ struct net *net;
+ struct sock *sk;
if (type == ICMPV6_MGM_REDUCTION)
snd_addr = &in6addr_linklocal_allrouters;
@@ -2146,19 +2196,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
payload_len = len + sizeof(ra);
full_len = sizeof(struct ipv6hdr) + payload_len;
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
- rcu_read_unlock();
+ skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL);
- skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
+ rcu_read_lock();
+ net = dev_net_rcu(dev);
+ idev = __in6_dev_get(dev);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
if (!skb) {
- rcu_read_lock();
- IP6_INC_STATS(net, __in6_dev_get(dev),
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
rcu_read_unlock();
return;
}
+ sk = net->ipv6.igmp_sk;
+ skb_set_owner_w(skb, sk);
+
skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
@@ -2183,9 +2235,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
IPPROTO_ICMPV6,
csum_partial(hdr, len, 0));
- rcu_read_lock();
- idev = __in6_dev_get(skb->dev);
-
icmpv6_flow_init(sk, &fl6, type,
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
skb->dev->ifindex);
@@ -2284,7 +2333,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
/* source filter not found, or count wrong => bug */
return -ESRCH;
}
- psf->sf_count[sfmode]--;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] - 1);
if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
struct inet6_dev *idev = pmc->idev;
@@ -2390,7 +2439,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
rcu_assign_pointer(pmc->mca_sources, psf);
}
}
- psf->sf_count[sfmode]++;
+ WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] + 1);
return 0;
}
@@ -2502,7 +2551,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
sf_markstate(pmc);
isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
if (!delta)
- pmc->mca_sfcount[sfmode]++;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] + 1);
err = 0;
for (i = 0; i < sfcount; i++) {
err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]);
@@ -2513,7 +2563,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int j;
if (!delta)
- pmc->mca_sfcount[sfmode]--;
+ WRITE_ONCE(pmc->mca_sfcount[sfmode],
+ pmc->mca_sfcount[sfmode] - 1);
for (j = 0; j < i; j++)
ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]);
} else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
@@ -2558,7 +2609,8 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
RCU_INIT_POINTER(pmc->mca_sources, NULL);
pmc->mca_sfmode = MCAST_EXCLUDE;
pmc->mca_sfcount[MCAST_INCLUDE] = 0;
- pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ /* Paired with the READ_ONCE() from ipv6_chk_mcast_addr() */
+ WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1);
}
/* called with mc_lock */
@@ -2719,7 +2771,6 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Should stop work after group drop. or we will
* start work again in mld_ifc_event()
*/
- synchronize_net();
mld_query_stop_work(idev);
mld_report_stop_work(idev);
@@ -3074,8 +3125,8 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name,
&state->im->mca_addr,
&psf->sf_addr,
- psf->sf_count[MCAST_INCLUDE],
- psf->sf_count[MCAST_EXCLUDE]);
+ READ_ONCE(psf->sf_count[MCAST_INCLUDE]),
+ READ_ONCE(psf->sf_count[MCAST_EXCLUDE]));
}
return 0;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a19999b30bc0..8699d1a188dc 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -200,9 +200,9 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
+ opt->nd_opt_type == ND_OPT_6CO ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
- opt->nd_opt_type == ND_OPT_PREF64 ||
- ndisc_ops_is_useropt(dev, opt->nd_opt_type);
+ opt->nd_opt_type == ND_OPT_PREF64;
}
static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev,
@@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
return NULL;
memset(ndopts, 0, sizeof(*ndopts));
while (opt_len) {
+ bool unknown = false;
int l;
if (opt_len < sizeof(struct nd_opt_hdr))
return NULL;
@@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev,
break;
#endif
default:
- if (ndisc_is_useropt(dev, nd_opt)) {
- ndopts->nd_useropts_end = nd_opt;
- if (!ndopts->nd_useropts)
- ndopts->nd_useropts = nd_opt;
- } else {
- /*
- * Unknown options must be silently ignored,
- * to accommodate future extension to the
- * protocol.
- */
- ND_PRINTK(2, notice,
- "%s: ignored unsupported option; type=%d, len=%d\n",
- __func__,
- nd_opt->nd_opt_type,
- nd_opt->nd_opt_len);
- }
+ unknown = true;
+ }
+ if (ndisc_is_useropt(dev, nd_opt)) {
+ ndopts->nd_useropts_end = nd_opt;
+ if (!ndopts->nd_useropts)
+ ndopts->nd_useropts = nd_opt;
+ } else if (unknown) {
+ /*
+ * Unknown options must be silently ignored,
+ * to accommodate future extension to the
+ * protocol.
+ */
+ ND_PRINTK(2, notice,
+ "%s: ignored unsupported option; type=%d, len=%d\n",
+ __func__,
+ nd_opt->nd_opt_type,
+ nd_opt->nd_opt_len);
}
next_opt:
opt_len -= l;
@@ -416,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
{
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
- if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
- __func__);
+ if (!skb)
return NULL;
- }
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -435,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
/* Manually assign socket ownership as we avoid calling
* sock_alloc_send_pskb() to bypass wmem buffer limits
*/
- skb_set_owner_w(skb, sk);
+ rcu_read_lock();
+ skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk);
+ rcu_read_unlock();
return skb;
}
@@ -451,7 +451,7 @@ static void ip6_nd_hdr(struct sk_buff *skb,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- tclass = idev ? idev->cnf.ndisc_tclass : 0;
+ tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0;
rcu_read_unlock();
skb_push(skb, sizeof(*hdr));
@@ -471,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb,
void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
+ struct icmp6hdr *icmp6h = icmp6_hdr(skb);
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(skb->dev);
- struct sock *sk = net->ipv6.ndisc_sk;
struct inet6_dev *idev;
+ struct net *net;
+ struct sock *sk;
int err;
- struct icmp6hdr *icmp6h = icmp6_hdr(skb);
u8 type;
type = icmp6h->icmp6_type;
+ rcu_read_lock();
+
+ net = dev_net_rcu(skb->dev);
+ sk = net->ipv6.ndisc_sk;
if (!dst) {
struct flowi6 fl6;
int oif = skb->dev->ifindex;
@@ -488,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
+ rcu_read_unlock();
kfree_skb(skb);
return;
}
@@ -502,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
- rcu_read_lock();
idev = __in6_dev_get(dst->dev);
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
@@ -535,7 +539,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
src_addr = solicited_addr;
if (ifp->flags & IFA_F_OPTIMISTIC)
override = false;
- inc_opt |= ifp->idev->cnf.force_tllao;
+ inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao);
in6_ifa_put(ifp);
} else {
if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -903,8 +907,9 @@ have_ifp:
}
if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
- (idev->cnf.forwarding &&
- (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
+ (READ_ONCE(idev->cnf.forwarding) &&
+ (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) ||
+ READ_ONCE(idev->cnf.proxy_ndp)) &&
(is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
skb->pkt_type != PACKET_HOST &&
@@ -929,7 +934,7 @@ have_ifp:
}
if (is_router < 0)
- is_router = idev->cnf.forwarding;
+ is_router = READ_ONCE(idev->cnf.forwarding);
if (dad) {
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
@@ -973,7 +978,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
{
struct inet6_dev *idev = __in6_dev_get(dev);
- switch (idev->cnf.accept_untracked_na) {
+ switch (READ_ONCE(idev->cnf.accept_untracked_na)) {
case 0: /* Don't accept untracked na (absent in neighbor cache) */
return 0;
case 1: /* Create new entries from na if currently untracked */
@@ -1024,7 +1029,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* drop_unsolicited_na takes precedence over accept_untracked_na
*/
if (!msg->icmph.icmp6_solicited && idev &&
- idev->cnf.drop_unsolicited_na)
+ READ_ONCE(idev->cnf.drop_unsolicited_na))
return reason;
if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
@@ -1080,7 +1085,7 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* Note that we don't do a (daddr == all-routers-mcast) check.
*/
new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE;
- if (!neigh && lladdr && idev && idev->cnf.forwarding) {
+ if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) {
if (accept_untracked_na(dev, saddr)) {
neigh = neigh_create(&nd_tbl, &msg->target, dev);
new_state = NUD_STALE;
@@ -1100,7 +1105,8 @@ static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
* has already sent a NA to us.
*/
if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
- net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
+ READ_ONCE(net->ipv6.devconf_all->forwarding) &&
+ READ_ONCE(net->ipv6.devconf_all->proxy_ndp) &&
pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
/* XXX: idev->cnf.proxy_ndp */
goto out;
@@ -1148,7 +1154,7 @@ static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
}
/* Don't accept RS if we're not in router mode */
- if (!idev->cnf.forwarding)
+ if (!READ_ONCE(idev->cnf.forwarding))
goto out;
/*
@@ -1237,6 +1243,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
struct ndisc_options ndopts;
struct fib6_info *rt = NULL;
struct inet6_dev *in6_dev;
+ struct fib6_table *table;
u32 defrtr_usr_metric;
unsigned int pref = 0;
__u32 old_if_flags;
@@ -1317,7 +1324,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
if (old_if_flags != in6_dev->if_flags)
send_ifinfo_notify = true;
- if (!in6_dev->cnf.accept_ra_defrtr) {
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) {
ND_PRINTK(2, info,
"RA: %s, defrtr is false for dev: %s\n",
__func__, skb->dev->name);
@@ -1325,7 +1332,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
}
lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
- if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ if (lifetime != 0 &&
+ lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) {
ND_PRINTK(2, info,
"RA: router lifetime (%ds) is too short: %s\n",
lifetime, skb->dev->name);
@@ -1336,7 +1344,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
* accept_ra_from_local is set to true.
*/
net = dev_net(in6_dev->dev);
- if (!in6_dev->cnf.accept_ra_from_local &&
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) {
ND_PRINTK(2, info,
"RA from local address detected on dev: %s: default router ignored\n",
@@ -1348,7 +1356,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
if (pref == ICMPV6_ROUTER_PREF_INVALID ||
- !in6_dev->cnf.accept_ra_rtr_pref)
+ !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref))
pref = ICMPV6_ROUTER_PREF_MEDIUM;
#endif
/* routes added from RAs do not use nexthop objects */
@@ -1382,7 +1390,8 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
neigh_release(neigh);
rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
- skb->dev, pref, defrtr_usr_metric);
+ skb->dev, pref, defrtr_usr_metric,
+ lifetime);
if (!rt) {
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
@@ -1409,12 +1418,21 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
}
- if (rt)
+ if (rt) {
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
fib6_set_expires(rt, jiffies + (HZ * lifetime));
- if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
+ fib6_add_gc_list(rt);
+
+ spin_unlock_bh(&table->tb6_lock);
+ }
+ if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 &&
ra_msg->icmph.icmp6_hop_limit) {
- if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
- in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <=
+ ra_msg->icmph.icmp6_hop_limit) {
+ WRITE_ONCE(in6_dev->cnf.hop_limit,
+ ra_msg->icmph.icmp6_hop_limit);
fib6_metric_set(rt, RTAX_HOPLIMIT,
ra_msg->icmph.icmp6_hop_limit);
} else {
@@ -1496,7 +1514,7 @@ skip_linkparms:
}
#ifdef CONFIG_IPV6_ROUTE_INFO
- if (!in6_dev->cnf.accept_ra_from_local &&
+ if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) &&
ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
in6_dev->dev, 0)) {
ND_PRINTK(2, info,
@@ -1505,7 +1523,7 @@ skip_linkparms:
goto skip_routeinfo;
}
- if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
+ if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_ri;
p;
@@ -1517,14 +1535,14 @@ skip_linkparms:
continue;
#endif
if (ri->prefix_len == 0 &&
- !in6_dev->cnf.accept_ra_defrtr)
+ !READ_ONCE(in6_dev->cnf.accept_ra_defrtr))
continue;
if (ri->lifetime != 0 &&
- ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft))
continue;
- if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
+ if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen))
continue;
- if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
+ if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen))
continue;
rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3,
&ipv6_hdr(skb)->saddr);
@@ -1544,7 +1562,7 @@ skip_routeinfo:
}
#endif
- if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
+ if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) {
struct nd_opt_hdr *p;
for (p = ndopts.nd_opts_pi;
p;
@@ -1555,7 +1573,7 @@ skip_routeinfo:
}
}
- if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
+ if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) {
__be32 n;
u32 mtu;
@@ -1569,8 +1587,8 @@ skip_routeinfo:
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
- } else if (in6_dev->cnf.mtu6 != mtu) {
- in6_dev->cnf.mtu6 = mtu;
+ } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) {
+ WRITE_ONCE(in6_dev->cnf.mtu6, mtu);
fib6_metric_set(rt, RTAX_MTU, mtu);
rt6_mtu_change(skb->dev, mtu);
}
@@ -1678,7 +1696,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
bool ret;
if (netif_is_l3_master(skb->dev)) {
- dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif);
if (!dev)
return;
}
@@ -1708,17 +1726,19 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
if (IS_ERR(dst))
return;
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_GATEWAY) {
ND_PRINTK(2, warn,
"Redirect: destination is not a neighbour\n");
goto release;
}
- peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1);
+
+ rcu_read_lock();
+ peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
+
if (!ret)
goto release;
@@ -1804,7 +1824,7 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
if (!idev)
return true;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
- idev->cnf.suppress_frag_ndisc) {
+ READ_ONCE(idev->cnf.suppress_frag_ndisc)) {
net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
return true;
}
@@ -1881,8 +1901,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
idev = in6_dev_get(dev);
if (!idev)
break;
- if (idev->cnf.ndisc_notify ||
- net->ipv6.devconf_all->ndisc_notify)
+ if (READ_ONCE(idev->cnf.ndisc_notify) ||
+ READ_ONCE(net->ipv6.devconf_all->ndisc_notify))
ndisc_send_unsol_na(dev);
in6_dev_put(idev);
break;
@@ -1891,8 +1911,8 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
if (!idev)
evict_nocarrier = true;
else {
- evict_nocarrier = idev->cnf.ndisc_evict_nocarrier &&
- net->ipv6.devconf_all->ndisc_evict_nocarrier;
+ evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) &&
+ READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier);
in6_dev_put(idev);
}
@@ -1922,13 +1942,13 @@ static struct notifier_block ndisc_netdev_notifier = {
};
#ifdef CONFIG_SYSCTL
-static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
+static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl,
const char *func, const char *dev_name)
{
static char warncomm[TASK_COMM_LEN];
static int warned;
if (strcmp(warncomm, current->comm) && warned < 5) {
- strcpy(warncomm, current->comm);
+ strscpy(warncomm, current->comm);
pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n",
warncomm, func,
dev_name, ctl->procname,
@@ -1937,7 +1957,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
}
}
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
+int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct net_device *dev = ctl->extra1;
@@ -1966,7 +1986,7 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME))
idev->nd_parms->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));
- idev->tstamp = jiffies;
+ WRITE_ONCE(idev->tstamp, jiffies);
inet6_ifinfo_notify(RTM_NEWLINK, idev);
in6_dev_put(idev);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 53d255838e6a..581ce055bf52 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -36,6 +36,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
.saddr = iph->saddr,
+ .flowlabel = ip6_flowinfo(iph),
};
int err;
@@ -126,7 +127,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
@@ -192,7 +193,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -225,7 +226,7 @@ slow_path:
goto blackhole;
}
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0ba62f4868f9..e087a8e97ba7 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -6,6 +6,17 @@
menu "IPv6: Netfilter Configuration"
depends on INET && IPV6 && NETFILTER
+# old sockopt interface and eval loop
+config IP6_NF_IPTABLES_LEGACY
+ tristate "Legacy IP6 tables support"
+ depends on INET && IPV6
+ select NETFILTER_XTABLES
+ default n
+ help
+ ip6tables is a legacy packet classifier.
+ This is not needed if you are using iptables over nftables
+ (iptables-nft).
+
config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
@@ -147,7 +158,7 @@ config IP6_NF_MATCH_MH
config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
- depends on IP6_NF_MANGLE || IP6_NF_RAW
+ depends on IP6_NF_MANGLE || IP6_NF_RAW || NFT_COMPAT
help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -186,6 +197,8 @@ config IP6_NF_TARGET_HL
config IP6_NF_FILTER
tristate "Packet filtering"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
+ tristate
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -195,7 +208,7 @@ config IP6_NF_FILTER
config IP6_NF_TARGET_REJECT
tristate "REJECT target support"
- depends on IP6_NF_FILTER
+ depends on IP6_NF_FILTER || NFT_COMPAT
select NF_REJECT_IPV6
default m if NETFILTER_ADVANCED=n
help
@@ -221,6 +234,7 @@ config IP6_NF_TARGET_SYNPROXY
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -230,6 +244,7 @@ config IP6_NF_MANGLE
config IP6_NF_RAW
tristate 'raw table support (required for TRACE)'
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `raw' table to ip6tables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
@@ -243,6 +258,7 @@ config IP6_NF_SECURITY
tristate "Security table"
depends on SECURITY
depends on NETFILTER_ADVANCED
+ select IP6_NF_IPTABLES_LEGACY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
@@ -254,6 +270,7 @@ config IP6_NF_NAT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
+ select IP6_NF_IPTABLES_LEGACY
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
@@ -262,25 +279,23 @@ config IP6_NF_NAT
To compile it as a module, choose M here. If unsure, say N.
-if IP6_NF_NAT
-
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
select NETFILTER_XT_TARGET_MASQUERADE
+ depends on IP6_NF_NAT
help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects NETFILTER_XT_TARGET_MASQUERADE.
config IP6_NF_TARGET_NPT
tristate "NPT (Network Prefix translation) target support"
+ depends on IP6_NF_NAT || NFT_COMPAT
help
This option adds the `SNPT' and `DNPT' target, which perform
stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
To compile it as a module, choose M here. If unsure, say N.
-endif # IP6_NF_NAT
-
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index b8d6dc9aeeb6..66ce6fa5b2f5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -4,7 +4,7 @@
#
# Link order matters here.
-obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_IPTABLES_LEGACY) += ip6_tables.o
obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index fd9f049d6d41..7d5602950ae7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1133,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1501,6 +1505,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1509,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1765,7 +1773,7 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}
- ops = kmemdup(template_ops, sizeof(*ops) * num_ops, GFP_KERNEL);
+ ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL);
if (!ops) {
ret = -ENOMEM;
goto out_free;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index df785ebda0ca..e8992693e14a 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -43,7 +43,7 @@ static int ip6table_filter_table_init(struct net *net)
return -ENOMEM;
/* Entry 1 is the FORWARD hook */
((struct ip6t_standard *)repl->entries)[1].target.verdict =
- forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+ forward ? -NF_ACCEPT - 1 : NF_DROP - 1;
err = ip6t_register_table(net, &packet_filter, repl, filter_ops);
kfree(repl);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 52cf104e3478..e119d4f090cc 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = {
static int __init ip6table_nat_init(void)
{
- int ret = xt_register_template(&nf_nat_ipv6_table,
- ip6table_nat_table_init);
+ int ret;
+ /* net->gen->ptr[ip6table_nat_net_id] must be allocated
+ * before calling ip6t_nat_register_lookups().
+ */
+ ret = register_pernet_subsys(&ip6table_nat_net_ops);
if (ret < 0)
return ret;
- ret = register_pernet_subsys(&ip6table_nat_net_ops);
+ ret = xt_register_template(&nf_nat_ipv6_table,
+ ip6table_nat_table_init);
if (ret)
- xt_unregister_template(&nf_nat_ipv6_table);
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
return ret;
}
static void __exit ip6table_nat_exit(void)
{
- unregister_pernet_subsys(&ip6table_nat_net_ops);
xt_unregister_template(&nf_nat_ipv6_table);
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
}
module_init(ip6table_nat_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b2dd48911c8d..4120e67a8ce6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -62,7 +62,6 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
- { }
};
static int nf_ct_frag6_sysctl_register(struct net *net)
@@ -105,7 +104,7 @@ err_alloc:
static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
struct nft_ct_frag6_pernet *nf_frag = nf_frag_pernet(net);
- struct ctl_table *table;
+ const struct ctl_table *table;
table = nf_frag->nf_frag_frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(nf_frag->nf_frag_frags_hdr);
@@ -155,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
+
q = inet_frag_find(nf_frag->fqdir, &key);
if (!q)
return NULL;
@@ -264,7 +267,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
@@ -294,6 +297,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
}
skb_dst_drop(skb);
+ skb_orphan(skb);
return -EINPROGRESS;
insert_error:
@@ -327,9 +331,9 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
if (!reasm_data)
goto err;
- payload_len = ((skb->data - skb_network_header(skb)) -
+ payload_len = -skb_network_offset(skb) -
sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
+ sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
@@ -469,7 +473,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index a0a2de30be3e..0c39c77fe8a8 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -47,11 +47,12 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif)
{
+ local_bh_disable();
if (this_cpu_read(nf_skb_duplicated))
- return;
+ goto out;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
- return;
+ goto out;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_reset_ct(skb);
@@ -69,6 +70,8 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
} else {
kfree_skb(skb);
}
+out:
+ local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_dup_ipv6);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 196dd4ecb5e2..9ae2b2725bf9 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -83,7 +83,7 @@ struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv6.devconf_all->hop_limit);
+ READ_ONCE(net->ipv6.devconf_all->hop_limit));
nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen);
nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr));
@@ -124,7 +124,7 @@ struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
skb_reserve(nskb, LL_MAX_HEADER);
nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6,
- net->ipv6.devconf_all->hop_limit);
+ READ_ONCE(net->ipv6.devconf_all->hop_limit));
skb_reset_transport_header(nskb);
icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr));
@@ -223,33 +223,23 @@ void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb,
const struct tcphdr *oth, unsigned int otcplen)
{
struct tcphdr *tcph;
- int needs_ack;
skb_reset_transport_header(nskb);
- tcph = skb_put(nskb, sizeof(struct tcphdr));
+ tcph = skb_put_zero(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
tcph->source = oth->dest;
tcph->dest = oth->source;
if (oth->ack) {
- needs_ack = 0;
tcph->seq = oth->ack_seq;
- tcph->ack_seq = 0;
} else {
- needs_ack = 1;
tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
otcplen - (oth->doff<<2));
- tcph->seq = 0;
+ tcph->ack = 1;
}
- /* Reset flags */
- ((u_int8_t *)tcph)[13] = 0;
tcph->rst = 1;
- tcph->ack = needs_ack;
- tcph->window = 0;
- tcph->urg_ptr = 0;
- tcph->check = 0;
/* Adjust TCP checksum */
tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
@@ -278,13 +268,12 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct sk_buff *nskb;
- struct tcphdr _otcph;
- const struct tcphdr *otcph;
- unsigned int otcplen, hh_len;
const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
- struct ipv6hdr *ip6h;
struct dst_entry *dst = NULL;
+ const struct tcphdr *otcph;
+ struct sk_buff *nskb;
+ struct tcphdr _otcph;
+ unsigned int otcplen;
struct flowi6 fl6;
if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
@@ -323,9 +312,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
if (IS_ERR(dst))
return;
- hh_len = (dst->dev->hard_header_len + 15)&~15;
- nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
- + sizeof(struct tcphdr) + dst->trailer_len,
+ nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr) + dst->trailer_len,
GFP_ATOMIC);
if (!nskb) {
@@ -338,9 +326,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
nskb->mark = fl6.flowi6_mark;
- skb_reserve(nskb, hh_len + dst->header_len);
- ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
- ip6_dst_hoplimit(dst));
+ skb_reserve(nskb, LL_MAX_HEADER);
+ nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst));
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
nf_ct_attach(nskb, oldskb);
@@ -355,6 +342,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
*/
if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct ipv6hdr *ip6h = ipv6_hdr(nskb);
struct net_device *br_indev;
br_indev = nf_bridge_get_physindev(oldskb, net);
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index c82f3fdd4a65..492a811828a7 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -38,13 +38,13 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
sizeof(struct in6_addr));
if (err < 0)
return err;
if (tb[NFTA_DUP_SREG_DEV])
- err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ err = nft_parse_register_load(ctx, tb[NFTA_DUP_SREG_DEV],
&priv->sreg_dev, sizeof(int));
return err;
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36dc14b34388..c9f1634b3838 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -41,8 +41,6 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
- } else if (priv->flags & NFTA_FIB_F_IIF) {
- fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
}
if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
@@ -75,6 +73,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
+
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -165,6 +165,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
.flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
+ .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
struct rt6_info *rt;
int lookup_flags;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index b5205311f372..806d4b5dd1e6 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -111,9 +111,9 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev)
- hoplimit = idev->cnf.hop_limit;
+ hoplimit = READ_ONCE(idev->cnf.hop_limit);
else
- hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit);
rcu_read_unlock();
}
return hoplimit;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ef2059c88955..46b8adf6e7f8 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -119,6 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
ipcm6_init_sk(&ipc6, sk);
+ ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
@@ -154,7 +155,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, false);
if (IS_ERR(dst))
return PTR_ERR(dst);
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 6d1d9221649d..752327b10dde 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -27,7 +27,7 @@
#include <net/ipv6.h>
#define MAX4(a, b, c, d) \
- max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
+ MAX_T(u32, MAX_T(u32, a, b), MAX_T(u32, c, d))
#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
IPSTATS_MIB_MAX, ICMP_MIB_MAX)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 03dbb874c363..a45aba090aa4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -160,6 +160,13 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
if (!raw_v6_match(net, sk, nexthdr, daddr, saddr,
inet6_iif(skb), inet6_sdif(skb)))
continue;
+
+ if (atomic_read(&sk->sk_rmem_alloc) >=
+ READ_ONCE(sk->sk_rcvbuf)) {
+ atomic_inc(&sk->sk_drops);
+ continue;
+ }
+
delivered = true;
switch (nexthdr) {
case IPPROTO_ICMPV6:
@@ -288,8 +295,7 @@ out:
}
static void rawv6_err(struct sock *sk, struct sk_buff *skb,
- struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
bool recverr = inet6_test_bit(RECVERR6, sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -344,7 +350,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr,
inet6_iif(skb), inet6_iif(skb)))
continue;
- rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
+ rawv6_err(sk, skb, type, code, inner_offset, info);
}
rcu_read_unlock();
}
@@ -356,14 +362,14 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return NET_RX_DROP;
}
@@ -384,7 +390,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
@@ -409,7 +415,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet_test_bit(HDRINCL, sk)) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
+ sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
@@ -592,7 +598,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct ipv6hdr *iph;
struct sk_buff *skb;
int err;
- struct rt6_info *rt = (struct rt6_info *)*dstp;
+ struct rt6_info *rt = dst_rt6_info(*dstp);
int hlen = LL_RESERVED_SPACE(rt->dst.dev);
int tlen = rt->dst.dev->needed_tailroom;
@@ -613,9 +619,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = READ_ONCE(sk->sk_priority);
+ skb->priority = sockc->priority;
skb->mark = sockc->mark;
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid);
skb_put(skb, length);
skb_reset_network_header(skb);
@@ -623,7 +629,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->ip_summed = CHECKSUM_NONE;
- skb_setup_tx_timestamp(skb, sockc->tsflags);
+ skb_setup_tx_timestamp(skb, sockc);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
@@ -774,6 +780,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
+ ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
@@ -911,7 +918,7 @@ back_from_confirm:
ipc6.opt = opt;
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
- len, 0, &ipc6, &fl6, (struct rt6_info *)dst,
+ len, 0, &ipc6, &fl6, dst_rt6_info(dst),
msg->msg_flags);
if (err)
@@ -935,7 +942,7 @@ do_confirm:
goto done;
}
-static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
+static int rawv6_seticmpfilter(struct sock *sk, int optname,
sockptr_t optval, int optlen)
{
switch (optname) {
@@ -952,7 +959,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
return 0;
}
-static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
+static int rawv6_geticmpfilter(struct sock *sk, int optname,
char __user *optval, int __user *optlen)
{
int len;
@@ -1038,7 +1045,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
case SOL_ICMPV6:
if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
- return rawv6_seticmpfilter(sk, level, optname, optval, optlen);
+ return rawv6_seticmpfilter(sk, optname, optval, optlen);
case SOL_IPV6:
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
@@ -1099,7 +1106,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
case SOL_ICMPV6:
if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
return -EOPNOTSUPP;
- return rawv6_geticmpfilter(sk, level, optname, optval, optlen);
+ return rawv6_geticmpfilter(sk, optname, optval, optlen);
case SOL_IPV6:
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5ebc47da1000..a48be617a8ab 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
@@ -272,9 +272,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
if (!reasm_data)
goto out_oom;
- payload_len = ((skb->data - skb_network_header(skb)) -
+ payload_len = -skb_network_offset(skb) -
sizeof(struct ipv6hdr) + fq->q.len -
- sizeof(struct frag_hdr));
+ sizeof(struct frag_hdr);
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
@@ -369,7 +369,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
* the source of the fragment, with the Pointer field set to zero.
*/
nexthdr = hdr->nexthdr;
- if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+ if (ipv6frag_thdr_truncated(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr), &nexthdr)) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
@@ -436,7 +436,6 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
/* secret interval has been deprecated */
@@ -449,7 +448,6 @@ static struct ctl_table ip6_frags_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- { }
};
static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
@@ -487,7 +485,7 @@ err_alloc:
static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
table = net->ipv6.sysctl.frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ef815ba583a8..15ce21afc8c6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -87,7 +87,8 @@ struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static unsigned int ip6_default_advmss(const struct dst_entry *dst);
INDIRECT_CALLABLE_SCOPE
unsigned int ip6_mtu(const struct dst_entry *dst);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev);
@@ -130,7 +131,6 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list {
spinlock_t lock;
struct list_head head;
- struct list_head quarantine;
};
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
@@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
struct net_device *rt_dev = rt->dst.dev;
bool handled = false;
- if (rt_idev->dev == dev) {
+ if (rt_idev && rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev);
handled = true;
@@ -188,8 +188,7 @@ static void rt6_uncached_list_flush_dev(struct net_device *dev)
handled = true;
}
if (handled)
- list_move(&rt->dst.rt_uncached,
- &ul->quarantine);
+ list_del_init(&rt->dst.rt_uncached);
}
spin_unlock_bh(&ul->lock);
}
@@ -226,7 +225,7 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
struct sk_buff *skb,
const void *daddr)
{
- const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
+ const struct rt6_info *rt = dst_rt6_info(dst);
return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
dst->dev, skb, daddr);
@@ -234,8 +233,8 @@ static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
{
+ const struct rt6_info *rt = dst_rt6_info(dst);
struct net_device *dev = dst->dev;
- struct rt6_info *rt = (struct rt6_info *)dst;
daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr);
if (!daddr)
@@ -354,7 +353,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct fib6_info *from;
struct inet6_dev *idev;
@@ -367,14 +366,15 @@ static void ip6_dst_destroy(struct dst_entry *dst)
in6_dev_put(idev);
}
- from = xchg((__force struct fib6_info **)&rt->from, NULL);
+ from = unrcu_pointer(xchg(&rt->from, NULL));
fib6_info_release(from);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
{
- struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
struct inet6_dev *idev = rt->rt6i_idev;
+ struct fib6_info *from;
if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
@@ -384,6 +384,8 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
in6_dev_put(idev);
}
}
+ from = unrcu_pointer(xchg(&rt->from, NULL));
+ fib6_info_release(from);
}
static bool __rt6_check_expired(const struct rt6_info *rt)
@@ -414,8 +416,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
struct flowi6 *fl6, int oif, bool have_oif_match,
const struct sk_buff *skb, int strict)
{
- struct fib6_info *sibling, *next_sibling;
struct fib6_info *match = res->f6i;
+ struct fib6_info *sibling;
if (!match->nh && (!match->fib6_nsiblings || have_oif_match))
goto out;
@@ -441,8 +443,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
- list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
- fib6_siblings) {
+ list_for_each_entry_rcu(sibling, &match->fib6_siblings,
+ fib6_siblings) {
const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
@@ -637,6 +639,8 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
rcu_read_lock();
last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
+ if (!idev)
+ goto out;
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
if (READ_ONCE(neigh->nud_state) & NUD_VALID)
@@ -645,14 +649,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
write_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
- neigh->updated + idev->cnf.rtr_probe_interval)) {
+ neigh->updated +
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (work)
__neigh_set_probe_once(neigh);
}
write_unlock_bh(&neigh->lock);
} else if (time_after(jiffies, last_probe +
- idev->cnf.rtr_probe_interval)) {
+ READ_ONCE(idev->cnf.rtr_probe_interval))) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
@@ -931,6 +936,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct net *net = dev_net(dev);
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
+ struct fib6_table *table;
unsigned int pref;
unsigned long lifetime;
struct fib6_info *rt;
@@ -989,10 +995,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
(rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
- if (!addrconf_finite_timeout(lifetime))
+ table = rt->fib6_table;
+ spin_lock_bh(&table->tb6_lock);
+
+ if (!addrconf_finite_timeout(lifetime)) {
fib6_clean_expires(rt);
- else
+ fib6_remove_gc_list(rt);
+ } else {
fib6_set_expires(rt, jiffies + HZ * lifetime);
+ fib6_add_gc_list(rt);
+ }
+
+ spin_unlock_bh(&table->tb6_lock);
fib6_info_release(rt);
}
@@ -1278,7 +1292,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
- return (struct rt6_info *) dst;
+ return dst_rt6_info(dst);
dst_release(dst);
@@ -1398,6 +1412,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
struct rt6_info *prev, **p;
p = this_cpu_ptr(res->nh->rt6i_pcpu);
+ /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
prev = xchg(p, NULL);
if (prev) {
dst_dev_put(&prev->dst);
@@ -1426,7 +1441,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
if (res->f6i->fib6_destroying) {
struct fib6_info *from;
- from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
+ from = unrcu_pointer(xchg(&pcpu_rt->from, NULL));
fib6_info_release(from);
}
@@ -1443,7 +1458,6 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
- struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
@@ -1455,8 +1469,6 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
/* purge completely the exception to allow releasing the held resources:
* some [sk] cache may keep the dst around for unlimited time
*/
- from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
- fib6_info_release(from);
dst_dev_put(&rt6_ex->rt6i->dst);
hlist_del_rcu(&rt6_ex->hlist);
@@ -1587,7 +1599,7 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
rcu_read_lock();
idev = __in6_dev_get(dev);
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
}
@@ -2085,12 +2097,12 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
*/
if (!(rt->rt6i_flags & RTF_EXPIRES)) {
if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ pr_debug("aging clone %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
} else if (time_after(jiffies, rt->dst.expires)) {
- RT6_TRACE("purging expired route %p\n", rt);
+ pr_debug("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2101,8 +2113,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (!(neigh && (neigh->flags & NTF_ROUTER))) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
+ pr_debug("purging route %p via non-router but gateway\n",
+ rt);
rt6_remove_exception(bucket, rt6_ex);
return;
}
@@ -2211,7 +2223,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
strict |= flags & RT6_LOOKUP_F_IFACE;
strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
- if (net->ipv6.devconf_all->forwarding == 0)
+ if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
rcu_read_lock();
@@ -2362,7 +2374,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net,
hash_keys.ports.dst = keys.ports.dst;
*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_inner(const struct net *net,
@@ -2411,7 +2423,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
hash_keys.ports.dst = keys.ports.dst;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
static u32 rt6_multipath_custom_hash_skb(const struct net *net,
@@ -2450,7 +2462,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
hash_keys.ports.dst = fl6->fl6_dport;
- return flow_hash_from_keys(&hash_keys);
+ return fib_multipath_hash_from_keys(net, &hash_keys);
}
/* if skb is set it will be used and fl6 can be NULL */
@@ -2472,7 +2484,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 1:
if (skb) {
@@ -2504,7 +2516,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2541,7 +2553,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
- mhash = flow_hash_from_keys(&hash_keys);
+ mhash = fib_multipath_hash_from_keys(net, &hash_keys);
break;
case 3:
if (skb)
@@ -2637,7 +2649,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net,
rcu_read_lock();
dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
- rt6 = (struct rt6_info *)dst;
+ rt6 = dst_rt6_info(dst);
/* For dst cached in uncached_list, refcnt is already taken. */
if (list_empty(&rt6->dst.rt_uncached) && !dst_hold_safe(dst)) {
dst = &net->ipv6.ip6_null_entry->dst;
@@ -2651,7 +2663,7 @@ EXPORT_SYMBOL_GPL(ip6_route_output_flags);
struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
{
- struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
+ struct rt6_info *rt, *ort = dst_rt6_info(dst_orig);
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
@@ -2734,7 +2746,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
struct fib6_info *from;
struct rt6_info *rt;
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->sernum)
return rt6_is_valid(rt) ? dst : NULL;
@@ -2760,24 +2772,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
}
EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+static void ip6_negative_advice(struct sock *sk,
+ struct dst_entry *dst)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
+ struct rt6_info *rt = dst_rt6_info(dst);
- if (rt) {
- if (rt->rt6i_flags & RTF_CACHE) {
- rcu_read_lock();
- if (rt6_check_expired(rt)) {
- rt6_remove_exception_rt(rt);
- dst = NULL;
- }
- rcu_read_unlock();
- } else {
- dst_release(dst);
- dst = NULL;
+ if (rt->rt6i_flags & RTF_CACHE) {
+ rcu_read_lock();
+ if (rt6_check_expired(rt)) {
+ /* rt/dst can not be destroyed yet,
+ * because of rcu_read_lock()
+ */
+ sk_dst_reset(sk);
+ rt6_remove_exception_rt(rt);
}
+ rcu_read_unlock();
+ return;
}
- return dst;
+ sk_dst_reset(sk);
}
static void ip6_link_failure(struct sk_buff *skb)
@@ -2786,7 +2798,7 @@ static void ip6_link_failure(struct sk_buff *skb)
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
- rt = (struct rt6_info *) skb_dst(skb);
+ rt = dst_rt6_info(skb_dst(skb));
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2842,7 +2854,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
/* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU)
* IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it.
@@ -3184,13 +3196,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
{
struct net_device *dev = dst->dev;
unsigned int mtu = dst_mtu(dst);
- struct net *net = dev_net(dev);
+ struct net *net;
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+ rcu_read_lock();
+
+ net = dev_net_rcu(dev);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
+ rcu_read_unlock();
+
/*
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
@@ -3240,8 +3257,8 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
- if (idev && idev->cnf.mtu6 > mtu)
- mtu = idev->cnf.mtu6;
+ if (idev)
+ mtu = max_t(u32, mtu, READ_ONCE(idev->cnf.mtu6));
}
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
@@ -3591,7 +3608,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
if (!dev)
goto out;
- if (idev->cnf.disable_ipv6) {
+ if (!idev || idev->cnf.disable_ipv6) {
NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
err = -EACCES;
goto out;
@@ -3627,7 +3644,8 @@ out:
in6_dev_put(idev);
if (err) {
- lwtstate_put(fib6_nh->fib_nh_lws);
+ fib_nh_common_release(&fib6_nh->nh_common);
+ fib6_nh->nh_common.nhc_pcpu_rth_output = NULL;
fib6_nh->fib_nh_lws = NULL;
netdev_put(dev, dev_tracker);
}
@@ -3750,7 +3768,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
- rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
+ rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len,
extack);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
@@ -3765,8 +3783,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_flags & RTF_EXPIRES)
fib6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
- else
- fib6_clean_expires(rt);
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -3787,10 +3803,12 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (nh) {
if (rt->fib6_src.plen) {
NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+ err = -EINVAL;
goto out_free;
}
if (!nexthop_get(nh)) {
NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+ err = -ENOENT;
goto out_free;
}
rt->nh = nh;
@@ -4142,7 +4160,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
in6_dev = __in6_dev_get(skb->dev);
if (!in6_dev)
return;
- if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
+ if (READ_ONCE(in6_dev->cnf.forwarding) ||
+ !READ_ONCE(in6_dev->cnf.accept_redirects))
return;
/* RFC2461 8.1:
@@ -4165,7 +4184,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
}
}
- rt = (struct rt6_info *) dst;
+ rt = dst_rt6_info(dst);
if (rt->rt6i_flags & RTF_REJECT) {
net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
return;
@@ -4355,7 +4374,8 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev,
unsigned int pref,
- u32 defrtr_usr_metric)
+ u32 defrtr_usr_metric,
+ int lifetime)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
@@ -4368,6 +4388,7 @@ struct fib6_info *rt6_add_dflt_router(struct net *net,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
+ .fc_expires = jiffies_to_clock_t(lifetime * HZ),
};
cfg.fc_gateway = *gwaddr;
@@ -4434,7 +4455,7 @@ static void rtmsg_to_fib6_config(struct net *net,
.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
: RT6_TABLE_MAIN,
.fc_ifindex = rtmsg->rtmsg_ifindex,
- .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
+ .fc_metric = rtmsg->rtmsg_metric,
.fc_expires = rtmsg->rtmsg_info,
.fc_dst_len = rtmsg->rtmsg_dst_len,
.fc_src_len = rtmsg->rtmsg_src_len,
@@ -4464,6 +4485,9 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
+ /* Only do the default setting of fc_metric in route adding */
+ if (cfg.fc_metric == 0)
+ cfg.fc_metric = IP6_RT_PRIO_USER;
err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
break;
case SIOCDELRT:
@@ -4574,8 +4598,8 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
f6i->dst_nocount = true;
if (!anycast &&
- (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy))
+ (READ_ONCE(net->ipv6.devconf_all->disable_policy) ||
+ READ_ONCE(idev->cnf.disable_policy)))
f6i->dst_nopolicy = true;
}
@@ -4989,6 +5013,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_SPORT] = { .type = NLA_U16 },
[RTA_DPORT] = { .type = NLA_U16 },
[RTA_NH_ID] = { .type = NLA_U32 },
+ [RTA_FLOWLABEL] = { .type = NLA_BE32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -5014,6 +5039,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
+ if (tb[RTA_FLOWLABEL]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Flow label cannot be specified for this operation");
+ goto errout;
+ }
+
*cfg = (struct fib6_config){
.fc_table = rtm->rtm_table,
.fc_dst_len = rtm->rtm_dst_len,
@@ -5179,14 +5210,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
* nexthop. Since sibling routes are always added at the end of
* the list, find the first sibling of the last route appended
*/
+ rcu_read_lock();
+
if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
- rt = list_first_entry(&rt_last->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
+ rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
}
if (rt)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
+
+ rcu_read_unlock();
}
static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
@@ -5531,17 +5566,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i)
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
} else {
- struct fib6_info *sibling, *next_sibling;
struct fib6_nh *nh = f6i->fib6_nh;
+ struct fib6_info *sibling;
nexthop_len = 0;
if (f6i->fib6_nsiblings) {
rt6_nh_nlmsg_size(nh, &nexthop_len);
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
}
+
+ rcu_read_unlock();
}
nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
@@ -5597,7 +5636,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
int iif, int type, u32 portid, u32 seq,
unsigned int flags)
{
- struct rt6_info *rt6 = (struct rt6_info *)dst;
+ struct rt6_info *rt6 = dst_rt6_info(dst);
struct rt6key *rt6_dst, *rt6_src;
u32 *pmetrics, table, rt6_flags;
unsigned char nh_flags = 0;
@@ -5671,7 +5710,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
} else if (dest) {
struct in6_addr saddr_buf;
- if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
+ if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 &&
nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
goto nla_put_failure;
}
@@ -5705,7 +5744,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
} else if (rt->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ struct fib6_info *sibling;
struct nlattr *mp;
mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
@@ -5717,14 +5756,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
0) < 0)
goto nla_put_failure;
- list_for_each_entry_safe(sibling, next_sibling,
- &rt->fib6_siblings, fib6_siblings) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sibling, &rt->fib6_siblings,
+ fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6, 0) < 0)
+ AF_INET6, 0) < 0) {
+ rcu_read_unlock();
+
goto nla_put_failure;
+ }
}
+ rcu_read_unlock();
+
nla_nest_end(skb, mp);
} else if (rt->nh) {
if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id))
@@ -5982,6 +6028,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (tb[RTA_FLOWLABEL] &&
+ (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL],
+ "Invalid flow label");
+ return -EINVAL;
+ }
+
for (i = 0; i <= RTA_MAX; i++) {
if (!tb[i])
continue;
@@ -5996,6 +6049,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
case RTA_SPORT:
case RTA_DPORT:
case RTA_IP_PROTO:
+ case RTA_FLOWLABEL:
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
@@ -6018,6 +6072,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
struct rtmsg *rtm;
struct flowi6 fl6 = {};
+ __be32 flowlabel;
bool fibmatch;
err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
@@ -6026,7 +6081,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = -EINVAL;
rtm = nlmsg_data(nlh);
- fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
if (tb[RTA_SRC]) {
@@ -6072,6 +6126,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
+ flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0);
+ fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel);
+
if (iif) {
struct net_device *dev;
int flags = 0;
@@ -6100,7 +6157,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
- rt = container_of(dst, struct rt6_info, dst);
+ rt = dst_rt6_info(dst);
if (rt->dst.error) {
err = rt->dst.error;
ip6_rt_put(rt);
@@ -6161,7 +6218,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -6174,11 +6231,10 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
goto errout;
}
rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
- info->nlh, gfp_any());
+ info->nlh, GFP_ATOMIC);
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_rt_update(struct net *net, struct fib6_info *rt,
@@ -6204,8 +6260,7 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
info->nlh, gfp_any());
return;
errout:
- if (err < 0)
- rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i,
@@ -6318,7 +6373,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_SYSCTL
-static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
+static int ipv6_sysctl_rtcache_flush(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -6327,12 +6382,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
if (!write)
return -EINVAL;
- net = (struct net *)ctl->extra1;
- delay = net->ipv6.sysctl.flush_delay;
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (ret)
return ret;
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}
@@ -6417,7 +6472,6 @@ static struct ctl_table ipv6_route_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- { }
};
struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
@@ -6441,10 +6495,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[1].procname = NULL;
}
return table;
@@ -6671,6 +6721,15 @@ static void bpf_iter_unregister(void)
#endif
#endif
+static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = {
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE,
+ .doit = inet6_rtm_newroute},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELROUTE,
+ .doit = inet6_rtm_delroute},
+ {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETROUTE,
+ .doit = inet6_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip6_route_init(void)
{
int ret;
@@ -6713,19 +6772,7 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
- inet6_rtm_newroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
- inet6_rtm_delroute, NULL, 0);
- if (ret < 0)
- goto out_register_late_subsys;
-
- ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
- inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ ret = rtnl_register_many(ip6_route_rtnl_msg_handlers);
if (ret < 0)
goto out_register_late_subsys;
@@ -6745,7 +6792,6 @@ int __init ip6_route_init(void)
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head);
- INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock);
}
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index a013b92cbb86..7c05ac846646 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt)
}
static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
- const struct ipv6_rpl_sr_hdr *srh)
+ const struct ipv6_rpl_sr_hdr *srh,
+ struct dst_entry *cache_dst)
{
struct ipv6_rpl_sr_hdr *isrh, *csrh;
const struct ipv6hdr *oldhdr;
@@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
hdrlen = ((csrh->hdrlen + 1) << 3);
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err)) {
kfree(buf);
return err;
@@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt,
return 0;
}
-static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
+static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt,
+ struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct rpl_iptunnel_encap *tinfo;
@@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
tinfo = rpl_encap_lwtunnel(dst->lwtstate);
- return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
+ return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst);
}
static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
- err = rpl_do_srh(skb, rlwt);
+ local_bh_disable();
+ dst = dst_cache_get(&rlwt->cache);
+ local_bh_enable();
+
+ err = rpl_do_srh(skb, rlwt, dst);
if (unlikely(err))
goto drop;
- preempt_disable();
- dst = dst_cache_get(&rlwt->cache);
- preempt_enable();
-
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -230,25 +232,28 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- preempt_disable();
- dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
@@ -257,40 +262,54 @@ static int rpl_input(struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct rpl_lwt *rlwt;
int err;
- rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate);
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- err = rpl_do_srh(skb, rlwt);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ rlwt = rpl_lwt_lwtunnel(lwtst);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&rlwt->cache);
- preempt_enable();
+ local_bh_enable();
+
+ err = rpl_do_srh(skb, rlwt, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
- preempt_disable();
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&rlwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- return err;
-
return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
}
static int nla_put_rpl_srh(struct sk_buff *skb, int attrtype,
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 35508abd76f4..180da19c148c 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -21,9 +21,7 @@
#include <net/genetlink.h>
#include <linux/seg6.h>
#include <linux/seg6_genl.h>
-#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
-#endif
bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced)
{
@@ -437,13 +435,11 @@ static int __net_init seg6_net_init(struct net *net)
net->ipv6.seg6_data = sdata;
-#ifdef CONFIG_IPV6_SEG6_HMAC
if (seg6_hmac_net_init(net)) {
kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
return -ENOMEM;
}
-#endif
return 0;
}
@@ -452,9 +448,7 @@ static void __net_exit seg6_net_exit(struct net *net)
{
struct seg6_pernet_data *sdata = seg6_pernet(net);
-#ifdef CONFIG_IPV6_SEG6_HMAC
seg6_hmac_net_exit(net);
-#endif
kfree(rcu_dereference_raw(sdata->tun_src));
kfree(sdata);
@@ -520,39 +514,28 @@ int __init seg6_init(void)
if (err)
goto out_unregister_pernet;
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
goto out_unregister_genl;
err = seg6_local_init();
- if (err) {
- seg6_iptunnel_exit();
- goto out_unregister_genl;
- }
-#endif
+ if (err)
+ goto out_unregister_iptun;
-#ifdef CONFIG_IPV6_SEG6_HMAC
err = seg6_hmac_init();
if (err)
- goto out_unregister_iptun;
-#endif
+ goto out_unregister_seg6;
pr_info("Segment Routing with IPv6\n");
out:
return err;
-#ifdef CONFIG_IPV6_SEG6_HMAC
-out_unregister_iptun:
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+out_unregister_seg6:
seg6_local_exit();
+out_unregister_iptun:
seg6_iptunnel_exit();
-#endif
-#endif
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
-#endif
out_unregister_pernet:
unregister_pernet_subsys(&ip6_segments_ops);
goto out;
@@ -560,12 +543,9 @@ out_unregister_pernet:
void seg6_exit(void)
{
-#ifdef CONFIG_IPV6_SEG6_HMAC
seg6_hmac_exit();
-#endif
-#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+ seg6_local_exit();
seg6_iptunnel_exit();
-#endif
- unregister_pernet_subsys(&ip6_segments_ops);
genl_unregister_family(&seg6_genl_family);
+ unregister_pernet_subsys(&ip6_segments_ops);
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index d43c50a7310d..bbf5b84a70fc 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -241,6 +241,7 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
struct sr6_tlv_hmac *tlv;
struct ipv6_sr_hdr *srh;
struct inet6_dev *idev;
+ int require_hmac;
idev = __in6_dev_get(skb->dev);
@@ -248,16 +249,17 @@ bool seg6_hmac_validate_skb(struct sk_buff *skb)
tlv = seg6_get_tlv_hmac(srh);
+ require_hmac = READ_ONCE(idev->cnf.seg6_require_hmac);
/* mandatory check but no tlv */
- if (idev->cnf.seg6_require_hmac > 0 && !tlv)
+ if (require_hmac > 0 && !tlv)
return false;
/* no check */
- if (idev->cnf.seg6_require_hmac < 0)
+ if (require_hmac < 0)
return true;
/* check only if present */
- if (idev->cnf.seg6_require_hmac == 0 && !tlv)
+ if (require_hmac == 0 && !tlv)
return true;
/* now, seg6_require_hmac >= 0 && tlv */
@@ -354,6 +356,7 @@ static int seg6_hmac_init_algo(void)
struct crypto_shash *tfm;
struct shash_desc *shash;
int i, alg_count, cpu;
+ int ret = -ENOMEM;
alg_count = ARRAY_SIZE(hmac_algos);
@@ -364,12 +367,14 @@ static int seg6_hmac_init_algo(void)
algo = &hmac_algos[i];
algo->tfms = alloc_percpu(struct crypto_shash *);
if (!algo->tfms)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
tfm = crypto_alloc_shash(algo->name, 0, 0);
- if (IS_ERR(tfm))
- return PTR_ERR(tfm);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto error_out;
+ }
p_tfm = per_cpu_ptr(algo->tfms, cpu);
*p_tfm = tfm;
}
@@ -381,18 +386,22 @@ static int seg6_hmac_init_algo(void)
algo->shashs = alloc_percpu(struct shash_desc *);
if (!algo->shashs)
- return -ENOMEM;
+ goto error_out;
for_each_possible_cpu(cpu) {
shash = kzalloc_node(shsize, GFP_KERNEL,
cpu_to_node(cpu));
if (!shash)
- return -ENOMEM;
+ goto error_out;
*per_cpu_ptr(algo->shashs, cpu) = shash;
}
}
return 0;
+
+error_out:
+ seg6_hmac_exit();
+ return ret;
}
int __init seg6_hmac_init(void)
@@ -410,22 +419,29 @@ int __net_init seg6_hmac_net_init(struct net *net)
void seg6_hmac_exit(void)
{
struct seg6_hmac_algo *algo = NULL;
+ struct crypto_shash *tfm;
+ struct shash_desc *shash;
int i, alg_count, cpu;
alg_count = ARRAY_SIZE(hmac_algos);
for (i = 0; i < alg_count; i++) {
algo = &hmac_algos[i];
- for_each_possible_cpu(cpu) {
- struct crypto_shash *tfm;
- struct shash_desc *shash;
- shash = *per_cpu_ptr(algo->shashs, cpu);
- kfree(shash);
- tfm = *per_cpu_ptr(algo->tfms, cpu);
- crypto_free_shash(tfm);
+ if (algo->shashs) {
+ for_each_possible_cpu(cpu) {
+ shash = *per_cpu_ptr(algo->shashs, cpu);
+ kfree(shash);
+ }
+ free_percpu(algo->shashs);
+ }
+
+ if (algo->tfms) {
+ for_each_possible_cpu(cpu) {
+ tfm = *per_cpu_ptr(algo->tfms, cpu);
+ crypto_free_shash(tfm);
+ }
+ free_percpu(algo->tfms);
}
- free_percpu(algo->tfms);
- free_percpu(algo->shashs);
}
}
EXPORT_SYMBOL(seg6_hmac_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 03b877ff4558..51583461ae29 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
return flowlabel;
}
-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct net *net = dev_net(dst->dev);
@@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
return 0;
}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+{
+ return __seg6_do_srh_encap(skb, osrh, proto, NULL);
+}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
static int seg6_do_srh_encap_red(struct sk_buff *skb,
- struct ipv6_sr_hdr *osrh, int proto)
+ struct ipv6_sr_hdr *osrh, int proto,
+ struct dst_entry *cache_dst)
{
__u8 first_seg = osrh->first_segment;
struct dst_entry *dst = skb_dst(skb);
@@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb,
tot_len = red_hdrlen + sizeof(struct ipv6hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -317,8 +324,8 @@ out:
return 0;
}
-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
return 0;
}
-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
-static int seg6_do_srh(struct sk_buff *skb)
+static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
@@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
- err = seg6_do_srh_inline(skb, tinfo->srh);
+ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
if (err)
return err;
break;
@@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb)
return -EINVAL;
if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ proto, cache_dst);
else
- err = seg6_do_srh_encap_red(skb, tinfo->srh, proto);
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ proto, cache_dst);
if (err)
return err;
@@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_push(skb, skb->mac_len);
if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
- err = seg6_do_srh_encap(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
else
err = seg6_do_srh_encap_red(skb, tinfo->srh,
- IPPROTO_ETHERNET);
+ IPPROTO_ETHERNET,
+ cache_dst);
if (err)
return err;
@@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb)
return 0;
}
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ return __seg6_do_srh_inline(skb, osrh, NULL);
+}
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -455,45 +472,58 @@ static int seg6_input_core(struct net *net, struct sock *sk,
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+ slwt = seg6_lwt_lwtunnel(lwtst);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
- preempt_disable();
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
} else {
skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- return err;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
+drop:
+ kfree_skb(skb);
+ return err;
}
static int seg6_input_nf(struct sk_buff *skb)
@@ -529,15 +559,15 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -553,28 +583,31 @@ static int seg6_output_core(struct net *net, struct sock *sk,
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- preempt_disable();
- dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 24e2b4b494cb..ac1dbd492c22 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -941,8 +941,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx6_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx6_finish);
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -954,10 +954,10 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
+ enum skb_drop_reason reason;
struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
- int err;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
@@ -967,9 +967,9 @@ static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
skb_dst_drop(skb);
- err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err) {
- kfree_skb(skb);
+ reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+ if (reason) {
+ kfree_skb_reason(skb, reason);
return -EINVAL;
}
@@ -991,8 +991,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, input_action_end_dx4_finish);
+ dev_net(skb->dev), NULL, skb, skb->dev,
+ NULL, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
@@ -1174,8 +1174,8 @@ drop:
static int input_action_end_dt4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
+ enum skb_drop_reason reason;
struct iphdr *iph;
- int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
@@ -1193,8 +1193,8 @@ static int input_action_end_dt4(struct sk_buff *skb,
iph = ip_hdr(skb);
- err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
- if (unlikely(err))
+ reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (unlikely(reason))
goto drop;
return dst_input(skb);
@@ -1380,7 +1380,9 @@ drop:
return err;
}
-DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
{
@@ -1388,6 +1390,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh = srh_state->srh;
+ lockdep_assert_held(&srh_state->bh_lock);
if (unlikely(srh == NULL))
return false;
@@ -1408,8 +1411,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct seg6_bpf_srh_state *srh_state =
- this_cpu_ptr(&seg6_bpf_srh_states);
+ struct seg6_bpf_srh_state *srh_state;
struct ipv6_sr_hdr *srh;
int ret;
@@ -1420,10 +1422,14 @@ static int input_action_end_bpf(struct sk_buff *skb,
}
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- /* preempt_disable is needed to protect the per-CPU buffer srh_state,
- * which is also accessed by the bpf_lwt_seg6_* helpers
+ /* The access to the per-CPU buffer srh_state is protected by running
+ * always in softirq context (with disabled BH). On PREEMPT_RT the
+ * required locking is provided by the following local_lock_nested_bh()
+ * statement. It is also accessed by the bpf_lwt_seg6_* helpers via
+ * bpf_prog_run_save_cb().
*/
- preempt_disable();
+ local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock);
+ srh_state = this_cpu_ptr(&seg6_bpf_srh_states);
srh_state->srh = srh;
srh_state->hdrlen = srh->hdrlen << 3;
srh_state->valid = true;
@@ -1446,15 +1452,15 @@ static int input_action_end_bpf(struct sk_buff *skb,
if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
goto drop;
+ local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
- preempt_enable();
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
- preempt_enable();
+ local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
kfree_skb(skb);
return -EINVAL;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5e9f625b76e3..39bd8951bfca 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -51,6 +51,7 @@
#include <net/dsfield.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/inet_dscp.h>
/*
This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
@@ -132,8 +133,8 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
return NULL;
}
-static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
- struct ip_tunnel_parm *parms)
+static struct ip_tunnel __rcu **
+__ipip6_bucket(struct sit_net *sitn, struct ip_tunnel_parm_kern *parms)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -207,7 +208,7 @@ static int ipip6_tunnel_create(struct net_device *dev)
__dev_addr_set(dev, &t->parms.iph.saddr, 4);
memcpy(dev->broadcast, &t->parms.iph.daddr, 4);
- if ((__force u16)t->parms.i_flags & SIT_ISATAP)
+ if (test_bit(IP_TUNNEL_SIT_ISATAP_BIT, t->parms.i_flags))
dev->priv_flags |= IFF_ISATAP;
dev->rtnl_link_ops = &sit_link_ops;
@@ -226,7 +227,8 @@ out:
}
static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
- struct ip_tunnel_parm *parms, int create)
+ struct ip_tunnel_parm_kern *parms,
+ int create)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
@@ -934,8 +936,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
flowi4_init_output(&fl4, tunnel->parms.link, tunnel->fwmark,
- RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
- 0, dst, tiph->saddr, 0, 0,
+ tos & INET_DSCP_MASK, RT_SCOPE_UNIVERSE,
+ IPPROTO_IPV6, 0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL));
rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
@@ -1110,7 +1112,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
iph->daddr, iph->saddr,
0, 0,
IPPROTO_IPV6,
- RT_TOS(iph->tos),
+ iph->tos & INET_DSCP_MASK,
tunnel->parms.link);
if (!IS_ERR(rt)) {
@@ -1135,7 +1137,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
dev->needed_headroom = t_hlen + hlen;
}
-static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
+static void ipip6_tunnel_update(struct ip_tunnel *t,
+ struct ip_tunnel_parm_kern *p,
__u32 fwmark)
{
struct net *net = t->net;
@@ -1196,11 +1199,11 @@ static int
ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
{
struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
struct ip_tunnel_6rd ip6rd;
- struct ip_tunnel_parm p;
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
- if (copy_from_user(&p, data, sizeof(p)))
+ if (!ip_tunnel_parm_from_user(&p, data))
return -EFAULT;
t = ipip6_tunnel_locate(t->net, &p, 0);
}
@@ -1251,7 +1254,7 @@ static bool ipip6_valid_ip_proto(u8 ipproto)
}
static int
-__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
+__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm_kern *p)
{
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1268,7 +1271,7 @@ __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1281,7 +1284,7 @@ ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
int err;
@@ -1297,7 +1300,7 @@ ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
int err;
@@ -1328,7 +1331,7 @@ ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
+ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm_kern *p)
{
struct ip_tunnel *t = netdev_priv(dev);
@@ -1348,7 +1351,8 @@ ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
}
static int
-ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
switch (cmd) {
case SIOCGETTUNNEL:
@@ -1398,7 +1402,6 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
};
@@ -1408,7 +1411,6 @@ static void ipip6_dev_free(struct net_device *dev)
struct ip_tunnel *tunnel = netdev_priv(dev);
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
}
#define SIT_FEATURES (NETIF_F_SG | \
@@ -1434,9 +1436,11 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
- dev->features |= NETIF_F_LLTX;
+ dev->lltx = true;
dev->features |= SIT_FEATURES;
dev->hw_features |= SIT_FEATURES;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
}
static int ipip6_tunnel_init(struct net_device *dev)
@@ -1449,17 +1453,13 @@ static int ipip6_tunnel_init(struct net_device *dev)
strcpy(tunnel->parms.name, dev->name);
ipip6_tunnel_bind_dev(dev);
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
- dev->tstats = NULL;
+ if (err)
return err;
- }
+
netdev_hold(dev, &tunnel->dev_tracker, GFP_KERNEL);
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -1494,7 +1494,7 @@ static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[],
}
static void ipip6_netlink_parms(struct nlattr *data[],
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
__u32 *fwmark)
{
memset(parms, 0, sizeof(*parms));
@@ -1603,8 +1603,8 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm p;
struct ip_tunnel_encap ipencap;
+ struct ip_tunnel_parm_kern p;
struct net *net = t->net;
struct sit_net *sitn = net_generic(net, sit_net_id);
#ifdef CONFIG_IPV6_SIT_6RD
@@ -1691,7 +1691,7 @@ static size_t ipip6_get_size(const struct net_device *dev)
static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct ip_tunnel_parm *parm = &tunnel->parms;
+ struct ip_tunnel_parm_kern *parm = &tunnel->parms;
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
@@ -1701,7 +1701,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
!!(parm->iph.frag_off & htons(IP_DF))) ||
nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
- nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags) ||
+ nla_put_be16(skb, IFLA_IPTUN_FLAGS,
+ ip_tunnel_flags_to_be16(parm->i_flags)) ||
nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
goto nla_put_failure;
@@ -1855,7 +1856,7 @@ static int __net_init sit_init_net(struct net *net)
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
- sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ sitn->fb_tunnel_dev->netns_local = true;
err = register_netdev(sitn->fb_tunnel_dev);
if (err)
@@ -1875,22 +1876,19 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_batch_net(struct list_head *net_list)
+static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
+ struct list_head *dev_to_kill)
{
- LIST_HEAD(list);
struct net *net;
- rtnl_lock();
+ ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, &list);
-
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ sit_destroy_tunnels(net, dev_to_kill);
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch = sit_exit_batch_net,
+ .exit_batch_rtnl = sit_exit_batch_rtnl,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c8d2ca27220c..9d83eadd308b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -177,24 +177,33 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct sock *ret = sk;
__u8 rcv_wscale;
int full_space;
+ SKB_DR(reason);
if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
!th->ack || th->rst)
goto out;
- req = cookie_tcp_check(net, sk, skb);
- if (IS_ERR(req))
- goto out;
- if (!req)
+ if (cookie_bpf_ok(skb)) {
+ req = cookie_bpf_check(sk, skb);
+ } else {
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ }
+ if (!req) {
+ SKB_DR_SET(reason, NO_SOCKET);
goto out_drop;
+ }
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- if (security_inet_conn_request(sk, skb, req))
+ if (security_inet_conn_request(sk, skb, req)) {
+ SKB_DR_SET(reason, SECURITY_HOOK);
goto out_free;
+ }
if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -231,11 +240,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
- if (IS_ERR(dst))
+ if (IS_ERR(dst)) {
+ SKB_DR_SET(reason, IP_OUTNOROUTES);
goto out_free;
+ }
}
- req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
full_space = tcp_full_space(sk);
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
@@ -247,14 +258,23 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
- ireq->rcv_wscale = rcv_wscale;
+ /* req->syncookie is set true only if ACK is validated
+ * by BPF kfunc, then, rcv_wscale is already configured.
+ */
+ if (!req->syncookie)
+ ireq->rcv_wscale = rcv_wscale;
ireq->ecn_ok &= cookie_ecn_ok(net, dst);
ret = tcp_get_cookie_sock(sk, skb, req, dst);
+ if (!ret) {
+ SKB_DR_SET(reason, NO_SOCKET);
+ goto out_drop;
+ }
out:
return ret;
out_free:
reqsk_free(req);
out_drop:
+ sk_skb_reason_drop(sk, skb, reason);
return NULL;
}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 888676163e90..d2cd33e2698d 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -30,7 +30,7 @@ static u32 rt6_multipath_hash_fields_all_mask =
static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
-static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+static int proc_rt6_multipath_hash_policy(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -46,7 +46,7 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
}
static int
-proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer,
+proc_rt6_multipath_hash_fields(const struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
struct net *net;
@@ -213,7 +213,6 @@ static struct ctl_table ipv6_table_template[] = {
.proc_handler = proc_doulongvec_minmax,
.extra2 = &ioam6_id_wide_max,
},
- { }
};
static struct ctl_table ipv6_rotable[] = {
@@ -248,11 +247,11 @@ static struct ctl_table ipv6_rotable[] = {
.proc_handler = proc_dointvec,
},
#endif /* CONFIG_NETLABEL */
- { }
};
static int __net_init ipv6_sysctl_net_init(struct net *net)
{
+ size_t table_size = ARRAY_SIZE(ipv6_table_template);
struct ctl_table *ipv6_table;
struct ctl_table *ipv6_route_table;
struct ctl_table *ipv6_icmp_table;
@@ -264,7 +263,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
if (!ipv6_table)
goto out;
/* Update the variables to point into the current struct net */
- for (i = 0; i < ARRAY_SIZE(ipv6_table_template) - 1; i++)
+ for (i = 0; i < table_size; i++)
ipv6_table[i].data += (void *)net - (void *)&init_net;
ipv6_route_table = ipv6_route_sysctl_init(net);
@@ -276,8 +275,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
goto out_ipv6_route_table;
net->ipv6.sysctl.hdr = register_net_sysctl_sz(net, "net/ipv6",
- ipv6_table,
- ARRAY_SIZE(ipv6_table_template));
+ ipv6_table, table_size);
if (!net->ipv6.sysctl.hdr)
goto out_ipv6_icmp_table;
@@ -313,9 +311,9 @@ out_ipv6_table:
static void __net_exit ipv6_sysctl_net_exit(struct net *net)
{
- struct ctl_table *ipv6_table;
- struct ctl_table *ipv6_route_table;
- struct ctl_table *ipv6_icmp_table;
+ const struct ctl_table *ipv6_table;
+ const struct ctl_table *ipv6_route_table;
+ const struct ctl_table *ipv6_icmp_table;
ipv6_table = net->ipv6.sysctl.hdr->ctl_table_arg;
ipv6_route_table = net->ipv6.sysctl.route_hdr->ctl_table_arg;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 57b25b1fc9d9..2debdf085a3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -58,7 +58,9 @@
#include <net/timewait_sock.h>
#include <net/inet_common.h>
#include <net/secure_seq.h>
+#include <net/hotdata.h>
#include <net/busy_poll.h>
+#include <net/rstreason.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -68,7 +70,8 @@
#include <trace/events/tcp.h>
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
@@ -94,11 +97,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
rcu_assign_pointer(sk->sk_rx_dst, dst);
sk->sk_rx_dst_ifindex = skb->skb_iif;
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
}
@@ -489,14 +490,10 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
- if (!sock_owned_by_user(sk)) {
- WRITE_ONCE(sk->sk_err, err);
- sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
-
- tcp_done(sk);
- } else {
+ if (!sock_owned_by_user(sk))
+ tcp_done_with_error(sk, err);
+ else
WRITE_ONCE(sk->sk_err_soft, err);
- }
goto out;
case TCP_LISTEN:
break;
@@ -792,7 +789,8 @@ clear_hash_nostart:
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ u32 tw_isn)
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
@@ -806,7 +804,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
+ if (!tw_isn &&
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
@@ -819,9 +817,10 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
- struct request_sock *req)
+ struct request_sock *req,
+ u32 tw_isn)
{
- tcp_v6_init_req(req, sk, skb);
+ tcp_v6_init_req(req, sk, skb, tw_isn);
if (security_inet_conn_request(sk, skb, req))
return NULL;
@@ -968,11 +967,14 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
if (sk) {
+ /* unconstify the socket only to attach it to buff with care. */
+ skb_set_owner_edemux(buff, (struct sock *)sk);
+
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
mark = READ_ONCE(sk->sk_mark);
- skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC);
}
if (txhash) {
/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
@@ -1005,7 +1007,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
kfree_skb(buff);
}
-static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
+ enum sk_rst_reason reason)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -1112,7 +1115,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
- trace_tcp_send_reset(sk, skb);
if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h);
priority = READ_ONCE(sk->sk_priority);
@@ -1128,6 +1130,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
+ trace_tcp_send_reset(sk, skb, reason);
+
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
ipv6_get_dsfield(ipv6h), label, priority, txhash,
&key);
@@ -1168,8 +1172,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
goto out;
if (aoh)
- key.ao_key = tcp_ao_established_key(ao_info,
- aoh->rnext_keyid, -1);
+ key.ao_key = tcp_ao_established_key(sk, ao_info,
+ aoh->rnext_keyid, -1);
}
}
if (key.ao_key) {
@@ -1192,12 +1196,13 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
#endif
}
- tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt,
+ READ_ONCE(tcptw->tw_rcv_nxt),
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_tw_tsval(tcptw),
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
- tw->tw_txhash);
+ READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
+ &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
+ tw->tw_priority, tw->tw_txhash);
#ifdef CONFIG_TCP_AO
out:
@@ -1267,15 +1272,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- /* RFC 7323 2.3
- * The window field (SEG.WND) of every outgoing segment, with the
- * exception of <SYN> segments, MUST be right-shifted by
- * Rcv.Wind.Shift bits:
- */
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
- req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
@@ -1439,7 +1439,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
*/
newsk->sk_gso_type = SKB_GSO_TCPV6;
- ip6_dst_store(newsk, dst, NULL, NULL);
inet6_sk_rx_dst_set(newsk, skb);
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
@@ -1450,6 +1449,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ ip6_dst_store(newsk, dst, NULL, NULL);
+
newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
newnp->saddr = ireq->ir_v6_loc_addr;
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
@@ -1620,10 +1621,9 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
- reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
struct dst_entry *dst;
@@ -1653,31 +1653,30 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (sk->sk_state == TCP_LISTEN) {
struct sock *nsk = tcp_v6_cookie_check(sk, skb);
- if (!nsk)
- goto discard;
-
if (nsk != sk) {
- if (tcp_child_process(sk, nsk, skb))
- goto reset;
- if (opt_skb)
- __kfree_skb(opt_skb);
+ if (nsk) {
+ reason = tcp_child_process(sk, nsk, skb);
+ if (reason)
+ goto reset;
+ }
return 0;
}
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb))
+ reason = tcp_rcv_state_process(sk, skb);
+ if (reason)
goto reset;
if (opt_skb)
goto ipv6_pktoptions;
return 0;
reset:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason));
discard:
if (opt_skb)
__kfree_skb(opt_skb);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
csum_err:
reason = SKB_DROP_REASON_TCP_CSUM;
@@ -1737,7 +1736,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
- TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
@@ -1751,9 +1749,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int dif = inet6_iif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
+ struct sock *sk = NULL;
bool refcounted;
- struct sock *sk;
int ret;
+ u32 isn;
struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
@@ -1790,7 +1789,6 @@ lookup:
if (!sk)
goto no_tcp_socket;
-process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
@@ -1856,15 +1854,21 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
- } else if (tcp_child_process(sk, nsk, skb)) {
- tcp_v6_send_reset(nsk, skb);
- goto discard_and_relse;
} else {
+ drop_reason = tcp_child_process(sk, nsk, skb);
+ if (drop_reason) {
+ enum sk_rst_reason rst_reason;
+
+ rst_reason = sk_rst_convert_drop_reason(drop_reason);
+ tcp_v6_send_reset(nsk, skb, rst_reason);
+ goto discard_and_relse;
+ }
sock_put(sk);
return 0;
}
}
+process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
@@ -1933,12 +1937,12 @@ csum_error:
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
} else {
- tcp_v6_send_reset(NULL, skb);
+ tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason));
}
discard_it:
SKB_DR_OR(drop_reason, NOT_SPECIFIED);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return 0;
discard_and_relse:
@@ -1961,7 +1965,7 @@ do_time_wait:
goto csum_error;
}
- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
+ switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN:
{
struct sock *sk2;
@@ -1979,6 +1983,7 @@ do_time_wait:
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
+ __this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
@@ -1988,7 +1993,7 @@ do_time_wait:
tcp_v6_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- tcp_v6_send_reset(sk, skb);
+ tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);
inet_twsk_deschedule_put(inet_twsk(sk));
goto discard_it;
case TCP_TW_SUCCESS:
@@ -2038,7 +2043,6 @@ void tcp_v6_early_demux(struct sk_buff *skb)
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
- .twsk_unique = tcp_twsk_unique,
.twsk_destructor = tcp_twsk_destructor,
};
@@ -2174,6 +2178,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
+ u8 icsk_pending;
int rx_queue;
int state;
@@ -2182,12 +2187,13 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
- icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
- icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ icsk_pending = smp_load_acquire(&icsk->icsk_pending);
+ if (icsk_pending == ICSK_TIME_RETRANS ||
+ icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ } else if (icsk_pending == ICSK_TIME_PROBE0) {
timer_active = 4;
timer_expires = icsk->icsk_timeout;
} else if (timer_pending(&sp->sk_timer)) {
@@ -2256,7 +2262,7 @@ static void get_timewait6_sock(struct seq_file *seq,
src->s6_addr32[2], src->s6_addr32[3], srcp,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
- tw->tw_substate, 0, 0,
+ READ_ONCE(tw->tw_substate), 0, 0,
3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
refcount_read(&tw->tw_refcnt), tw);
}
@@ -2365,11 +2371,6 @@ struct proto tcpv6_prot = {
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-static const struct inet6_protocol tcpv6_protocol = {
- .handler = tcp_v6_rcv,
- .err_handler = tcp_v6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
static struct inet_protosw tcpv6_protosw = {
.type = SOCK_STREAM,
@@ -2382,8 +2383,14 @@ static struct inet_protosw tcpv6_protosw = {
static int __net_init tcpv6_net_init(struct net *net)
{
- return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
- SOCK_RAW, IPPROTO_TCP, net);
+ int res;
+
+ res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
+ SOCK_RAW, IPPROTO_TCP, net);
+ if (!res)
+ net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC;
+
+ return res;
}
static void __net_exit tcpv6_net_exit(struct net *net)
@@ -2391,22 +2398,21 @@ static void __net_exit tcpv6_net_exit(struct net *net)
inet_ctl_sock_destroy(net->ipv6.tcp_sk);
}
-static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
-{
- tcp_twsk_purge(net_exit_list, AF_INET6);
-}
-
static struct pernet_operations tcpv6_net_ops = {
.init = tcpv6_net_init,
.exit = tcpv6_net_exit,
- .exit_batch = tcpv6_net_exit_batch,
};
int __init tcpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ net_hotdata.tcpv6_protocol = (struct inet6_protocol) {
+ .handler = tcp_v6_rcv,
+ .err_handler = tcp_v6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
if (ret)
goto out;
@@ -2431,7 +2437,7 @@ out_tcpv6_pernet_subsys:
out_tcpv6_protosw:
inet6_unregister_protosw(&tcpv6_protosw);
out_tcpv6_protocol:
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
goto out;
}
@@ -2439,5 +2445,5 @@ void tcpv6_exit(void)
{
unregister_pernet_subsys(&tcpv6_net_ops);
inet6_unregister_protosw(&tcpv6_protosw);
- inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
+ inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP);
}
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index bf0c957e4b5e..ae2da28f9dfb 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -7,31 +7,84 @@
*/
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
+#include <net/inet6_hashtables.h>
#include <net/gro.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
+static void tcp6_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
+ struct tcphdr *th)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *hdr;
+ struct sk_buff *p;
+ struct sock *sk;
+ struct net *net;
+ int iif, sdif;
+
+ if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
+ return;
+
+ p = tcp_gro_lookup(head, th);
+ if (p) {
+ NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
+ return;
+ }
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
+ hdr = skb_gro_network_header(skb);
+ net = dev_net(skb->dev);
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ iif, sdif);
+ NAPI_GRO_CB(skb)->is_flist = !sk;
+ if (sk)
+ sock_put(sk);
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+}
+
INDIRECT_CALLABLE_SCOPE
struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
+ struct tcphdr *th;
+
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
skb_gro_checksum_validate(skb, IPPROTO_TCP,
- ip6_gro_compute_pseudo)) {
- NAPI_GRO_CB(skb)->flush = 1;
- return NULL;
- }
+ ip6_gro_compute_pseudo))
+ goto flush;
+
+ th = tcp_gro_pull_header(skb);
+ if (!th)
+ goto flush;
- return tcp_gro_receive(head, skb);
+ tcp6_check_fraglist_gro(head, skb, th);
+
+ return tcp_gro_receive(head, skb, th);
+
+flush:
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
}
INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct tcphdr *th = tcp_hdr(skb);
+ if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
+ skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+ __skb_incr_checksum_unnecessary(skb);
+
+ return 0;
+ }
+
th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
@@ -40,6 +93,70 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
return 0;
}
+static void __tcpv6_gso_segment_csum(struct sk_buff *seg,
+ struct in6_addr *oldip,
+ const struct in6_addr *newip,
+ __be16 *oldport, __be16 newport)
+{
+ struct tcphdr *th = tcp_hdr(seg);
+
+ if (!ipv6_addr_equal(oldip, newip)) {
+ inet_proto_csum_replace16(&th->check, seg,
+ oldip->s6_addr32,
+ newip->s6_addr32,
+ true);
+ *oldip = *newip;
+ }
+
+ if (*oldport == newport)
+ return;
+
+ inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
+ *oldport = newport;
+}
+
+static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs)
+{
+ const struct tcphdr *th;
+ const struct ipv6hdr *iph;
+ struct sk_buff *seg;
+ struct tcphdr *th2;
+ struct ipv6hdr *iph2;
+
+ seg = segs;
+ th = tcp_hdr(seg);
+ iph = ipv6_hdr(seg);
+ th2 = tcp_hdr(seg->next);
+ iph2 = ipv6_hdr(seg->next);
+
+ if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
+ ipv6_addr_equal(&iph->saddr, &iph2->saddr) &&
+ ipv6_addr_equal(&iph->daddr, &iph2->daddr))
+ return segs;
+
+ while ((seg = seg->next)) {
+ th2 = tcp_hdr(seg);
+ iph2 = ipv6_hdr(seg);
+
+ __tcpv6_gso_segment_csum(seg, &iph2->saddr, &iph->saddr,
+ &th2->source, th->source);
+ __tcpv6_gso_segment_csum(seg, &iph2->daddr, &iph->daddr,
+ &th2->dest, th->dest);
+ }
+
+ return segs;
+}
+
+static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
+ if (IS_ERR(skb))
+ return skb;
+
+ return __tcpv6_gso_segment_list_csum(skb);
+}
+
static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -51,6 +168,15 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) {
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size)
+ return __tcp6_gso_segment_list(skb, features);
+
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
@@ -66,15 +192,15 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
return tcp_gso_segment(skb, features);
}
-static const struct net_offload tcpv6_offload = {
- .callbacks = {
- .gso_segment = tcp6_gso_segment,
- .gro_receive = tcp6_gro_receive,
- .gro_complete = tcp6_gro_complete,
- },
-};
int __init tcpv6_offload_init(void)
{
- return inet6_add_offload(&tcpv6_offload, IPPROTO_TCP);
+ net_hotdata.tcpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = tcp6_gso_segment,
+ .gro_receive = tcp6_gro_receive,
+ .gro_complete = tcp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.tcpv6_offload, IPPROTO_TCP);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f2249b4cd5f..c6ea438b5c75 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -34,6 +34,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/indirect_call_wrapper.h>
+#include <trace/events/udp.h>
#include <net/addrconf.h>
#include <net/ndisc.h>
@@ -45,7 +46,6 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
-#include <trace/events/udp.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -79,9 +79,6 @@ u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *faddr,
const __be16 fport)
{
- static u32 udp6_ehash_secret __read_mostly;
- static u32 udp_ipv6_hash_secret __read_mostly;
-
u32 lhash, fhash;
net_get_random_once(&udp6_ehash_secret,
@@ -113,11 +110,22 @@ void udp_v6_rehash(struct sock *sk)
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
+ u16 new_hash4;
- udp_lib_rehash(sk, new_hash);
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ new_hash4 = udp_ehashfn(sock_net(sk),
+ sk->sk_rcv_saddr, sk->sk_num,
+ sk->sk_daddr, sk->sk_dport);
+ } else {
+ new_hash4 = udp6_ehashfn(sock_net(sk),
+ &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+ }
+
+ udp_lib_rehash(sk, new_hash, new_hash4);
}
-static int compute_score(struct sock *sk, struct net *net,
+static int compute_score(struct sock *sk, const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned short hnum,
int dif, int sdif)
@@ -162,8 +170,51 @@ static int compute_score(struct sock *sk, struct net *net,
return score;
}
+/**
+ * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port)
+ * @net: Network namespace
+ * @saddr: Source address, network order
+ * @sport: Source port, network order
+ * @daddr: Destination address, network order
+ * @hnum: Destination port, host order
+ * @dif: Destination interface index
+ * @sdif: Destination bridge port index, if relevant
+ * @udptable: Set of UDP hash tables
+ *
+ * Simplified lookup to be used as fallback if no sockets are found due to a
+ * potential race between (receive) address change, and lookup happening before
+ * the rehash operation. This function ignores SO_REUSEPORT groups while scoring
+ * result sockets, because if we have one, we don't need the fallback at all.
+ *
+ * Called under rcu_read_lock().
+ *
+ * Return: socket with highest matching score if any, NULL if none
+ */
+static struct sock *udp6_lib_lookup1(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ const struct udp_table *udptable)
+{
+ unsigned int slot = udp_hashfn(net, hnum, udptable->mask);
+ struct udp_hslot *hslot = &udptable->hash[slot];
+ struct sock *sk, *result = NULL;
+ int score, badness = 0;
+
+ sk_for_each_rcu(sk, &hslot->head) {
+ score = compute_score(sk, net,
+ saddr, sport, daddr, hnum, dif, sdif);
+ if (score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+
+ return result;
+}
+
/* called with rcu_read_lock() */
-static struct sock *udp6_lib_lookup2(struct net *net,
+static struct sock *udp6_lib_lookup2(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, unsigned int hnum,
int dif, int sdif, struct udp_hslot *hslot2,
@@ -171,15 +222,21 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
+ bool need_rescore;
result = NULL;
badness = -1;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ need_rescore = false;
+rescore:
+ score = compute_score(need_rescore ? result : sk, net, saddr,
+ sport, daddr, hnum, dif, sdif);
if (score > badness) {
badness = score;
+ if (need_rescore)
+ continue;
+
if (sk->sk_state == TCP_ESTABLISHED) {
result = sk;
continue;
@@ -200,28 +257,108 @@ static struct sock *udp6_lib_lookup2(struct net *net,
if (IS_ERR(result))
continue;
- badness = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif);
+ /* compute_score is too long of a function to be
+ * inlined, and calling it again here yields
+ * measureable overhead for some
+ * workloads. Work around it by jumping
+ * backwards to rescore 'result'.
+ */
+ need_rescore = true;
+ goto rescore;
}
}
return result;
}
+#if IS_ENABLED(CONFIG_BASE_SMALL)
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+}
+#else /* !CONFIG_BASE_SMALL */
+static struct sock *udp6_lib_lookup4(const struct net *net,
+ const struct in6_addr *saddr, __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum, int dif, int sdif,
+ struct udp_table *udptable)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+ const struct hlist_nulls_node *node;
+ struct udp_hslot *hslot4;
+ unsigned int hash4, slot;
+ struct udp_sock *up;
+ struct sock *sk;
+
+ hash4 = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ slot = hash4 & udptable->mask;
+ hslot4 = &udptable->hash4[slot];
+
+begin:
+ udp_lrpa_for_each_entry_rcu(up, node, &hslot4->nulls_head) {
+ sk = (struct sock *)up;
+ if (inet6_match(net, sk, saddr, daddr, ports, dif, sdif))
+ return sk;
+ }
+
+ /* if the nulls value we got at the end of this lookup is not the
+ * expected one, we must restart lookup. We probably met an item that
+ * was moved to another chain due to rehash.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+static void udp6_hash4(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ unsigned int hash;
+
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) {
+ udp4_hash4(sk);
+ return;
+ }
+
+ if (sk_unhashed(sk) || ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return;
+
+ hash = udp6_ehashfn(net, &sk->sk_v6_rcv_saddr, sk->sk_num,
+ &sk->sk_v6_daddr, sk->sk_dport);
+
+ udp_lib_hash4(sk, hash);
+}
+#endif /* CONFIG_BASE_SMALL */
+
/* rcu_read_lock() must be held */
-struct sock *__udp6_lib_lookup(struct net *net,
+struct sock *__udp6_lib_lookup(const struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *udptable,
struct sk_buff *skb)
{
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
struct sock *result, *sk;
+ unsigned int hash2;
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
+
+ if (udp_has_hash4(hslot2)) {
+ result = udp6_lib_lookup4(net, saddr, sport, daddr, hnum,
+ dif, sdif, udptable);
+ if (result) /* udp6_lib_lookup4 return sk or NULL */
+ return result;
+ }
/* Lookup connected or non-wildcard sockets */
result = udp6_lib_lookup2(net, saddr, sport,
@@ -248,12 +385,18 @@ struct sock *__udp6_lib_lookup(struct net *net,
/* Lookup wildcard sockets */
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
result = udp6_lib_lookup2(net, saddr, sport,
&in6addr_any, hnum, dif, sdif,
hslot2, skb);
+ if (!IS_ERR_OR_NULL(result))
+ goto done;
+
+ /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */
+ result = udp6_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif,
+ udptable);
+
done:
if (IS_ERR(result))
return NULL;
@@ -275,7 +418,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
@@ -290,7 +434,7 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
* Does increment socket refcount.
*/
#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
-struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
+struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
struct sock *sk;
@@ -450,7 +594,7 @@ csum_copy_err:
goto try_again;
}
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_inc(&udpv6_encap_needed_key);
@@ -661,8 +805,8 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
drop_reason = SKB_DROP_REASON_PROTO_MEM;
}
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
- kfree_skb_reason(skb, drop_reason);
- trace_udp_fail_queue_rcv_skb(rc, sk);
+ trace_udp_fail_queue_rcv_skb(rc, sk, skb);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -765,7 +909,7 @@ csum_error:
drop:
__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
atomic_inc(&sk->sk_drops);
- kfree_skb_reason(skb, drop_reason);
+ sk_skb_reason_drop(sk, skb, drop_reason);
return -1;
}
@@ -849,7 +993,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
udptable->mask;
hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
- hslot = &udptable->hash2[hash2];
+ hslot = &udptable->hash2[hash2].hslot;
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
}
@@ -900,11 +1044,8 @@ start_lookup:
static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
- if (udp_sk_rx_dst_set(sk, dst)) {
- const struct rt6_info *rt = (const struct rt6_info *)dst;
-
- sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
- }
+ if (udp_sk_rx_dst_set(sk, dst))
+ sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst));
}
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
@@ -932,8 +1073,8 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
const struct in6_addr *saddr, *daddr;
struct net *net = dev_net(skb->dev);
+ struct sock *sk = NULL;
struct udphdr *uh;
- struct sock *sk;
bool refcounted;
u32 ulen = 0;
@@ -1025,7 +1166,7 @@ no_sk:
__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
short_packet:
@@ -1046,7 +1187,7 @@ csum_error:
__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
discard:
__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
- kfree_skb_reason(skb, reason);
+ sk_skb_reason_drop(sk, skb, reason);
return 0;
}
@@ -1058,14 +1199,13 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
{
struct udp_table *udptable = net->ipv4.udp_table;
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2, slot2;
struct udp_hslot *hslot2;
+ unsigned int hash2;
__portpair ports;
struct sock *sk;
hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
- slot2 = hash2 & udptable->mask;
- hslot2 = &udptable->hash2[slot2];
+ hslot2 = udp_hashslot2(udptable, hash2);
ports = INET_COMBINED_PORTS(rmt_port, hnum);
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
@@ -1101,11 +1241,12 @@ void udp_v6_early_demux(struct sk_buff *skb)
else
return;
- if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
+ if (!sk)
return;
skb->sk = sk;
- skb->destructor = sock_efree;
+ DEBUG_NET_WARN_ON_ONCE(sk_is_refcounted(sk));
+ skb->destructor = sock_pfree;
dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
@@ -1161,6 +1302,18 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
+static int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip6_datagram_connect(sk, uaddr, addr_len);
+ if (!res)
+ udp6_hash4(sk);
+ release_sock(sk);
+ return res;
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1236,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize) {
+ if (hlen + min(datalen, cork->gso_size) > cork->fragsize) {
kfree_skb(skb);
- return -EINVAL;
+ return -EMSGSIZE;
}
if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
@@ -1248,8 +1401,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb))) {
+ if (is_udplite || dst_xfrm(skb_dst(skb))) {
kfree_skb(skb);
return -EIO;
}
@@ -1259,8 +1411,10 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
cork->gso_size);
+
+ /* Don't checksum the payload, skb will get segmented */
+ goto csum_partial;
}
- goto csum_partial;
}
if (is_udplite)
@@ -1344,6 +1498,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+ ipc6.sockc.priority = READ_ONCE(sk->sk_priority);
/* destination address check */
if (sin6) {
@@ -1476,9 +1631,11 @@ do_udp_sendmsg:
ipc6.opt = opt;
err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
&ipc6);
+ connected = false;
+ }
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1490,7 +1647,6 @@ do_udp_sendmsg:
}
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
- connected = false;
}
if (!opt) {
opt = txopt_get(np);
@@ -1573,7 +1729,7 @@ back_from_confirm:
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc6,
- (struct rt6_info *)dst,
+ dst_rt6_info(dst),
msg->msg_flags, &cork);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
@@ -1600,7 +1756,7 @@ do_append_data:
ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
- &ipc6, fl6, (struct rt6_info *)dst,
+ &ipc6, fl6, dst_rt6_info(dst),
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
@@ -1702,11 +1858,6 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-static const struct inet6_protocol udpv6_protocol = {
- .handler = udpv6_rcv,
- .err_handler = udpv6_err,
- .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
-};
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
@@ -1759,7 +1910,7 @@ struct proto udpv6_prot = {
.owner = THIS_MODULE,
.close = udp_lib_close,
.pre_connect = udpv6_pre_connect,
- .connect = ip6_datagram_connect,
+ .connect = udpv6_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.init = udpv6_init_sock,
@@ -1803,7 +1954,12 @@ int __init udpv6_init(void)
{
int ret;
- ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
+ net_hotdata.udpv6_protocol = (struct inet6_protocol) {
+ .handler = udpv6_rcv,
+ .err_handler = udpv6_err,
+ .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ ret = inet6_add_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
if (ret)
goto out;
@@ -1814,12 +1970,12 @@ out:
return ret;
out_udpv6_protocol:
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
goto out;
}
void udpv6_exit(void)
{
inet6_unregister_protosw(&udpv6_protosw);
- inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
+ inet6_del_protocol(&net_hotdata.udpv6_protocol, IPPROTO_UDP);
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b95ba241ebe..b41152dd4246 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -164,7 +164,8 @@ flush:
INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
@@ -174,13 +175,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
@@ -192,20 +187,19 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
-static const struct net_offload udpv6_offload = {
- .callbacks = {
- .gso_segment = udp6_ufo_fragment,
- .gro_receive = udp6_gro_receive,
- .gro_complete = udp6_gro_complete,
- },
-};
-
-int udpv6_offload_init(void)
+int __init udpv6_offload_init(void)
{
- return inet6_add_offload(&udpv6_offload, IPPROTO_UDP);
+ net_hotdata.udpv6_offload = (struct net_offload) {
+ .callbacks = {
+ .gso_segment = udp6_ufo_fragment,
+ .gro_receive = udp6_gro_receive,
+ .gro_complete = udp6_gro_complete,
+ },
+ };
+ return inet6_add_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
int udpv6_offload_exit(void)
{
- return inet6_del_offload(&udpv6_offload, IPPROTO_UDP);
+ return inet6_del_offload(&net_hotdata.udpv6_offload, IPPROTO_UDP);
}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 6e36e5047fba..4abc5e9d6322 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -43,7 +43,7 @@ static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
int xfrm6_transport_finish(struct sk_buff *skb, int async)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- int nhlen = skb->data - skb_network_header(skb);
+ int nhlen = -skb_network_offset(skb);
skb_network_header(skb)[IP6CB(skb)->nhoff] =
XFRM_MODE_SKB_CB(skb)->protocol;
@@ -58,7 +58,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
@@ -109,19 +113,6 @@ static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull
/* Must be an IKE packet.. pass it through */
return 1;
break;
- case UDP_ENCAP_ESPINUDP_NON_IKE:
- /* Check if this is a keepalive packet. If so, eat it. */
- if (len == 1 && udpdata[0] == 0xff) {
- return -EINVAL;
- } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
- udpdata32[0] == 0 && udpdata32[1] == 0) {
-
- /* ESP Packet with Non-IKE marker */
- len = sizeof(struct udphdr) + 2 * sizeof(u32);
- } else
- /* Must be an IKE packet.. pass it through */
- return 1;
- break;
}
/* At this point we are sure that this is an ESPinUDP packet,
@@ -279,6 +270,13 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
if (!x)
continue;
+ if (unlikely(x->dir && x->dir != XFRM_SA_DIR_IN)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEDIRERROR);
+ xfrm_state_put(x);
+ x = NULL;
+ continue;
+ }
+
spin_lock(&x->lock);
if ((!i || (x->props.flags & XFRM_STATE_WILDRECV)) &&
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 5f7b1fdbffe6..b3d5d1f266ee 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,14 +82,14 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
toobig = skb->len > mtu && !skb_is_gso(skb);
- if (toobig && xfrm6_local_dontfrag(skb->sk)) {
+ if (toobig && xfrm6_local_dontfrag(sk)) {
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
} else if (toobig && xfrm6_noneed_fragment(skb)) {
skb->ignore_df = 1;
goto skip_frag;
- } else if (!skb->ignore_df && toobig && skb->sk) {
+ } else if (!skb->ignore_df && toobig && sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 42fb6996b077..1f19b6f14484 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -23,23 +23,24 @@
#include <net/ip6_route.h>
#include <net/l3mdev.h>
-static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark)
+static struct dst_entry *xfrm6_dst_lookup(const struct xfrm_dst_lookup_params *params)
{
struct flowi6 fl6;
struct dst_entry *dst;
int err;
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
- fl6.flowi6_mark = mark;
- memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
- if (saddr)
- memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
+ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(params->net,
+ params->oif);
+ fl6.flowi6_mark = params->mark;
+ memcpy(&fl6.daddr, params->daddr, sizeof(fl6.daddr));
+ if (params->saddr)
+ memcpy(&fl6.saddr, params->saddr, sizeof(fl6.saddr));
- dst = ip6_route_output(net, NULL, &fl6);
+ fl6.flowi4_proto = params->ipproto;
+ fl6.uli = params->uli;
+
+ dst = ip6_route_output(params->net, NULL, &fl6);
err = dst->error;
if (dst->error) {
@@ -50,19 +51,25 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
return dst;
}
-static int xfrm6_get_saddr(struct net *net, int oif,
- xfrm_address_t *saddr, xfrm_address_t *daddr,
- u32 mark)
+static int xfrm6_get_saddr(xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params)
{
struct dst_entry *dst;
struct net_device *dev;
+ struct inet6_dev *idev;
- dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark);
+ dst = xfrm6_dst_lookup(params);
if (IS_ERR(dst))
return -EHOSTUNREACH;
- dev = ip6_dst_idev(dst)->dev;
- ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
+ idev = ip6_dst_idev(dst);
+ if (!idev) {
+ dst_release(dst);
+ return -EHOSTUNREACH;
+ }
+ dev = idev->dev;
+ ipv6_dev_get_saddr(dev_net(dev), dev, &params->daddr->in6, 0,
+ &saddr->in6);
dst_release(dst);
return 0;
}
@@ -70,7 +77,7 @@ static int xfrm6_get_saddr(struct net *net, int oif,
static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
- struct rt6_info *rt = (struct rt6_info *)xdst->route;
+ struct rt6_info *rt = dst_rt6_info(xdst->route);
xdst->u.dst.dev = dev;
netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
@@ -184,7 +191,6 @@ static struct ctl_table xfrm6_policy_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- { }
};
static int __net_init xfrm6_net_sysctl_init(struct net *net)
@@ -218,7 +224,7 @@ err_alloc:
static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
- struct ctl_table *table;
+ const struct ctl_table *table;
if (!net->ipv6.sysctl.xfrm6_hdr)
return;
@@ -285,8 +291,14 @@ int __init xfrm6_init(void)
ret = register_pernet_subsys(&xfrm6_net_ops);
if (ret)
goto out_protocol;
+
+ ret = xfrm_nat_keepalive_init(AF_INET6);
+ if (ret)
+ goto out_nat_keepalive;
out:
return ret;
+out_nat_keepalive:
+ unregister_pernet_subsys(&xfrm6_net_ops);
out_protocol:
xfrm6_protocol_fini();
out_state:
@@ -298,6 +310,7 @@ out_policy:
void xfrm6_fini(void)
{
+ xfrm_nat_keepalive_fini(AF_INET6);
unregister_pernet_subsys(&xfrm6_net_ops);
xfrm6_protocol_fini();
xfrm6_policy_fini();
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f6cb94f82cc3..bf140ef781c1 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -355,10 +355,7 @@ static int __init xfrm6_tunnel_init(void)
{
int rv;
- xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
- sizeof(struct xfrm6_tunnel_spi),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
+ xfrm6_tunnel_spi_kmem = KMEM_CACHE(xfrm6_tunnel_spi, SLAB_HWCACHE_ALIGN);
if (!xfrm6_tunnel_spi_kmem)
return -ENOMEM;
rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);