From 1b227e536662eae4b7beee3037edbecdc577495f Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Sat, 21 May 2016 18:17:34 +0800
Subject: ip6_gre: Fix MTU setting for ip6gretap

When creat an ip6gretap interface with an unreachable route,
the MTU is about 14 bytes larger than what was needed.

If the remote address is reachable:
ping6 2001:0:130::1 -c 2
PING 2001:0:130::1(2001:0:130::1) 56 data bytes
64 bytes from 2001:0:130::1: icmp_seq=1 ttl=64 time=1.46 ms
64 bytes from 2001:0:130::1: icmp_seq=2 ttl=64 time=81.1 ms

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index af503f518278..a6fe3397728c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1027,6 +1027,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
 
 	dev->hard_header_len = LL_MAX_HEADER + t_hlen;
 	dev->mtu = ETH_DATA_LEN - t_hlen;
+	if (dev->type == ARPHRD_ETHER)
+		dev->mtu -= ETH_HLEN;
 	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 		dev->mtu -= 8;
 
-- 
cgit 


From 252f3f5a1189a7f6c309d8e4ff1c4c1888a27f13 Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Sat, 21 May 2016 18:17:35 +0800
Subject: ip6_gre: Set flowi6_proto as IPPROTO_GRE in xmit path.

In gre6 xmit path, we are sending a GRE packet, so set fl6 proto
to IPPROTO_GRE properly.

Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a6fe3397728c..f4ac2842d4d9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -712,6 +712,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
 	fl6->daddr = p->raddr;
 	fl6->flowi6_oif = p->link;
 	fl6->flowlabel = 0;
+	fl6->flowi6_proto = IPPROTO_GRE;
 
 	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
 		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
-- 
cgit 


From fabb13db448efc23c47851550867fc46519de97e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 25 May 2016 16:50:46 +0200
Subject: fou: add Kconfig options for IPv6 support

A previous patch added the fou6.ko module, but that failed to link
in a couple of configurations:

net/built-in.o: In function `ip6_tnl_encap_add_fou_ops':
net/ipv6/fou6.c:88: undefined reference to `ip6_tnl_encap_add_ops'
net/ipv6/fou6.c:94: undefined reference to `ip6_tnl_encap_add_ops'
net/ipv6/fou6.c:97: undefined reference to `ip6_tnl_encap_del_ops'
net/built-in.o: In function `ip6_tnl_encap_del_fou_ops':
net/ipv6/fou6.c:106: undefined reference to `ip6_tnl_encap_del_ops'
net/ipv6/fou6.c:107: undefined reference to `ip6_tnl_encap_del_ops'

If CONFIG_IPV6=m, ip6_tnl_encap_add_ops/ip6_tnl_encap_del_ops
are in a module, but fou6.c can still be built-in, and that
obviously fails to link.

Also, if CONFIG_IPV6=y, but CONFIG_IPV6_TUNNEL=m or
CONFIG_IPV6_TUNNEL=n, the same problem happens for a different
reason.

This adds two new silent Kconfig symbols to work around both
problems:

- CONFIG_IPV6_FOU is now always set to 'm' if either CONFIG_NET_FOU=m
  or CONFIG_IPV6=m
- CONFIG_IPV6_FOU_TUNNEL is set implicitly when IPV6_FOU is enabled
  and NET_FOU_IP_TUNNELS is also turned out, and it will ensure
  that CONFIG_IPV6_TUNNEL is also available.

The options could be made user-visible as well, to give additional
room for configuration, but it seems easier not to bother users
with more choice here.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: aa3463d65e7b ("fou: Add encap ops for IPv6 tunnels")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig  | 9 +++++++++
 net/ipv6/Makefile | 2 +-
 net/ipv6/fou6.c   | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 3f8411328de5..994608263260 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -232,6 +232,15 @@ config IPV6_GRE
 
 	  Saying M here will produce a module called ip6_gre. If unsure, say N.
 
+config IPV6_FOU
+	tristate
+	default NET_FOU && IPV6
+
+config IPV6_FOU_TUNNEL
+	tristate
+	default NET_FOU_IP_TUNNELS && IPV6_FOU
+	select INET6_TUNNEL
+
 config IPV6_MULTIPLE_TABLES
 	bool "IPv6: Multiple Routing Tables"
 	select FIB_RULES
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 7ec3129c9ace..d42ca3d1197f 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
 obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
-obj-$(CONFIG_NET_FOU) += fou6.o
+obj-$(CONFIG_NET_FOU_IPV6_TUNNELS) += fou6.o
 
 obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
 obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index c972d0b52579..9ea249b9451e 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -69,7 +69,7 @@ int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 }
 EXPORT_SYMBOL(gue6_build_header);
 
-#ifdef CONFIG_NET_FOU_IP_TUNNELS
+#if IS_ENABLED(CONFIG_IPV6_FOU_TUNNEL)
 
 static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
 	.encap_hlen = fou_encap_hlen,
-- 
cgit 


From 83170f3beccccd7ceb4f9a0ac0c4dc736afde90c Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 26 May 2016 19:08:10 +0200
Subject: netfilter: nf_dup_ipv6: set again FLOWI_FLAG_KNOWN_NH at flowi6_flags

With the commit 48e8aa6e3137 ("ipv6: Set FLOWI_FLAG_KNOWN_NH at
flowi6_flags") ip6_pol_route() callers were asked to to set the
FLOWI_FLAG_KNOWN_NH properly and xt_TEE was updated accordingly,
but with the later refactor in commit bbde9fc1824a ("netfilter:
factor out packet duplication for IPv4/IPv6") the flowi6_flags
update was lost.
This commit re-add it just before the routing decision.

Fixes: bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/nf_dup_ipv6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 6989c70ae29f..4a84b5ad9ecb 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
 	fl6.daddr = *gw;
 	fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) |
 			(iph->flow_lbl[1] << 8) | iph->flow_lbl[2]);
+	fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (dst->error) {
 		dst_release(dst);
-- 
cgit 


From 95e4daa82086e2bd7b7163f33a4f455bf8ecdf48 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 31 May 2016 22:42:11 +0200
Subject: fou: fix IPv6 Kconfig options

The Kconfig options I added to work around broken compilation ended
up screwing up things more, as I used the wrong symbol to control
compilation of the file, resulting in IPv6 fou support to never be built
into the kernel.

Changing CONFIG_NET_FOU_IPV6_TUNNELS to CONFIG_IPV6_FOU fixes that
problem, I had renamed the symbol in one location but not the other,
and as the file is never being used by other kernel code, this did not
lead to a build failure that I would have caught.

After that fix, another issue with the same patch becomes obvious, as we
'select INET6_TUNNEL', which is related to IPV6_TUNNEL, but not the same,
and this can still cause the original build failure when IPV6_TUNNEL is
not built-in but IPV6_FOU is. The fix is equally trivial, we just need
to select the right symbol.

I have successfully build 350 randconfig kernels with this patch
and verified that the driver is now being built.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Valentin Rothberg <valentinrothberg@gmail.com>
Fixes: fabb13db448e ("fou: add Kconfig options for IPv6 support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Kconfig  | 2 +-
 net/ipv6/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 994608263260..2343e4f2e0bf 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -239,7 +239,7 @@ config IPV6_FOU
 config IPV6_FOU_TUNNEL
 	tristate
 	default NET_FOU_IP_TUNNELS && IPV6_FOU
-	select INET6_TUNNEL
+	select IPV6_TUNNEL
 
 config IPV6_MULTIPLE_TABLES
 	bool "IPv6: Multiple Routing Tables"
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d42ca3d1197f..6d8ea099213e 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
 obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
 obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
-obj-$(CONFIG_NET_FOU_IPV6_TUNNELS) += fou6.o
+obj-$(CONFIG_IPV6_FOU) += fou6.o
 
 obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
 obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
-- 
cgit 


From ce25d66ad5f8d921bac5fe2d32d62fa30c0f9a70 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Jun 2016 14:52:43 -0700
Subject: Possible problem with e6afc8ac ("udp: remove headers from UDP packets
 before queueing")

Paul Moore tracked a regression caused by a recent commit, which
mistakenly assumed that sk_filter() could be avoided if socket
had no current BPF filter.

The intent was to avoid udp_lib_checksum_complete() overhead.

But sk_filter() also checks skb_pfmemalloc() and
security_sock_rcv_skb(), so better call it.

Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Paul Moore <paul@paul-moore.com>
Tested-by: Paul Moore <paul@paul-moore.com>
Tested-by: Stephen Smalley <sds@tycho.nsa.gov>
Cc: samanthakumar <samanthakumar@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2da1896af934..f421c9f23c5b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -653,12 +653,12 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	if (rcu_access_pointer(sk->sk_filter)) {
-		if (udp_lib_checksum_complete(skb))
-			goto csum_error;
-		if (sk_filter(sk, skb))
-			goto drop;
-	}
+	if (rcu_access_pointer(sk->sk_filter) &&
+	    udp_lib_checksum_complete(skb))
+		goto csum_error;
+
+	if (sk_filter(sk, skb))
+		goto drop;
 
 	udp_csum_pull_header(skb);
 	if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
-- 
cgit 


From ce3cf4ec0305919fc69a972f6c2b2efd35d36abc Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 6 Jun 2016 15:07:18 -0700
Subject: tcp: record TLP and ER timer stats in v6 stats

The v6 tcp stats scan do not provide TLP and ER timer information
correctly like the v4 version . This patch fixes that.

Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
Fixes: eed530b6c676 ("tcp: early retransmit")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 79e33e02f11a..f36c2d076fce 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1721,7 +1721,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	destp = ntohs(inet->inet_dport);
 	srcp  = ntohs(inet->inet_sport);
 
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active	= 1;
 		timer_expires	= icsk->icsk_timeout;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
-- 
cgit 


From 00bc0ef5880dc7b82f9c320dead4afaad48e47be Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jkbs@redhat.com>
Date: Wed, 8 Jun 2016 15:13:34 +0200
Subject: ipv6: Skip XFRM lookup if dst_entry in socket cache is valid

At present we perform an xfrm_lookup() for each UDPv6 message we
send. The lookup involves querying the flow cache (flow_cache_lookup)
and, in case of a cache miss, creating an XFRM bundle.

If we miss the flow cache, we can end up creating a new bundle and
deriving the path MTU (xfrm_init_pmtu) from on an already transformed
dst_entry, which we pass from the socket cache (sk->sk_dst_cache) down
to xfrm_lookup(). This can happen only if we're caching the dst_entry
in the socket, that is when we're using a connected UDP socket.

To put it another way, the path MTU shrinks each time we miss the flow
cache, which later on leads to incorrectly fragmented payload. It can
be observed with ESPv6 in transport mode:

  1) Set up a transformation and lower the MTU to trigger fragmentation
    # ip xfrm policy add dir out src ::1 dst ::1 \
      tmpl src ::1 dst ::1 proto esp spi 1
    # ip xfrm state add src ::1 dst ::1 \
      proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b
    # ip link set dev lo mtu 1500

  2) Monitor the packet flow and set up an UDP sink
    # tcpdump -ni lo -ttt &
    # socat udp6-listen:12345,fork /dev/null &

  3) Send a datagram that needs fragmentation with a connected socket
    # perl -e 'print "@" x 1470 | socat - udp6:[::1]:12345
    2016/06/07 18:52:52 socat[724] E read(3, 0x555bb3d5ba00, 8192): Protocol error
    00:00:00.000000 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x2), length 1448
    00:00:00.000014 IP6 ::1 > ::1: frag (1448|32)
    00:00:00.000050 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x3), length 1272
    (^ ICMPv6 Parameter Problem)
    00:00:00.000022 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x5), length 136

  4) Compare it to a non-connected socket
    # perl -e 'print "@" x 1500' | socat - udp6-sendto:[::1]:12345
    00:00:40.535488 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x6), length 1448
    00:00:00.000010 IP6 ::1 > ::1: frag (1448|64)

What happens in step (3) is:

  1) when connecting the socket in __ip6_datagram_connect(), we
     perform an XFRM lookup, miss the flow cache, create an XFRM
     bundle, and cache the destination,

  2) afterwards, when sending the datagram, we perform an XFRM lookup,
     again, miss the flow cache (due to mismatch of flowi6_iif and
     flowi6_oif, which is an issue of its own), and recreate an XFRM
     bundle based on the cached (and already transformed) destination.

To prevent the recreation of an XFRM bundle, avoid an XFRM lookup
altogether whenever we already have a destination entry cached in the
socket. This prevents the path MTU shrinkage and brings us on par with
UDPv4.

The fix also benefits connected PINGv6 sockets, another user of
ip6_sk_dst_lookup_flow(), who also suffer messages being transformed
twice.

Joint work with Hannes Frederic Sowa.

Reported-by: Jan Tluka <jtluka@redhat.com>
Signed-off-by: Jakub Sitnicki <jkbs@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cbf127ae7c67..635b8d340cdb 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1071,17 +1071,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 					 const struct in6_addr *final_dst)
 {
 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
-	int err;
 
 	dst = ip6_sk_dst_check(sk, dst, fl6);
+	if (!dst)
+		dst = ip6_dst_lookup_flow(sk, fl6, final_dst);
 
-	err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
-	if (err)
-		return ERR_PTR(err);
-	if (final_dst)
-		fl6->daddr = *final_dst;
-
-	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+	return dst;
 }
 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
 
-- 
cgit 


From 0a46baaf634663d28038fc137239b71bf5385e5a Mon Sep 17 00:00:00 2001
From: Shweta Choudaha <schoudah@brocade.com>
Date: Wed, 8 Jun 2016 20:15:43 +0100
Subject: ip6gre: Allow live link address change

The ip6 GRE tap device should not be forced to down state to change
the mac address and should allow live address change for tap device
similar to ipv4 gre.

Signed-off-by: Shweta Choudaha <schoudah@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f4ac2842d4d9..fdc9de276ab1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1256,6 +1256,8 @@ static int ip6gre_tap_init(struct net_device *dev)
 	if (ret)
 		return ret;
 
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
 	tunnel = netdev_priv(dev);
 
 	ip6gre_tnl_link_config(tunnel, 1);
@@ -1289,6 +1291,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
 
 	dev->features |= NETIF_F_NETNS_LOCAL;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 }
 
 static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
-- 
cgit 


From dcb94b88c09ce82a80e188d49bcffdc83ba215a6 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jun 2016 20:32:06 +0200
Subject: ipv6: fix endianness error in icmpv6_err

IPv6 ping socket error handler doesn't correctly convert the new 32 bit
mtu to host endianness before using.

Cc: Lorenzo Colitti <lorenzo@google.com>
Fixes: 6d0bfe22611602f ("net: ipv6: Add IPv6 support to the ping socket.")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4527285fcaa2..a4fa84076969 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (!(type & ICMPV6_INFOMSG_MASK))
 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
-			ping_err(skb, offset, info);
+			ping_err(skb, offset, ntohl(info));
 }
 
 static int icmpv6_rcv(struct sk_buff *skb);
-- 
cgit 


From 5119bd16815d3f0364390a1369392dcc036790e7 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jun 2016 20:41:38 +0200
Subject: ipv6: tcp: fix endianness annotation in tcp_v6_send_response

Cc: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Fixes: 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f36c2d076fce..2255d2bf5f6b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -738,7 +738,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
-				 u8 tclass, u32 label)
+				 u8 tclass, __be32 label)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -911,7 +911,7 @@ out:
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
-			    u32 label)
+			    __be32 label)
 {
 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
 			     tclass, label);
-- 
cgit 


From c148d16369ff0095eca950d17968ba1d56a47b53 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jun 2016 21:15:37 +0200
Subject: ipv6: fix checksum annotation in udp6_csum_init

Cc: Tom Herbert <tom@herbertland.com>
Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_checksum.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index b2025bf3da4a..c0cbcb259f5a 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 	 * we accept a checksum of zero here. When we find the socket
 	 * for the UDP packet we'll check if that socket allows zero checksum
 	 * for IPv6 (set by socket option).
+	 *
+	 * Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
 	 */
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					   ip6_compute_pseudo);
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
 
-- 
cgit 


From d1e37288c9146dccff830e3253e403af8705b51f Mon Sep 17 00:00:00 2001
From: "Su, Xuemin" <suxm@chinanetcenter.com>
Date: Mon, 13 Jun 2016 11:02:50 +0800
Subject: udp reuseport: fix packet of same flow hashed to different socket

There is a corner case in which udp packets belonging to a same
flow are hashed to different socket when hslot->count changes from 10
to 11:

1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash,
and always passes 'daddr' to udp_ehashfn().

2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2,
but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to
INADDR_ANY instead of some specific addr.

That means when hslot->count changes from 10 to 11, the hash calculated by
udp_ehashfn() is also changed, and the udp packets belonging to a same
flow will be hashed to different socket.

This is easily reproduced:
1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000.
2) From the same host send udp packets to 127.0.0.1:40000, record the
socket index which receives the packets.
3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096
is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the
same hslot as the aformentioned 10 sockets, and makes the hslot->count
change from 10 to 11.
4) From the same host send udp packets to 127.0.0.1:40000, and the socket
index which receives the packets will be different from the one received
in step 2.
This should not happen as the socket bound to 0.0.0.0:44096 should not
change the behavior of the sockets bound to 0.0.0.0:40000.

It's the same case for IPv6, and this patch also fixes that.

Signed-off-by: Su, Xuemin <suxm@chinanetcenter.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 71 +++++++++++++---------------------------------------------
 1 file changed, 16 insertions(+), 55 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f421c9f23c5b..005dc82c2138 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				unsigned short hnum,
-				const struct in6_addr *saddr, __be16 sport,
-				const struct in6_addr *daddr, __be16 dport,
-				int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 const struct in6_addr *saddr, __be16 sport,
+			 const struct in6_addr *daddr, unsigned short hnum,
+			 int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 const struct in6_addr *saddr, __be16 sport,
-				 const struct in6_addr *daddr,
-				 unsigned short hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    udp_sk(sk)->udp_port_hash != hnum ||
-	    sk->sk_family != PF_INET6)
-		return -1;
-
-	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
-		return -1;
-
-	score = 0;
-	inet = inet_sk(sk);
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score++;
-	}
-
-	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
-		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
 		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net,
 
 		result = udp6_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  &in6addr_any, hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -286,7 +247,7 @@ begin:
 	result = NULL;
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-- 
cgit 


From e582615ad33dbd39623084a02e95567b116e1eea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 Jun 2016 06:24:00 -0700
Subject: gre: fix error handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1) gre_parse_header() can be called from gre_err()

   At this point transport header points to ICMP header, not the inner
header.

2) We can not really change transport header as ipgre_err() will later
assume transport header still points to ICMP header (using icmp_hdr())

3) pskb_may_pull() logic in gre_parse_header() really works
  if we are interested at zone pointed by skb->data

4) As Jiri explained in commit b7f8fe251e46 ("gre: do not pull header in
ICMP error processing") we should not pull headers in error handler.

So this fix :

A) changes gre_parse_header() to use skb->data instead of
skb_transport_header()

B) Adds a nhs parameter to gre_parse_header() so that we can skip the
not pulled IP header from error path.
  This offset is 0 for normal receive path.

C) remove obsolete IPV6 includes

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@herbertland.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_gre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index fdc9de276ab1..776d145113e1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb)
 	bool csum_err = false;
 	int hdr_len;
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
 	if (hdr_len < 0)
 		goto drop;
 
-- 
cgit 


From d5d8760b78d0cfafe292f965f599988138b06a70 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Thu, 16 Jun 2016 17:06:19 +0900
Subject: sit: correct IP protocol used in ipip6_err

Since 32b8a8e59c9c ("sit: add IPv4 over IPv4 support")
ipip6_err() may be called for packets whose IP protocol is
IPPROTO_IPIP as well as those whose IP protocol is IPPROTO_IPV6.

In the case of IPPROTO_IPIP packets the correct protocol value is not
passed to ipv4_update_pmtu() or ipv4_redirect().

This patch resolves this problem by using the IP protocol of the packet
rather than a hard-coded value. This appears to be consistent
with the usage of the protocol of a packet by icmp_socket_deliver()
the caller of ipip6_err().

I was able to exercise the redirect case by using a setup where an ICMP
redirect was received for the destination of the encapsulated packet.
However, it appears that although incorrect the protocol field is not used
in this case and thus no problem manifests.  On inspection it does not
appear that a problem will manifest in the fragmentation needed/update pmtu
case either.

In short I believe this is a cosmetic fix. None the less, the use of
IPPROTO_IPV6 seems wrong and confusing.

Reviewed-by: Dinan Gunawardena <dinan.gunawardena@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0a5a255277e5..0619ac70836d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, IPPROTO_IPV6, 0);
+				 t->parms.link, 0, iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      IPPROTO_IPV6, 0);
+			      iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
-- 
cgit 


From 48f1dcb55a7d29aeb8965c567660c14d0dfd1a42 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 23 Jun 2016 15:25:09 +0200
Subject: ipv6: enforce egress device match in per table nexthop lookups

with the commit 8c14586fc320 ("net: ipv6: Use passed in table for
nexthop lookups"), net hop lookup is first performed on route creation
in the passed-in table.
However device match is not enforced in table lookup, so the found
route can be later discarded due to egress device mismatch and no
global lookup will be performed.
This cause the following to fail:

ip link add dummy1 type dummy
ip link add dummy2 type dummy
ip link set dummy1 up
ip link set dummy2 up
ip route add 2001:db8:8086::/48 dev dummy1 metric 20
ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy1 metric 20
ip route add 2001:db8:8086::/48 dev dummy2 metric 21
ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy2 metric 21
RTNETLINK answers: No route to host

This change fixes the issue enforcing device lookup in
ip6_nh_lookup_table()

v1->v2: updated commit message title

Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups")
Reported-and-tested-by: Beniamino Galvani <bgalvani@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 969913da494f..520b7884d0c2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1782,7 +1782,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	};
 	struct fib6_table *table;
 	struct rt6_info *rt;
-	int flags = 0;
+	int flags = RT6_LOOKUP_F_IFACE;
 
 	table = fib6_get_table(net, cfg->fc_table);
 	if (!table)
-- 
cgit 


From 70a0dec45174c976c64b4c8c1d0898581f759948 Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@ll.mit.edu>
Date: Thu, 23 Jun 2016 16:11:57 -0400
Subject: ipmr/ip6mr: Initialize the last assert time of mfc entries.

This fixes wrong-interface signaling on 32-bit platforms for entries
created when jiffies > 2^31 + MFC_ASSERT_THRESH.

Signed-off-by: Tom Goff <thomas.goff@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f2e2013f8346..487ef3bc7bbc 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
+	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 	c->mfc_un.res.minvif = MAXMIFS;
 	return c;
 }
-- 
cgit