From 4832c30d5458387ff2533ff66fbde26ad8bb5a2d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 17 Aug 2017 12:17:20 -0700 Subject: net: ipv6: put host and anycast routes on device with address One nagging difference between ipv4 and ipv6 is host routes for ipv6 addresses are installed using the loopback device or VRF / L3 Master device. e.g., 2001:db8:1::/120 dev veth0 proto kernel metric 256 pref medium local 2001:db8:1::1 dev lo table local proto kernel metric 0 pref medium Using the loopback device is convenient -- necessary for local tx, but has some nasty side effects, most notably setting the 'lo' device down causes all host routes for all local IPv6 address to be removed from the FIB and completely breaks IPv6 networking across all interfaces. This patch puts FIB entries for IPv6 routes against the device. This simplifies the routes in the FIB, for example by making dst->dev and rt6i_idev->dev the same (a future patch can look at removing the device reference taken for rt6i_idev for FIB entries). When copies are made on FIB lookups, the cloned route has dst->dev set to loopback (or the L3 master device). This is needed for the local Tx of packets to local addresses. With fib entries allocated against the real network device, the addrconf code that reinserts host routes on admin up of 'lo' is no longer needed. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8d7b113958b1..4f82830fc068 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -459,9 +459,20 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, * Source addr check */ - if (__ipv6_addr_needs_scope_id(addr_type)) + if (__ipv6_addr_needs_scope_id(addr_type)) { iif = skb->dev->ifindex; - else { + + /* for local packets, get the real device index */ + if (iif == LOOPBACK_IFINDEX) { + dst = skb_dst(skb); + if (dst) { + struct rt6_info *rt; + + rt = container_of(dst, struct rt6_info, dst); + iif = rt->rt6i_idev->dev->ifindex; + } + } + } else { dst = skb_dst(skb); iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev); } -- cgit From 23aebdacb05dab9efdf22b9e0413491cbd5f128f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 23 Aug 2017 09:58:29 +0200 Subject: ipv6: Compute multipath hash for ICMP errors from offending packet When forwarding or sending out an ICMPv6 error, look at the embedded packet that triggered the error and compute a flow hash over its headers. This let's us route the ICMP error together with the flow it belongs to when multipath (ECMP) routing is in use, which in turn makes Path MTU Discovery work in ECMP load-balanced or anycast setups (RFC 7690). Granted, end-hosts behind the ECMP router (aka servers) need to reflect the IPv6 Flow Label for PMTUD to work. The code is organized to be in parallel with ipv4 stack: ip_multipath_l3_keys -> ip6_multipath_l3_keys fib_multipath_hash -> rt6_multipath_hash Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6/icmp.c') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4f82830fc068..dd7608cf1d72 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -519,6 +519,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); + fl6.mp_hash = rt6_multipath_hash(&fl6, skb); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); -- cgit From 1b70d792cf6775fb5d0737524387893daeb5374a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Aug 2017 13:53:34 -0700 Subject: ipv6: Use rt6i_idev index for echo replies to a local address Tariq repored local pings to linklocal address is failing: $ ifconfig ens8 ens8: flags=4163 mtu 1500 inet 11.141.16.6 netmask 255.255.0.0 broadcast 11.141.255.255 inet6 fe80::7efe:90ff:fecb:7502 prefixlen 64 scopeid 0x20 ether 7c:fe:90:cb:75:02 txqueuelen 1000 (Ethernet) RX packets 12 bytes 1164 (1.1 KiB) RX errors 0 dropped 0 overruns 0 frame 0 TX packets 30 bytes 2484 (2.4 KiB) TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0 $ /bin/ping6 -c 3 fe80::7efe:90ff:fecb:7502%ens8 PING fe80::7efe:90ff:fecb:7502%ens8(fe80::7efe:90ff:fecb:7502) 56 data bytes Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'net/ipv6/icmp.c') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index dd7608cf1d72..5acb54405b10 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -399,6 +399,24 @@ relookup_failed: return ERR_PTR(err); } +static int icmp6_iif(const struct sk_buff *skb) +{ + int iif = skb->dev->ifindex; + + /* for local traffic to local address, skb dev is the loopback + * device. Check if there is a dst attached to the skb and if so + * get the real device index. + */ + if (unlikely(iif == LOOPBACK_IFINDEX)) { + const struct rt6_info *rt6 = skb_rt6_info(skb); + + if (rt6) + iif = rt6->rt6i_idev->dev->ifindex; + } + + return iif; +} + /* * Send an ICMP message in response to a packet in error */ @@ -460,18 +478,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, */ if (__ipv6_addr_needs_scope_id(addr_type)) { - iif = skb->dev->ifindex; - - /* for local packets, get the real device index */ - if (iif == LOOPBACK_IFINDEX) { - dst = skb_dst(skb); - if (dst) { - struct rt6_info *rt; - - rt = container_of(dst, struct rt6_info, dst); - iif = rt->rt6i_idev->dev->ifindex; - } - } + iif = icmp6_iif(skb); } else { dst = skb_dst(skb); iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev); @@ -694,7 +701,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = skb->dev->ifindex; + fl6.flowi6_oif = icmp6_iif(skb); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); -- cgit