diff options
Diffstat (limited to 'net/ipv6/ip6_input.c')
| -rw-r--r-- | net/ipv6/ip6_input.c | 334 |
1 files changed, 260 insertions, 74 deletions
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9ee208a348f5..168ec07e31cc 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 input * Linux INET6 implementation @@ -7,11 +8,6 @@ * Ian P. Morris <I.P.Morris@soton.ac.uk> * * Based in linux/net/ipv4/ip_input.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ /* Changes * @@ -29,12 +25,14 @@ #include <linux/icmpv6.h> #include <linux/mroute6.h> #include <linux/slab.h> +#include <linux/indirect_call_wrapper.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/sock.h> #include <net/snmp.h> +#include <net/udp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -47,40 +45,118 @@ #include <net/inet_ecn.h> #include <net/dst_metadata.h> -int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +static void ip6_rcv_finish_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { - void (*edemux)(struct sk_buff *skb); + if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && + !skb_dst(skb) && !skb->sk) { + switch (ipv6_hdr(skb)->nexthdr) { + case IPPROTO_TCP: + if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) + tcp_v6_early_demux(skb); + break; + case IPPROTO_UDP: + if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) + udp_v6_early_demux(skb); + break; + } + } + if (!skb_valid_dst(skb)) + ip6_route_input(skb); +} + +int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_rcv(skb); if (!skb) return NET_RX_SUCCESS; + ip6_rcv_finish_core(net, sk, skb); - if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { - const struct inet6_protocol *ipprot; + return dst_input(skb); +} - ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); - if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) - edemux(skb); +static void ip6_sublist_rcv_finish(struct list_head *head) +{ + struct sk_buff *skb, *next; + + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); + dst_input(skb); } - if (!skb_valid_dst(skb)) - ip6_route_input(skb); +} - return dst_input(skb); +static bool ip6_can_use_hint(const struct sk_buff *skb, + const struct sk_buff *hint) +{ + return hint && !skb_dst(skb) && + ipv6_addr_equal(&ipv6_hdr(hint)->daddr, &ipv6_hdr(skb)->daddr); } -int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +static struct sk_buff *ip6_extract_route_hint(const struct net *net, + struct sk_buff *skb) +{ + if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) || + IP6CB(skb)->flags & IP6SKB_MULTIPATH) + return NULL; + + return skb; +} + +static void ip6_list_rcv_finish(struct net *net, struct sock *sk, + struct list_head *head) +{ + struct sk_buff *skb, *next, *hint = NULL; + struct dst_entry *curr_dst = NULL; + LIST_HEAD(sublist); + + list_for_each_entry_safe(skb, next, head, list) { + struct dst_entry *dst; + + skb_list_del_init(skb); + /* if ingress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_rcv(skb); + if (!skb) + continue; + + if (ip6_can_use_hint(skb, hint)) + skb_dst_copy(skb, hint); + else + ip6_rcv_finish_core(net, sk, skb); + dst = skb_dst(skb); + if (curr_dst != dst) { + hint = ip6_extract_route_hint(net, skb); + + /* dispatch old sublist */ + if (!list_empty(&sublist)) + ip6_sublist_rcv_finish(&sublist); + /* start new sublist */ + INIT_LIST_HEAD(&sublist); + curr_dst = dst; + } + list_add_tail(&skb->list, &sublist); + } + /* dispatch final sublist */ + ip6_sublist_rcv_finish(&sublist); +} + +static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, + struct net *net) { + enum skb_drop_reason reason; const struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; - struct net *net = dev_net(skb->dev); if (skb->pkt_type == PACKET_OTHERHOST) { - kfree_skb(skb); - return NET_RX_DROP; + dev_core_stats_rx_otherhost_dropped_inc(skb->dev); + kfree_skb_reason(skb, SKB_DROP_REASON_OTHERHOST); + return NULL; } rcu_read_lock(); @@ -89,9 +165,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt __IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_IN, skb->len); + SKB_DR_SET(reason, NOT_SPECIFIED); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || - !idev || unlikely(idev->cnf.disable_ipv6)) { + !idev || unlikely(READ_ONCE(idev->cnf.disable_ipv6))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); + if (idev && unlikely(READ_ONCE(idev->cnf.disable_ipv6))) + SKB_DR_SET(reason, IPV6DISABLED); goto drop; } @@ -108,15 +187,19 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt * arrived via the sending interface (ethX), because of the * nature of scoping architecture. --yoshfuji */ - IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex; + IP6CB(skb)->iif = skb_valid_dst(skb) ? + ip6_dst_idev(skb_dst(skb))->dev->ifindex : + dev->ifindex; if (unlikely(!pskb_may_pull(skb, sizeof(*hdr)))) goto err; hdr = ipv6_hdr(skb); - if (hdr->version != 6) + if (hdr->version != 6) { + SKB_DR_SET(reason, UNHANDLED_PROTO); goto err; + } __IP6_ADD_STATS(net, idev, IPSTATS_MIB_NOECTPKTS + @@ -131,7 +214,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt */ if ((ipv6_addr_loopback(&hdr->saddr) || ipv6_addr_loopback(&hdr->daddr)) && - !(dev->flags & IFF_LOOPBACK)) + !(dev->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev)) goto err; /* RFC4291 Errata ID: 3480 @@ -153,8 +237,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (!ipv6_addr_is_multicast(&hdr->daddr) && (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) && - idev->cnf.drop_unicast_in_l2_multicast) + READ_ONCE(idev->cnf.drop_unicast_in_l2_multicast)) { + SKB_DR_SET(reason, UNICAST_IN_L2_MULTICAST); goto err; + } /* RFC4291 2.7 * Nodes must not originate a packet to a multicast address whose scope @@ -183,12 +269,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTRUNCATEDPKTS); + SKB_DR_SET(reason, PKT_TOO_SMALL); goto drop; } - if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); - goto drop; - } + if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) + goto err; hdr = ipv6_hdr(skb); } @@ -196,51 +281,106 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (ipv6_parse_hopopts(skb) < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); - return NET_RX_DROP; + return NULL; } } rcu_read_unlock(); /* Must drop socket now because of tproxy. */ - skb_orphan(skb); + if (!skb_sk_is_prefetched(skb)) + skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, - net, NULL, skb, dev, NULL, - ip6_rcv_finish); + return skb; err: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); + SKB_DR_OR(reason, IP_INHDR); drop: rcu_read_unlock(); - kfree_skb(skb); - return NET_RX_DROP; + kfree_skb_reason(skb, reason); + return NULL; +} + +int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) +{ + struct net *net = dev_net(skb->dev); + + skb = ip6_rcv_core(skb, dev, net); + if (skb == NULL) + return NET_RX_DROP; + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + net, NULL, skb, dev, NULL, + ip6_rcv_finish); } +static void ip6_sublist_rcv(struct list_head *head, struct net_device *dev, + struct net *net) +{ + NF_HOOK_LIST(NFPROTO_IPV6, NF_INET_PRE_ROUTING, net, NULL, + head, dev, NULL, ip6_rcv_finish); + ip6_list_rcv_finish(net, NULL, head); +} + +/* Receive a list of IPv6 packets */ +void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, + struct net_device *orig_dev) +{ + struct net_device *curr_dev = NULL; + struct net *curr_net = NULL; + struct sk_buff *skb, *next; + LIST_HEAD(sublist); + + list_for_each_entry_safe(skb, next, head, list) { + struct net_device *dev = skb->dev; + struct net *net = dev_net(dev); + + skb_list_del_init(skb); + skb = ip6_rcv_core(skb, dev, net); + if (skb == NULL) + continue; + + if (curr_dev != dev || curr_net != net) { + /* dispatch old sublist */ + if (!list_empty(&sublist)) + ip6_sublist_rcv(&sublist, curr_dev, curr_net); + /* start new sublist */ + INIT_LIST_HEAD(&sublist); + curr_dev = dev; + curr_net = net; + } + list_add_tail(&skb->list, &sublist); + } + /* dispatch final sublist */ + if (!list_empty(&sublist)) + ip6_sublist_rcv(&sublist, curr_dev, curr_net); +} + +INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); + /* * Deliver the packet to the host */ - - -static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final) { const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; - int nexthdr; + SKB_DR(reason); bool raw; - bool have_final = false; /* * Parse extension headers */ - rcu_read_lock(); resubmit: idev = ip6_dst_idev(skb_dst(skb)); - if (!pskb_pull(skb, skb_transport_offset(skb))) - goto discard; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; + if (!have_final) { + if (!pskb_pull(skb, skb_transport_offset(skb))) + goto discard; + nexthdr = skb_network_header(skb)[nhoff]; + } resubmit_final: raw = raw6_local_deliver(skb, nexthdr); @@ -259,29 +399,44 @@ resubmit_final: } } else if (ipprot->flags & INET6_PROTO_FINAL) { const struct ipv6hdr *hdr; + int sdif = inet6_sdif(skb); + struct net_device *dev; /* Only do this once for first final protocol */ have_final = true; - /* Free reference early: we don't need it any more, - and it may hold ip_conntrack module loaded - indefinitely. */ - nf_reset(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); hdr = ipv6_hdr(skb); + + /* skb->dev passed may be master dev for vrfs. */ + if (sdif) { + dev = dev_get_by_index_rcu(net, sdif); + if (!dev) + goto discard; + } else { + dev = skb->dev; + } + if (ipv6_addr_is_multicast(&hdr->daddr) && - !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, - &hdr->saddr) && - !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) + !ipv6_chk_mcast_addr(dev, &hdr->daddr, + &hdr->saddr) && + !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) { + SKB_DR_SET(reason, IP_INADDRERRORS); goto discard; + } + } + if (!(ipprot->flags & INET6_PROTO_NOPOLICY)) { + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + SKB_DR_SET(reason, XFRM_POLICY); + goto discard; + } + nf_reset_ct(skb); } - if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && - !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto discard; - ret = ipprot->handler(skb); + ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, + skb); if (ret > 0) { if (ipprot->flags & INET6_PROTO_FINAL) { /* Not an extension header, most likely UDP @@ -304,49 +459,81 @@ resubmit_final: IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); + SKB_DR_SET(reason, IP_NOPROTO); + } else { + SKB_DR_SET(reason, XFRM_POLICY); } - kfree_skb(skb); + kfree_skb_reason(skb, reason); } else { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } - rcu_read_unlock(); - return 0; + return; discard: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - rcu_read_unlock(); - kfree_skb(skb); + kfree_skb_reason(skb, reason); +} + +static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INDISCARDS); + kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); + return 0; + } + + skb_clear_delivery_time(skb); + ip6_protocol_deliver_rcu(net, skb, 0, false); + return 0; } int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_input_finish); + int res; + + rcu_read_lock(); + res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL, + ip6_input_finish); + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(ip6_input); int ip6_mc_input(struct sk_buff *skb) { + struct net_device *dev = skb->dev; + int sdif = inet6_sdif(skb); const struct ipv6hdr *hdr; bool deliver; - __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), - ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INMCAST, - skb->len); + __IP6_UPD_PO_STATS(skb_dst_dev_net_rcu(skb), + __in6_dev_get_safely(dev), IPSTATS_MIB_INMCAST, + skb->len); + + /* skb->dev passed may be master dev for vrfs. */ + if (sdif) { + dev = dev_get_by_index_rcu(dev_net_rcu(dev), sdif); + if (!dev) { + kfree_skb(skb); + return -ENODEV; + } + } hdr = ipv6_hdr(skb); - deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); + deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL); #ifdef CONFIG_IPV6_MROUTE /* * IPv6 multicast router mode is now supported ;) */ - if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && + if (atomic_read(&dev_net_rcu(skb->dev)->ipv6.devconf_all->mc_forwarding) && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { @@ -387,22 +574,21 @@ int ip6_mc_input(struct sk_buff *skb) /* unknown RA - process it normally */ } - if (deliver) + if (deliver) { skb2 = skb_clone(skb, GFP_ATOMIC); - else { + } else { skb2 = skb; skb = NULL; } - if (skb2) { + if (skb2) ip6_mr_input(skb2); - } } out: #endif - if (likely(deliver)) + if (likely(deliver)) { ip6_input(skb); - else { + } else { /* discard */ kfree_skb(skb); } |
