diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/cipso_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 35 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 21 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_options.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_dup_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_fib_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_bpf.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 44 | ||||
-rw-r--r-- | net/ipv4/tcp_offload.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 5 | ||||
-rw-r--r-- | net/ipv4/udp.c | 4 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 22 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 40 |
15 files changed, 146 insertions, 65 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 8cc0e2f4159d..740af8541d2f 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -37,7 +37,7 @@ #include <net/cipso_ipv4.h> #include <linux/atomic.h> #include <linux/bug.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> /* List of available DOI definitions */ /* XXX - This currently assumes a minimal number of different DOIs in use, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ab76744383cf..7cf5f7d0d0de 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -298,17 +298,19 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); - err = devinet_sysctl_register(in_dev); - if (err) { - in_dev->dead = 1; - neigh_parms_release(&arp_tbl, in_dev->arp_parms); - in_dev_put(in_dev); - in_dev = NULL; - goto out; + if (dev != blackhole_netdev) { + err = devinet_sysctl_register(in_dev); + if (err) { + in_dev->dead = 1; + neigh_parms_release(&arp_tbl, in_dev->arp_parms); + in_dev_put(in_dev); + in_dev = NULL; + goto out; + } + ip_mc_init_dev(in_dev); + if (dev->flags & IFF_UP) + ip_mc_up(in_dev); } - ip_mc_init_dev(in_dev); - if (dev->flags & IFF_UP) - ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ rcu_assign_pointer(dev->ip_ptr, in_dev); @@ -347,6 +349,19 @@ static void inetdev_destroy(struct in_device *in_dev) in_dev_put(in_dev); } +static int __init inet_blackhole_dev_init(void) +{ + int err = 0; + + rtnl_lock(); + if (!inetdev_init(blackhole_netdev)) + err = -ENOMEM; + rtnl_unlock(); + + return err; +} +late_initcall(inet_blackhole_dev_init); + int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { const struct in_ifaddr *ifa; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2c5632d4fddb..2b698f8419fe 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1045,21 +1045,31 @@ static bool reqsk_queue_unlink(struct request_sock *req) found = __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); } - if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) - reqsk_put(req); + return found; } -bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +static bool __inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + bool from_timer) { bool unlinked = reqsk_queue_unlink(req); + if (!from_timer && timer_delete_sync(&req->rsk_timer)) + reqsk_put(req); + if (unlinked) { reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); reqsk_put(req); } + return unlinked; } + +bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + return __inet_csk_reqsk_queue_drop(sk, req, false); +} EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) @@ -1152,7 +1162,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ - inet_csk_reqsk_queue_drop(sk_listener, nreq); + __inet_csk_reqsk_queue_drop(sk_listener, nreq, true); goto no_ownership; } @@ -1178,7 +1188,8 @@ no_ownership: } drop: - inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); + __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + reqsk_put(req); } static bool reqsk_queue_hash_req(struct request_sock *req, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5f6fd382af38..f1f31ebfc793 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -662,11 +662,11 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, if (skb_cow_head(skb, 0)) goto free_skb; - tnl_params = (const struct iphdr *)skb->data; - - if (!pskb_network_may_pull(skb, pull_len)) + if (!pskb_may_pull(skb, pull_len)) goto free_skb; + tnl_params = (const struct iphdr *)skb->data; + /* ip_tunnel_xmit() needs skb->data pointing to gre header. */ skb_pull(skb, pull_len); skb_reset_mac_header(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index a9e22a098872..68aedb8877b9 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -17,7 +17,7 @@ #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/icmp.h> diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d591c73e2c0e..25505f9b724c 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -218,7 +218,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, ip_tunnel_flags_copy(flags, parms->i_flags); - hlist_for_each_entry_rcu(t, head, hash_node) { + hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && link == READ_ONCE(t->parms.link) && diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index f4aed0789d69..ec94ee1051c7 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -53,8 +53,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, { struct iphdr *iph; + local_bh_disable(); if (this_cpu_read(nf_skb_duplicated)) - return; + goto out; /* * Copy the skb, and route the copy. Will later return %XT_CONTINUE for * the original skb, which should continue on its way as if nothing has @@ -62,7 +63,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, */ skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) - return; + goto out; #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Avoid counting cloned packets towards the original connection. */ @@ -91,6 +92,8 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, } else { kfree_skb(skb); } +out: + local_bh_enable(); } EXPORT_SYMBOL_GPL(nf_dup_ipv4); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 00da1332bbf1..09fff5d424ef 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), + .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; const struct net_device *oif; const struct net_device *found; @@ -83,9 +84,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; - if (priv->flags & NFTA_FIB_F_IIF) - fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif); - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { nft_fib_store_result(dest, priv, nft_in(pkt)); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index e7658c5d6b79..370993c03d31 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -221,11 +221,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, int flags, int *addr_len) { - struct tcp_sock *tcp = tcp_sk(sk); int peek = flags & MSG_PEEK; - u32 seq = tcp->copied_seq; struct sk_psock *psock; + struct tcp_sock *tcp; int copied = 0; + u32 seq; if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); @@ -238,7 +238,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, return tcp_recvmsg(sk, msg, len, flags, addr_len); lock_sock(sk); - + tcp = tcp_sk(sk); + seq = tcp->copied_seq; /* We may have received data on the sk_receive_queue pre-accept and * then we can not use read_skb in this context because we haven't * assigned a sk_socket yet so have no link to the ops. The work-around diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9f314dfa1490..2d844e1f867f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -75,7 +75,7 @@ #include <net/proto_memory.h> #include <net/inet_common.h> #include <linux/ipsec.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <linux/errqueue.h> #include <trace/events/tcp.h> #include <linux/jump_label_ratelimit.h> @@ -2473,8 +2473,22 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { - return tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp); + const struct sock *sk = (const struct sock *)tp; + + if (tp->retrans_stamp && + tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) + return true; /* got echoed TS before first retransmission */ + + /* Check if nothing was retransmitted (retrans_stamp==0), which may + * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp + * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear + * retrans_stamp even if we had retransmitted the SYN. + */ + if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ + sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ + return true; /* nothing was retransmitted */ + + return false; } /* Undo procedures. */ @@ -2508,6 +2522,16 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } +/* If loss recovery is finished and there are no retransmits out in the + * network, then we clear retrans_stamp so that upon the next loss recovery + * retransmits_timed_out() and timestamp-undo are using the correct value. + */ +static void tcp_retrans_stamp_cleanup(struct sock *sk) +{ + if (!tcp_any_retrans_done(sk)) + tcp_sk(sk)->retrans_stamp = 0; +} + static void DBGUNDO(struct sock *sk, const char *msg) { #if FASTRETRANS_DEBUG > 1 @@ -2875,6 +2899,9 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) struct tcp_sock *tp = tcp_sk(sk); int mib_idx; + /* Start the clock with our fast retransmit, for undo and ETIMEDOUT. */ + tcp_retrans_stamp_cleanup(sk); + if (tcp_is_reno(tp)) mib_idx = LINUX_MIB_TCPRENORECOVERY; else @@ -6657,10 +6684,17 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out) tcp_try_undo_recovery(sk); - /* Reset rtx states to prevent spurious retransmits_timed_out() */ tcp_update_rto_time(tp); - tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; + /* In tcp_fastopen_synack_timer() on the first SYNACK RTO we set + * retrans_stamp but don't enter CA_Loss, so in case that happened we + * need to zero retrans_stamp here to prevent spurious + * retransmits_timed_out(). However, if the ACK of our SYNACK caused us + * to enter CA_Recovery then we need to leave retrans_stamp as it was + * set entering CA_Recovery, for correct retransmits_timed_out() and + * undo behavior. + */ + tcp_retrans_stamp_cleanup(sk); /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, * we no longer need req so release it. diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e4ad3311e148..2308665b51c5 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -101,8 +101,14 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); - if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) - return __tcp4_gso_segment_list(skb, features); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) { + struct tcphdr *th = tcp_hdr(skb); + + if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size) + return __tcp4_gso_segment_list(skb, features); + + skb->ip_summed = CHECKSUM_NONE; + } if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4fd746bd4d54..68804fd01daf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2342,10 +2342,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || - tcp_has_tx_tstamp(skb) || - !skb_pure_zcopy_same(skb, next) || - skb_frags_readable(skb) != skb_frags_readable(next)) + if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next)) return false; len -= skb->len; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8accbf4cb295..2849b273b131 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,8 +951,10 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, cork->gso_size); + + /* Don't checksum the payload, skb will get segmented */ + goto csum_partial; } - goto csum_partial; } if (is_udplite) /* UDP-Lite */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index d842303587af..a5be6e4ed326 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -296,8 +296,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, return NULL; } - if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features, is_ipv6); + if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) { + /* Detect modified geometry and pass those to skb_segment. */ + if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size) + return __udp_gso_segment_list(gso_skb, features, is_ipv6); + + /* Setup csum, as fraglist skips this in udp4_gro_receive. */ + gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head; + gso_skb->csum_offset = offsetof(struct udphdr, check); + gso_skb->ip_summed = CHECKSUM_PARTIAL; + + uh = udp_hdr(gso_skb); + if (is_ipv6) + uh->check = ~udp_v6_check(gso_skb->len, + &ipv6_hdr(gso_skb)->saddr, + &ipv6_hdr(gso_skb)->daddr, 0); + else + uh->check = ~udp_v4_check(gso_skb->len, + ip_hdr(gso_skb)->saddr, + ip_hdr(gso_skb)->daddr, 0); + } skb_pull(gso_skb, sizeof(*uh)); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0294fef577fa..7e1c2faed1ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -17,47 +17,43 @@ #include <net/ip.h> #include <net/l3mdev.h> -static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, + const struct xfrm_dst_lookup_params *params) { struct rtable *rt; memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = daddr->a4; - fl4->flowi4_tos = tos; - fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl4->flowi4_mark = mark; - if (saddr) - fl4->saddr = saddr->a4; - - rt = __ip_route_output_key(net, fl4); + fl4->daddr = params->daddr->a4; + fl4->flowi4_tos = params->tos; + fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net, + params->oif); + fl4->flowi4_mark = params->mark; + if (params->saddr) + fl4->saddr = params->saddr->a4; + fl4->flowi4_proto = params->ipproto; + fl4->uli = params->uli; + + rt = __ip_route_output_key(params->net, fl4); if (!IS_ERR(rt)) return &rt->dst; return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); + return __xfrm4_dst_lookup(&fl4, params); } -static int xfrm4_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr, - u32 mark) +static int xfrm4_get_saddr(xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); + dst = __xfrm4_dst_lookup(&fl4, params); if (IS_ERR(dst)) return -EHOSTUNREACH; |