summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c7
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
-rw-r--r--net/ipv4/tcp_bpf.c7
-rw-r--r--net/ipv4/tcp_input.c44
-rw-r--r--net/ipv4/tcp_offload.c10
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/udp_offload.c22
-rw-r--r--net/ipv4/xfrm4_policy.c40
15 files changed, 146 insertions, 65 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 8cc0e2f4159d..740af8541d2f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -37,7 +37,7 @@
#include <net/cipso_ipv4.h>
#include <linux/atomic.h>
#include <linux/bug.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/* List of available DOI definitions */
/* XXX - This currently assumes a minimal number of different DOIs in use,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ab76744383cf..7cf5f7d0d0de 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -298,17 +298,19 @@ static struct in_device *inetdev_init(struct net_device *dev)
/* Account for reference dev->ip_ptr (below) */
refcount_set(&in_dev->refcnt, 1);
- err = devinet_sysctl_register(in_dev);
- if (err) {
- in_dev->dead = 1;
- neigh_parms_release(&arp_tbl, in_dev->arp_parms);
- in_dev_put(in_dev);
- in_dev = NULL;
- goto out;
+ if (dev != blackhole_netdev) {
+ err = devinet_sysctl_register(in_dev);
+ if (err) {
+ in_dev->dead = 1;
+ neigh_parms_release(&arp_tbl, in_dev->arp_parms);
+ in_dev_put(in_dev);
+ in_dev = NULL;
+ goto out;
+ }
+ ip_mc_init_dev(in_dev);
+ if (dev->flags & IFF_UP)
+ ip_mc_up(in_dev);
}
- ip_mc_init_dev(in_dev);
- if (dev->flags & IFF_UP)
- ip_mc_up(in_dev);
/* we can receive as soon as ip_ptr is set -- do this last */
rcu_assign_pointer(dev->ip_ptr, in_dev);
@@ -347,6 +349,19 @@ static void inetdev_destroy(struct in_device *in_dev)
in_dev_put(in_dev);
}
+static int __init inet_blackhole_dev_init(void)
+{
+ int err = 0;
+
+ rtnl_lock();
+ if (!inetdev_init(blackhole_netdev))
+ err = -ENOMEM;
+ rtnl_unlock();
+
+ return err;
+}
+late_initcall(inet_blackhole_dev_init);
+
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
{
const struct in_ifaddr *ifa;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 2c5632d4fddb..2b698f8419fe 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1045,21 +1045,31 @@ static bool reqsk_queue_unlink(struct request_sock *req)
found = __sk_nulls_del_node_init_rcu(sk);
spin_unlock(lock);
}
- if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
- reqsk_put(req);
+
return found;
}
-bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
+ struct request_sock *req,
+ bool from_timer)
{
bool unlinked = reqsk_queue_unlink(req);
+ if (!from_timer && timer_delete_sync(&req->rsk_timer))
+ reqsk_put(req);
+
if (unlinked) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
+
return unlinked;
}
+
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+{
+ return __inet_csk_reqsk_queue_drop(sk, req, false);
+}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
@@ -1152,7 +1162,7 @@ static void reqsk_timer_handler(struct timer_list *t)
if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
/* delete timer */
- inet_csk_reqsk_queue_drop(sk_listener, nreq);
+ __inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
goto no_ownership;
}
@@ -1178,7 +1188,8 @@ no_ownership:
}
drop:
- inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
+ __inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
+ reqsk_put(req);
}
static bool reqsk_queue_hash_req(struct request_sock *req,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5f6fd382af38..f1f31ebfc793 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -662,11 +662,11 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, 0))
goto free_skb;
- tnl_params = (const struct iphdr *)skb->data;
-
- if (!pskb_network_may_pull(skb, pull_len))
+ if (!pskb_may_pull(skb, pull_len))
goto free_skb;
+ tnl_params = (const struct iphdr *)skb->data;
+
/* ip_tunnel_xmit() needs skb->data pointing to gre header. */
skb_pull(skb, pull_len);
skb_reset_mac_header(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index a9e22a098872..68aedb8877b9 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -17,7 +17,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/uaccess.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d591c73e2c0e..25505f9b724c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -218,7 +218,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
ip_tunnel_flags_copy(flags, parms->i_flags);
- hlist_for_each_entry_rcu(t, head, hash_node) {
+ hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
link == READ_ONCE(t->parms.link) &&
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index f4aed0789d69..ec94ee1051c7 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -53,8 +53,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
{
struct iphdr *iph;
+ local_bh_disable();
if (this_cpu_read(nf_skb_duplicated))
- return;
+ goto out;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
* the original skb, which should continue on its way as if nothing has
@@ -62,7 +63,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
*/
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
- return;
+ goto out;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Avoid counting cloned packets towards the original connection. */
@@ -91,6 +92,8 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
} else {
kfree_skb(skb);
}
+out:
+ local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_dup_ipv4);
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 00da1332bbf1..09fff5d424ef 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
+ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
const struct net_device *oif;
const struct net_device *found;
@@ -83,9 +84,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
- if (priv->flags & NFTA_FIB_F_IIF)
- fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif);
-
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv, nft_in(pkt));
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index e7658c5d6b79..370993c03d31 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -221,11 +221,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int flags,
int *addr_len)
{
- struct tcp_sock *tcp = tcp_sk(sk);
int peek = flags & MSG_PEEK;
- u32 seq = tcp->copied_seq;
struct sk_psock *psock;
+ struct tcp_sock *tcp;
int copied = 0;
+ u32 seq;
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
@@ -238,7 +238,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, flags, addr_len);
lock_sock(sk);
-
+ tcp = tcp_sk(sk);
+ seq = tcp->copied_seq;
/* We may have received data on the sk_receive_queue pre-accept and
* then we can not use read_skb in this context because we haven't
* assigned a sk_socket yet so have no link to the ops. The work-around
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9f314dfa1490..2d844e1f867f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -75,7 +75,7 @@
#include <net/proto_memory.h>
#include <net/inet_common.h>
#include <linux/ipsec.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/errqueue.h>
#include <trace/events/tcp.h>
#include <linux/jump_label_ratelimit.h>
@@ -2473,8 +2473,22 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
*/
static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
{
- return tp->retrans_stamp &&
- tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
+ const struct sock *sk = (const struct sock *)tp;
+
+ if (tp->retrans_stamp &&
+ tcp_tsopt_ecr_before(tp, tp->retrans_stamp))
+ return true; /* got echoed TS before first retransmission */
+
+ /* Check if nothing was retransmitted (retrans_stamp==0), which may
+ * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp
+ * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear
+ * retrans_stamp even if we had retransmitted the SYN.
+ */
+ if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */
+ sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */
+ return true; /* nothing was retransmitted */
+
+ return false;
}
/* Undo procedures. */
@@ -2508,6 +2522,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
return false;
}
+/* If loss recovery is finished and there are no retransmits out in the
+ * network, then we clear retrans_stamp so that upon the next loss recovery
+ * retransmits_timed_out() and timestamp-undo are using the correct value.
+ */
+static void tcp_retrans_stamp_cleanup(struct sock *sk)
+{
+ if (!tcp_any_retrans_done(sk))
+ tcp_sk(sk)->retrans_stamp = 0;
+}
+
static void DBGUNDO(struct sock *sk, const char *msg)
{
#if FASTRETRANS_DEBUG > 1
@@ -2875,6 +2899,9 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
struct tcp_sock *tp = tcp_sk(sk);
int mib_idx;
+ /* Start the clock with our fast retransmit, for undo and ETIMEDOUT. */
+ tcp_retrans_stamp_cleanup(sk);
+
if (tcp_is_reno(tp))
mib_idx = LINUX_MIB_TCPRENORECOVERY;
else
@@ -6657,10 +6684,17 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out)
tcp_try_undo_recovery(sk);
- /* Reset rtx states to prevent spurious retransmits_timed_out() */
tcp_update_rto_time(tp);
- tp->retrans_stamp = 0;
inet_csk(sk)->icsk_retransmits = 0;
+ /* In tcp_fastopen_synack_timer() on the first SYNACK RTO we set
+ * retrans_stamp but don't enter CA_Loss, so in case that happened we
+ * need to zero retrans_stamp here to prevent spurious
+ * retransmits_timed_out(). However, if the ACK of our SYNACK caused us
+ * to enter CA_Recovery then we need to leave retrans_stamp as it was
+ * set entering CA_Recovery, for correct retransmits_timed_out() and
+ * undo behavior.
+ */
+ tcp_retrans_stamp_cleanup(sk);
/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
* we no longer need req so release it.
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index e4ad3311e148..2308665b51c5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -101,8 +101,14 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
- if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
- return __tcp4_gso_segment_list(skb, features);
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) {
+ struct tcphdr *th = tcp_hdr(skb);
+
+ if (skb_pagelen(skb) - th->doff * 4 == skb_shinfo(skb)->gso_size)
+ return __tcp4_gso_segment_list(skb, features);
+
+ skb->ip_summed = CHECKSUM_NONE;
+ }
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4fd746bd4d54..68804fd01daf 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2342,10 +2342,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
if (len <= skb->len)
break;
- if (unlikely(TCP_SKB_CB(skb)->eor) ||
- tcp_has_tx_tstamp(skb) ||
- !skb_pure_zcopy_same(skb, next) ||
- skb_frags_readable(skb) != skb_frags_readable(next))
+ if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next))
return false;
len -= skb->len;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8accbf4cb295..2849b273b131 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,8 +951,10 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
cork->gso_size);
+
+ /* Don't checksum the payload, skb will get segmented */
+ goto csum_partial;
}
- goto csum_partial;
}
if (is_udplite) /* UDP-Lite */
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d842303587af..a5be6e4ed326 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -296,8 +296,26 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
return NULL;
}
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) {
+ /* Detect modified geometry and pass those to skb_segment. */
+ if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
+ /* Setup csum, as fraglist skips this in udp4_gro_receive. */
+ gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
+ gso_skb->csum_offset = offsetof(struct udphdr, check);
+ gso_skb->ip_summed = CHECKSUM_PARTIAL;
+
+ uh = udp_hdr(gso_skb);
+ if (is_ipv6)
+ uh->check = ~udp_v6_check(gso_skb->len,
+ &ipv6_hdr(gso_skb)->saddr,
+ &ipv6_hdr(gso_skb)->daddr, 0);
+ else
+ uh->check = ~udp_v4_check(gso_skb->len,
+ ip_hdr(gso_skb)->saddr,
+ ip_hdr(gso_skb)->daddr, 0);
+ }
skb_pull(gso_skb, sizeof(*uh));
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0294fef577fa..7e1c2faed1ff 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -17,47 +17,43 @@
#include <net/ip.h>
#include <net/l3mdev.h>
-static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
- int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark)
+static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4,
+ const struct xfrm_dst_lookup_params *params)
{
struct rtable *rt;
memset(fl4, 0, sizeof(*fl4));
- fl4->daddr = daddr->a4;
- fl4->flowi4_tos = tos;
- fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
- fl4->flowi4_mark = mark;
- if (saddr)
- fl4->saddr = saddr->a4;
-
- rt = __ip_route_output_key(net, fl4);
+ fl4->daddr = params->daddr->a4;
+ fl4->flowi4_tos = params->tos;
+ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net,
+ params->oif);
+ fl4->flowi4_mark = params->mark;
+ if (params->saddr)
+ fl4->saddr = params->saddr->a4;
+ fl4->flowi4_proto = params->ipproto;
+ fl4->uli = params->uli;
+
+ rt = __ip_route_output_key(params->net, fl4);
if (!IS_ERR(rt))
return &rt->dst;
return ERR_CAST(rt);
}
-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
- const xfrm_address_t *saddr,
- const xfrm_address_t *daddr,
- u32 mark)
+static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params)
{
struct flowi4 fl4;
- return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
+ return __xfrm4_dst_lookup(&fl4, params);
}
-static int xfrm4_get_saddr(struct net *net, int oif,
- xfrm_address_t *saddr, xfrm_address_t *daddr,
- u32 mark)
+static int xfrm4_get_saddr(xfrm_address_t *saddr,
+ const struct xfrm_dst_lookup_params *params)
{
struct dst_entry *dst;
struct flowi4 fl4;
- dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
+ dst = __xfrm4_dst_lookup(&fl4, params);
if (IS_ERR(dst))
return -EHOSTUNREACH;