summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c10
-rw-r--r--net/ipv4/esp4.c4
-rw-r--r--net/ipv4/fib_semantics.c14
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/inet_hashtables.c24
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_bpf.c12
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_output.c25
-rw-r--r--net/ipv4/tcp_recovery.c2
10 files changed, 70 insertions, 39 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3d2e30e20473..2713c9b06c4c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -597,7 +597,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
- sk->sk_wait_pending++;
/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -613,7 +612,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
- sk->sk_wait_pending--;
return timeo;
}
@@ -642,6 +640,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
return -EINVAL;
if (uaddr->sa_family == AF_UNSPEC) {
+ sk->sk_disconnects++;
err = sk->sk_prot->disconnect(sk, flags);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
goto out;
@@ -696,6 +695,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int writebias = (sk->sk_protocol == IPPROTO_TCP) &&
tcp_sk(sk)->fastopen_req &&
tcp_sk(sk)->fastopen_req->data ? 1 : 0;
+ int dis = sk->sk_disconnects;
/* Error code is set above */
if (!timeo || !inet_wait_for_connect(sk, timeo, writebias))
@@ -704,6 +704,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
+
+ if (dis != sk->sk_disconnects) {
+ err = -EPIPE;
+ goto out;
+ }
}
/* Connection was closed by RST, timeout, ICMP error
@@ -725,6 +730,7 @@ out:
sock_error:
err = sock_error(sk) ? : -ECONNABORTED;
sock->state = SS_UNCONNECTED;
+ sk->sk_disconnects++;
if (sk->sk_prot->disconnect(sk, flags))
sock->state = SS_DISCONNECTING;
goto out;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 2be2d4922557..d18f0f092fe7 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -732,7 +732,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
skb->csum = csum_block_sub(skb->csum, csumdiff,
skb->len - trimlen);
}
- pskb_trim(skb, skb->len - trimlen);
+ ret = pskb_trim(skb, skb->len - trimlen);
+ if (unlikely(ret))
+ return ret;
ret = nexthdr[1];
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 1ea82bc33ef1..5eb1b8d302bb 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1325,15 +1325,18 @@ __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc,
unsigned char scope)
{
struct fib_nh *nh;
+ __be32 saddr;
if (nhc->nhc_family != AF_INET)
return inet_select_addr(nhc->nhc_dev, 0, scope);
nh = container_of(nhc, struct fib_nh, nh_common);
- nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
- nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
+ saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
- return nh->nh_saddr;
+ WRITE_ONCE(nh->nh_saddr, saddr);
+ WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid));
+
+ return saddr;
}
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
@@ -1347,8 +1350,9 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
- if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
- return nh->nh_saddr;
+ if (READ_ONCE(nh->nh_saddr_genid) ==
+ atomic_read(&net->ipv4.dev_addr_genid))
+ return READ_ONCE(nh->nh_saddr);
}
return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index aeebe8816689..394a498c2823 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1145,7 +1145,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
- newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
newicsk->icsk_bind2_hash = NULL;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c32f5e28758b..598c1b114d2c 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -149,8 +149,14 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb2->family)
- return false;
+ if (sk->sk_family != tb2->family) {
+ if (sk->sk_family == AF_INET)
+ return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) &&
+ tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
+
+ return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) &&
+ sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr;
+ }
if (sk->sk_family == AF_INET6)
return ipv6_addr_equal(&tb2->v6_rcv_saddr,
@@ -819,19 +825,7 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
tb->l3mdev != l3mdev)
return false;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb->family) {
- if (sk->sk_family == AF_INET)
- return ipv6_addr_v4mapped(&tb->v6_rcv_saddr) &&
- tb->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
-
- return false;
- }
-
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
-#endif
- return tb->rcv_saddr == sk->sk_rcv_saddr;
+ return inet_bind2_bucket_addr_match(tb, sk);
}
bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3f66cdeef7de..d3456cf840de 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -831,7 +831,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
*/
if (!skb_queue_empty(&sk->sk_receive_queue))
break;
- sk_wait_data(sk, &timeo, NULL);
+ ret = sk_wait_data(sk, &timeo, NULL);
+ if (ret < 0)
+ break;
if (signal_pending(current)) {
ret = sock_intr_errno(timeo);
break;
@@ -2442,7 +2444,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
__sk_flush_backlog(sk);
} else {
tcp_cleanup_rbuf(sk, copied);
- sk_wait_data(sk, &timeo, last);
+ err = sk_wait_data(sk, &timeo, last);
+ if (err < 0) {
+ err = copied ? : err;
+ goto out;
+ }
}
if ((flags & MSG_PEEK) &&
@@ -2966,12 +2972,6 @@ int tcp_disconnect(struct sock *sk, int flags)
int old_state = sk->sk_state;
u32 seq;
- /* Deny disconnect if other threads are blocked in sk_wait_event()
- * or inet_wait_for_connect().
- */
- if (sk->sk_wait_pending)
- return -EBUSY;
-
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 327268203001..53b0d62fd2c2 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -307,6 +307,10 @@ msg_bytes_ready:
}
data = tcp_msg_wait_data(sk, psock, timeo);
+ if (data < 0) {
+ copied = data;
+ goto unlock;
+ }
if (data && !sk_psock_queue_empty(psock))
goto msg_bytes_ready;
copied = -EAGAIN;
@@ -317,6 +321,8 @@ out:
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
+
+unlock:
release_sock(sk);
sk_psock_put(sk, psock);
return copied;
@@ -351,6 +357,10 @@ msg_bytes_ready:
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
data = tcp_msg_wait_data(sk, psock, timeo);
+ if (data < 0) {
+ ret = data;
+ goto unlock;
+ }
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
@@ -361,6 +371,8 @@ msg_bytes_ready:
copied = -EAGAIN;
}
ret = copied;
+
+unlock:
release_sock(sk);
sk_psock_put(sk, psock);
return ret;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 27140e5cdc06..4167e8a48b60 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1869,6 +1869,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
#ifdef CONFIG_TLS_DEVICE
tail->decrypted != skb->decrypted ||
#endif
+ !mptcp_skb_can_collapse(tail, skb) ||
thtail->doff != th->doff ||
memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
goto no_coalesce;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c8c42c280b7..f0723460753c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2542,6 +2542,18 @@ static bool tcp_pacing_check(struct sock *sk)
return true;
}
+static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk)
+{
+ const struct rb_node *node = sk->tcp_rtx_queue.rb_node;
+
+ /* No skb in the rtx queue. */
+ if (!node)
+ return true;
+
+ /* Only one skb in rtx queue. */
+ return !node->rb_left && !node->rb_right;
+}
+
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* (These limits are doubled for retransmits)
@@ -2579,12 +2591,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit += extra_bytes;
}
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send skb if rtx queue is empty.
+ /* Always send skb if rtx queue is empty or has one skb.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (tcp_rtx_queue_empty(sk))
+ if (tcp_rtx_queue_empty_or_single_skb(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2788,7 +2800,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, rto_delta_us;
+ u32 timeout, timeout_us, rto_delta_us;
int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
@@ -2812,11 +2824,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
* sample is available then probe after TCP_TIMEOUT_INIT.
*/
if (tp->srtt_us) {
- timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ timeout_us = tp->srtt_us >> 2;
if (tp->packets_out == 1)
- timeout += TCP_RTO_MIN;
+ timeout_us += tcp_rto_min_us(sk);
else
- timeout += TCP_TIMEOUT_MIN;
+ timeout_us += TCP_TIMEOUT_MIN_US;
+ timeout = usecs_to_jiffies(timeout_us);
} else {
timeout = TCP_TIMEOUT_INIT;
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index acf4869c5d3b..bba10110fbbc 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -104,7 +104,7 @@ bool tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
- timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
+ timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
}