summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/tcp_input.c51
1 files changed, 31 insertions, 20 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0640453fce54..d764b5854dfc 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4764,8 +4764,8 @@ static void tcp_ofo_queue(struct sock *sk)
}
}
-static bool tcp_prune_ofo_queue(struct sock *sk);
-static int tcp_prune_queue(struct sock *sk);
+static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
+static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
unsigned int size)
@@ -4773,11 +4773,11 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
!sk_rmem_schedule(sk, skb, size)) {
- if (tcp_prune_queue(sk) < 0)
+ if (tcp_prune_queue(sk, skb) < 0)
return -1;
while (!sk_rmem_schedule(sk, skb, size)) {
- if (!tcp_prune_ofo_queue(sk))
+ if (!tcp_prune_ofo_queue(sk, skb))
return -1;
}
}
@@ -5329,6 +5329,8 @@ new_range:
* Clean the out-of-order queue to make room.
* We drop high sequences packets to :
* 1) Let a chance for holes to be filled.
+ * This means we do not drop packets from ooo queue if their sequence
+ * is before incoming packet sequence.
* 2) not add too big latencies if thousands of packets sit there.
* (But if application shrinks SO_RCVBUF, we could still end up
* freeing whole queue here)
@@ -5336,24 +5338,31 @@ new_range:
*
* Return true if queue has shrunk.
*/
-static bool tcp_prune_ofo_queue(struct sock *sk)
+static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct rb_node *node, *prev;
+ bool pruned = false;
int goal;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
return false;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
goal = sk->sk_rcvbuf >> 3;
node = &tp->ooo_last_skb->rbnode;
+
do {
+ struct sk_buff *skb = rb_to_skb(node);
+
+ /* If incoming skb would land last in ofo queue, stop pruning. */
+ if (after(TCP_SKB_CB(in_skb)->seq, TCP_SKB_CB(skb)->seq))
+ break;
+ pruned = true;
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
- goal -= rb_to_skb(node)->truesize;
- tcp_drop_reason(sk, rb_to_skb(node),
- SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
+ goal -= skb->truesize;
+ tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
+ tp->ooo_last_skb = rb_to_skb(prev);
if (!prev || goal <= 0) {
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
@@ -5362,16 +5371,18 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
}
node = prev;
} while (node);
- tp->ooo_last_skb = rb_to_skb(prev);
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if (tp->rx_opt.sack_ok)
- tcp_sack_reset(&tp->rx_opt);
- return true;
+ if (pruned) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if (tp->rx_opt.sack_ok)
+ tcp_sack_reset(&tp->rx_opt);
+ }
+ return pruned;
}
/* Reduce allocated memory if we can, trying to get
@@ -5381,7 +5392,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
* until the socket owning process reads some of the data
* to stabilize the situation.
*/
-static int tcp_prune_queue(struct sock *sk)
+static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -5408,7 +5419,7 @@ static int tcp_prune_queue(struct sock *sk)
/* Collapsing did not help, destructive actions follow.
* This must not ever occur. */
- tcp_prune_ofo_queue(sk);
+ tcp_prune_ofo_queue(sk, in_skb);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;