diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 219 | 
1 files changed, 88 insertions, 131 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fe3b4bdfd251..557fe16cbfb0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -42,7 +42,7 @@  /* People can turn this off for buggy TCP's found in printers etc. */  int sysctl_tcp_retrans_collapse __read_mostly = 1; -/* People can turn this on to  work with those rare, broken TCPs that +/* People can turn this on to work with those rare, broken TCPs that   * interpret the window field as a signed quantity.   */  int sysctl_tcp_workaround_signed_windows __read_mostly = 0; @@ -484,7 +484,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,  	}  	if (likely(sysctl_tcp_window_scaling)) {  		opts->ws = tp->rx_opt.rcv_wscale; -		if(likely(opts->ws)) +		if (likely(opts->ws))  			size += TCPOLEN_WSCALE_ALIGNED;  	}  	if (likely(sysctl_tcp_sack)) { @@ -526,7 +526,7 @@ static unsigned tcp_synack_options(struct sock *sk,  	if (likely(ireq->wscale_ok)) {  		opts->ws = ireq->rcv_wscale; -		if(likely(opts->ws)) +		if (likely(opts->ws))  			size += TCPOLEN_WSCALE_ALIGNED;  	}  	if (likely(doing_ts)) { @@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  	th->urg_ptr		= 0;  	/* The urg_mode check is necessary during a below snd_una win probe */ -	if (unlikely(tcp_urg_mode(tp) && -		     between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { -		th->urg_ptr		= htons(tp->snd_up - tcb->seq); -		th->urg			= 1; +	if (unlikely(tcp_urg_mode(tp))) { +		if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) { +			th->urg_ptr = htons(tp->snd_up - tcb->seq); +			th->urg = 1; +		} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { +			th->urg_ptr = 0xFFFF; +			th->urg = 1; +		}  	}  	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); @@ -1168,7 +1172,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,  static inline int tcp_minshall_check(const struct tcp_sock *tp)  { -	return after(tp->snd_sml,tp->snd_una) && +	return after(tp->snd_sml, tp->snd_una) &&  		!after(tp->snd_sml, tp->snd_nxt);  } @@ -1334,7 +1338,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)  	/* Defer for less than two clock ticks. */  	if (tp->tso_deferred && -	    ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) +	    (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)  		goto send_now;  	in_flight = tcp_packets_in_flight(tp); @@ -1519,7 +1523,8 @@ static int tcp_mtu_probe(struct sock *sk)   * Returns 1, if no segments are in flight and we have queued segments, but   * cannot send anything now because of SWS or another problem.   */ -static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) +static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, +			  int push_one, gfp_t gfp)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *skb; @@ -1527,20 +1532,16 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)  	int cwnd_quota;  	int result; -	/* If we are closed, the bytes will have to remain here. -	 * In time closedown will finish, we empty the write queue and all -	 * will be happy. -	 */ -	if (unlikely(sk->sk_state == TCP_CLOSE)) -		return 0; -  	sent_pkts = 0; -	/* Do MTU probing. */ -	if ((result = tcp_mtu_probe(sk)) == 0) { -		return 0; -	} else if (result > 0) { -		sent_pkts = 1; +	if (!push_one) { +		/* Do MTU probing. */ +		result = tcp_mtu_probe(sk); +		if (!result) { +			return 0; +		} else if (result > 0) { +			sent_pkts = 1; +		}  	}  	while ((skb = tcp_send_head(sk))) { @@ -1562,7 +1563,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)  						      nonagle : TCP_NAGLE_PUSH))))  				break;  		} else { -			if (tcp_tso_should_defer(sk, skb)) +			if (!push_one && tcp_tso_should_defer(sk, skb))  				break;  		} @@ -1577,7 +1578,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)  		TCP_SKB_CB(skb)->when = tcp_time_stamp; -		if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) +		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))  			break;  		/* Advance the send_head.  This one is sent out. @@ -1587,6 +1588,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)  		tcp_minshall_update(tp, mss_now, skb);  		sent_pkts++; + +		if (push_one) +			break;  	}  	if (likely(sent_pkts)) { @@ -1605,10 +1609,18 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,  {  	struct sk_buff *skb = tcp_send_head(sk); -	if (skb) { -		if (tcp_write_xmit(sk, cur_mss, nonagle)) -			tcp_check_probe_timer(sk); -	} +	if (!skb) +		return; + +	/* If we are closed, the bytes will have to remain here. +	 * In time closedown will finish, we empty the write queue and +	 * all will be happy. +	 */ +	if (unlikely(sk->sk_state == TCP_CLOSE)) +		return; + +	if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) +		tcp_check_probe_timer(sk);  }  /* Send _single_ skb sitting at the send head. This function requires @@ -1616,38 +1628,11 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,   */  void tcp_push_one(struct sock *sk, unsigned int mss_now)  { -	struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *skb = tcp_send_head(sk); -	unsigned int tso_segs, cwnd_quota;  	BUG_ON(!skb || skb->len < mss_now); -	tso_segs = tcp_init_tso_segs(sk, skb, mss_now); -	cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); - -	if (likely(cwnd_quota)) { -		unsigned int limit; - -		BUG_ON(!tso_segs); - -		limit = mss_now; -		if (tso_segs > 1 && !tcp_urg_mode(tp)) -			limit = tcp_mss_split_point(sk, skb, mss_now, -						    cwnd_quota); - -		if (skb->len > limit && -		    unlikely(tso_fragment(sk, skb, limit, mss_now))) -			return; - -		/* Send it out now. */ -		TCP_SKB_CB(skb)->when = tcp_time_stamp; - -		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { -			tcp_event_new_data_sent(sk, skb); -			tcp_cwnd_validate(sk); -			return; -		} -	} +	tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);  }  /* This function returns the amount that we can raise the @@ -1767,46 +1752,22 @@ u32 __tcp_select_window(struct sock *sk)  	return window;  } -/* Attempt to collapse two adjacent SKB's during retransmission. */ -static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, -				     int mss_now) +/* Collapses two adjacent SKB's during retransmission. */ +static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);  	int skb_size, next_skb_size;  	u16 flags; -	/* The first test we must make is that neither of these two -	 * SKB's are still referenced by someone else. -	 */ -	if (skb_cloned(skb) || skb_cloned(next_skb)) -		return; -  	skb_size = skb->len;  	next_skb_size = next_skb->len;  	flags = TCP_SKB_CB(skb)->flags; -	/* Also punt if next skb has been SACK'd. */ -	if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) -		return; - -	/* Next skb is out of window. */ -	if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) -		return; - -	/* Punt if not enough space exists in the first SKB for -	 * the data in the second, or the total combined payload -	 * would exceed the MSS. -	 */ -	if ((next_skb_size > skb_tailroom(skb)) || -	    ((skb_size + next_skb_size) > mss_now)) -		return; -  	BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);  	tcp_highest_sack_combine(sk, next_skb, skb); -	/* Ok.	We will be able to collapse the packet. */  	tcp_unlink_write_queue(next_skb, sk);  	skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), @@ -1848,54 +1809,60 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,  	sk_wmem_free_skb(sk, next_skb);  } -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) +static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +{ +	if (tcp_skb_pcount(skb) > 1) +		return 0; +	/* TODO: SACK collapsing could be used to remove this condition */ +	if (skb_shinfo(skb)->nr_frags != 0) +		return 0; +	if (skb_cloned(skb)) +		return 0; +	if (skb == tcp_send_head(sk)) +		return 0; +	/* Some heurestics for collapsing over SACK'd could be invented */ +	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) +		return 0; + +	return 1; +} + +static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, +				     int space)  { -	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); -	struct sk_buff *skb; -	unsigned int mss = tcp_current_mss(sk, 0); -	u32 prior_lost = tp->lost_out; +	struct sk_buff *skb = to, *tmp; +	int first = 1; -	tcp_for_write_queue(skb, sk) { -		if (skb == tcp_send_head(sk)) +	if (!sysctl_tcp_retrans_collapse) +		return; +	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) +		return; + +	tcp_for_write_queue_from_safe(skb, tmp, sk) { +		if (!tcp_can_collapse(sk, skb))  			break; -		if (skb->len > mss && -		    !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { -			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { -				TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; -				tp->retrans_out -= tcp_skb_pcount(skb); -			} -			tcp_skb_mark_lost_uncond_verify(tp, skb); -		} -	} -	tcp_clear_retrans_hints_partial(tp); +		space -= skb->len; -	if (prior_lost == tp->lost_out) -		return; +		if (first) { +			first = 0; +			continue; +		} -	if (tcp_is_reno(tp)) -		tcp_limit_reno_sacked(tp); +		if (space < 0) +			break; +		/* Punt if not enough space exists in the first SKB for +		 * the data in the second +		 */ +		if (skb->len > skb_tailroom(to)) +			break; -	tcp_verify_left_out(tp); +		if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) +			break; -	/* Don't muck with the congestion window here. -	 * Reason is that we do not increase amount of _data_ -	 * in network, but units changed and effective -	 * cwnd/ssthresh really reduced now. -	 */ -	if (icsk->icsk_ca_state != TCP_CA_Loss) { -		tp->high_seq = tp->snd_nxt; -		tp->snd_ssthresh = tcp_current_ssthresh(sk); -		tp->prior_ssthresh = 0; -		tp->undo_marker = 0; -		tcp_set_ca_state(sk, TCP_CA_Loss); +		tcp_collapse_retrans(sk, to);  	} -	tcp_xmit_retransmit_queue(sk);  }  /* This retransmits one SKB.  Policy decisions and retransmit queue @@ -1947,17 +1914,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)  			return -ENOMEM; /* We'll try again later. */  	} -	/* Collapse two adjacent packets if worthwhile and we can. */ -	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && -	    (skb->len < (cur_mss >> 1)) && -	    (!tcp_skb_is_last(sk, skb)) && -	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && -	    (skb_shinfo(skb)->nr_frags == 0 && -	     skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && -	    (tcp_skb_pcount(skb) == 1 && -	     tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && -	    (sysctl_tcp_retrans_collapse != 0)) -		tcp_retrans_try_collapse(sk, skb, cur_mss); +	tcp_retrans_try_collapse(sk, skb, cur_mss);  	/* Some Solaris stacks overoptimize and ignore the FIN on a  	 * retransmit when old data is attached.  So strip it off  | 
