summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c282
1 files changed, 106 insertions, 176 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 699fb102e971..d46f2143305c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -86,9 +86,6 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
tcp_rearm_rto(sk);
}
-
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
- tcp_skb_pcount(skb));
}
/* SND.NXT, if window was not shrunk.
@@ -272,7 +269,6 @@ EXPORT_SYMBOL(tcp_select_initial_window);
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 old_win = tp->rcv_wnd;
u32 cur_win = tcp_receive_window(tp);
u32 new_win = __tcp_select_window(sk);
@@ -285,9 +281,6 @@ static u16 tcp_select_window(struct sock *sk)
*
* Relax Will Robinson.
*/
- if (new_win == 0)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPWANTZEROWINDOWADV);
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
}
tp->rcv_wnd = new_win;
@@ -305,14 +298,8 @@ static u16 tcp_select_window(struct sock *sk)
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
- if (new_win == 0) {
+ if (new_win == 0)
tp->pred_flags = 0;
- if (old_win)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPTOZEROWINDOWADV);
- } else if (old_win == 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
- }
return new_win;
}
@@ -376,17 +363,15 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
*/
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
-
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
- shinfo->gso_segs = 1;
- shinfo->gso_size = 0;
- shinfo->gso_type = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
TCP_SKB_CB(skb)->seq = seq;
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
@@ -421,7 +406,7 @@ struct tcp_out_options {
* Beware: Something in the Internet is very sensitive to the ordering of
* TCP options, we learned this through the hard way, so be careful here.
* Luckily we can at least blame others for their non-compliance but from
- * inter-operability perspective it seems that we're somewhat stuck with
+ * inter-operatibility perspective it seems that we're somewhat stuck with
* the ordering which we have been using if we want to keep working with
* those broken things (not that it currently hurts anybody as there isn't
* particular reason why the ordering would need to be changed).
@@ -694,7 +679,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
*
* Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
* needs to be reallocated in a driver.
- * The invariant being skb->truesize subtracted from sk->sk_wmem_alloc
+ * The invariant being skb->truesize substracted from sk->sk_wmem_alloc
*
* Since transmit from skb destructor is forbidden, we use a tasklet
* to process all sockets that eventually need to send more skbs.
@@ -711,13 +696,12 @@ static void tcp_tsq_handler(struct sock *sk)
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
- 0, GFP_ATOMIC);
+ tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
}
/*
- * One tasklet per cpu tries to send more skbs.
+ * One tasklest per cpu tries to send more skbs.
* We run in tasklet context but need to disable irqs when
- * transferring tsq->head because tcp_wfree() might
+ * transfering tsq->head because tcp_wfree() might
* interrupt us (non NAPI drivers)
*/
static void tcp_tasklet_func(unsigned long data)
@@ -780,17 +764,6 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
- /* Here begins the tricky part :
- * We are called from release_sock() with :
- * 1) BH disabled
- * 2) sk_lock.slock spinlock held
- * 3) socket owned by us (sk->sk_lock.owned == 1)
- *
- * But following code is meant to be called from BH handlers,
- * so we should keep BH disabled, but early release socket ownership
- */
- sock_release_ownership(sk);
-
if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
__sock_put(sk);
@@ -822,7 +795,7 @@ void __init tcp_tasklet_init(void)
/*
* Write buffer destructor automatically called from kfree_skb.
- * We can't xmit new skbs from this context, as we might already
+ * We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
void tcp_wfree(struct sk_buff *skb)
@@ -877,15 +850,19 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
BUG_ON(!skb || !tcp_skb_pcount(skb));
- if (clone_it) {
- const struct sk_buff *fclone = skb + 1;
+ /* If congestion control is doing timestamping, we must
+ * take such a timestamp before we potentially clone/copy.
+ */
+ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
+ __net_timestamp(skb);
- skb_mstamp_get(&skb->skb_mstamp);
+ if (likely(clone_it)) {
+ const struct sk_buff *fclone = skb + 1;
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE))
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
@@ -893,8 +870,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
- /* Our usage of tstamp should remain private */
- skb->tstamp.tv64 = 0;
}
inet = inet_sk(sk);
@@ -1011,8 +986,6 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
-
/* Make sure we own this skb before messing gso_size/gso_segs */
WARN_ON_ONCE(skb_cloned(skb));
@@ -1020,13 +993,13 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
/* Avoid the costly divide in the normal
* non-TSO case.
*/
- shinfo->gso_segs = 1;
- shinfo->gso_size = 0;
- shinfo->gso_type = 0;
+ skb_shinfo(skb)->gso_segs = 1;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_type = 0;
} else {
- shinfo->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
- shinfo->gso_size = mss_now;
- shinfo->gso_type = sk->sk_gso_type;
+ skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now);
+ skb_shinfo(skb)->gso_size = mss_now;
+ skb_shinfo(skb)->gso_type = sk->sk_gso_type;
}
}
@@ -1173,7 +1146,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
*/
static void __pskb_trim_head(struct sk_buff *skb, int len)
{
- struct skb_shared_info *shinfo;
int i, k, eat;
eat = min_t(int, len, skb_headlen(skb));
@@ -1185,24 +1157,23 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
}
eat = len;
k = 0;
- shinfo = skb_shinfo(skb);
- for (i = 0; i < shinfo->nr_frags; i++) {
- int size = skb_frag_size(&shinfo->frags[i]);
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
if (size <= eat) {
skb_frag_unref(skb, i);
eat -= size;
} else {
- shinfo->frags[k] = shinfo->frags[i];
+ skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
if (eat) {
- shinfo->frags[k].page_offset += eat;
- skb_frag_size_sub(&shinfo->frags[k], eat);
+ skb_shinfo(skb)->frags[k].page_offset += eat;
+ skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
eat = 0;
}
k++;
}
}
- shinfo->nr_frags = k;
+ skb_shinfo(skb)->nr_frags = k;
skb_reset_tail_pointer(skb);
skb->data_len -= len;
@@ -1407,51 +1378,23 @@ static void tcp_cwnd_validate(struct sock *sk)
}
}
-/* Minshall's variant of the Nagle send check. */
-static bool tcp_minshall_check(const struct tcp_sock *tp)
-{
- return after(tp->snd_sml, tp->snd_una) &&
- !after(tp->snd_sml, tp->snd_nxt);
-}
-
-/* Update snd_sml if this skb is under mss
- * Note that a TSO packet might end with a sub-mss segment
- * The test is really :
- * if ((skb->len % mss) != 0)
- * tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
- * But we can avoid doing the divide again given we already have
- * skb_pcount = skb->len / mss_now
- */
-static void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss_now,
- const struct sk_buff *skb)
-{
- if (skb->len < tcp_skb_pcount(skb) * mss_now)
- tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
-}
-
-/* Return false, if packet can be sent now without violation Nagle's rules:
- * 1. It is full sized. (provided by caller in %partial bool)
- * 2. Or it contains FIN. (already checked by caller)
- * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
- * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
- * With Minshall's modification: all sent small packets are ACKed.
+/* Returns the portion of skb which can be sent right away without
+ * introducing MSS oddities to segment boundaries. In rare cases where
+ * mss_now != mss_cache, we will request caller to create a small skb
+ * per input skb which could be mostly avoided here (if desired).
+ *
+ * We explicitly want to create a request for splitting write queue tail
+ * to a small skb for Nagle purposes while avoiding unnecessary modulos,
+ * thus all the complexity (cwnd_len is always MSS multiple which we
+ * return whenever allowed by the other factors). Basically we need the
+ * modulo only when the receiver window alone is the limiting factor or
+ * when we would be allowed to send the split-due-to-Nagle skb fully.
*/
-static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
- int nonagle)
-{
- return partial &&
- ((nonagle & TCP_NAGLE_CORK) ||
- (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
-}
-/* Returns the portion of skb which can be sent right away */
-static unsigned int tcp_mss_split_point(const struct sock *sk,
- const struct sk_buff *skb,
- unsigned int mss_now,
- unsigned int max_segs,
- int nonagle)
+static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int mss_now, unsigned int max_segs)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 partial, needed, window, max_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
max_len = mss_now * max_segs;
@@ -1464,15 +1407,7 @@ static unsigned int tcp_mss_split_point(const struct sock *sk,
if (max_len <= needed)
return max_len;
- partial = needed % mss_now;
- /* If last segment is not a full MSS, check if Nagle rules allow us
- * to include this last segment in this skb.
- * Otherwise, we'll split the skb at last MSS boundary
- */
- if (tcp_nagle_check(partial != 0, tp, nonagle))
- return needed - partial;
-
- return needed;
+ return needed - needed % mss_now;
}
/* Can at least one segment of SKB be sent right now, according to the
@@ -1512,6 +1447,28 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
return tso_segs;
}
+/* Minshall's variant of the Nagle send check. */
+static inline bool tcp_minshall_check(const struct tcp_sock *tp)
+{
+ return after(tp->snd_sml, tp->snd_una) &&
+ !after(tp->snd_sml, tp->snd_nxt);
+}
+
+/* Return false, if packet can be sent now without violation Nagle's rules:
+ * 1. It is full sized.
+ * 2. Or it contains FIN. (already checked by caller)
+ * 3. Or TCP_CORK is not set, and TCP_NODELAY is set.
+ * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
+ * With Minshall's modification: all sent small packets are ACKed.
+ */
+static inline bool tcp_nagle_check(const struct tcp_sock *tp,
+ const struct sk_buff *skb,
+ unsigned int mss_now, int nonagle)
+{
+ return skb->len < mss_now &&
+ ((nonagle & TCP_NAGLE_CORK) ||
+ (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
+}
/* Return true if the Nagle test allows this packet to be
* sent now.
@@ -1532,7 +1489,7 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
return true;
- if (!tcp_nagle_check(skb->len < cur_mss, tp, nonagle))
+ if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
return true;
return false;
@@ -1918,24 +1875,12 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
* - better RTT estimation and ACK scheduling
* - faster recovery
* - high rates
- * Alas, some drivers / subsystems require a fair amount
- * of queued bytes to ensure line rate.
- * One example is wifi aggregation (802.11 AMPDU)
*/
- limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
- sk->sk_pacing_rate >> 10);
+ limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- /* It is possible TX completion already happened
- * before we set TSQ_THROTTLED, so we must
- * test again the condition.
- * We abuse smp_mb__after_clear_bit() because
- * there is no smp_mb__after_set_bit() yet
- */
- smp_mb__after_clear_bit();
- if (atomic_read(&sk->sk_wmem_alloc) > limit)
- break;
+ break;
}
limit = mss_now;
@@ -1943,8 +1888,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = tcp_mss_split_point(sk, skb, mss_now,
min_t(unsigned int,
cwnd_quota,
- sk->sk_gso_max_segs),
- nonagle);
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -1986,7 +1930,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
- u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
+ u32 rtt = tp->srtt >> 3;
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
return false;
@@ -2008,7 +1952,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
+ if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
return false;
@@ -2093,6 +2037,7 @@ rearm_timer:
if (likely(!err))
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPLOSSPROBES);
+ return;
}
/* Push out any pending frames which were held back due to
@@ -2190,8 +2135,7 @@ u32 __tcp_select_window(struct sock *sk)
*/
int mss = icsk->icsk_ack.rcv_mss;
int free_space = tcp_space(sk);
- int allowed_space = tcp_full_space(sk);
- int full_space = min_t(int, tp->window_clamp, allowed_space);
+ int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
int window;
if (mss > full_space)
@@ -2204,19 +2148,7 @@ u32 __tcp_select_window(struct sock *sk)
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
- /* free_space might become our new window, make sure we don't
- * increase it due to wscale.
- */
- free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
-
- /* if free space is less than mss estimate, or is below 1/16th
- * of the maximum allowed, try to move to zero-window, else
- * tcp_clamp_window() will grow rcv buf up to tcp_rmem[2], and
- * new incoming data is dropped due to memory limits.
- * With large window, mss test triggers way too late in order
- * to announce zero window in time before rmem limit kicks in.
- */
- if (free_space < (allowed_space >> 4) || free_space < mss)
+ if (free_space < mss)
return 0;
}
@@ -2371,7 +2303,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned int cur_mss;
- int err;
/* Inconslusive MTU probe */
if (icsk->icsk_mtup.probe_size) {
@@ -2422,6 +2353,21 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
tcp_retrans_try_collapse(sk, skb, cur_mss);
+ /* Some Solaris stacks overoptimize and ignore the FIN on a
+ * retransmit when old data is attached. So strip it off
+ * since it is cheap to do so and saves bytes on the network.
+ */
+ if (skb->len > 0 &&
+ (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
+ tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+ if (!pskb_trim(skb, 0)) {
+ /* Reuse, even though it does some unnecessary work */
+ tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
+ TCP_SKB_CB(skb)->tcp_flags);
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ }
+
/* Make a copy, if the first transmission SKB clone we made
* is still in somebody's hands, else make a clone.
*/
@@ -2435,15 +2381,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
} else {
- err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
-
- if (likely(!err))
- TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
- return err;
}
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -2454,8 +2396,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (err == 0) {
/* Update global TCP statistics. */
TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+
tp->total_retrans++;
#if FASTRETRANS_DEBUG > 0
@@ -2741,7 +2682,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
int tcp_header_size;
int mss;
- skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
+ skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -2795,8 +2736,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->syn = 1;
th->ack = 1;
TCP_ECN_make_synack(req, th);
- th->source = htons(ireq->ir_num);
- th->dest = ireq->ir_rmt_port;
+ th->source = ireq->loc_port;
+ th->dest = ireq->rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
* not even correctly set)
*/
@@ -2811,7 +2752,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), tp, &opts);
th->doff = (tcp_header_size >> 2);
- TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
+ TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
@@ -2826,7 +2767,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
EXPORT_SYMBOL(tcp_make_synack);
/* Do all connect socket setups that can be done AF independent. */
-static void tcp_connect_init(struct sock *sk)
+void tcp_connect_init(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -2948,12 +2889,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
- space = min_t(size_t, space, fo->size);
-
- /* limit to order-0 allocations */
- space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
-
- syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
+ syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
sk->sk_allocation);
if (syn_data == NULL)
goto fallback;
@@ -2983,15 +2919,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
tcp_connect_queue_skb(sk, data);
fo->copied = data->len;
- /* syn_data is about to be sent, we need to take current time stamps
- * for the packets that are in write queue : SYN packet and DATA
- */
- skb_mstamp_get(&syn->skb_mstamp);
- data->skb_mstamp = syn->skb_mstamp;
-
if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
tp->syn_data = (fo->copied > 0);
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
goto done;
}
syn_data = NULL;
@@ -3079,9 +3009,8 @@ void tcp_send_delayed_ack(struct sock *sk)
* Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
* directly.
*/
- if (tp->srtt_us) {
- int rtt = max_t(int, usecs_to_jiffies(tp->srtt_us >> 3),
- TCP_DELACK_MIN);
+ if (tp->srtt) {
+ int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
if (rtt < max_ato)
max_ato = rtt;
@@ -3179,6 +3108,7 @@ void tcp_send_window_probe(struct sock *sk)
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
+ tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
tcp_xmit_probe_skb(sk, 0);
}
}