diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 207 |
1 files changed, 85 insertions, 122 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2920e0cb09f8..c5d7656beeee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -107,6 +107,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ @@ -1951,6 +1952,7 @@ void tcp_enter_loss(struct sock *sk) !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->prior_cwnd = tp->snd_cwnd; tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); @@ -2520,8 +2522,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) return; /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_jiffies32; } @@ -3004,21 +3006,24 @@ void tcp_rearm_rto(struct sock *sk) /* Offset the time elapsed after installing regular RTO */ if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { - struct sk_buff *skb = tcp_write_queue_head(sk); - u64 rto_time_stamp = skb->skb_mstamp + - jiffies_to_usecs(rto); - s64 delta_us = rto_time_stamp - tp->tcp_mstamp; + s64 delta_us = tcp_rto_delta_us(sk); /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta_us > 0) - rto = usecs_to_jiffies(delta_us); + rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } } +/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ +static void tcp_set_xmit_timer(struct sock *sk) +{ + if (!tcp_schedule_loss_probe(sk)) + tcp_rearm_rto(sk); +} + /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3180,7 +3185,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_rtt_us, sack->rate); if (flag & FLAG_ACKED) { - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3208,7 +3213,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. */ - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ } if (icsk->icsk_ca_ops->pkts_acked) { @@ -3580,9 +3585,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) - tcp_rearm_rto(sk); - if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; icsk->icsk_retransmits = 0; @@ -3647,18 +3649,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + /* If needed, reset TLP/RTO timer; RACK may later override this. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); } - if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) - tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, sack_state.rate); @@ -4264,9 +4268,15 @@ static void tcp_sack_remove(struct tcp_sock *tp) tp->rx_opt.num_sacks = num_sacks; } +enum tcp_queue { + OOO_QUEUE, + RCV_QUEUE, +}; + /** * tcp_try_coalesce - try to merge skb to prior one * @sk: socket + * @dest: destination queue * @to: prior buffer * @from: buffer to add in queue * @fragstolen: pointer to boolean @@ -4278,6 +4288,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) * Returns true if caller should free @from instead of queueing it */ static bool tcp_try_coalesce(struct sock *sk, + enum tcp_queue dest, struct sk_buff *to, struct sk_buff *from, bool *fragstolen) @@ -4299,6 +4310,15 @@ static bool tcp_try_coalesce(struct sock *sk, TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; + + if (TCP_SKB_CB(from)->has_rxtstamp) { + TCP_SKB_CB(to)->has_rxtstamp = true; + if (dest == OOO_QUEUE) + TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; + else + to->tstamp = from->tstamp; + } + return true; } @@ -4333,6 +4353,9 @@ static void tcp_ofo_queue(struct sock *sk) } p = rb_next(p); rb_erase(&skb->rbnode, &tp->out_of_order_queue); + /* Replace tstamp which was stomped by rbnode */ + if (TCP_SKB_CB(skb)->has_rxtstamp) + skb->tstamp = TCP_SKB_CB(skb)->swtstamp; if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); @@ -4344,7 +4367,8 @@ static void tcp_ofo_queue(struct sock *sk) TCP_SKB_CB(skb)->end_seq); tail = skb_peek_tail(&sk->sk_receive_queue); - eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); + eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, + tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) @@ -4398,6 +4422,10 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } + /* Stash tstamp to avoid being stomped on by rbnode */ + if (TCP_SKB_CB(skb)->has_rxtstamp) + TCP_SKB_CB(skb)->swtstamp = skb->tstamp; + /* Disable header prediction. */ tp->pred_flags = 0; inet_csk_schedule_ack(sk); @@ -4425,7 +4453,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { + if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4475,7 +4504,8 @@ coalesce_done: __kfree_skb(skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; @@ -4526,7 +4556,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && - tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; + tcp_try_coalesce(sk, RCV_QUEUE, tail, + skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { __skb_queue_tail(&sk->sk_receive_queue, skb); @@ -4588,8 +4619,8 @@ err: static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - bool fragstolen = false; - int eaten = -1; + bool fragstolen; + int eaten; if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -4611,32 +4642,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) goto out_of_window; /* Ok. In sequence. In window. */ - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && - sock_owned_by_user(sk) && !tp->urg_data) { - int chunk = min_t(unsigned int, skb->len, - tp->ucopy.len); - - __set_current_state(TASK_RUNNING); - - if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len); - tcp_rcv_space_adjust(sk); - } - } - - if (eaten <= 0) { queue_and_out: - if (eaten < 0) { - if (skb_queue_len(&sk->sk_receive_queue) == 0) - sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto drop; - } - eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); - } + if (skb_queue_len(&sk->sk_receive_queue) == 0) + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + goto drop; + + eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); @@ -5186,26 +5198,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t } } -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ - struct tcp_sock *tp = tcp_sk(sk); - int chunk = skb->len - hlen; - int err; - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); - else - err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg); - - if (!err) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - tcp_rcv_space_adjust(sk); - } - - return err; -} - /* Accept RST for rcv_nxt - 1 after a FIN. * When tcp connections are abruptly terminated from Mac OSX (via ^C), a * FIN is sent followed by a RST packet. The RST is sent with the same @@ -5358,8 +5350,9 @@ discard: * tcp_data_queue when everything is OK. */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); tcp_mstamp_refresh(tp); @@ -5445,56 +5438,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, int eaten = 0; bool fragstolen = false; - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sock_owned_by_user(sk)) { - __set_current_state(TASK_RUNNING); - - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - __skb_pull(skb, tcp_header_len); - tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHPHITSTOUSER); - eaten = 1; - } - } - if (!eaten) { - if (tcp_checksum_complete(skb)) - goto csum_error; + if (tcp_checksum_complete(skb)) + goto csum_error; - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + /* Predicted packet is in window by definition. + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: + */ + if (tcp_header_len == + (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && + tp->rcv_nxt == tp->rcv_wup) + tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(sk, skb); + tcp_rcv_rtt_measure_ts(sk, skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); - /* Bulk data transfer: receiver */ - eaten = tcp_queue_rcv(sk, skb, tcp_header_len, - &fragstolen); - } + /* Bulk data transfer: receiver */ + eaten = tcp_queue_rcv(sk, skb, tcp_header_len, + &fragstolen); tcp_event_data_recv(sk, skb); @@ -5588,7 +5553,6 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) __tcp_fast_path_on(tp, tp->snd_wnd); else tp->pred_flags = 0; - } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -6303,9 +6267,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; - struct dst_entry *dst = NULL; struct request_sock *req; bool want_cookie = false; + struct dst_entry *dst; struct flowi fl; /* TW buckets are converted to open requests without @@ -6355,6 +6319,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (tmp_opt.tstamp_ok) tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); + dst = af_ops->route_req(sk, &fl, req); + if (!dst) + goto drop_and_free; + if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ if (!net->ipv4.sysctl_tcp_syncookies && @@ -6375,11 +6343,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, isn = af_ops->init_seq(skb); } - if (!dst) { - dst = af_ops->route_req(sk, &fl, req); - if (!dst) - goto drop_and_free; - } tcp_ecn_create_request(req, skb, sk, dst); @@ -6395,7 +6358,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_openreq_init_rwin(req, sk, dst); if (!want_cookie) { tcp_reqsk_record_syn(sk, req, skb); - fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); + fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc); } if (fastopen_sk) { af_ops->send_synack(fastopen_sk, dst, &fl, req, |