summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c316
1 files changed, 222 insertions, 94 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f0723460753c..f558c054cf6e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
tp->lsndtime = now;
/* If it is a reply for ato after last received
- * packet, enter pingpong mode.
+ * packet, increase pingpong count.
*/
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_enter_pingpong_mode(sk);
+ inet_csk_inc_pingpong_cnt(sk);
}
/* Account for an ACK we sent. */
@@ -422,6 +422,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_FAST_OPEN_COOKIE BIT(8)
#define OPTION_SMC BIT(9)
#define OPTION_MPTCP BIT(10)
+#define OPTION_AO BIT(11)
static void smc_options_write(__be32 *ptr, u16 *options)
{
@@ -614,19 +615,52 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb,
* (but it may well be that other scenarios fail similarly).
*/
static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
- struct tcp_out_options *opts)
+ const struct tcp_request_sock *tcprsk,
+ struct tcp_out_options *opts,
+ struct tcp_key *key)
{
__be32 *ptr = (__be32 *)(th + 1);
u16 options = opts->options; /* mungable copy */
- if (unlikely(OPTION_MD5 & options)) {
+ if (tcp_key_is_md5(key)) {
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
/* overload cookie hash location */
opts->hash_location = (__u8 *)ptr;
ptr += 4;
- }
+ } else if (tcp_key_is_ao(key)) {
+#ifdef CONFIG_TCP_AO
+ u8 maclen = tcp_ao_maclen(key->ao_key);
+
+ if (tcprsk) {
+ u8 aolen = maclen + sizeof(struct tcp_ao_hdr);
+ *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) |
+ (tcprsk->ao_keyid << 8) |
+ (tcprsk->ao_rcv_next));
+ } else {
+ struct tcp_ao_key *rnext_key;
+ struct tcp_ao_info *ao_info;
+
+ ao_info = rcu_dereference_check(tp->ao_info,
+ lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk));
+ rnext_key = READ_ONCE(ao_info->rnext_key);
+ if (WARN_ON_ONCE(!rnext_key))
+ goto out_ao;
+ *ptr++ = htonl((TCPOPT_AO << 24) |
+ (tcp_ao_len(key->ao_key) << 16) |
+ (key->ao_key->sndid << 8) |
+ (rnext_key->rcvid));
+ }
+ opts->hash_location = (__u8 *)ptr;
+ ptr += maclen / sizeof(*ptr);
+ if (unlikely(maclen % sizeof(*ptr))) {
+ memset(ptr, TCPOPT_NOP, sizeof(*ptr));
+ ptr++;
+ }
+out_ao:
+#endif
+ }
if (unlikely(opts->mss)) {
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
@@ -767,23 +801,25 @@ static void mptcp_set_option_cond(const struct request_sock *req,
*/
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5)
+ struct tcp_key *key)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
+ bool timestamps;
- *md5 = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed.key) &&
- rcu_access_pointer(tp->md5sig_info)) {
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
- opts->options |= OPTION_MD5;
- remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ /* Better than switch (key.type) as it has static branches */
+ if (tcp_key_is_md5(key)) {
+ timestamps = false;
+ opts->options |= OPTION_MD5;
+ remaining -= TCPOLEN_MD5SIG_ALIGNED;
+ } else {
+ timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps);
+ if (tcp_key_is_ao(key)) {
+ opts->options |= OPTION_AO;
+ remaining -= tcp_ao_len(key->ao_key);
}
}
-#endif
/* We always get an MSS option. The option bytes which will be seen in
* normal data packets should timestamps be used, must be in the MSS
@@ -797,9 +833,9 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
+ if (likely(timestamps)) {
opts->options |= OPTION_TS;
- opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
+ opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -850,7 +886,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
- const struct tcp_md5sig_key *md5,
+ const struct tcp_key *key,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
@@ -858,8 +894,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
-#ifdef CONFIG_TCP_MD5SIG
- if (md5) {
+ if (tcp_key_is_md5(key)) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
@@ -870,8 +905,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,
*/
if (synack_type != TCP_SYNACK_COOKIE)
ireq->tstamp_ok &= !ireq->sack_ok;
+ } else if (tcp_key_is_ao(key)) {
+ opts->options |= OPTION_AO;
+ remaining -= tcp_ao_len(key->ao_key);
+ ireq->tstamp_ok &= !ireq->sack_ok;
}
-#endif
/* We always send an MSS option. */
opts->mss = mss;
@@ -884,7 +922,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
}
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
+ opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
+ tcp_rsk(req)->ts_off;
opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
@@ -921,7 +960,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
*/
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
- struct tcp_md5sig_key **md5)
+ struct tcp_key *key)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int size = 0;
@@ -929,21 +968,19 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
opts->options = 0;
- *md5 = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- if (static_branch_unlikely(&tcp_md5_needed.key) &&
- rcu_access_pointer(tp->md5sig_info)) {
- *md5 = tp->af_specific->md5_lookup(sk, sk);
- if (*md5) {
- opts->options |= OPTION_MD5;
- size += TCPOLEN_MD5SIG_ALIGNED;
- }
+ /* Better than switch (key.type) as it has static branches */
+ if (tcp_key_is_md5(key)) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
+ } else if (tcp_key_is_ao(key)) {
+ opts->options |= OPTION_AO;
+ size += tcp_ao_len(key->ao_key);
}
-#endif
if (likely(tp->rx_opt.tstamp_ok)) {
opts->options |= OPTION_TS;
- opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
+ opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) +
+ tp->tsoffset : 0;
opts->tsecr = tp->rx_opt.ts_recent;
size += TCPOLEN_TSTAMP_ALIGNED;
}
@@ -1076,7 +1113,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t)
#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \
TCPF_WRITE_TIMER_DEFERRED | \
TCPF_DELACK_TIMER_DEFERRED | \
- TCPF_MTU_REDUCED_DEFERRED)
+ TCPF_MTU_REDUCED_DEFERRED | \
+ TCPF_ACK_DEFERRED)
/**
* tcp_release_cb - tcp release_sock() callback
* @sk: socket
@@ -1100,16 +1138,6 @@ void tcp_release_cb(struct sock *sk)
tcp_tsq_write(sk);
__sock_put(sk);
}
- /* Here begins the tricky part :
- * We are called from release_sock() with :
- * 1) BH disabled
- * 2) sk_lock.slock spinlock held
- * 3) socket owned by us (sk->sk_lock.owned == 1)
- *
- * But following code is meant to be called from BH handlers,
- * so we should keep BH disabled, but early release socket ownership
- */
- sock_release_ownership(sk);
if (flags & TCPF_WRITE_TIMER_DEFERRED) {
tcp_write_timer_handler(sk);
@@ -1123,6 +1151,8 @@ void tcp_release_cb(struct sock *sk)
inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
+ if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk))
+ tcp_send_ack(sk);
}
EXPORT_SYMBOL(tcp_release_cb);
@@ -1207,7 +1237,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
if (sk->sk_pacing_status != SK_PACING_NONE) {
- unsigned long rate = sk->sk_pacing_rate;
+ unsigned long rate = READ_ONCE(sk->sk_pacing_rate);
/* Original sch_fq does not pace first 10 MSS
* Note that tp->data_segs_out overflows after 2^32 packets,
@@ -1250,7 +1280,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
struct sk_buff *oskb = NULL;
- struct tcp_md5sig_key *md5;
+ struct tcp_key key;
struct tcphdr *th;
u64 prior_wstamp;
int err;
@@ -1282,11 +1312,11 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
+ tcp_get_current_key(sk, &key);
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
- tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
+ tcp_options_size = tcp_syn_options(sk, skb, &opts, &key);
} else {
- tcp_options_size = tcp_established_options(sk, skb, &opts,
- &md5);
+ tcp_options_size = tcp_established_options(sk, skb, &opts, &key);
/* Force a PSH flag on all (GSO) packets to expedite GRO flush
* at receiver : This slightly improve GRO performance.
* Note that we do not force the PSH flag for non GSO packets,
@@ -1331,7 +1361,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
- skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
+ skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm));
/* Build TCP header and checksum it. */
th = (struct tcphdr *)skb->data;
@@ -1367,16 +1397,25 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
th->window = htons(min(tp->rcv_wnd, 65535U));
}
- tcp_options_write(th, tp, &opts);
+ tcp_options_write(th, tp, NULL, &opts, &key);
+ if (tcp_key_is_md5(&key)) {
#ifdef CONFIG_TCP_MD5SIG
- /* Calculate the MD5 hash, as we have all we need now */
- if (md5) {
+ /* Calculate the MD5 hash, as we have all we need now */
sk_gso_disable(sk);
tp->af_specific->calc_md5_hash(opts.hash_location,
- md5, sk, skb);
- }
+ key.md5_key, sk, skb);
#endif
+ } else if (tcp_key_is_ao(&key)) {
+ int err;
+
+ err = tcp_ao_transmit_skb(sk, skb, key.ao_key, th,
+ opts.hash_location);
+ if (err) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED);
+ return -ENOMEM;
+ }
+ }
/* BPF prog is the last one writing header option */
bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts);
@@ -1703,14 +1742,6 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
*/
mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
- /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
- if (icsk->icsk_af_ops->net_frag_header_len) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
- if (dst && dst_allfrag(dst))
- mss_now -= icsk->icsk_af_ops->net_frag_header_len;
- }
-
/* Clamp it (mss_clamp does not include tcp options) */
if (mss_now > tp->rx_opt.mss_clamp)
mss_now = tp->rx_opt.mss_clamp;
@@ -1738,21 +1769,11 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
- int mtu;
- mtu = mss +
+ return mss +
tp->tcp_header_len +
icsk->icsk_ext_hdr_len +
icsk->icsk_af_ops->net_header_len;
-
- /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
- if (icsk->icsk_af_ops->net_frag_header_len) {
- const struct dst_entry *dst = __sk_dst_get(sk);
-
- if (dst && dst_allfrag(dst))
- mtu += icsk->icsk_af_ops->net_frag_header_len;
- }
- return mtu;
}
EXPORT_SYMBOL(tcp_mss_to_mtu);
@@ -1827,7 +1848,7 @@ unsigned int tcp_current_mss(struct sock *sk)
u32 mss_now;
unsigned int header_len;
struct tcp_out_options opts;
- struct tcp_md5sig_key *md5;
+ struct tcp_key key;
mss_now = tp->mss_cache;
@@ -1836,8 +1857,8 @@ unsigned int tcp_current_mss(struct sock *sk)
if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
mss_now = tcp_sync_mss(sk, mtu);
}
-
- header_len = tcp_established_options(sk, NULL, &opts, &md5) +
+ tcp_get_current_key(sk, &key);
+ header_len = tcp_established_options(sk, NULL, &opts, &key) +
sizeof(struct tcphdr);
/* The mss_cache is sized based on tp->tcp_header_len, which assumes
* some common options. If this is an odd packet (because we have SACK
@@ -1979,7 +2000,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
unsigned long bytes;
u32 r;
- bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
@@ -2572,7 +2593,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+ READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
@@ -2580,7 +2601,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
tcp_sk(sk)->tcp_tx_delay) {
- u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+ u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) *
+ tcp_sk(sk)->tcp_tx_delay;
/* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
* approximate our needs assuming an ~100% skb->truesize overhead.
@@ -3385,7 +3407,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
/* Save stamp of the first (attempted) retransmit. */
if (!tp->retrans_stamp)
- tp->retrans_stamp = tcp_skb_timestamp(skb);
+ tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb);
if (tp->undo_retrans < 0)
tp->undo_retrans = 0;
@@ -3633,8 +3655,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
{
struct inet_request_sock *ireq = inet_rsk(req);
const struct tcp_sock *tp = tcp_sk(sk);
- struct tcp_md5sig_key *md5 = NULL;
struct tcp_out_options opts;
+ struct tcp_key key = {};
struct sk_buff *skb;
int tcp_header_size;
struct tcphdr *th;
@@ -3671,6 +3693,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
memset(&opts, 0, sizeof(opts));
+ if (tcp_rsk(req)->req_usec_ts < 0)
+ tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
now = tcp_clock_ns();
#ifdef CONFIG_SYN_COOKIES
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
@@ -3684,16 +3708,48 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
rcu_read_lock();
- md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
+ if (tcp_rsk_used_ao(req)) {
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_key *ao_key = NULL;
+ u8 maclen = tcp_rsk(req)->maclen;
+ u8 keyid = tcp_rsk(req)->ao_keyid;
+
+ ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req),
+ keyid, -1);
+ /* If there is no matching key - avoid sending anything,
+ * especially usigned segments. It could try harder and lookup
+ * for another peer-matching key, but the peer has requested
+ * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here.
+ */
+ if (unlikely(!ao_key || tcp_ao_maclen(ao_key) != maclen)) {
+ u8 key_maclen = ao_key ? tcp_ao_maclen(ao_key) : 0;
+
+ rcu_read_unlock();
+ kfree_skb(skb);
+ net_warn_ratelimited("TCP-AO: the keyid %u with maclen %u|%u from SYN packet is not present - not sending SYNACK\n",
+ keyid, maclen, key_maclen);
+ return NULL;
+ }
+ key.ao_key = ao_key;
+ key.type = TCP_KEY_AO;
+#endif
+ } else {
+#ifdef CONFIG_TCP_MD5SIG
+ key.md5_key = tcp_rsk(req)->af_specific->req_md5_lookup(sk,
+ req_to_sk(req));
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
/* bpf program will be interested in the tcp_flags */
TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
- tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
- foc, synack_type,
- syn_skb) + sizeof(*th);
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts,
+ &key, foc, synack_type, syn_skb)
+ + sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -3713,15 +3769,24 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
- tcp_options_write(th, NULL, &opts);
+ tcp_options_write(th, NULL, tcp_rsk(req), &opts, &key);
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
-#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
- if (md5)
+ if (tcp_key_is_md5(&key)) {
+#ifdef CONFIG_TCP_MD5SIG
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
- md5, req_to_sk(req), skb);
+ key.md5_key, req_to_sk(req), skb);
+#endif
+ } else if (tcp_key_is_ao(&key)) {
+#ifdef CONFIG_TCP_AO
+ tcp_rsk(req)->af_specific->ao_synack_hash(opts.hash_location,
+ key.ao_key, req, skb,
+ opts.hash_location - (u8 *)th, 0);
+#endif
+ }
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
rcu_read_unlock();
#endif
@@ -3769,6 +3834,8 @@ static void tcp_connect_init(struct sock *sk)
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+ tcp_ao_connect_init(sk);
+
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
@@ -3951,6 +4018,53 @@ int tcp_connect(struct sock *sk)
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
+#if defined(CONFIG_TCP_MD5SIG) && defined(CONFIG_TCP_AO)
+ /* Has to be checked late, after setting daddr/saddr/ops.
+ * Return error if the peer has both a md5 and a tcp-ao key
+ * configured as this is ambiguous.
+ */
+ if (unlikely(rcu_dereference_protected(tp->md5sig_info,
+ lockdep_sock_is_held(sk)))) {
+ bool needs_ao = !!tp->af_specific->ao_lookup(sk, sk, -1, -1);
+ bool needs_md5 = !!tp->af_specific->md5_lookup(sk, sk);
+ struct tcp_ao_info *ao_info;
+
+ ao_info = rcu_dereference_check(tp->ao_info,
+ lockdep_sock_is_held(sk));
+ if (ao_info) {
+ /* This is an extra check: tcp_ao_required() in
+ * tcp_v{4,6}_parse_md5_keys() should prevent adding
+ * md5 keys on ao_required socket.
+ */
+ needs_ao |= ao_info->ao_required;
+ WARN_ON_ONCE(ao_info->ao_required && needs_md5);
+ }
+ if (needs_md5 && needs_ao)
+ return -EKEYREJECTED;
+
+ /* If we have a matching md5 key and no matching tcp-ao key
+ * then free up ao_info if allocated.
+ */
+ if (needs_md5) {
+ tcp_ao_destroy_sock(sk, false);
+ } else if (needs_ao) {
+ tcp_clear_md5_list(sk);
+ kfree(rcu_replace_pointer(tp->md5sig_info, NULL,
+ lockdep_sock_is_held(sk)));
+ }
+ }
+#endif
+#ifdef CONFIG_TCP_AO
+ if (unlikely(rcu_dereference_protected(tp->ao_info,
+ lockdep_sock_is_held(sk)))) {
+ /* Don't allow connecting if ao is configured but no
+ * matching key is found.
+ */
+ if (!tp->af_specific->ao_lookup(sk, sk, -1, -1))
+ return -EKEYREJECTED;
+ }
+#endif
+
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
@@ -3967,7 +4081,7 @@ int tcp_connect(struct sock *sk)
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
tcp_mstamp_refresh(tp);
- tp->retrans_stamp = tcp_time_stamp(tp);
+ tp->retrans_stamp = tcp_time_stamp_ts(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
@@ -3997,6 +4111,20 @@ int tcp_connect(struct sock *sk)
}
EXPORT_SYMBOL(tcp_connect);
+u32 tcp_delack_max(const struct sock *sk)
+{
+ const struct dst_entry *dst = __sk_dst_get(sk);
+ u32 delack_max = inet_csk(sk)->icsk_delack_max;
+
+ if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) {
+ u32 rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
+ u32 delack_from_rto_min = max_t(int, 1, rto_min - 1);
+
+ delack_max = min_t(u32, delack_max, delack_from_rto_min);
+ }
+ return delack_max;
+}
+
/* Send out a delayed ack, the caller does the policy checking
* to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
* for details.
@@ -4032,7 +4160,7 @@ void tcp_send_delayed_ack(struct sock *sk)
ato = min(ato, max_ato);
}
- ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
+ ato = min_t(u32, ato, tcp_delack_max(sk));
/* Stay within the limit we were given */
timeout = jiffies + ato;