summaryrefslogtreecommitdiff
path: root/include/net/inet_connection_sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/inet_connection_sock.h')
-rw-r--r--include/net/inet_connection_sock.h320
1 files changed, 166 insertions, 154 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index de2c78529afa..ecb362025c4e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* NET Generic infrastructure for INET connection oriented protocols.
*
@@ -6,11 +7,6 @@
* Authors: Many people, see the TCP sources
*
* From code originally in TCP
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _INET_CONNECTION_SOCK_H
#define _INET_CONNECTION_SOCK_H
@@ -19,16 +15,17 @@
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/sockptr.h>
#include <net/inet_sock.h>
#include <net/request_sock.h>
-#define INET_CSK_DEBUG 1
-
/* Cancel timers, when they are not required. */
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
+struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
@@ -36,44 +33,38 @@ struct tcp_congestion_ops;
* (i.e. things that depend on the address family)
*/
struct inet_connection_sock_af_ops {
- int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl);
+ int (*queue_xmit)(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
void (*send_check)(struct sock *sk, struct sk_buff *skb);
int (*rebuild_header)(struct sock *sk);
void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb);
int (*conn_request)(struct sock *sk, struct sk_buff *skb);
- struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
+ struct sock *(*syn_recv_sock)(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst);
+ struct dst_entry *dst,
+ struct request_sock *req_unhash,
+ bool *own_req);
u16 net_header_len;
- u16 net_frag_header_len;
- u16 sockaddr_len;
- int (*setsockopt)(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
- int (*getsockopt)(struct sock *sk, int level, int optname,
+ int (*setsockopt)(struct sock *sk, int level, int optname,
+ sockptr_t optval, unsigned int optlen);
+ int (*getsockopt)(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
-#ifdef CONFIG_COMPAT
- int (*compat_setsockopt)(struct sock *sk,
- int level, int optname,
- char __user *optval, unsigned int optlen);
- int (*compat_getsockopt)(struct sock *sk,
- int level, int optname,
- char __user *optval, int __user *optlen);
-#endif
- void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
- int (*bind_conflict)(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax);
+ void (*mtu_reduced)(struct sock *sk);
};
/** inet_connection_sock - INET connection oriented sock
*
- * @icsk_accept_queue: FIFO of established children
+ * @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
- * @icsk_timeout: Timeout
- * @icsk_retransmit_timer: Resend (no ack)
+ * @icsk_bind2_hash: Bind node in the bhash2 table
+ * @icsk_delack_timer: Delayed ACK timer
+ * @icsk_keepalive_timer: Keepalive timer
+ * @mptcp_tout_timer: mptcp timer
* @icsk_rto: Retransmit timeout
* @icsk_pmtu_cookie Last pmtu seen by socket
* @icsk_ca_ops Pluggable congestion control hook
* @icsk_af_ops Operations which are AF_INET{4,6} specific
+ * @icsk_ulp_ops Pluggable ULP control hook
+ * @icsk_ulp_data ULP private data
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
@@ -83,21 +74,34 @@ struct inet_connection_sock_af_ops {
* @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options)
* @icsk_ack: Delayed ACK control data
* @icsk_mtup; MTU probing control data
+ * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack)
+ * @icsk_user_timeout: TCP_USER_TIMEOUT value
*/
struct inet_connection_sock {
/* inet_sock has to be the first member! */
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
- unsigned long icsk_timeout;
- struct timer_list icsk_retransmit_timer;
- struct timer_list icsk_delack_timer;
+ struct inet_bind2_bucket *icsk_bind2_hash;
+ struct timer_list icsk_delack_timer;
+ union {
+ struct timer_list icsk_keepalive_timer;
+ struct timer_list mptcp_tout_timer;
+ };
__u32 icsk_rto;
+ __u32 icsk_rto_min;
+ u32 icsk_rto_max;
+ __u32 icsk_delack_max;
__u32 icsk_pmtu_cookie;
const struct tcp_congestion_ops *icsk_ca_ops;
const struct inet_connection_sock_af_ops *icsk_af_ops;
+ const struct tcp_ulp_ops *icsk_ulp_ops;
+ void __rcu *icsk_ulp_data;
unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
- __u8 icsk_ca_state;
+ __u8 icsk_ca_state:5,
+ icsk_ca_initialized:1,
+ icsk_ca_setsockopt:1,
+ icsk_ca_dst_locked:1;
__u8 icsk_retransmits;
__u8 icsk_pending;
__u8 icsk_backoff;
@@ -108,60 +112,67 @@ struct inet_connection_sock {
__u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
- __u8 blocked; /* Delayed ACK was blocked by socket lock */
- __u32 ato; /* Predicted tick of soft clock */
- unsigned long timeout; /* Currently scheduled timeout */
+ __u8 retry; /* Number of attempts */
+ #define ATO_BITS 8
+ __u32 ato:ATO_BITS, /* Predicted tick of soft clock */
+ lrcv_flowlabel:20, /* last received ipv6 flowlabel */
+ dst_quick_ack:1, /* cache dst RTAX_QUICKACK */
+ unused:3;
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
- __u16 rcv_mss; /* MSS used for delayed ACK decisions */
+ __u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
struct {
- int enabled;
-
/* Range of MTUs to search */
int search_high;
int search_low;
/* Information on the current probe. */
- int probe_size;
+ u32 probe_size:31,
+ /* Is the MTUP feature enabled for this connection? */
+ enabled:1;
+
+ u32 probe_timestamp;
} icsk_mtup;
- u32 icsk_ca_priv[16];
+ u32 icsk_probes_tstamp;
u32 icsk_user_timeout;
-#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
+
+ u64 icsk_ca_priv[104 / sizeof(u64)];
+#define ICSK_CA_PRIV_SIZE sizeof_field(struct inet_connection_sock, icsk_ca_priv)
};
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
#define ICSK_TIME_DACK 2 /* Delayed ack timer */
#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
-#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */
#define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */
+#define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */
-static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
-{
- return (struct inet_connection_sock *)sk;
-}
+#define inet_csk(ptr) container_of_const(ptr, struct inet_connection_sock, icsk_inet.sk)
static inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
-extern struct sock *inet_csk_clone_lock(const struct sock *sk,
- const struct request_sock *req,
- const gfp_t priority);
+struct sock *inet_csk_clone_lock(const struct sock *sk,
+ const struct request_sock *req,
+ const gfp_t priority);
enum inet_csk_ack_state_t {
ICSK_ACK_SCHED = 1,
ICSK_ACK_TIMER = 2,
ICSK_ACK_PUSHED = 4,
- ICSK_ACK_PUSHED2 = 8
+ ICSK_ACK_PUSHED2 = 8,
+ ICSK_ACK_NOW = 16, /* Send the next ACK immediately (once) */
+ ICSK_ACK_NOMEM = 32,
};
-extern void inet_csk_init_xmit_timers(struct sock *sk,
- void (*retransmit_handler)(unsigned long),
- void (*delack_handler)(unsigned long),
- void (*keepalive_handler)(unsigned long));
-extern void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_init_xmit_timers(struct sock *sk,
+ void (*retransmit_handler)(struct timer_list *),
+ void (*delack_handler)(struct timer_list *),
+ void (*keepalive_handler)(struct timer_list *));
+void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
@@ -178,33 +189,35 @@ static inline void inet_csk_delack_init(struct sock *sk)
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}
-extern void inet_csk_delete_keepalive_timer(struct sock *sk);
-extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
+static inline unsigned long tcp_timeout_expires(const struct sock *sk)
+{
+ return READ_ONCE(sk->tcp_retransmit_timer.expires);
+}
-#ifdef INET_CSK_DEBUG
-extern const char inet_csk_timer_bug_msg[];
-#endif
+static inline unsigned long
+icsk_delack_timeout(const struct inet_connection_sock *icsk)
+{
+ return READ_ONCE(icsk->icsk_delack_timer.expires);
+}
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
-
+
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
- icsk->icsk_pending = 0;
+ smp_store_release(&icsk->icsk_pending, 0);
#ifdef INET_CSK_CLEAR_TIMERS
- sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer(sk, &sk->tcp_retransmit_timer);
#endif
} else if (what == ICSK_TIME_DACK) {
- icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
+ smp_store_release(&icsk->icsk_ack.pending, 0);
+ icsk->icsk_ack.retry = 0;
#ifdef INET_CSK_CLEAR_TIMERS
sk_stop_timer(sk, &icsk->icsk_delack_timer);
#endif
+ } else {
+ pr_debug("inet_csk BUG: unknown timer value\n");
}
-#ifdef INET_CSK_DEBUG
- else {
- pr_debug("%s", inet_csk_timer_bug_msg);
- }
-#endif
}
/*
@@ -217,71 +230,55 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
struct inet_connection_sock *icsk = inet_csk(sk);
if (when > max_when) {
-#ifdef INET_CSK_DEBUG
pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
- sk, what, when, current_text_addr());
-#endif
+ sk, what, when, (void *)_THIS_IP_);
when = max_when;
}
+ when += jiffies;
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
- what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) {
- icsk->icsk_pending = what;
- icsk->icsk_timeout = jiffies + when;
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
+ what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
+ smp_store_release(&icsk->icsk_pending, what);
+ sk_reset_timer(sk, &sk->tcp_retransmit_timer, when);
} else if (what == ICSK_TIME_DACK) {
- icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
- icsk->icsk_ack.timeout = jiffies + when;
- sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
- }
-#ifdef INET_CSK_DEBUG
- else {
- pr_debug("%s", inet_csk_timer_bug_msg);
+ smp_store_release(&icsk->icsk_ack.pending,
+ icsk->icsk_ack.pending | ICSK_ACK_TIMER);
+ sk_reset_timer(sk, &icsk->icsk_delack_timer, when);
+ } else {
+ pr_debug("inet_csk BUG: unknown timer value\n");
}
-#endif
}
-extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
+static inline unsigned long
+inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
+ unsigned long max_when)
+{
+ u64 when = (u64)icsk->icsk_rto << icsk->icsk_backoff;
-extern struct request_sock *inet_csk_search_req(const struct sock *sk,
- struct request_sock ***prevp,
- const __be16 rport,
- const __be32 raddr,
- const __be32 laddr);
-extern int inet_csk_bind_conflict(const struct sock *sk,
- const struct inet_bind_bucket *tb, bool relax);
-extern int inet_csk_get_port(struct sock *sk, unsigned short snum);
+ return (unsigned long)min_t(u64, when, max_when);
+}
-extern struct dst_entry* inet_csk_route_req(struct sock *sk,
- struct flowi4 *fl4,
- const struct request_sock *req);
-extern struct dst_entry* inet_csk_route_child_sock(struct sock *sk,
- struct sock *newsk,
- const struct request_sock *req);
+struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg);
-static inline void inet_csk_reqsk_queue_add(struct sock *sk,
- struct request_sock *req,
- struct sock *child)
-{
- reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
-}
+int inet_csk_get_port(struct sock *sk, unsigned short snum);
-extern void inet_csk_reqsk_queue_hash_add(struct sock *sk,
- struct request_sock *req,
- unsigned long timeout);
+struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4,
+ const struct request_sock *req);
+struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
+ struct sock *newsk,
+ const struct request_sock *req);
-static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
- struct request_sock *req)
-{
- if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
- inet_csk_delete_keepalive_timer(sk);
-}
+struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
+ struct request_sock *req,
+ struct sock *child);
+bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req);
+struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+ struct request_sock *req,
+ bool own_req);
-static inline void inet_csk_reqsk_queue_added(struct sock *sk,
- const unsigned long timeout)
+static inline void inet_csk_reqsk_queue_added(struct sock *sk)
{
- if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
- inet_csk_reset_keepalive_timer(sk, timeout);
+ reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue);
}
static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
@@ -289,58 +286,73 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
}
-static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
+static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
- return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
+ return inet_csk_reqsk_queue_len(sk) > READ_ONCE(sk->sk_max_ack_backlog);
}
-static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
+void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
+
+void inet_csk_destroy_sock(struct sock *sk);
+void inet_csk_prepare_for_destroy_sock(struct sock *sk);
+void inet_csk_prepare_forced_close(struct sock *sk);
+
+/*
+ * LISTEN is a special case for poll..
+ */
+static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
{
- return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
+ return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
+ (EPOLLIN | EPOLLRDNORM) : 0;
}
-static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
- struct request_sock *req,
- struct request_sock **prev)
+int inet_csk_listen_start(struct sock *sk);
+void inet_csk_listen_stop(struct sock *sk);
+
+/* update the fast reuse flag when adding a socket */
+void inet_csk_update_fastreuse(const struct sock *sk,
+ struct inet_bind_bucket *tb,
+ struct inet_bind2_bucket *tb2);
+
+struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+
+static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
- reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
+ inet_csk(sk)->icsk_ack.pingpong =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
}
-static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
- struct request_sock *req,
- struct request_sock **prev)
+static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
{
- inet_csk_reqsk_queue_unlink(sk, req, prev);
- inet_csk_reqsk_queue_removed(sk, req);
- reqsk_free(req);
+ inet_csk(sk)->icsk_ack.pingpong = 0;
}
-extern void inet_csk_reqsk_queue_prune(struct sock *parent,
- const unsigned long interval,
- const unsigned long timeout,
- const unsigned long max_rto);
-
-extern void inet_csk_destroy_sock(struct sock *sk);
-extern void inet_csk_prepare_forced_close(struct sock *sk);
+static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
+{
+ return inet_csk(sk)->icsk_ack.pingpong >=
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pingpong_thresh);
+}
-/*
- * LISTEN is a special case for poll..
- */
-static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
+static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
{
- return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
- (POLLIN | POLLRDNORM) : 0;
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ack.pingpong < U8_MAX)
+ icsk->icsk_ack.pingpong++;
}
-extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
-extern void inet_csk_listen_stop(struct sock *sk);
+static inline bool inet_csk_has_ulp(const struct sock *sk)
+{
+ return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
+}
-extern void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
+static inline void inet_init_csk_locks(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
-extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, unsigned int optlen);
+ spin_lock_init(&icsk->icsk_accept_queue.rskq_lock);
+ spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock);
+}
-extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET_CONNECTION_SOCK_H */