summaryrefslogtreecommitdiff
path: root/include/net/sock.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h397
1 files changed, 171 insertions, 226 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 54ca8dcbfb43..7ef728324e4e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -337,6 +337,7 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
+ * @sk_user_frags: xarray of pages the user is holding a reference on.
*/
struct sock {
/*
@@ -378,14 +379,10 @@ struct sock {
#define sk_flags __sk_common.skc_flags
#define sk_rxhash __sk_common.skc_rxhash
- /* early demux fields */
- struct dst_entry __rcu *sk_rx_dst;
- int sk_rx_dst_ifindex;
- u32 sk_rx_dst_cookie;
+ __cacheline_group_begin(sock_write_rx);
- socket_lock_t sk_lock;
atomic_t sk_drops;
- int sk_rcvlowat;
+ __s32 sk_peek_off;
struct sk_buff_head sk_error_queue;
struct sk_buff_head sk_receive_queue;
/*
@@ -402,18 +399,24 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
-
#define sk_rmem_alloc sk_backlog.rmem_alloc
- int sk_forward_alloc;
- u32 sk_reserved_mem;
+ __cacheline_group_end(sock_write_rx);
+
+ __cacheline_group_begin(sock_read_rx);
+ /* early demux fields */
+ struct dst_entry __rcu *sk_rx_dst;
+ int sk_rx_dst_ifindex;
+ u32 sk_rx_dst_cookie;
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_ll_usec;
- /* ===== mostly read cache line ===== */
unsigned int sk_napi_id;
+ u16 sk_busy_poll_budget;
+ u8 sk_prefer_busy_poll;
#endif
+ u8 sk_userlocks;
int sk_rcvbuf;
- int sk_disconnects;
struct sk_filter __rcu *sk_filter;
union {
@@ -422,15 +425,33 @@ struct sock {
struct socket_wq *sk_wq_raw;
/* public: */
};
+
+ void (*sk_data_ready)(struct sock *sk);
+ long sk_rcvtimeo;
+ int sk_rcvlowat;
+ __cacheline_group_end(sock_read_rx);
+
+ __cacheline_group_begin(sock_read_rxtx);
+ int sk_err;
+ struct socket *sk_socket;
+ struct mem_cgroup *sk_memcg;
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+ __cacheline_group_end(sock_read_rxtx);
- struct dst_entry __rcu *sk_dst_cache;
+ __cacheline_group_begin(sock_write_rxtx);
+ socket_lock_t sk_lock;
+ u32 sk_reserved_mem;
+ int sk_forward_alloc;
+ u32 sk_tsflags;
+ __cacheline_group_end(sock_write_rxtx);
+
+ __cacheline_group_begin(sock_write_tx);
+ int sk_write_pending;
atomic_t sk_omem_alloc;
int sk_sndbuf;
- /* ===== cache line for TX ===== */
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
@@ -439,22 +460,36 @@ struct sock {
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue;
- __s32 sk_peek_off;
- int sk_write_pending;
- __u32 sk_dst_pending_confirm;
+ u32 sk_dst_pending_confirm;
u32 sk_pacing_status; /* see enum sk_pacing */
- long sk_sndtimeo;
+ struct page_frag sk_frag;
struct timer_list sk_timer;
- __u32 sk_priority;
- __u32 sk_mark;
+
unsigned long sk_pacing_rate; /* bytes per second */
+ atomic_t sk_zckey;
+ atomic_t sk_tskey;
+ __cacheline_group_end(sock_write_tx);
+
+ __cacheline_group_begin(sock_read_tx);
unsigned long sk_max_pacing_rate;
- struct page_frag sk_frag;
+ long sk_sndtimeo;
+ u32 sk_priority;
+ u32 sk_mark;
+ struct dst_entry __rcu *sk_dst_cache;
netdev_features_t sk_route_caps;
- int sk_gso_type;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
+ u16 sk_gso_type;
+ u16 sk_gso_max_segs;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
- __u32 sk_txhash;
+ u32 sk_txhash;
+ u8 sk_pacing_shift;
+ bool sk_use_task_frag;
+ __cacheline_group_end(sock_read_tx);
/*
* Because of non atomicity rules, all
@@ -463,64 +498,44 @@ struct sock {
u8 sk_gso_disabled : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
- sk_no_check_rx : 1,
- sk_userlocks : 4;
- u8 sk_pacing_shift;
+ sk_no_check_rx : 1;
+ u8 sk_shutdown;
u16 sk_type;
u16 sk_protocol;
- u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
- int sk_err,
- sk_err_soft;
+ int sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
- u8 sk_txrehash;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- u8 sk_prefer_busy_poll;
- u16 sk_busy_poll_budget;
-#endif
spinlock_t sk_peer_lock;
int sk_bind_phc;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
- long sk_rcvtimeo;
ktime_t sk_stamp;
#if BITS_PER_LONG==32
seqlock_t sk_stamp_seq;
#endif
- atomic_t sk_tskey;
- atomic_t sk_zckey;
- u32 sk_tsflags;
- u8 sk_shutdown;
+ int sk_disconnects;
+ u8 sk_txrehash;
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
sk_txtime_report_errors : 1,
sk_txtime_unused : 6;
- bool sk_use_task_frag;
- struct socket *sk_socket;
void *sk_user_data;
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
struct sock_cgroup_data sk_cgrp_data;
- struct mem_cgroup *sk_memcg;
void (*sk_state_change)(struct sock *sk);
- void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
-#ifdef CONFIG_SOCK_VALIDATE_XMIT
- struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
@@ -528,6 +543,12 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
+ struct xarray sk_user_frags;
+};
+
+struct sock_bh_locked {
+ struct sock *sock;
+ local_lock_t bh_lock;
};
enum sk_pacing {
@@ -873,6 +894,8 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
+#define sk_for_each_bound_safe(__sk, tmp, list) \
+ hlist_for_each_entry_safe(__sk, tmp, list, sk_bind_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
@@ -930,9 +953,16 @@ enum sock_flags {
SOCK_XDP, /* XDP is attached */
SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */
+ SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */
};
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+/*
+ * The highest bit of sk_tsflags is reserved for kernel-internal
+ * SOCKCM_FLAG_TS_OPT_ID. There is a check in core/sock.c to control that
+ * SOF_TIMESTAMPING* values do not reach this reserved area
+ */
+#define SOCKCM_FLAG_TS_OPT_ID BIT(31)
static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk)
{
@@ -1103,41 +1133,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
-static inline void sock_rps_record_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void sock_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
- }
-#endif
-}
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
@@ -1215,6 +1210,13 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size)
size - offsetof(struct sock, sk_node.pprev));
}
+struct proto_accept_arg {
+ int flags;
+ int err;
+ int is_empty;
+ bool kern;
+};
+
/* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface
*/
@@ -1229,8 +1231,8 @@ struct proto {
int addr_len);
int (*disconnect)(struct sock *sk, int flags);
- struct sock * (*accept)(struct sock *sk, int flags, int *err,
- bool kern);
+ struct sock * (*accept)(struct sock *sk,
+ struct proto_accept_arg *arg);
int (*ioctl)(struct sock *sk, int cmd,
int *karg);
@@ -1392,72 +1394,6 @@ static inline int sk_under_cgroup_hierarchy(struct sock *sk,
#endif
}
-static inline bool sk_has_memory_pressure(const struct sock *sk)
-{
- return sk->sk_prot->memory_pressure != NULL;
-}
-
-static inline bool sk_under_global_memory_pressure(const struct sock *sk)
-{
- return sk->sk_prot->memory_pressure &&
- !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline bool sk_under_memory_pressure(const struct sock *sk)
-{
- if (!sk->sk_prot->memory_pressure)
- return false;
-
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- mem_cgroup_under_socket_pressure(sk->sk_memcg))
- return true;
-
- return !!READ_ONCE(*sk->sk_prot->memory_pressure);
-}
-
-static inline long
-proto_memory_allocated(const struct proto *prot)
-{
- return max(0L, atomic_long_read(prot->memory_allocated));
-}
-
-static inline long
-sk_memory_allocated(const struct sock *sk)
-{
- return proto_memory_allocated(sk->sk_prot);
-}
-
-/* 1 MB per cpu, in page units */
-#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
-
-static inline void
-sk_memory_allocated_add(struct sock *sk, int amt)
-{
- int local_reserve;
-
- preempt_disable();
- local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
- __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
- atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
- }
- preempt_enable();
-}
-
-static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt)
-{
- int local_reserve;
-
- preempt_disable();
- local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
- __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
- atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
- }
- preempt_enable();
-}
-
#define SK_ALLOC_PERCPU_COUNTER_BATCH 16
static inline void sk_sockets_allocated_dec(struct sock *sk)
@@ -1484,15 +1420,6 @@ proto_sockets_allocated_sum_positive(struct proto *prot)
return percpu_counter_sum_positive(prot->sockets_allocated);
}
-static inline bool
-proto_memory_pressure(struct proto *prot)
-{
- if (!prot->memory_pressure)
- return false;
- return !!READ_ONCE(*prot->memory_pressure);
-}
-
-
#ifdef CONFIG_PROC_FS
#define PROTO_INUSE_NR 64 /* should be enough for the first time */
struct prot_inuse {
@@ -1601,7 +1528,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
}
static inline bool
-sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+__sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
{
int delta;
@@ -1609,7 +1536,13 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
return true;
delta = size - sk->sk_forward_alloc;
return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) ||
- skb_pfmemalloc(skb);
+ pfmemalloc;
+}
+
+static inline bool
+sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
+{
+ return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
}
static inline int sk_unused_reserved_mem(const struct sock *sk)
@@ -1708,7 +1641,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock);
* lock_sock_fast - fast version of lock_sock
* @sk: socket
*
- * This version should be used for very small section, where process wont block
+ * This version should be used for very small section, where process won't block
* return false if fast path is taken:
*
* sk_lock.slock locked, owned = 0, BH disabled
@@ -1779,6 +1712,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
#endif
}
+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
@@ -1811,6 +1751,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk)
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern);
void sk_free(struct sock *sk);
+void sk_net_refcnt_upgrade(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
void sk_free_unlock_clone(struct sock *sk);
@@ -1827,6 +1768,15 @@ void sock_efree(struct sk_buff *skb);
#ifdef CONFIG_INET
void sock_edemux(struct sk_buff *skb);
void sock_pfree(struct sk_buff *skb);
+
+static inline void skb_set_owner_edemux(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ if (refcount_inc_not_zero(&sk->sk_refcnt)) {
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ }
+}
#else
#define sock_edemux sock_efree
#endif
@@ -1871,13 +1821,16 @@ struct sockcm_cookie {
u64 transmit_time;
u32 mark;
u32 tsflags;
+ u32 ts_opt_id;
+ u32 priority;
};
static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
*sockc = (struct sockcm_cookie) {
- .tsflags = READ_ONCE(sk->sk_tsflags)
+ .tsflags = READ_ONCE(sk->sk_tsflags),
+ .priority = READ_ONCE(sk->sk_priority),
};
}
@@ -1893,7 +1846,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
int sock_no_bind(struct socket *, struct sockaddr *, int);
int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
-int sock_no_accept(struct socket *, struct socket *, int, bool);
+int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *);
int sock_no_getname(struct socket *, struct sockaddr *, int);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
@@ -2145,17 +2098,10 @@ sk_dst_get(const struct sock *sk)
static inline void __dst_negative_advice(struct sock *sk)
{
- struct dst_entry *ndst, *dst = __sk_dst_get(sk);
+ struct dst_entry *dst = __sk_dst_get(sk);
- if (dst && dst->ops->negative_advice) {
- ndst = dst->ops->negative_advice(dst);
-
- if (ndst != dst) {
- rcu_assign_pointer(sk->sk_dst_cache, ndst);
- sk_tx_queue_clear(sk);
- WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
- }
- }
+ if (dst && dst->ops->negative_advice)
+ dst->ops->negative_advice(sk, dst);
}
static inline void dst_negative_advice(struct sock *sk)
@@ -2184,7 +2130,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst)
sk_tx_queue_clear(sk);
WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
- old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
+ old_dst = unrcu_pointer(xchg(&sk->sk_dst_cache, RCU_INITIALIZER(dst)));
dst_release(old_dst);
}
@@ -2355,7 +2301,7 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
}
/**
- * sock_poll_wait - place memory barrier behind the poll_wait call.
+ * sock_poll_wait - wrapper for the poll_wait call.
* @filp: file
* @sock: socket to wait on
* @p: poll_table
@@ -2365,15 +2311,12 @@ static inline bool skwq_has_sleeper(struct socket_wq *wq)
static inline void sock_poll_wait(struct file *filp, struct socket *sock,
poll_table *p)
{
- if (!poll_does_not_wait(p)) {
- poll_wait(filp, &sock->wq.wait, p);
- /* We need to be sure we are in sync with the
- * socket flags modification.
- *
- * This memory barrier is paired in the wq_has_sleeper.
- */
- smp_mb();
- }
+ /* Provides a barrier we need to be sure we are in sync
+ * with the socket flags modification.
+ *
+ * This memory barrier is paired in the wq_has_sleeper.
+ */
+ poll_wait(filp, &sock->wq.wait, p);
}
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
@@ -2526,6 +2469,12 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
}
}
+static inline void sk_wake_async_rcu(const struct sock *sk, int how, int band)
+{
+ if (unlikely(sock_flag(sk, SOCK_FASYNC)))
+ sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+}
+
/* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
* need sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak.
* Note: for send buffers, TCP works better if we can build two skbs at
@@ -2624,7 +2573,7 @@ struct sock_skb_cb {
/* Store sock_skb_cb at the end of skb->cb[] so protocol families
* using skb->cb[] would keep using it directly and utilize its
- * alignement guarantee.
+ * alignment guarantee.
*/
#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
sizeof(struct sock_skb_cb)))
@@ -2716,7 +2665,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
{
#define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \
(1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_RCVMARK))
+ (1UL << SOCK_RCVMARK) |\
+ (1UL << SOCK_RCVPRIORITY))
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
@@ -2729,39 +2679,48 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
sock_write_timestamp(sk, 0);
}
-void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
+void __sock_tx_timestamp(__u32 tsflags, __u8 *tx_flags);
/**
* _sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @sk: socket sending this packet
- * @tsflags: timestamping flags to use
+ * @sockc: pointer to socket cmsg cookie to get timestamping info
* @tx_flags: completed with instructions for time stamping
* @tskey: filled in with next sk_tskey (not for TCP, which uses seqno)
*
* Note: callers should take care of initial ``*tx_flags`` value (usually 0)
*/
-static inline void _sock_tx_timestamp(struct sock *sk, __u16 tsflags,
+static inline void _sock_tx_timestamp(struct sock *sk,
+ const struct sockcm_cookie *sockc,
__u8 *tx_flags, __u32 *tskey)
{
+ __u32 tsflags = sockc->tsflags;
+
if (unlikely(tsflags)) {
__sock_tx_timestamp(tsflags, tx_flags);
if (tsflags & SOF_TIMESTAMPING_OPT_ID && tskey &&
- tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
- *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) {
+ if (tsflags & SOCKCM_FLAG_TS_OPT_ID)
+ *tskey = sockc->ts_opt_id;
+ else
+ *tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+ }
}
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
}
-static inline void sock_tx_timestamp(struct sock *sk, __u16 tsflags,
+static inline void sock_tx_timestamp(struct sock *sk,
+ const struct sockcm_cookie *sockc,
__u8 *tx_flags)
{
- _sock_tx_timestamp(sk, tsflags, tx_flags, NULL);
+ _sock_tx_timestamp(sk, sockc, tx_flags, NULL);
}
-static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
+static inline void skb_setup_tx_timestamp(struct sk_buff *skb,
+ const struct sockcm_cookie *sockc)
{
- _sock_tx_timestamp(skb->sk, tsflags, &skb_shinfo(skb)->tx_flags,
+ _sock_tx_timestamp(skb->sk, sockc, &skb_shinfo(skb)->tx_flags,
&skb_shinfo(skb)->tskey);
}
@@ -2791,6 +2750,11 @@ static inline bool sk_is_stream_unix(const struct sock *sk)
return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM;
}
+static inline bool sk_is_vsock(const struct sock *sk)
+{
+ return sk->sk_family == AF_VSOCK;
+}
+
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
@@ -2830,31 +2794,6 @@ sk_is_refcounted(struct sock *sk)
return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}
-/**
- * skb_steal_sock - steal a socket from an sk_buff
- * @skb: sk_buff to steal the socket from
- * @refcounted: is set to true if the socket is reference-counted
- * @prefetched: is set to true if the socket was assigned from bpf
- */
-static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
-{
- if (skb->sk) {
- struct sock *sk = skb->sk;
-
- *refcounted = true;
- *prefetched = skb_sk_is_prefetched(skb);
- if (*prefetched)
- *refcounted = sk_is_refcounted(sk);
- skb->destructor = NULL;
- skb->sk = NULL;
- return sk;
- }
- *prefetched = false;
- *refcounted = false;
- return NULL;
-}
-
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
@@ -2867,12 +2806,10 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
skb = sk->sk_validate_xmit_skb(sk, dev, skb);
-#ifdef CONFIG_TLS_DEVICE
- } else if (unlikely(skb->decrypted)) {
+ } else if (unlikely(skb_is_decrypted(skb))) {
pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
kfree_skb(skb);
skb = NULL;
-#endif
}
#endif
@@ -2887,6 +2824,16 @@ static inline bool sk_listener(const struct sock *sk)
return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
}
+/* This helper checks if a socket is a LISTEN or NEW_SYN_RECV or TIME_WAIT
+ * TCP SYNACK messages can be attached to LISTEN or NEW_SYN_RECV (depending on SYNCOOKIE)
+ * TCP RST and ACK can be attached to TIME_WAIT.
+ */
+static inline bool sk_listener_or_tw(const struct sock *sk)
+{
+ return (1 << READ_ONCE(sk->sk_state)) &
+ (TCPF_LISTEN | TCPF_NEW_SYN_RECV | TCPF_TIME_WAIT);
+}
+
void sock_enable_timestamp(struct sock *sk, enum sock_flags flag);
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);
@@ -2911,8 +2858,6 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
-extern int sysctl_tstamp_allow_data;
-
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;