summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx5/mlx5_ifc.h5
-rw-r--r--include/linux/netdevice.h7
-rw-r--r--include/linux/rculist_nulls.h7
-rw-r--r--include/linux/skbuff.h30
-rw-r--r--include/net/flow_dissector.h9
-rw-r--r--include/net/sock.h38
-rw-r--r--include/uapi/linux/bpf.h16
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h12
8 files changed, 104 insertions, 20 deletions
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ff8c9d527bb4..bfdf41537cf1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -688,7 +688,10 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
u8 nic_rx_multi_path_tirs[0x1];
u8 nic_rx_multi_path_tirs_fts[0x1];
u8 allow_sniffer_and_nic_rx_shared_tir[0x1];
- u8 reserved_at_3[0x1d];
+ u8 reserved_at_3[0x4];
+ u8 sw_owner_reformat_supported[0x1];
+ u8 reserved_at_8[0x18];
+
u8 encap_general_header[0x1];
u8 reserved_at_21[0xa];
u8 log_max_packet_reformat_context[0x5];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9f1f633235f6..6c3f7032e8d9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev,
#define NET_RX_SUCCESS 0 /* keep 'em coming, baby */
#define NET_RX_DROP 1 /* packet dropped */
+#define MAX_NEST_DEV 8
+
/*
* Transmit return codes: transmit return codes originate from three different
* namespaces:
@@ -4389,11 +4391,8 @@ void *netdev_lower_get_next(struct net_device *dev,
ldev; \
ldev = netdev_lower_get_next(dev, &(iter)))
-struct net_device *netdev_all_lower_get_next(struct net_device *dev,
+struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
struct list_head **iter);
-struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
- struct list_head **iter);
-
int netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *lower_dev,
void *data),
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index e5b752027a03..9670b54b484a 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -145,6 +145,13 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
}
}
+/* after that hlist_nulls_del will work */
+static inline void hlist_nulls_add_fake(struct hlist_nulls_node *n)
+{
+ n->pprev = &n->next;
+ n->next = (struct hlist_nulls_node *)NULLS_MARKER(NULL);
+}
+
/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ca8806b69388..5b50278c4bc8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -611,9 +611,15 @@ typedef unsigned char *sk_buff_data_t;
* @next: Next buffer in list
* @prev: Previous buffer in list
* @tstamp: Time we arrived/left
+ * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point
+ * for retransmit timer
* @rbnode: RB tree node, alternative to next/prev for netem/tcp
+ * @list: queue head
* @sk: Socket we are owned by
+ * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in
+ * fragmentation management
* @dev: Device we arrived on/are leaving by
+ * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @_skb_refdst: destination entry (with norefcount bit)
* @sp: the security path, used for xfrm
@@ -632,6 +638,9 @@ typedef unsigned char *sk_buff_data_t;
* @pkt_type: Packet class
* @fclone: skbuff clone status
* @ipvs_property: skbuff is owned by ipvs
+ * @inner_protocol_type: whether the inner protocol is
+ * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO
+ * @remcsum_offload: remote checksum offload is enabled
* @offload_fwd_mark: Packet was L2-forwarded in hardware
* @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
* @tc_skip_classify: do not classify packet. set by IFB device
@@ -650,6 +659,8 @@ typedef unsigned char *sk_buff_data_t;
* @tc_index: Traffic control index
* @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
+ * @head_frag: skb was allocated from page fragments,
+ * not allocated by kmalloc() or vmalloc().
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
* @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
@@ -660,15 +671,28 @@ typedef unsigned char *sk_buff_data_t;
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
+ * @encapsulation: indicates the inner headers in the skbuff are valid
+ * @encap_hdr_csum: software checksum is needed
+ * @csum_valid: checksum is already valid
* @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
+ * @csum_complete_sw: checksum was completed by software
+ * @csum_level: indicates the number of consecutive checksums found in
+ * the packet minus one that have been verified as
+ * CHECKSUM_UNNECESSARY (max 3)
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @napi_id: id of the NAPI struct this skb came from
+ * @sender_cpu: (aka @napi_id) source CPU in XPS
* @secmark: security marking
* @mark: Generic packet mark
+ * @reserved_tailroom: (aka @mark) number of bytes of free space available
+ * at the tail of an sk_buff
+ * @vlan_present: VLAN tag is present
* @vlan_proto: vlan encapsulation protocol
* @vlan_tci: vlan tag control information
* @inner_protocol: Protocol (encapsulation)
+ * @inner_ipproto: (aka @inner_protocol) stores ipproto when
+ * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO;
* @inner_transport_header: Inner transport layer header (encapsulation)
* @inner_network_header: Network layer header (encapsulation)
* @inner_mac_header: Link layer header (encapsulation)
@@ -750,7 +774,9 @@ struct sk_buff {
#endif
#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset)
+ /* private: */
__u8 __cloned_offset[0];
+ /* public: */
__u8 cloned:1,
nohdr:1,
fclone:2,
@@ -775,7 +801,9 @@ struct sk_buff {
#endif
#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset)
+ /* private: */
__u8 __pkt_type_offset[0];
+ /* public: */
__u8 pkt_type:3;
__u8 ignore_df:1;
__u8 nf_trace:1;
@@ -798,7 +826,9 @@ struct sk_buff {
#define PKT_VLAN_PRESENT_BIT 0
#endif
#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset)
+ /* private: */
__u8 __pkt_vlan_present_offset[0];
+ /* public: */
__u8 vlan_present:1;
__u8 csum_complete_sw:1;
__u8 csum_level:2;
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index e9391e877f9a..628383915827 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/in6.h>
#include <linux/siphash.h>
+#include <linux/string.h>
#include <uapi/linux/if_ether.h>
struct sk_buff;
@@ -348,4 +349,12 @@ struct bpf_flow_dissector {
void *data_end;
};
+static inline void
+flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
+ struct flow_dissector_key_basic *key_basic)
+{
+ memset(key_control, 0, sizeof(*key_control));
+ memset(key_basic, 0, sizeof(*key_basic));
+}
+
#endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 02162b0378f7..328564525526 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -117,19 +117,26 @@ typedef __u64 __bitwise __addrpair;
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
* @skc_rcv_saddr: Bound local IPv4 addr
+ * @skc_addrpair: 8-byte-aligned __u64 union of @skc_daddr & @skc_rcv_saddr
* @skc_hash: hash value used with various protocol lookup tables
* @skc_u16hashes: two u16 hash values used by UDP lookup tables
* @skc_dport: placeholder for inet_dport/tw_dport
* @skc_num: placeholder for inet_num/tw_num
+ * @skc_portpair: __u32 union of @skc_dport & @skc_num
* @skc_family: network address family
* @skc_state: Connection state
* @skc_reuse: %SO_REUSEADDR setting
* @skc_reuseport: %SO_REUSEPORT setting
+ * @skc_ipv6only: socket is IPV6 only
+ * @skc_net_refcnt: socket is using net ref counting
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
* @skc_prot: protocol handlers inside a network family
* @skc_net: reference to the network namespace of this socket
+ * @skc_v6_daddr: IPV6 destination address
+ * @skc_v6_rcv_saddr: IPV6 source address
+ * @skc_cookie: socket's cookie value
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
* @skc_tx_queue_mapping: tx queue number for this connection
@@ -137,7 +144,15 @@ typedef __u64 __bitwise __addrpair;
* @skc_flags: place holder for sk_flags
* %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
+ * @skc_listener: connection request listener socket (aka rsk_listener)
+ * [union with @skc_flags]
+ * @skc_tw_dr: (aka tw_dr) ptr to &struct inet_timewait_death_row
+ * [union with @skc_flags]
* @skc_incoming_cpu: record/match cpu processing incoming packets
+ * @skc_rcv_wnd: (aka rsk_rcv_wnd) TCP receive window size (possibly scaled)
+ * [union with @skc_incoming_cpu]
+ * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
+ * [union with @skc_incoming_cpu]
* @skc_refcnt: reference count
*
* This is the minimal network layer representation of sockets, the header
@@ -245,6 +260,7 @@ struct bpf_sk_storage;
* @sk_dst_cache: destination cache
* @sk_dst_pending_confirm: need to confirm neighbour
* @sk_policy: flow policy
+ * @sk_rx_skb_cache: cache copy of recently accessed RX skb
* @sk_receive_queue: incoming packets
* @sk_wmem_alloc: transmit queue bytes committed
* @sk_tsq_flags: TCP Small Queues flags
@@ -265,6 +281,8 @@ struct bpf_sk_storage;
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
+ * @sk_route_forced_caps: static, forced route capabilities
+ * (set in tcp_init_sock())
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_gso_max_segs: Maximum number of GSO segments
@@ -303,6 +321,8 @@ struct bpf_sk_storage;
* @sk_frag: cached page frag
* @sk_peek_off: current peek_offset value
* @sk_send_head: front of stuff to transmit
+ * @tcp_rtx_queue: TCP re-transmit queue [union with @sk_send_head]
+ * @sk_tx_skb_cache: cache copy of recently accessed TX skb
* @sk_security: used by security modules
* @sk_mark: generic packet mark
* @sk_cgrp_data: cgroup data for this cgroup
@@ -313,11 +333,14 @@ struct bpf_sk_storage;
* @sk_write_space: callback to indicate there is bf sending space available
* @sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
* @sk_backlog_rcv: callback to process the backlog
+ * @sk_validate_xmit_skb: ptr to an optional validate function
* @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
* @sk_reuseport_cb: reuseport group container
+ * @sk_bpf_storage: ptr to cache and control for bpf_sk_storage
* @sk_rcu: used during RCU grace period
* @sk_clockid: clockid used by time-based scheduling (SO_TXTIME)
* @sk_txtime_deadline_mode: set deadline mode for SO_TXTIME
+ * @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
*/
struct sock {
@@ -393,7 +416,9 @@ struct sock {
struct sk_filter __rcu *sk_filter;
union {
struct socket_wq __rcu *sk_wq;
+ /* private: */
struct socket_wq *sk_wq_raw;
+ /* public: */
};
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
@@ -2017,7 +2042,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
* sk_wmem_alloc_get - returns write allocations
* @sk: socket
*
- * Returns sk_wmem_alloc minus initial offset of one
+ * Return: sk_wmem_alloc minus initial offset of one
*/
static inline int sk_wmem_alloc_get(const struct sock *sk)
{
@@ -2028,7 +2053,7 @@ static inline int sk_wmem_alloc_get(const struct sock *sk)
* sk_rmem_alloc_get - returns read allocations
* @sk: socket
*
- * Returns sk_rmem_alloc
+ * Return: sk_rmem_alloc
*/
static inline int sk_rmem_alloc_get(const struct sock *sk)
{
@@ -2039,7 +2064,7 @@ static inline int sk_rmem_alloc_get(const struct sock *sk)
* sk_has_allocations - check if allocations are outstanding
* @sk: socket
*
- * Returns true if socket has write or read allocations
+ * Return: true if socket has write or read allocations
*/
static inline bool sk_has_allocations(const struct sock *sk)
{
@@ -2050,7 +2075,7 @@ static inline bool sk_has_allocations(const struct sock *sk)
* skwq_has_sleeper - check if there are any waiting processes
* @wq: struct socket_wq
*
- * Returns true if socket_wq has waiting processes
+ * Return: true if socket_wq has waiting processes
*
* The purpose of the skwq_has_sleeper and sock_poll_wait is to wrap the memory
* barrier call. They were added due to the race found within the tcp code.
@@ -2238,6 +2263,9 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
* inside other socket operations and end up recursing into sk_page_frag()
* while it's already in use.
+ *
+ * Return: a per task page_frag if context allows that,
+ * otherwise a per socket one.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
@@ -2432,6 +2460,7 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
&skb_shinfo(skb)->tskey);
}
+DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
/**
* sk_eat_skb - Release a skb if it is no longer needed
* @sk: socket to eat this skb from
@@ -2440,7 +2469,6 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
-DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f1d74a2bd234..22f235260a3a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
- * The same effect can be attained with the more generic
- * **bpf_redirect_map**\ (), which requires specific maps to be
- * used but offers better performance.
+ * The same effect can also be attained with the more generic
+ * **bpf_redirect_map**\ (), which uses a BPF map to store the
+ * redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * When used to redirect packets to net devices, this helper
- * provides a high performance increase over **bpf_redirect**\ ().
- * This is due to various implementation details of the underlying
- * mechanisms, one of which is the fact that **bpf_redirect_map**\
- * () tries to send packet as a "bulk" to the device.
+ * See also bpf_redirect(), which only supports redirecting to an
+ * ifindex, but doesn't require a map to do so.
* Return
- * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ * **XDP_REDIRECT** on success, or the value of the two lower bits
+ * of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 336014bf8868..b6f0bb1dc799 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -97,6 +97,15 @@ enum ip_conntrack_status {
IPS_UNTRACKED_BIT = 12,
IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+#ifdef __KERNEL__
+ /* Re-purposed for in-kernel use:
+ * Tags a conntrack entry that clashed with an existing entry
+ * on insert.
+ */
+ IPS_NAT_CLASH_BIT = IPS_UNTRACKED_BIT,
+ IPS_NAT_CLASH = IPS_UNTRACKED,
+#endif
+
/* Conntrack got a helper explicitly attached via CT target. */
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
@@ -110,7 +119,8 @@ enum ip_conntrack_status {
*/
IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
+ IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED |
+ IPS_OFFLOAD),
__IPS_MAX_BIT = 15,
};