summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h8
-rw-r--r--include/net/addrconf.h19
-rw-r--r--include/net/af_rxrpc.h15
-rw-r--r--include/net/af_unix.h43
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/bluetooth/hci_core.h35
-rw-r--r--include/net/bond_3ad.h2
-rw-r--r--include/net/bond_options.h1
-rw-r--r--include/net/bonding.h23
-rw-r--r--include/net/busy_poll.h4
-rw-r--r--include/net/cfg80211.h328
-rw-r--r--include/net/dropreason-core.h50
-rw-r--r--include/net/dsa.h4
-rw-r--r--include/net/dst.h1
-rw-r--r--include/net/eee.h38
-rw-r--r--include/net/fib_rules.h3
-rw-r--r--include/net/genetlink.h59
-rw-r--r--include/net/gro.h39
-rw-r--r--include/net/hotdata.h52
-rw-r--r--include/net/if_inet6.h8
-rw-r--r--include/net/inet_connection_sock.h8
-rw-r--r--include/net/inet_hashtables.h21
-rw-r--r--include/net/inet_sock.h11
-rw-r--r--include/net/inet_timewait_sock.h4
-rw-r--r--include/net/ioam6.h4
-rw-r--r--include/net/ip.h12
-rw-r--r--include/net/ip6_fib.h88
-rw-r--r--include/net/ip6_route.h5
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/ip_tunnels.h14
-rw-r--r--include/net/ipv6.h13
-rw-r--r--include/net/iucv/iucv.h4
-rw-r--r--include/net/llc_pdu.h6
-rw-r--r--include/net/mac80211.h224
-rw-r--r--include/net/macsec.h54
-rw-r--r--include/net/mana/gdma.h12
-rw-r--r--include/net/mana/mana.h46
-rw-r--r--include/net/mctp.h7
-rw-r--r--include/net/neighbour.h2
-rw-r--r--include/net/net_namespace.h5
-rw-r--r--include/net/netdev_queues.h58
-rw-r--r--include/net/netdev_rx_queue.h4
-rw-r--r--include/net/netfilter/nf_flow_table.h21
-rw-r--r--include/net/netfilter/nf_queue.h1
-rw-r--r--include/net/netfilter/nf_tables.h77
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h2
-rw-r--r--include/net/netkit.h6
-rw-r--r--include/net/netlabel.h7
-rw-r--r--include/net/netlink.h47
-rw-r--r--include/net/netmem.h41
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/netns/ipv4.h50
-rw-r--r--include/net/netns/smc.h2
-rw-r--r--include/net/nexthop.h32
-rw-r--r--include/net/nfc/nfc.h2
-rw-r--r--include/net/page_pool/helpers.h85
-rw-r--r--include/net/page_pool/types.h62
-rw-r--r--include/net/pkt_cls.h8
-rw-r--r--include/net/pkt_sched.h20
-rw-r--r--include/net/protocol.h3
-rw-r--r--include/net/request_sock.h39
-rw-r--r--include/net/route.h7
-rw-r--r--include/net/rps.h125
-rw-r--r--include/net/rtnetlink.h1
-rw-r--r--include/net/sch_generic.h47
-rw-r--r--include/net/scm.h10
-rw-r--r--include/net/sctp/structs.h5
-rw-r--r--include/net/smc.h16
-rw-r--r--include/net/sock.h226
-rw-r--r--include/net/switchdev.h3
-rw-r--r--include/net/tc_act/tc_ct.h9
-rw-r--r--include/net/tc_act/tc_ipt.h17
-rw-r--r--include/net/tc_act/tc_mirred.h1
-rw-r--r--include/net/tc_wrapper.h4
-rw-r--r--include/net/tcp.h86
-rw-r--r--include/net/tcp_ao.h38
-rw-r--r--include/net/tcp_states.h2
-rw-r--r--include/net/tls.h5
-rw-r--r--include/net/vxlan.h33
-rw-r--r--include/net/xdp.h20
-rw-r--r--include/net/xdp_sock.h111
-rw-r--r--include/net/xdp_sock_drv.h78
-rw-r--r--include/net/xfrm.h23
-rw-r--r--include/net/xsk_buff_pool.h10
84 files changed, 2058 insertions, 662 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 4ae0580b63ca..77ee0c657e2c 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -137,6 +137,7 @@ struct tc_action_ops {
#ifdef CONFIG_NET_CLS_ACT
+#define ACT_P_BOUND 0
#define ACT_P_CREATED 1
#define ACT_P_DELETED 1
@@ -191,7 +192,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int bind,
u32 flags);
-void tcf_idr_insert_many(struct tc_action *actions[]);
+void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
@@ -200,6 +201,8 @@ int tcf_idr_release(struct tc_action *a, bool bind);
int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
int tcf_unregister_action(struct tc_action_ops *a,
struct pernet_operations *ops);
+#define NET_ACT_ALIAS_PREFIX "net-act-"
+#define MODULE_ALIAS_NET_ACT(kind) MODULE_ALIAS(NET_ACT_ALIAS_PREFIX kind)
int tcf_action_destroy(struct tc_action *actions[], int bind);
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
@@ -207,8 +210,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est,
struct tc_action *actions[], int init_res[], size_t *attr_size,
u32 flags, u32 fl_flags, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
- bool rtnl_held,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 82da55101b5a..9d06eb945509 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -8,8 +8,9 @@
#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */
-#define TEMP_VALID_LIFETIME (7*86400)
-#define TEMP_PREFERRED_LIFETIME (86400)
+#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */
+#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */
+#define REGEN_MIN_ADVANCE (2) /* 2 seconds */
#define REGEN_MAX_RETRY (3)
#define MAX_DESYNC_FACTOR (600)
@@ -31,17 +32,22 @@ struct prefix_info {
__u8 length;
__u8 prefix_len;
+ union __packed {
+ __u8 flags;
+ struct __packed {
#if defined(__BIG_ENDIAN_BITFIELD)
- __u8 onlink : 1,
+ __u8 onlink : 1,
autoconf : 1,
reserved : 6;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 reserved : 6,
+ __u8 reserved : 6,
autoconf : 1,
onlink : 1;
#else
#error "Please fix <asm/byteorder.h>"
#endif
+ };
+ };
__be32 valid;
__be32 prefered;
__be32 reserved2;
@@ -49,6 +55,9 @@ struct prefix_info {
struct in6_addr prefix;
};
+/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */
+static_assert(sizeof(struct prefix_info) == 32);
+
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <net/if_inet6.h>
@@ -408,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev)
if (unlikely(!idev))
return true;
- return !!idev->cnf.ignore_routes_with_linkdown;
+ return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown);
}
void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 5531dd08061e..0754c463224a 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
struct sock;
struct socket;
struct rxrpc_call;
+struct rxrpc_peer;
enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
@@ -41,13 +42,14 @@ void rxrpc_kernel_new_call_notification(struct socket *,
rxrpc_notify_new_call_t,
rxrpc_discard_new_call_t);
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id);
@@ -60,9 +62,14 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, enum rxrpc_abort_reason);
void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call);
void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call);
-void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
- struct sockaddr_rxrpc *);
-bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer);
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call);
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer);
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 824c258143a3..627ea8e2d915 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -8,21 +8,29 @@
#include <linux/refcount.h>
#include <net/sock.h>
+#if IS_ENABLED(CONFIG_UNIX)
+struct unix_sock *unix_get_socket(struct file *filp);
+#else
+static inline struct unix_sock *unix_get_socket(struct file *filp)
+{
+ return NULL;
+}
+#endif
+
+extern spinlock_t unix_gc_lock;
+extern unsigned int unix_tot_inflight;
+
void unix_inflight(struct user_struct *user, struct file *fp);
void unix_notinflight(struct user_struct *user, struct file *fp);
-void unix_destruct_scm(struct sk_buff *skb);
-void io_uring_destruct_scm(struct sk_buff *skb);
void unix_gc(void);
-void wait_for_unix_gc(void);
-struct sock *unix_get_socket(struct file *filp);
+void wait_for_unix_gc(struct scm_fp_list *fpl);
+
struct sock *unix_peer_get(struct sock *sk);
#define UNIX_HASH_MOD (256 - 1)
#define UNIX_HASH_SIZE (256 * 2)
#define UNIX_HASH_BITS 8
-extern unsigned int unix_tot_inflight;
-
struct unix_address {
refcount_t refcnt;
int len;
@@ -46,12 +54,6 @@ struct scm_stat {
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
-#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
-#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
- spin_lock_nested(&unix_sk(s)->lock, \
- SINGLE_DEPTH_NESTING)
-
/* The AF_UNIX socket */
struct unix_sock {
/* WARNING: sk has to be the first member */
@@ -61,7 +63,7 @@ struct unix_sock {
struct mutex iolock, bindlock;
struct sock *peer;
struct list_head link;
- atomic_long_t inflight;
+ unsigned long inflight;
spinlock_t lock;
unsigned long gc_flags;
#define UNIX_GC_CANDIDATE 0
@@ -75,6 +77,21 @@ struct unix_sock {
};
#define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk)
+#define unix_peer(sk) (unix_sk(sk)->peer)
+
+#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
+#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
+enum unix_socket_lock_class {
+ U_LOCK_NORMAL,
+ U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */
+ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */
+};
+
+static inline void unix_state_lock_nested(struct sock *sk,
+ enum unix_socket_lock_class subclass)
+{
+ spin_lock_nested(&unix_sk(sk)->lock, subclass);
+}
#define peer_wait peer_wq.wait
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index e302c0e804d0..535701efc1e5 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -137,7 +137,6 @@ struct vsock_transport {
u64 (*stream_rcvhiwat)(struct vsock_sock *);
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
- int (*set_rcvlowat)(struct vsock_sock *vsk, int val);
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
@@ -168,6 +167,7 @@ struct vsock_transport {
struct vsock_transport_send_notify_data *);
/* sk_lock held by the caller */
void (*notify_buffer_size)(struct vsock_sock *, u64 *);
+ int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
/* Shutdown. */
int (*shutdown)(struct vsock_sock *, int);
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 20988623c5cc..8f8dd9173714 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -189,6 +189,7 @@ struct blocked_key {
struct smp_csrk {
bdaddr_t bdaddr;
u8 bdaddr_type;
+ u8 link_type;
u8 type;
u8 val[16];
};
@@ -198,6 +199,7 @@ struct smp_ltk {
struct rcu_head rcu;
bdaddr_t bdaddr;
u8 bdaddr_type;
+ u8 link_type;
u8 authenticated;
u8 type;
u8 enc_size;
@@ -212,6 +214,7 @@ struct smp_irk {
bdaddr_t rpa;
bdaddr_t bdaddr;
u8 addr_type;
+ u8 link_type;
u8 val[16];
};
@@ -219,6 +222,8 @@ struct link_key {
struct list_head list;
struct rcu_head rcu;
bdaddr_t bdaddr;
+ u8 bdaddr_type;
+ u8 link_type;
u8 type;
u8 val[HCI_LINK_KEY_SIZE];
u8 pin_len;
@@ -534,7 +539,6 @@ struct hci_dev {
struct work_struct tx_work;
struct delayed_work le_scan_disable;
- struct delayed_work le_scan_restart;
struct sk_buff_head rx_q;
struct sk_buff_head raw_q;
@@ -952,7 +956,6 @@ void hci_inquiry_cache_flush(struct hci_dev *hdev);
/* ----- HCI Connections ----- */
enum {
HCI_CONN_AUTH_PEND,
- HCI_CONN_REAUTH_PEND,
HCI_CONN_ENCRYPT_PEND,
HCI_CONN_RSWITCH_PEND,
HCI_CONN_MODE_CHANGE_PEND,
@@ -1227,11 +1230,11 @@ static inline struct hci_conn *hci_conn_hash_lookup_cis(struct hci_dev *hdev,
continue;
/* Match CIG ID if set */
- if (cig != BT_ISO_QOS_CIG_UNSET && cig != c->iso_qos.ucast.cig)
+ if (cig != c->iso_qos.ucast.cig)
continue;
/* Match CIS ID if set */
- if (id != BT_ISO_QOS_CIS_UNSET && id != c->iso_qos.ucast.cis)
+ if (id != c->iso_qos.ucast.cis)
continue;
/* Match destination address if set */
@@ -1293,6 +1296,30 @@ static inline struct hci_conn *hci_conn_hash_lookup_big(struct hci_dev *hdev,
}
static inline struct hci_conn *
+hci_conn_hash_lookup_big_state(struct hci_dev *hdev, __u8 handle, __u16 state)
+{
+ struct hci_conn_hash *h = &hdev->conn_hash;
+ struct hci_conn *c;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(c, &h->list, list) {
+ if (bacmp(&c->dst, BDADDR_ANY) || c->type != ISO_LINK ||
+ c->state != state)
+ continue;
+
+ if (handle == c->iso_qos.bcast.big) {
+ rcu_read_unlock();
+ return c;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static inline struct hci_conn *
hci_conn_hash_lookup_pa_sync_big_handle(struct hci_dev *hdev, __u8 big)
{
struct hci_conn_hash *h = &hdev->conn_hash;
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index c5e57c6bd873..9ce5ac2bfbad 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -54,6 +54,8 @@ typedef enum {
AD_MUX_DETACHED, /* mux machine */
AD_MUX_WAITING, /* mux machine */
AD_MUX_ATTACHED, /* mux machine */
+ AD_MUX_COLLECTING, /* mux machine */
+ AD_MUX_DISTRIBUTING, /* mux machine */
AD_MUX_COLLECTING_DISTRIBUTING /* mux machine */
} mux_states_t;
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 69292ecc0325..473a0147769e 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -76,6 +76,7 @@ enum {
BOND_OPT_MISSED_MAX,
BOND_OPT_NS_TARGETS,
BOND_OPT_PRIO,
+ BOND_OPT_COUPLED_CONTROL,
BOND_OPT_LAST
};
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 5b8b1b644a2d..b61fb1aa3a56 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -148,6 +148,7 @@ struct bond_params {
#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr ns_targets[BOND_MAX_NS_TARGETS];
#endif
+ int coupled_control;
/* 2 bytes of padding : see ether_addr_equal_64bits() */
u8 ad_actor_system[ETH_ALEN + 2];
@@ -167,6 +168,7 @@ struct slave {
u8 backup:1, /* indicates backup slave. Value corresponds with
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
inactive:1, /* indicates inactive slave */
+ rx_disabled:1, /* indicates whether slave's Rx is disabled */
should_notify:1, /* indicates whether the state changed */
should_notify_link:1; /* indicates whether the link changed */
u8 duplex;
@@ -568,6 +570,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave,
bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
if (!slave->bond->params.all_slaves_active)
slave->inactive = 1;
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD)
+ slave->rx_disabled = 1;
+}
+
+static inline void bond_set_slave_tx_disabled_flags(struct slave *slave,
+ bool notify)
+{
+ bond_set_slave_state(slave, BOND_STATE_BACKUP, notify);
}
static inline void bond_set_slave_active_flags(struct slave *slave,
@@ -575,6 +585,14 @@ static inline void bond_set_slave_active_flags(struct slave *slave,
{
bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify);
slave->inactive = 0;
+ if (BOND_MODE(slave->bond) == BOND_MODE_8023AD)
+ slave->rx_disabled = 0;
+}
+
+static inline void bond_set_slave_rx_enabled_flags(struct slave *slave,
+ bool notify)
+{
+ slave->rx_disabled = 0;
}
static inline bool bond_is_slave_inactive(struct slave *slave)
@@ -582,6 +600,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
return slave->inactive;
}
+static inline bool bond_is_slave_rx_disabled(struct slave *slave)
+{
+ return slave->rx_disabled;
+}
+
static inline void bond_propose_link_state(struct slave *slave, int state)
{
slave->link_new_state = state;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 4dabeb6c76d3..9b09acac538e 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -48,6 +48,10 @@ void napi_busy_loop(unsigned int napi_id,
bool (*loop_end)(void *, unsigned long),
void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+void napi_busy_loop_rcu(unsigned int napi_id,
+ bool (*loop_end)(void *, unsigned long),
+ void *loop_end_arg, bool prefer_busy_poll, u16 budget);
+
#else /* CONFIG_NET_RX_BUSY_POLL */
static inline unsigned long net_busy_loop_on(void)
{
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b137a33a1b68..2e2be4fd2bb6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7,7 +7,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021, 2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ethtool.h>
@@ -52,7 +52,7 @@
* such wiphy can have zero, one, or many virtual interfaces associated with
* it, which need to be identified as such by pointing the network interface's
* @ieee80211_ptr pointer to a &struct wireless_dev which further describes
- * the wireless part of the interface, normally this struct is embedded in the
+ * the wireless part of the interface. Normally this struct is embedded in the
* network interface's private data area. Drivers can optionally allow creating
* or destroying virtual interfaces on the fly, but without at least one or the
* ability to create some the wireless device isn't useful.
@@ -117,6 +117,14 @@ struct wiphy;
* This may be due to the driver or due to regulatory bandwidth
* restrictions.
* @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel.
+ * @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT
+ * @IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT: Client connection with VLP AP
+ * not permitted using this channel
+ * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP
+ * not permitted using this channel
+ * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor
+ * mode even in the presence of other (regulatory) restrictions,
+ * even if it is otherwise disabled.
*/
enum ieee80211_channel_flags {
IEEE80211_CHAN_DISABLED = 1<<0,
@@ -140,6 +148,10 @@ enum ieee80211_channel_flags {
IEEE80211_CHAN_16MHZ = 1<<18,
IEEE80211_CHAN_NO_320MHZ = 1<<19,
IEEE80211_CHAN_NO_EHT = 1<<20,
+ IEEE80211_CHAN_DFS_CONCURRENT = 1<<21,
+ IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22,
+ IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23,
+ IEEE80211_CHAN_CAN_MONITOR = 1<<24,
};
#define IEEE80211_CHAN_NO_HT40 \
@@ -800,6 +812,9 @@ struct key_params {
* chan will define the primary channel and all other
* parameters are ignored.
* @freq1_offset: offset from @center_freq1, in KHz
+ * @punctured: mask of the punctured 20 MHz subchannels, with
+ * bits turned on being disabled (punctured); numbered
+ * from lower to higher frequency (like in the spec)
*/
struct cfg80211_chan_def {
struct ieee80211_channel *chan;
@@ -808,6 +823,7 @@ struct cfg80211_chan_def {
u32 center_freq2;
struct ieee80211_edmg edmg;
u16 freq1_offset;
+ u16 punctured;
};
/*
@@ -948,7 +964,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
chandef1->width == chandef2->width &&
chandef1->center_freq1 == chandef2->center_freq1 &&
chandef1->freq1_offset == chandef2->freq1_offset &&
- chandef1->center_freq2 == chandef2->center_freq2);
+ chandef1->center_freq2 == chandef2->center_freq2 &&
+ chandef1->punctured == chandef2->punctured);
}
/**
@@ -977,6 +994,15 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
/**
+ * nl80211_chan_width_to_mhz - get the channel width in MHz
+ * @chan_width: the channel width from &enum nl80211_chan_width
+ *
+ * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width
+ * is valid. -1 otherwise.
+ */
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
+
+/**
* cfg80211_chandef_valid - check if a channel definition is valid
* @chandef: the channel definition to check
* Return: %true if the channel definition is valid. %false otherwise.
@@ -1031,6 +1057,20 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef);
/**
+ * cfg80211_chandef_primary - calculate primary 40/80/160 MHz freq
+ * @chandef: chandef to calculate for
+ * @primary_chan_width: primary channel width to calculate center for
+ * @punctured: punctured sub-channel bitmap, will be recalculated
+ * according to the new bandwidth, can be %NULL
+ *
+ * Returns: the primary 40/80/160 MHz channel center frequency, or -1
+ * for errors, updating the punctured bitmap
+ */
+int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef,
+ enum nl80211_chan_width primary_chan_width,
+ u16 *punctured);
+
+/**
* nl80211_send_chandef - sends the channel definition.
* @msg: the msg to send channel definition
* @chandef: the channel definition to check
@@ -1440,9 +1480,6 @@ struct cfg80211_unsol_bcast_probe_resp {
* @fils_discovery: FILS discovery transmission parameters
* @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters
* @mbssid_config: AP settings for multiple bssid
- * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
- * a 20 MHz channel, lowest bit corresponding to the lowest channel.
- * Bit set to 1 indicates that the channel is punctured.
*/
struct cfg80211_ap_settings {
struct cfg80211_chan_def chandef;
@@ -1477,7 +1514,6 @@ struct cfg80211_ap_settings {
struct cfg80211_fils_discovery fils_discovery;
struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp;
struct cfg80211_mbssid_config mbssid_config;
- u16 punct_bitmap;
};
@@ -1511,9 +1547,8 @@ struct cfg80211_ap_update {
* @radar_required: whether radar detection is required on the new channel
* @block_tx: whether transmissions should be blocked while changing
* @count: number of beacons until switch
- * @punct_bitmap: Preamble puncturing bitmap. Each bit represents
- * a 20 MHz channel, lowest bit corresponding to the lowest channel.
- * Bit set to 1 indicates that the channel is punctured.
+ * @link_id: defines the link on which channel switch is expected during
+ * MLO. 0 in case of non-MLO.
*/
struct cfg80211_csa_settings {
struct cfg80211_chan_def chandef;
@@ -1526,7 +1561,7 @@ struct cfg80211_csa_settings {
bool radar_required;
bool block_tx;
u8 count;
- u16 punct_bitmap;
+ u8 link_id;
};
/**
@@ -1665,6 +1700,21 @@ struct link_station_del_parameters {
};
/**
+ * struct cfg80211_ttlm_params: TID to link mapping parameters
+ *
+ * Used for setting a TID to link mapping.
+ *
+ * @dlink: Downlink TID to link mapping, as defined in section 9.4.2.314
+ * (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
+ * @ulink: Uplink TID to link mapping, as defined in section 9.4.2.314
+ * (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
+ */
+struct cfg80211_ttlm_params {
+ u16 dlink[8];
+ u16 ulink[8];
+};
+
+/**
* struct station_parameters - station parameters
*
* Used to change and create a new station.
@@ -1734,11 +1784,15 @@ struct station_parameters {
* @subtype: Management frame subtype to use for indicating removal
* (10 = Disassociation, 12 = Deauthentication)
* @reason_code: Reason code for the Disassociation/Deauthentication frame
+ * @link_id: Link ID indicating a link that stations to be flushed must be
+ * using; valid only for MLO, but can also be -1 for MLO to really
+ * remove all stations.
*/
struct station_del_parameters {
const u8 *mac;
u8 subtype;
u16 reason_code;
+ int link_id;
};
/**
@@ -2560,7 +2614,7 @@ struct cfg80211_scan_info {
* @short_ssid: short ssid to scan for
* @bssid: bssid to scan for
* @channel_idx: idx of the channel in the channel array in the scan request
- * which the above info relvant to
+ * which the above info is relevant to
* @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
* @short_ssid_valid: @short_ssid is valid and can be used
* @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
@@ -2608,6 +2662,8 @@ struct cfg80211_scan_6ghz_params {
* @n_6ghz_params: number of 6 GHz params
* @scan_6ghz_params: 6 GHz params
* @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
+ * @tsf_report_link_id: for MLO, indicates the link ID of the BSS that should be
+ * used for TSF reporting. Can be set to -1 to indicate no preference.
*/
struct cfg80211_scan_request {
struct cfg80211_ssid *ssids;
@@ -2636,6 +2692,7 @@ struct cfg80211_scan_request {
bool scan_6ghz;
u32 n_6ghz_params;
struct cfg80211_scan_6ghz_params *scan_6ghz_params;
+ s8 tsf_report_link_id;
/* keep last */
struct ieee80211_channel *channels[] __counted_by(n_channels);
@@ -2660,19 +2717,11 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
* @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match
* or no match (RSSI only)
* @rssi_thold: don't report scan results below this threshold (in s32 dBm)
- * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied
- * for filtering out scan results received. Drivers advertise this support
- * of band specific rssi based filtering through the feature capability
- * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band
- * specific rssi thresholds take precedence over rssi_thold, if specified.
- * If not specified for any band, it will be assigned with rssi_thold of
- * corresponding matchset.
*/
struct cfg80211_match_set {
struct cfg80211_ssid ssid;
u8 bssid[ETH_ALEN];
s32 rssi_thold;
- s32 per_band_rssi_thold[NUM_NL80211_BANDS];
};
/**
@@ -2816,6 +2865,13 @@ enum cfg80211_signal_type {
* the BSS that requested the scan in which the beacon/probe was received.
* @chains: bitmask for filled values in @chain_signal.
* @chain_signal: per-chain signal strength of last received BSS in dBm.
+ * @restrict_use: restrict usage, if not set, assume @use_for is
+ * %NL80211_BSS_USE_FOR_NORMAL.
+ * @use_for: bitmap of possible usage for this BSS, see
+ * &enum nl80211_bss_use_for
+ * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
+ * if @restrict_use is set and @use_for is zero (empty); may be 0 for
+ * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
* @drv_data: Data to be passed through to @inform_bss
*/
struct cfg80211_inform_bss {
@@ -2827,6 +2883,9 @@ struct cfg80211_inform_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 restrict_use:1, use_for:7;
+ u8 cannot_use_reasons;
+
void *drv_data;
};
@@ -2865,6 +2924,8 @@ struct cfg80211_bss_ies {
* own the beacon_ies, but they're just pointers to the ones from the
* @hidden_beacon_bss struct)
* @proberesp_ies: the information elements from the last Probe Response frame
+ * @proberesp_ecsa_stuck: ECSA element is stuck in the Probe Response frame,
+ * cannot rely on it having valid data
* @hidden_beacon_bss: in case this BSS struct represents a probe response from
* a BSS that hides the SSID in its beacon, this points to the BSS struct
* that holds the beacon data. @beacon_ies is still valid, of course, and
@@ -2878,6 +2939,11 @@ struct cfg80211_bss_ies {
* @chain_signal: per-chain signal strength of last received BSS in dBm.
* @bssid_index: index in the multiple BSS set
* @max_bssid_indicator: max number of members in the BSS set
+ * @use_for: bitmap of possible usage for this BSS, see
+ * &enum nl80211_bss_use_for
+ * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
+ * if @restrict_use is set and @use_for is zero (empty); may be 0 for
+ * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
* @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
*/
struct cfg80211_bss {
@@ -2900,9 +2966,14 @@ struct cfg80211_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 proberesp_ecsa_stuck:1;
+
u8 bssid_index;
u8 max_bssid_indicator;
+ u8 use_for;
+ u8 cannot_use_reasons;
+
u8 priv[] __aligned(sizeof(void *));
};
@@ -3006,6 +3077,7 @@ struct cfg80211_assoc_link {
* @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links.
* Drivers shall disable MLO features for the current association if this
* flag is not set.
+ * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any)
*/
enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HT = BIT(0),
@@ -3015,6 +3087,7 @@ enum cfg80211_assoc_req_flags {
ASSOC_REQ_DISABLE_HE = BIT(4),
ASSOC_REQ_DISABLE_EHT = BIT(5),
CONNECT_REQ_MLO_SUPPORT = BIT(6),
+ ASSOC_REQ_SPP_AMSDU = BIT(7),
};
/**
@@ -3180,8 +3253,8 @@ struct cfg80211_ibss_params {
*
* @behaviour: requested BSS selection behaviour.
* @param: parameters for requestion behaviour.
- * @band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF.
- * @adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST.
+ * @param.band_pref: preferred band for %NL80211_BSS_SELECT_ATTR_BAND_PREF.
+ * @param.adjust: parameters for %NL80211_BSS_SELECT_ATTR_RSSI_ADJUST.
*/
struct cfg80211_bss_selection {
enum nl80211_bss_select_attr behaviour;
@@ -3539,12 +3612,15 @@ struct cfg80211_wowlan_nd_info {
* @tcp_connlost: TCP connection lost or failed to establish
* @tcp_nomoretokens: TCP data ran out of tokens
* @net_detect: if not %NULL, woke up because of net detect
+ * @unprot_deauth_disassoc: woke up due to unprotected deauth or
+ * disassoc frame (in MFP).
*/
struct cfg80211_wowlan_wakeup {
bool disconnect, magic_pkt, gtk_rekey_failure,
eap_identity_req, four_way_handshake,
rfkill_release, packet_80211,
- tcp_match, tcp_connlost, tcp_nomoretokens;
+ tcp_match, tcp_connlost, tcp_nomoretokens,
+ unprot_deauth_disassoc;
s32 pattern_idx;
u32 packet_present_len, packet_len;
const void *packet;
@@ -4493,6 +4569,7 @@ struct mgmt_frame_regs {
* @del_link_station: Remove a link of a station.
*
* @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
+ * @set_ttlm: set the TID to link mapping.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4852,6 +4929,8 @@ struct cfg80211_ops {
struct link_station_del_parameters *params);
int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_set_hw_timestamp *hwts);
+ int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ttlm_params *params);
};
/*
@@ -4863,7 +4942,7 @@ struct cfg80211_ops {
* enum wiphy_flags - wiphy capability flags
*
* @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split
- * into two, first for legacy bands and second for UHB.
+ * into two, first for legacy bands and second for 6 GHz.
* @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this
* wiphy at all
* @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled
@@ -4910,6 +4989,8 @@ struct cfg80211_ops {
* NL80211_REGDOM_SET_BY_DRIVER.
* @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver
* set this flag to update channels on beacon hints.
+ * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
+ * of an NSTR mobile AP MLD.
*/
enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
@@ -4923,7 +5004,7 @@ enum wiphy_flags {
WIPHY_FLAG_IBSS_RSN = BIT(8),
WIPHY_FLAG_MESH_AUTH = BIT(10),
WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11),
- /* use hole at 12 */
+ WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12),
WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13),
WIPHY_FLAG_AP_UAPSD = BIT(14),
WIPHY_FLAG_SUPPORTS_TDLS = BIT(15),
@@ -6013,7 +6094,6 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
* wireless device if it has no netdev
* @u: union containing data specific to @iftype
* @connected: indicates if connected or not (STA mode)
- * @bssid: (private) Used by the internal configuration code
* @wext: (private) Used by the internal wireless extensions compat code
* @wext.ibss: (private) IBSS data part of wext handling
* @wext.connect: (private) connection handling data
@@ -6033,8 +6113,6 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy,
* @mgmt_registrations: list of registrations for management frames
* @mgmt_registrations_need_update: mgmt registrations were updated,
* need to propagate the update to the driver
- * @beacon_interval: beacon interval used on this device for transmitting
- * beacons, 0 when not valid
* @address: The address for this device, valid only if @netdev is %NULL
* @is_running: true if this is a non-netdev device that has been started, e.g.
* the P2P Device.
@@ -6145,7 +6223,7 @@ struct wireless_dev {
int beacon_interval;
struct cfg80211_chan_def preset_chandef;
struct cfg80211_chan_def chandef;
- u8 id[IEEE80211_MAX_SSID_LEN];
+ u8 id[IEEE80211_MAX_MESH_ID_LEN];
u8 id_len, id_up_len;
} mesh;
struct {
@@ -6793,13 +6871,45 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
}
/**
+ * enum cfg80211_rnr_iter_ret - reduced neighbor report iteration state
+ * @RNR_ITER_CONTINUE: continue iterating with the next entry
+ * @RNR_ITER_BREAK: break iteration and return success
+ * @RNR_ITER_ERROR: break iteration and return error
+ */
+enum cfg80211_rnr_iter_ret {
+ RNR_ITER_CONTINUE,
+ RNR_ITER_BREAK,
+ RNR_ITER_ERROR,
+};
+
+/**
+ * cfg80211_iter_rnr - iterate reduced neighbor report entries
+ * @elems: the frame elements to iterate RNR elements and then
+ * their entries in
+ * @elems_len: length of the elements
+ * @iter: iteration function, see also &enum cfg80211_rnr_iter_ret
+ * for the return value
+ * @iter_data: additional data passed to the iteration function
+ * Return: %true on success (after successfully iterating all entries
+ * or if the iteration function returned %RNR_ITER_BREAK),
+ * %false on error (iteration function returned %RNR_ITER_ERROR
+ * or elements were malformed.)
+ */
+bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len,
+ enum cfg80211_rnr_iter_ret
+ (*iter)(void *data, u8 type,
+ const struct ieee80211_neighbor_ap_info *info,
+ const u8 *tbtt_info, u8 tbtt_info_len),
+ void *iter_data);
+
+/**
* cfg80211_defragment_element - Defrag the given element data into a buffer
*
* @elem: the element to defragment
* @ies: elements where @elem is contained
* @ieslen: length of @ies
- * @data: buffer to store element data
- * @data_len: length of @data
+ * @data: buffer to store element data, or %NULL to just determine size
+ * @data_len: length of @data, or 0
* @frag_id: the element ID of fragments
*
* Return: length of @data, or -EINVAL on error
@@ -7097,11 +7207,13 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen,
* from a beacon or probe response
* @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon
* @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response
+ * @CFG80211_BSS_FTYPE_S1G_BEACON: data comes from an S1G beacon
*/
enum cfg80211_bss_frame_type {
CFG80211_BSS_FTYPE_UNKNOWN,
CFG80211_BSS_FTYPE_BEACON,
CFG80211_BSS_FTYPE_PRESP,
+ CFG80211_BSS_FTYPE_S1G_BEACON,
};
/**
@@ -7116,6 +7228,23 @@ int cfg80211_get_ies_channel_number(const u8 *ie, size_t ielen,
enum nl80211_band band);
/**
+ * cfg80211_ssid_eq - compare two SSIDs
+ * @a: first SSID
+ * @b: second SSID
+ *
+ * Return: %true if SSIDs are equal, %false otherwise.
+ */
+static inline bool
+cfg80211_ssid_eq(struct cfg80211_ssid *a, struct cfg80211_ssid *b)
+{
+ if (WARN_ON(!a || !b))
+ return false;
+ if (a->ssid_len != b->ssid_len)
+ return false;
+ return memcmp(a->ssid, b->ssid, a->ssid_len) ? false : true;
+}
+
+/**
* cfg80211_inform_bss_data - inform cfg80211 of a new BSS
*
* @wiphy: the wiphy reporting the BSS
@@ -7162,6 +7291,25 @@ cfg80211_inform_bss(struct wiphy *wiphy,
}
/**
+ * __cfg80211_get_bss - get a BSS reference
+ * @wiphy: the wiphy this BSS struct belongs to
+ * @channel: the channel to search on (or %NULL)
+ * @bssid: the desired BSSID (or %NULL)
+ * @ssid: the desired SSID (or %NULL)
+ * @ssid_len: length of the SSID (or 0)
+ * @bss_type: type of BSS, see &enum ieee80211_bss_type
+ * @privacy: privacy filter, see &enum ieee80211_privacy
+ * @use_for: indicates which use is intended
+ */
+struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ const u8 *bssid,
+ const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy,
+ u32 use_for);
+
+/**
* cfg80211_get_bss - get a BSS reference
* @wiphy: the wiphy this BSS struct belongs to
* @channel: the channel to search on (or %NULL)
@@ -7170,13 +7318,20 @@ cfg80211_inform_bss(struct wiphy *wiphy,
* @ssid_len: length of the SSID (or 0)
* @bss_type: type of BSS, see &enum ieee80211_bss_type
* @privacy: privacy filter, see &enum ieee80211_privacy
+ *
+ * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL.
*/
-struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- const u8 *bssid,
- const u8 *ssid, size_t ssid_len,
- enum ieee80211_bss_type bss_type,
- enum ieee80211_privacy privacy);
+static inline struct cfg80211_bss *
+cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel,
+ const u8 *bssid, const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy)
+{
+ return __cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len,
+ bss_type, privacy,
+ NL80211_BSS_USE_FOR_NORMAL);
+}
+
static inline struct cfg80211_bss *
cfg80211_get_ibss(struct wiphy *wiphy,
struct ieee80211_channel *channel,
@@ -7270,8 +7425,6 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
/**
* struct cfg80211_rx_assoc_resp_data - association response data
- * @bss: the BSS that association was requested with, ownership of the pointer
- * moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
* @buf: (Re)Association Response frame (header + body)
* @len: length of the frame data
* @uapsd_queues: bitmap of queues configured for uapsd. Same format
@@ -7281,6 +7434,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
* @ap_mld_addr: AP MLD address (in case of MLO)
* @links: per-link information indexed by link ID, use links[0] for
* non-MLO connections
+ * @links.bss: the BSS that association was requested with, ownership of the
+ * pointer moves to cfg80211 in the call to cfg80211_rx_assoc_resp()
* @links.status: Set this (along with a BSS pointer) for links that
* were rejected by the AP.
*/
@@ -7309,7 +7464,7 @@ struct cfg80211_rx_assoc_resp_data {
* This function may sleep. The caller must hold the corresponding wdev's mutex.
*/
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_rx_assoc_resp_data *data);
+ const struct cfg80211_rx_assoc_resp_data *data);
/**
* struct cfg80211_assoc_failure - association failure data
@@ -7428,7 +7583,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev,
* RFkill integration in cfg80211 is almost invisible to drivers,
* as cfg80211 automatically registers an rfkill instance for each
* wireless device it knows about. Soft kill is also translated
- * into disconnecting and turning all interfaces off, drivers are
+ * into disconnecting and turning all interfaces off. Drivers are
* expected to turn off the device when all interfaces are down.
*
* However, devices may have a hard RFkill line, in which case they
@@ -7476,7 +7631,7 @@ static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
* the configuration mechanism.
*
* A driver supporting vendor commands must register them as an array
- * in struct wiphy, with handlers for each one, each command has an
+ * in struct wiphy, with handlers for each one. Each command has an
* OUI and sub command ID to identify it.
*
* Note that this feature should not be (ab)used to implement protocol
@@ -7640,7 +7795,7 @@ static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp)
* interact with driver-specific tools to aid, for instance,
* factory programming.
*
- * This chapter describes how drivers interact with it, for more
+ * This chapter describes how drivers interact with it. For more
* information see the nl80211 book's chapter on it.
*/
@@ -8631,14 +8786,13 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy,
* @dev: the device which switched channels
* @chandef: the new channel definition
* @link_id: the link ID for MLO, must be 0 for non-MLO
- * @punct_bitmap: the new puncturing bitmap
*
* Caller must hold wiphy mutex, therefore must only be called from sleepable
* driver context!
*/
void cfg80211_ch_switch_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id, u16 punct_bitmap);
+ unsigned int link_id);
/*
* cfg80211_ch_switch_started_notify - notify channel switch start
@@ -8647,7 +8801,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
* @link_id: the link ID for MLO, must be 0 for non-MLO
* @count: the number of TBTTs until the channel switch happens
* @quiet: whether or not immediate quiet was requested by the AP
- * @punct_bitmap: the future puncturing bitmap
*
* Inform the userspace about the channel switch that has just
* started, so that it can take appropriate actions (eg. starting
@@ -8656,7 +8809,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
unsigned int link_id, u8 count,
- bool quiet, u16 punct_bitmap);
+ bool quiet);
/**
* ieee80211_operating_class_to_band - convert operating class to band
@@ -8670,6 +8823,19 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
enum nl80211_band *band);
/**
+ * ieee80211_operating_class_to_chandef - convert operating class to chandef
+ *
+ * @operating_class: the operating class to convert
+ * @chan: the ieee80211_channel to convert
+ * @chandef: a pointer to the resulting chandef
+ *
+ * Returns %true if the conversion was successful, %false otherwise.
+ */
+bool ieee80211_operating_class_to_chandef(u8 operating_class,
+ struct ieee80211_channel *chan,
+ struct cfg80211_chan_def *chandef);
+
+/**
* ieee80211_chandef_to_operating_class - convert chandef to operation class
*
* @chandef: the chandef to convert
@@ -9275,18 +9441,6 @@ static inline int cfg80211_color_change_notify(struct net_device *dev)
}
/**
- * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap
- * @bitmap: bitmap to be validated
- * @chandef: channel definition
- *
- * Validate the puncturing bitmap.
- *
- * Return: %true if the bitmap is valid. %false otherwise.
- */
-bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
- const struct cfg80211_chan_def *chandef);
-
-/**
* cfg80211_links_removed - Notify about removed STA MLD setup links.
* @dev: network device.
* @link_mask: BIT mask of removed STA MLD setup link IDs.
@@ -9299,4 +9453,60 @@ bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
*/
void cfg80211_links_removed(struct net_device *dev, u16 link_mask);
+/**
+ * cfg80211_schedule_channels_check - schedule regulatory check if needed
+ * @wdev: the wireless device to check
+ *
+ * In case the device supports NO_IR or DFS relaxations, schedule regulatory
+ * channels check, as previous concurrent operation conditions may not
+ * hold anymore.
+ */
+void cfg80211_schedule_channels_check(struct wireless_dev *wdev);
+
+#ifdef CONFIG_CFG80211_DEBUGFS
+/**
+ * wiphy_locked_debugfs_read - do a locked read in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being read
+ * @buf: the buffer to fill and then read from
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy to
+ * @count: read count
+ * @ppos: read position
+ * @handler: the read handler to call (under wiphy lock)
+ * @data: additional data to pass to the read handler
+ */
+ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ char __user *userbuf, size_t count,
+ loff_t *ppos,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t bufsize,
+ void *data),
+ void *data);
+
+/**
+ * wiphy_locked_debugfs_write - do a locked write in debugfs
+ * @wiphy: the wiphy to use
+ * @file: the file being written to
+ * @buf: the buffer to copy the user data to
+ * @bufsize: size of the buffer
+ * @userbuf: the user buffer to copy from
+ * @count: read count
+ * @handler: the write handler to call (under wiphy lock)
+ * @data: additional data to pass to the write handler
+ */
+ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, struct file *file,
+ char *buf, size_t bufsize,
+ const char __user *userbuf, size_t count,
+ ssize_t (*handler)(struct wiphy *wiphy,
+ struct file *file,
+ char *buf,
+ size_t count,
+ void *data),
+ void *data);
+#endif
+
#endif /* __NET_CFG80211_H */
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 3c70ad53a49c..9707ab54fdd5 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -30,6 +30,7 @@
FN(TCP_AOFAILURE) \
FN(SOCKET_BACKLOG) \
FN(TCP_FLAGS) \
+ FN(TCP_ABORT_ON_DATA) \
FN(TCP_ZEROWINDOW) \
FN(TCP_OLD_DATA) \
FN(TCP_OVERWINDOW) \
@@ -37,6 +38,7 @@
FN(TCP_RFC7323_PAWS) \
FN(TCP_OLD_SEQUENCE) \
FN(TCP_INVALID_SEQUENCE) \
+ FN(TCP_INVALID_ACK_SEQUENCE) \
FN(TCP_RESET) \
FN(TCP_INVALID_SYN) \
FN(TCP_CLOSE) \
@@ -54,6 +56,7 @@
FN(NEIGH_QUEUEFULL) \
FN(NEIGH_DEAD) \
FN(TC_EGRESS) \
+ FN(SECURITY_HOOK) \
FN(QDISC_DROP) \
FN(CPU_BACKLOG) \
FN(XDP) \
@@ -85,7 +88,10 @@
FN(IPV6_NDISC_BAD_OPTIONS) \
FN(IPV6_NDISC_NS_OTHERHOST) \
FN(QUEUE_PURGE) \
- FN(TC_ERROR) \
+ FN(TC_COOKIE_ERROR) \
+ FN(PACKET_SOCK_ERROR) \
+ FN(TC_CHAIN_NOTFOUND) \
+ FN(TC_RECLASSIFY_LOOP) \
FNe(MAX)
/**
@@ -102,7 +108,13 @@ enum skb_drop_reason {
SKB_CONSUMED,
/** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
SKB_DROP_REASON_NOT_SPECIFIED,
- /** @SKB_DROP_REASON_NO_SOCKET: socket not found */
+ /**
+ * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used.
+ * Reason could be one of three cases:
+ * 1) no established/listening socket found during lookup process
+ * 2) no valid request socket during 3WHS process
+ * 3) no valid child socket during 3WHS process
+ */
SKB_DROP_REASON_NO_SOCKET,
/** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */
SKB_DROP_REASON_PKT_TOO_SMALL,
@@ -195,6 +207,11 @@ enum skb_drop_reason {
/** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */
SKB_DROP_REASON_TCP_FLAGS,
/**
+ * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to
+ * LINUX_MIB_TCPABORTONDATA
+ */
+ SKB_DROP_REASON_TCP_ABORT_ON_DATA,
+ /**
* @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero,
* see LINUX_MIB_TCPZEROWINDOWDROP
*/
@@ -218,13 +235,19 @@ enum skb_drop_reason {
SKB_DROP_REASON_TCP_OFOMERGE,
/**
* @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to
- * LINUX_MIB_PAWSESTABREJECTED
+ * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS,
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
+ /**
+ * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
+ * field because ack sequence is not in the window between snd_una
+ * and snd_nxt
+ */
+ SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE,
/** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */
SKB_DROP_REASON_TCP_RESET,
/**
@@ -268,6 +291,8 @@ enum skb_drop_reason {
SKB_DROP_REASON_NEIGH_DEAD,
/** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */
SKB_DROP_REASON_TC_EGRESS,
+ /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */
+ SKB_DROP_REASON_SECURITY_HOOK,
/**
* @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting (
* failed to enqueue to current qdisc)
@@ -376,8 +401,23 @@ enum skb_drop_reason {
SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
/** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
SKB_DROP_REASON_QUEUE_PURGE,
- /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */
- SKB_DROP_REASON_TC_ERROR,
+ /**
+ * @SKB_DROP_REASON_TC_COOKIE_ERROR: An error occurred whilst
+ * processing a tc ext cookie.
+ */
+ SKB_DROP_REASON_TC_COOKIE_ERROR,
+ /**
+ * @SKB_DROP_REASON_PACKET_SOCK_ERROR: generic packet socket errors
+ * after its filter matches an incoming packet.
+ */
+ SKB_DROP_REASON_PACKET_SOCK_ERROR,
+ /** @SKB_DROP_REASON_TC_CHAIN_NOTFOUND: tc chain lookup failed. */
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TC_RECLASSIFY_LOOP: tc exceeded max reclassify loop
+ * iterations.
+ */
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 82135fbdb1e6..7c0da9effe4e 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -991,9 +991,9 @@ struct dsa_switch_ops {
* Port's MAC EEE settings
*/
int (*set_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_eee *e);
+ struct ethtool_keee *e);
int (*get_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_eee *e);
+ struct ethtool_keee *e);
/* EEPROM access */
int (*get_eeprom_len)(struct dsa_switch *ds);
diff --git a/include/net/dst.h b/include/net/dst.h
index f5dfc8fb7b37..0aa331bd2fdb 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -390,7 +390,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
struct net_device *dev, int initial_obsolete,
unsigned short flags);
-struct dst_entry *dst_destroy(struct dst_entry *dst);
void dst_dev_put(struct dst_entry *dst);
static inline void dst_confirm(struct dst_entry *dst)
diff --git a/include/net/eee.h b/include/net/eee.h
new file mode 100644
index 000000000000..84837aba3cd9
--- /dev/null
+++ b/include/net/eee.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _EEE_H
+#define _EEE_H
+
+#include <linux/types.h>
+
+struct eee_config {
+ u32 tx_lpi_timer;
+ bool tx_lpi_enabled;
+ bool eee_enabled;
+};
+
+static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg)
+{
+ /* eee_enabled is the master on/off */
+ if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled)
+ return false;
+
+ return true;
+}
+
+static inline void eeecfg_to_eee(struct ethtool_keee *eee,
+ const struct eee_config *eeecfg)
+{
+ eee->tx_lpi_timer = eeecfg->tx_lpi_timer;
+ eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled;
+ eee->eee_enabled = eeecfg->eee_enabled;
+}
+
+static inline void eee_to_eeecfg(struct eee_config *eeecfg,
+ const struct ethtool_keee *eee)
+{
+ eeecfg->tx_lpi_timer = eee->tx_lpi_timer;
+ eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled;
+ eeecfg->eee_enabled = eee->eee_enabled;
+}
+
+#endif
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 82da359bca03..d17855c52ef9 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -172,8 +172,7 @@ void fib_rules_unregister(struct fib_rules_ops *);
int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags,
struct fib_lookup_arg *);
-int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table,
- u32 flags);
+int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table);
bool fib_rule_matchall(const struct fib_rule *rule);
int fib_rules_dump(struct net *net, struct notifier_block *nb, int family,
struct netlink_ext_ack *extack);
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index e18a4c0d69ee..9ece6e5a3ea8 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -8,10 +8,15 @@
#define GENLMSG_DEFAULT_SIZE (NLMSG_DEFAULT_SIZE - GENL_HDRLEN)
+/* Binding to multicast group requires %CAP_NET_ADMIN */
+#define GENL_MCAST_CAP_NET_ADMIN BIT(0)
+/* Binding to multicast group requires %CAP_SYS_ADMIN */
+#define GENL_MCAST_CAP_SYS_ADMIN BIT(1)
+
/**
* struct genl_multicast_group - generic netlink multicast group
* @name: name of the multicast group, names are per-family
- * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
+ * @flags: GENL_MCAST_* flags
*/
struct genl_multicast_group {
char name[GENL_NAMSIZ];
@@ -36,6 +41,8 @@ struct genl_info;
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
+ * @bind: called when family multicast group is added to a netlink socket
+ * @unbind: called when family multicast group is removed from a netlink socket
* @module: pointer to the owning module (set to THIS_MODULE)
* @mcgrps: multicast groups used by this family
* @n_mcgrps: number of multicast groups
@@ -49,6 +56,9 @@ struct genl_info;
* @split_ops: the split do/dump form of operation definition
* @n_split_ops: number of entries in @split_ops, not that with split do/dump
* ops the number of entries is not the same as number of commands
+ * @sock_priv_size: the size of per-socket private memory
+ * @sock_priv_init: the per-socket private memory initializer
+ * @sock_priv_destroy: the per-socket private memory destructor
*
* Attribute policies (the combination of @policy and @maxattr fields)
* can be attached at the family level or at the operation level.
@@ -76,17 +86,25 @@ struct genl_family {
void (*post_doit)(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
+ int (*bind)(int mcgrp);
+ void (*unbind)(int mcgrp);
const struct genl_ops * ops;
const struct genl_small_ops *small_ops;
const struct genl_split_ops *split_ops;
const struct genl_multicast_group *mcgrps;
struct module *module;
+ size_t sock_priv_size;
+ void (*sock_priv_init)(void *priv);
+ void (*sock_priv_destroy)(void *priv);
+
/* private: internal use only */
/* protocol family identifier */
int id;
/* starting number of multicast group IDs in this family */
unsigned int mcgrp_offset;
+ /* list of per-socket privs */
+ struct xarray *sock_privs;
};
/**
@@ -135,7 +153,7 @@ static inline void *genl_info_userhdr(const struct genl_info *info)
/* Report that a root attribute is missing */
#define GENL_REQ_ATTR_CHECK(info, attr) ({ \
- struct genl_info *__info = (info); \
+ const struct genl_info *__info = (info); \
\
NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \
})
@@ -296,6 +314,8 @@ static inline bool genl_info_is_ntf(const struct genl_info *info)
return !info->nlhdr;
}
+void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk);
+void *genl_sk_priv_get(struct genl_family *family, struct sock *sk);
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
@@ -436,6 +456,35 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
}
/**
+ * genlmsg_multicast_netns_filtered - multicast a netlink message
+ * to a specific netns with filter
+ * function
+ * @family: the generic netlink family
+ * @net: the net namespace
+ * @skb: netlink message as socket buffer
+ * @portid: own netlink portid to avoid sending to yourself
+ * @group: offset of multicast group in groups array
+ * @flags: allocation flags
+ * @filter: filter function
+ * @filter_data: filter function private data
+ *
+ * Return: 0 on success, negative error code for failure.
+ */
+static inline int
+genlmsg_multicast_netns_filtered(const struct genl_family *family,
+ struct net *net, struct sk_buff *skb,
+ u32 portid, unsigned int group, gfp_t flags,
+ netlink_filter_fn filter,
+ void *filter_data)
+{
+ if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ return -EINVAL;
+ group = family->mcgrp_offset + group;
+ return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group,
+ flags, filter, filter_data);
+}
+
+/**
* genlmsg_multicast_netns - multicast a netlink message to a specific netns
* @family: the generic netlink family
* @net: the net namespace
@@ -448,10 +497,8 @@ static inline int genlmsg_multicast_netns(const struct genl_family *family,
struct net *net, struct sk_buff *skb,
u32 portid, unsigned int group, gfp_t flags)
{
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
- return -EINVAL;
- group = family->mcgrp_offset + group;
- return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
+ return genlmsg_multicast_netns_filtered(family, net, skb, portid,
+ group, flags, NULL, NULL);
}
/**
diff --git a/include/net/gro.h b/include/net/gro.h
index b435f0ddbf64..d6fc8fbd3730 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -9,6 +9,7 @@
#include <net/ip6_checksum.h>
#include <linux/skbuff.h>
#include <net/udp.h>
+#include <net/hotdata.h>
struct napi_gro_cb {
union {
@@ -139,21 +140,16 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len)
NAPI_GRO_CB(skb)->data_offset += len;
}
-static inline void *skb_gro_header_fast(struct sk_buff *skb,
+static inline void *skb_gro_header_fast(const struct sk_buff *skb,
unsigned int offset)
{
return NAPI_GRO_CB(skb)->frag0 + offset;
}
-static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen)
+static inline bool skb_gro_may_pull(const struct sk_buff *skb,
+ unsigned int hlen)
{
- return NAPI_GRO_CB(skb)->frag0_len < hlen;
-}
-
-static inline void skb_gro_frag0_invalidate(struct sk_buff *skb)
-{
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
+ return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len);
}
static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
@@ -162,28 +158,30 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
if (!pskb_may_pull(skb, hlen))
return NULL;
- skb_gro_frag0_invalidate(skb);
return skb->data + offset;
}
-static inline void *skb_gro_header(struct sk_buff *skb,
- unsigned int hlen, unsigned int offset)
+static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
+ unsigned int offset)
{
void *ptr;
ptr = skb_gro_header_fast(skb, offset);
- if (skb_gro_header_hard(skb, hlen))
+ if (!skb_gro_may_pull(skb, hlen))
ptr = skb_gro_header_slow(skb, hlen, offset);
return ptr;
}
-static inline void *skb_gro_network_header(struct sk_buff *skb)
+static inline void *skb_gro_network_header(const struct sk_buff *skb)
{
- return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
- skb_network_offset(skb);
+ if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
+ return skb_gro_header_fast(skb, skb_network_offset(skb));
+
+ return skb_network_header(skb);
}
-static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct iphdr *iph = skb_gro_network_header(skb);
@@ -421,7 +419,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb)
return uh;
}
-static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto)
+static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
+ int proto)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
@@ -448,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= READ_ONCE(gro_normal_batch))
+ if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
gro_normal_list(napi);
}
@@ -495,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
#endif
}
-extern struct list_head offload_base;
-
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/hotdata.h b/include/net/hotdata.h
new file mode 100644
index 000000000000..003667a1efd6
--- /dev/null
+++ b/include/net/hotdata.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_HOTDATA_H
+#define _NET_HOTDATA_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <net/protocol.h>
+
+/* Read mostly data used in network fast paths. */
+struct net_hotdata {
+#if IS_ENABLED(CONFIG_INET)
+ struct packet_offload ip_packet_offload;
+ struct net_offload tcpv4_offload;
+ struct net_protocol tcp_protocol;
+ struct net_offload udpv4_offload;
+ struct net_protocol udp_protocol;
+ struct packet_offload ipv6_packet_offload;
+ struct net_offload tcpv6_offload;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol tcpv6_protocol;
+ struct inet6_protocol udpv6_protocol;
+#endif
+ struct net_offload udpv6_offload;
+#endif
+ struct list_head offload_base;
+ struct list_head ptype_all;
+ struct kmem_cache *skbuff_cache;
+ struct kmem_cache *skbuff_fclone_cache;
+ struct kmem_cache *skb_small_head_cache;
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+ u32 rps_cpu_mask;
+#endif
+ int gro_normal_batch;
+ int netdev_budget;
+ int netdev_budget_usecs;
+ int tstamp_prequeue;
+ int max_backlog;
+ int dev_tx_weight;
+ int dev_rx_weight;
+};
+
+#define inet_ehash_secret net_hotdata.tcp_protocol.secret
+#define udp_ehash_secret net_hotdata.udp_protocol.secret
+#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
+#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
+#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
+#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret
+
+extern struct net_hotdata net_hotdata;
+
+#endif /* _NET_HOTDATA_H */
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 3e454c4d7ba6..238ad3349456 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -22,10 +22,6 @@
#define IF_RS_SENT 0x10
#define IF_READY 0x80000000
-/* prefix flags */
-#define IF_PREFIX_ONLINK 0x01
-#define IF_PREFIX_AUTOCONF 0x02
-
enum {
INET6_IFADDR_STATE_PREDAD,
INET6_IFADDR_STATE_DAD,
@@ -148,7 +144,7 @@ struct ipv6_ac_socklist {
struct ifacaddr6 {
struct in6_addr aca_addr;
struct fib6_info *aca_rt;
- struct ifacaddr6 *aca_next;
+ struct ifacaddr6 __rcu *aca_next;
struct hlist_node aca_addr_lst;
int aca_users;
refcount_t aca_refcnt;
@@ -200,7 +196,7 @@ struct inet6_dev {
spinlock_t mc_report_lock; /* mld query report lock */
struct mutex mc_lock; /* mld global lock */
- struct ifacaddr6 *ac_list;
+ struct ifacaddr6 __rcu *ac_list;
rwlock_t lock;
refcount_t refcnt;
__u32 if_flags;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index d0a2f827d5f2..9ab4bf704e86 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -357,4 +357,12 @@ static inline bool inet_csk_has_ulp(const struct sock *sk)
return inet_test_bit(IS_ICSK, sk) && !!inet_csk(sk)->icsk_ulp_ops;
}
+static inline void inet_init_csk_locks(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ spin_lock_init(&icsk->icsk_accept_queue.rskq_lock);
+ spin_lock_init(&icsk->icsk_accept_queue.fastopenq.lock);
+}
+
#endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 3ecfeadbfa06..7f1b38458743 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -88,7 +88,7 @@ struct inet_bind_bucket {
unsigned short fast_sk_family;
bool fast_ipv6_only;
struct hlist_node node;
- struct hlist_head owners;
+ struct hlist_head bhash2;
};
struct inet_bind2_bucket {
@@ -96,22 +96,17 @@ struct inet_bind2_bucket {
int l3mdev;
unsigned short port;
#if IS_ENABLED(CONFIG_IPV6)
- unsigned short family;
-#endif
- union {
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr v6_rcv_saddr;
+ unsigned short addr_type;
+ struct in6_addr v6_rcv_saddr;
+#define rcv_saddr v6_rcv_saddr.s6_addr32[3]
+#else
+ __be32 rcv_saddr;
#endif
- __be32 rcv_saddr;
- };
/* Node in the bhash2 inet_bind_hashbucket chain */
struct hlist_node node;
+ struct hlist_node bhash_node;
/* List of sockets hashed to this bucket */
struct hlist_head owners;
- /* bhash has twsk in owners, but bhash2 has twsk in
- * deathrow not to add a member in struct sock_common.
- */
- struct hlist_head deathrow;
};
static inline struct net *ib_net(const struct inet_bind_bucket *ib)
@@ -241,7 +236,7 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
struct inet_bind2_bucket *
inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
struct inet_bind_hashbucket *head,
- unsigned short port, int l3mdev,
+ struct inet_bind_bucket *tb,
const struct sock *sk);
void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 74db6d97cae1..f9ddd47dc4f8 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -234,10 +234,7 @@ struct inet_sock {
int uc_index;
int mc_index;
__be32 mc_addr;
- struct {
- __u16 lo;
- __u16 hi;
- } local_port_range;
+ u32 local_port_range; /* high << 16 | low */
struct ip_mc_socklist __rcu *mc_list;
struct inet_cork_full cork;
@@ -277,6 +274,7 @@ enum {
INET_FLAGS_REPFLOW = 27,
INET_FLAGS_RTALERT_ISOLATE = 28,
INET_FLAGS_SNDFLOW = 29,
+ INET_FLAGS_RTALERT = 30,
};
/* cmsg flags for inet */
@@ -310,11 +308,6 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
#define inet_assign_bit(nr, sk, val) \
assign_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags, val)
-static inline bool sk_is_inet(struct sock *sk)
-{
- return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
-}
-
/**
* sk_to_full_sk - Access to a full socket
* @sk: pointer to a socket
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index b14999ff55db..f28da08a37b4 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -75,13 +75,9 @@ struct inet_timewait_sock {
struct timer_list tw_timer;
struct inet_bind_bucket *tw_tb;
struct inet_bind2_bucket *tw_tb2;
- struct hlist_node tw_bind2_node;
};
#define tw_tclass tw_tos
-#define twsk_for_each_bound_bhash2(__tw, list) \
- hlist_for_each_entry(__tw, list, tw_bind2_node)
-
static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
{
return (struct inet_timewait_sock *)sk;
diff --git a/include/net/ioam6.h b/include/net/ioam6.h
index 781d2d8b2f29..2cbbee6e806a 100644
--- a/include/net/ioam6.h
+++ b/include/net/ioam6.h
@@ -12,6 +12,7 @@
#include <linux/net.h>
#include <linux/ipv6.h>
#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
#include <linux/rhashtable-types.h>
struct ioam6_namespace {
@@ -65,4 +66,7 @@ void ioam6_exit(void);
int ioam6_iptunnel_init(void);
void ioam6_iptunnel_exit(void);
+void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp,
+ void *opt, unsigned int opt_len);
+
#endif /* _NET_IOAM6_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 1fc4c8d69e33..25cb688bdc62 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -349,8 +349,14 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
} \
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high);
-void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
+static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
+{
+ u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);
+
+ *low = range & 0xffff;
+ *high = range >> 16;
+}
+bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
#ifdef CONFIG_SYSCTL
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
@@ -761,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
* Functions provided by ip_sockglue.c
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst);
void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, int tlen, int offset);
int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 1ba9f4ddf2f6..323c94f1845b 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -30,12 +30,6 @@
#define RT6_DEBUG 2
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
struct rt6_info;
struct fib6_info;
@@ -250,6 +244,25 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
return rt->fib6_src.plen > 0;
}
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
+static inline void fib6_clean_expires(struct fib6_info *f6i)
+{
+ f6i->fib6_flags &= ~RTF_EXPIRES;
+ f6i->expires = 0;
+}
+
+/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
+ * been added to a table before.
+ */
+static inline void fib6_set_expires(struct fib6_info *f6i,
+ unsigned long expires)
+{
+ f6i->expires = expires;
+ f6i->fib6_flags |= RTF_EXPIRES;
+}
+
static inline bool fib6_check_expired(const struct fib6_info *f6i)
{
if (f6i->fib6_flags & RTF_EXPIRES)
@@ -257,11 +270,6 @@ static inline bool fib6_check_expired(const struct fib6_info *f6i)
return false;
}
-static inline bool fib6_has_expires(const struct fib6_info *f6i)
-{
- return f6i->fib6_flags & RTF_EXPIRES;
-}
-
/* Function to safely get fn->fn_sernum for passed in rt
* and store result in passed in cookie.
* Return true if we can get cookie safely
@@ -328,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)
static inline void fib6_info_release(struct fib6_info *f6i)
{
- if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
+ if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
+ DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
+ }
}
enum fib6_walk_state {
@@ -500,46 +510,36 @@ void fib6_gc_cleanup(void);
int fib6_init(void);
-/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be
- * NULL.
+/* Add the route to the gc list if it is not already there
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
*/
-static inline void fib6_set_expires_locked(struct fib6_info *f6i,
- unsigned long expires)
+static inline void fib6_add_gc_list(struct fib6_info *f6i)
{
- struct fib6_table *tb6;
+ /* If fib6_node is null, the f6i is not in (or removed from) the
+ * table.
+ *
+ * There is a gap between finding the f6i from the table and
+ * calling this function without the protection of the tb6_lock.
+ * This check makes sure the f6i is not added to the gc list when
+ * it is not on the table.
+ */
+ if (!rcu_dereference_protected(f6i->fib6_node,
+ lockdep_is_held(&f6i->fib6_table->tb6_lock)))
+ return;
- tb6 = f6i->fib6_table;
- f6i->expires = expires;
- if (tb6 && !fib6_has_expires(f6i))
- hlist_add_head(&f6i->gc_link, &tb6->tb6_gc_hlist);
- f6i->fib6_flags |= RTF_EXPIRES;
+ if (hlist_unhashed(&f6i->gc_link))
+ hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
}
-/* fib6_info must be locked by the caller, and fib6_info->fib6_table can be
- * NULL. If fib6_table is NULL, the fib6_info will no be inserted into the
- * list of GC candidates until it is inserted into a table.
+/* Remove the route from the gc list if it is on the list.
+ *
+ * The callers should hold f6i->fib6_table->tb6_lock.
*/
-static inline void fib6_set_expires(struct fib6_info *f6i,
- unsigned long expires)
+static inline void fib6_remove_gc_list(struct fib6_info *f6i)
{
- spin_lock_bh(&f6i->fib6_table->tb6_lock);
- fib6_set_expires_locked(f6i, expires);
- spin_unlock_bh(&f6i->fib6_table->tb6_lock);
-}
-
-static inline void fib6_clean_expires_locked(struct fib6_info *f6i)
-{
- if (fib6_has_expires(f6i))
+ if (!hlist_unhashed(&f6i->gc_link))
hlist_del_init(&f6i->gc_link);
- f6i->fib6_flags &= ~RTF_EXPIRES;
- f6i->expires = 0;
-}
-
-static inline void fib6_clean_expires(struct fib6_info *f6i)
-{
- spin_lock_bh(&f6i->fib6_table->tb6_lock);
- fib6_clean_expires_locked(f6i);
- spin_unlock_bh(&f6i->fib6_table->tb6_lock);
}
struct ipv6_route_iter {
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 28b065790261..a30c6aa9e5cf 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -170,7 +170,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
struct fib6_info *rt6_add_dflt_router(struct net *net,
const struct in6_addr *gwaddr,
struct net_device *dev, unsigned int pref,
- u32 defrtr_usr_metric);
+ u32 defrtr_usr_metric,
+ int lifetime);
void rt6_purge_dflt_routers(struct net *net);
@@ -331,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
if (idev)
- mtu = idev->cnf.mtu6;
+ mtu = READ_ONCE(idev->cnf.mtu6);
rcu_read_unlock();
out:
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d4667b7797e3..9b2f69ba5e49 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -264,6 +264,7 @@ struct fib_dump_filter {
bool filter_set;
bool dump_routes;
bool dump_exceptions;
+ bool rtnl_held;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f346b4efbc30..5cd64bb2104d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
- struct rtnl_link_ops *ops);
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
@@ -416,6 +417,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0;
}
+static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ __be16 payload_protocol = skb_protocol(skb, true);
+
+ if (payload_protocol == htons(ETH_P_IPV6))
+ return ip6_flowlabel((const struct ipv6hdr *)iph);
+ else
+ return 0;
+}
+
static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
const struct sk_buff *skb)
{
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 78d38dd88aba..88a8e554f7a1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
return 0;
}
-static inline bool ipv6_accept_ra(struct inet6_dev *idev)
+static inline bool ipv6_accept_ra(const struct inet6_dev *idev)
{
+ s32 accept_ra = READ_ONCE(idev->cnf.accept_ra);
+
/* If forwarding is enabled, RA are not accepted unless the special
* hybrid mode (accept_ra=2) is enabled.
*/
- return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 :
- idev->cnf.accept_ra;
+ return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 :
+ accept_ra;
}
#define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */
@@ -784,11 +786,6 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
cpu_to_be32(0x0000ffff))) == 0UL;
}
-static inline bool ipv6_addr_v4mapped_any(const struct in6_addr *a)
-{
- return ipv6_addr_v4mapped(a) && ipv4_is_zeronet(a->s6_addr32[3]);
-}
-
static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a)
{
return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]);
diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h
index f9e88401d7da..8b2055d64a6b 100644
--- a/include/net/iucv/iucv.h
+++ b/include/net/iucv/iucv.h
@@ -80,7 +80,7 @@ struct iucv_array {
u32 length;
} __attribute__ ((aligned (8)));
-extern struct bus_type iucv_bus;
+extern const struct bus_type iucv_bus;
extern struct device *iucv_root;
/*
@@ -489,7 +489,7 @@ struct iucv_interface {
int (*path_sever)(struct iucv_path *path, u8 userdata[16]);
int (*iucv_register)(struct iucv_handler *handler, int smp);
void (*iucv_unregister)(struct iucv_handler *handler, int smp);
- struct bus_type *bus;
+ const struct bus_type *bus;
struct device *root;
};
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 7e73f8e5e497..1d55ba7c45be 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -262,8 +262,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
*/
static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
{
- if (skb->protocol == htons(ETH_P_802_2))
- memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN);
+ memcpy(sa, eth_hdr(skb)->h_source, ETH_ALEN);
}
/**
@@ -275,8 +274,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
*/
static inline void llc_pdu_decode_da(struct sk_buff *skb, u8 *da)
{
- if (skb->protocol == htons(ETH_P_802_2))
- memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN);
+ memcpy(da, eth_hdr(skb)->h_dest, ETH_ALEN);
}
/**
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 580781ff9dcf..353488ab94a2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#ifndef MAC80211_H
@@ -214,6 +214,10 @@ struct ieee80211_low_level_stats {
* @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel,
* this is used only with channel switching with CSA
* @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed
+ * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider
+ * bandwidth) OFDMA settings need to be changed
+ * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap
+ * was changed.
*/
enum ieee80211_chanctx_change {
IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0),
@@ -221,6 +225,19 @@ enum ieee80211_chanctx_change {
IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2),
IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3),
IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4),
+ IEEE80211_CHANCTX_CHANGE_AP = BIT(5),
+ IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6),
+};
+
+/**
+ * struct ieee80211_chan_req - A channel "request"
+ * @oper: channel definition to use for operation
+ * @ap: the channel definition of the AP, if any
+ * (otherwise the chan member is %NULL)
+ */
+struct ieee80211_chan_req {
+ struct cfg80211_chan_def oper;
+ struct cfg80211_chan_def ap;
};
/**
@@ -231,6 +248,8 @@ enum ieee80211_chanctx_change {
*
* @def: the channel definition
* @min_def: the minimum channel definition currently required.
+ * @ap: the channel definition the AP actually is operating as,
+ * for use with (wider bandwidth) OFDMA
* @rx_chains_static: The number of RX chains that must always be
* active on the channel to receive MIMO transmissions
* @rx_chains_dynamic: The number of RX chains that must be enabled
@@ -243,6 +262,7 @@ enum ieee80211_chanctx_change {
struct ieee80211_chanctx_conf {
struct cfg80211_chan_def def;
struct cfg80211_chan_def min_def;
+ struct cfg80211_chan_def ap;
u8 rx_chains_static, rx_chains_dynamic;
@@ -340,8 +360,8 @@ struct ieee80211_vif_chanctx_switch {
* @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed.
* @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response
* status changed.
- * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed.
* @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed.
+ * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed
*/
enum ieee80211_bss_change {
BSS_CHANGED_ASSOC = 1<<0,
@@ -376,8 +396,8 @@ enum ieee80211_bss_change {
BSS_CHANGED_HE_BSS_COLOR = 1<<29,
BSS_CHANGED_FILS_DISCOVERY = 1<<30,
BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31,
- BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32),
BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33),
+ BSS_CHANGED_MLD_TTLM = BIT_ULL(34),
/* when adding here, make sure to change ieee80211_reconfig */
};
@@ -476,9 +496,9 @@ struct ieee80211_ba_event {
/**
* struct ieee80211_event - event to be sent to the driver
* @type: The event itself. See &enum ieee80211_event_type.
- * @rssi: relevant if &type is %RSSI_EVENT
- * @mlme: relevant if &type is %AUTH_EVENT
- * @ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT
+ * @u.rssi: relevant if &type is %RSSI_EVENT
+ * @u.mlme: relevant if &type is %AUTH_EVENT
+ * @u.ba: relevant if &type is %BAR_RX_EVENT or %BA_FRAME_TIMEOUT
* @u:union holding the fields above
*/
struct ieee80211_event {
@@ -537,12 +557,14 @@ struct ieee80211_fils_discovery {
* to that BSS) that can change during the lifetime of the BSS.
*
* @vif: reference to owning VIF
+ * @bss: the cfg80211 bss descriptor. Valid only for a station, and only
+ * when associated. Note: This contains information which is not
+ * necessarily authenticated. For example, information coming from probe
+ * responses.
* @addr: (link) address used locally
* @link_id: link ID, or 0 for non-MLO
* @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE
* @uora_exists: is the UORA element advertised by AP
- * @ack_enabled: indicates support to receive a multi-TID that solicits either
- * ACK, BACK or both
* @uora_ocw_range: UORA element's OCW Range field
* @frame_time_rts_th: HE duration RTS threshold, in units of 32us
* @he_support: does this BSS support HE
@@ -583,7 +605,7 @@ struct ieee80211_fils_discovery {
* @mcast_rate: per-band multicast rate index + 1 (0: disabled)
* @bssid: The BSSID for this BSS
* @enable_beacon: whether beaconing should be enabled or not
- * @chandef: Channel definition for this BSS -- the hardware might be
+ * @chanreq: Channel request for this BSS -- the hardware might be
* configured a higher bandwidth than this BSS uses, for example.
* @mu_group: VHT MU-MIMO group membership data
* @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation.
@@ -644,9 +666,7 @@ struct ieee80211_fils_discovery {
* @tx_pwr_env_num: number of @tx_pwr_env.
* @pwr_reduction: power constraint of BSS.
* @eht_support: does this BSS support EHT
- * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS
* @csa_active: marks whether a channel switch is going on.
- * @csa_punct_bitmap: new puncturing bitmap for channel switch
* @mu_mimo_owner: indicates interface owns MU-MIMO capability
* @chanctx_conf: The channel context this interface is assigned to, or %NULL
* when it is not assigned. This pointer is RCU-protected due to the TX
@@ -684,6 +704,7 @@ struct ieee80211_fils_discovery {
*/
struct ieee80211_bss_conf {
struct ieee80211_vif *vif;
+ struct cfg80211_bss *bss;
const u8 *bssid;
unsigned int link_id;
@@ -716,7 +737,7 @@ struct ieee80211_bss_conf {
u32 cqm_rssi_hyst;
s32 cqm_rssi_low;
s32 cqm_rssi_high;
- struct cfg80211_chan_def chandef;
+ struct ieee80211_chan_req chanreq;
struct ieee80211_mu_group_data mu_group;
bool qos;
bool hidden_ssid;
@@ -749,10 +770,8 @@ struct ieee80211_bss_conf {
u8 tx_pwr_env_num;
u8 pwr_reduction;
bool eht_support;
- u16 eht_puncturing;
bool csa_active;
- u16 csa_punct_bitmap;
bool mu_mimo_owner;
struct ieee80211_chanctx_conf __rcu *chanctx_conf;
@@ -1150,11 +1169,6 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
* @ack: union part for pure ACK data
* @ack.cookie: cookie for the ACK
* @driver_data: array of driver_data pointers
- * @ampdu_ack_len: number of acked aggregated frames.
- * relevant only if IEEE80211_TX_STAT_AMPDU was set.
- * @ampdu_len: number of aggregated frames.
- * relevant only if IEEE80211_TX_STAT_AMPDU was set.
- * @ack_signal: signal strength of the ACK frame
*/
struct ieee80211_tx_info {
/* common information */
@@ -1362,6 +1376,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* the frame.
* @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on
* the frame.
+ * @RX_FLAG_MACTIME: The timestamp passed in the RX status (@mactime
+ * field) is valid if this field is non-zero, and the position
+ * where the timestamp was sampled depends on the value.
* @RX_FLAG_MACTIME_START: The timestamp passed in the RX status (@mactime
* field) is valid and contains the time the first symbol of the MPDU
* was received. This is useful in monitor mode and for proper IBSS
@@ -1371,6 +1388,11 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
* (including FCS) was received.
* @RX_FLAG_MACTIME_PLCP_START: The timestamp passed in the RX status (@mactime
* field) is valid and contains the time the SYNC preamble was received.
+ * @RX_FLAG_MACTIME_IS_RTAP_TS64: The timestamp passed in the RX status @mactime
+ * is only for use in the radiotap timestamp header, not otherwise a valid
+ * @mactime value. Note this is a separate flag so that we continue to see
+ * %RX_FLAG_MACTIME as unset. Also note that in this case the timestamp is
+ * reported to be 64 bits wide, not just 32.
* @RX_FLAG_NO_SIGNAL_VAL: The signal strength value is not present.
* Valid only for data frames (mainly A-MPDU)
* @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference
@@ -1441,12 +1463,12 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
enum mac80211_rx_flags {
RX_FLAG_MMIC_ERROR = BIT(0),
RX_FLAG_DECRYPTED = BIT(1),
- RX_FLAG_MACTIME_PLCP_START = BIT(2),
+ RX_FLAG_ONLY_MONITOR = BIT(2),
RX_FLAG_MMIC_STRIPPED = BIT(3),
RX_FLAG_IV_STRIPPED = BIT(4),
RX_FLAG_FAILED_FCS_CRC = BIT(5),
RX_FLAG_FAILED_PLCP_CRC = BIT(6),
- RX_FLAG_MACTIME_START = BIT(7),
+ RX_FLAG_MACTIME_IS_RTAP_TS64 = BIT(7),
RX_FLAG_NO_SIGNAL_VAL = BIT(8),
RX_FLAG_AMPDU_DETAILS = BIT(9),
RX_FLAG_PN_VALIDATED = BIT(10),
@@ -1455,8 +1477,10 @@ enum mac80211_rx_flags {
RX_FLAG_AMPDU_IS_LAST = BIT(13),
RX_FLAG_AMPDU_DELIM_CRC_ERROR = BIT(14),
RX_FLAG_AMPDU_DELIM_CRC_KNOWN = BIT(15),
- RX_FLAG_MACTIME_END = BIT(16),
- RX_FLAG_ONLY_MONITOR = BIT(17),
+ RX_FLAG_MACTIME = BIT(16) | BIT(17),
+ RX_FLAG_MACTIME_PLCP_START = 1 << 16,
+ RX_FLAG_MACTIME_START = 2 << 16,
+ RX_FLAG_MACTIME_END = 3 << 16,
RX_FLAG_SKIP_MONITOR = BIT(18),
RX_FLAG_AMSDU_MORE = BIT(19),
RX_FLAG_RADIOTAP_TLV_AT_END = BIT(20),
@@ -1739,8 +1763,9 @@ struct ieee80211_conf {
* @chandef: the new channel to switch to
* @count: the number of TBTT's until the channel switch event
* @delay: maximum delay between the time the AP transmitted the last beacon in
- * current channel and the expected time of the first beacon in the new
- * channel, expressed in TU.
+ * current channel and the expected time of the first beacon in the new
+ * channel, expressed in TU.
+ * @link_id: the link ID of the link doing the channel switch, 0 for non-MLO
*/
struct ieee80211_channel_switch {
u64 timestamp;
@@ -1748,6 +1773,7 @@ struct ieee80211_channel_switch {
bool block_tx;
struct cfg80211_chan_def chandef;
u8 count;
+ u8 link_id;
u32 delay;
};
@@ -1769,6 +1795,10 @@ struct ieee80211_channel_switch {
* this is not pure P2P vif.
* @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is
* enabled for the interface.
+ * @IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW: Ignore wider bandwidth OFDMA
+ * operation on this interface and request a channel context without
+ * the AP definition. Use this e.g. because the device is able to
+ * handle OFDMA (downlink and trigger for uplink) on a per-AP basis.
*/
enum ieee80211_vif_flags {
IEEE80211_VIF_BEACON_FILTER = BIT(0),
@@ -1776,6 +1806,7 @@ enum ieee80211_vif_flags {
IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2),
IEEE80211_VIF_GET_NOA_UPDATE = BIT(3),
IEEE80211_VIF_EML_ACTIVE = BIT(4),
+ IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5),
};
@@ -1805,9 +1836,11 @@ enum ieee80211_offload_flags {
* @ps: power-save mode (STA only). This flag is NOT affected by
* offchannel/dynamic_ps operations.
* @aid: association ID number, valid only when @assoc is true
- * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k.
+ * @eml_cap: EML capabilities as described in P802.11be_D4.1 Figure 9-1001j.
* @eml_med_sync_delay: Medium Synchronization delay as described in
- * P802.11be_D2.2 Figure 9-1002j.
+ * P802.11be_D4.1 Figure 9-1001i.
+ * @mld_capa_op: MLD Capabilities and Operations per P802.11be_D4.1
+ * Figure 9-1001k
* @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The
* may filter ARP queries targeted for other addresses than listed here.
* The driver must allow ARP queries targeted for all address listed here
@@ -1832,6 +1865,7 @@ struct ieee80211_vif_cfg {
u16 aid;
u16 eml_cap;
u16 eml_med_sync_delay;
+ u16 mld_capa_op;
__be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN];
int arp_addr_cnt;
@@ -1842,6 +1876,35 @@ struct ieee80211_vif_cfg {
u8 ap_addr[ETH_ALEN] __aligned(2);
};
+#define IEEE80211_TTLM_NUM_TIDS 8
+
+/**
+ * struct ieee80211_neg_ttlm - negotiated TID to link map info
+ *
+ * @downlink: bitmap of active links per TID for downlink, or 0 if mapping for
+ * this TID is not included.
+ * @uplink: bitmap of active links per TID for uplink, or 0 if mapping for this
+ * TID is not included.
+ * @valid: info is valid or not.
+ */
+struct ieee80211_neg_ttlm {
+ u16 downlink[IEEE80211_TTLM_NUM_TIDS];
+ u16 uplink[IEEE80211_TTLM_NUM_TIDS];
+ bool valid;
+};
+
+/**
+ * enum ieee80211_neg_ttlm_res - return value for negotiated TTLM handling
+ * @NEG_TTLM_RES_ACCEPT: accept the request
+ * @NEG_TTLM_RES_REJECT: reject the request
+ * @NEG_TTLM_RES_SUGGEST_PREFERRED: reject and suggest a new mapping
+ */
+enum ieee80211_neg_ttlm_res {
+ NEG_TTLM_RES_ACCEPT,
+ NEG_TTLM_RES_REJECT,
+ NEG_TTLM_RES_SUGGEST_PREFERRED
+};
+
/**
* struct ieee80211_vif - per-interface data
*
@@ -1860,6 +1923,11 @@ struct ieee80211_vif_cfg {
* API calls meant for that purpose.
* @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO.
* Must be a subset of valid_links.
+ * @suspended_links: subset of dormant_links representing links that are
+ * suspended.
+ * 0 for non-MLO.
+ * @neg_ttlm: negotiated TID to link mapping info.
+ * see &struct ieee80211_neg_ttlm.
* @addr: address of this interface
* @p2p: indicates whether this AP or STA interface is a p2p
* interface, i.e. a GO or p2p-sta respectively
@@ -1897,7 +1965,8 @@ struct ieee80211_vif {
struct ieee80211_vif_cfg cfg;
struct ieee80211_bss_conf bss_conf;
struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS];
- u16 valid_links, active_links, dormant_links;
+ u16 valid_links, active_links, dormant_links, suspended_links;
+ struct ieee80211_neg_ttlm neg_ttlm;
u8 addr[ETH_ALEN] __aligned(2);
bool p2p;
@@ -1944,6 +2013,21 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif)
return vif->valid_links != 0;
}
+/**
+ * ieee80211_vif_link_active - check if a given link is active
+ * @vif: the vif
+ * @link_id: the link ID to check
+ * Return: %true if the vif is an MLD and the link is active, or if
+ * the vif is not an MLD and the link ID is 0; %false otherwise.
+ */
+static inline bool ieee80211_vif_link_active(const struct ieee80211_vif *vif,
+ unsigned int link_id)
+{
+ if (!ieee80211_vif_is_mld(vif))
+ return link_id == 0;
+ return vif->active_links & BIT(link_id);
+}
+
#define for_each_vif_active_link(vif, link, link_id) \
for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \
if ((!(vif)->active_links || \
@@ -2038,6 +2122,8 @@ static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif)
* @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver
* for a AES_CMAC key to indicate that it requires sequence number
* generation only
+ * @IEEE80211_KEY_FLAG_SPP_AMSDU: SPP A-MSDUs can be used with this key
+ * (set by mac80211 from the sta->spp_amsdu flag)
*/
enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0),
@@ -2051,6 +2137,7 @@ enum ieee80211_key_flags {
IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8),
IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9),
IEEE80211_KEY_FLAG_GENERATE_MMIE = BIT(10),
+ IEEE80211_KEY_FLAG_SPP_AMSDU = BIT(11),
};
/**
@@ -2349,6 +2436,7 @@ struct ieee80211_link_sta {
* would be assigned to link[link_id] where link_id is the id assigned
* by the AP.
* @valid_links: bitmap of valid links, or 0 for non-MLO
+ * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not.
*/
struct ieee80211_sta {
u8 addr[ETH_ALEN];
@@ -2362,6 +2450,7 @@ struct ieee80211_sta {
bool tdls_initiator;
bool mfp;
bool mlo;
+ bool spp_amsdu;
u8 max_amsdu_subframes;
struct ieee80211_sta_aggregates *cur;
@@ -2686,6 +2775,14 @@ struct ieee80211_txq {
* @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting
* multicast frames on all links, mac80211 should not do that.
*
+ * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT
+ * and connecting with a lower bandwidth instead
+ *
+ * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so
+ * no need to stop queues. This really should be set by a driver that
+ * implements MLO, so operation can continue on other links when one
+ * link is switching.
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2743,6 +2840,8 @@ enum ieee80211_hw_flags {
IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP,
IEEE80211_HW_DETECTS_COLOR_COLLISION,
IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX,
+ IEEE80211_HW_DISALLOW_PUNCTURING,
+ IEEE80211_HW_HANDLES_QUIET_CSA,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -2831,8 +2930,6 @@ enum ieee80211_hw_flags {
* the default is _GI | _BANDWIDTH.
* Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values.
*
- * @radiotap_he: HE radiotap validity flags
- *
* @radiotap_timestamp: Information for the radiotap timestamp field; if the
* @units_pos member is set to a non-negative value then the timestamp
* field will be added and populated from the &struct ieee80211_rx_status
@@ -4177,7 +4274,7 @@ struct ieee80211_prep_tx_info {
* after a channel switch procedure is completed, allowing the
* driver to go back to a normal configuration.
* @abort_channel_switch: This is an optional callback that is called
- * when channel switch procedure was completed, allowing the
+ * when channel switch procedure was aborted, allowing the
* driver to go back to a normal configuration.
* @channel_switch_rx_beacon: This is an optional callback that is called
* when channel switch procedure is in progress and additional beacon with
@@ -4267,6 +4364,8 @@ struct ieee80211_prep_tx_info {
* disable background CAC/radar detection.
* @net_fill_forward_path: Called from .ndo_fill_forward_path in order to
* resolve a path for hardware flow offloading
+ * @can_activate_links: Checks if a specific active_links bitmap is
+ * supported by the driver.
* @change_vif_links: Change the valid links on an interface, note that while
* removing the old link information is still valid (link_conf pointer),
* but may immediately disappear after the function returns. The old or
@@ -4286,6 +4385,10 @@ struct ieee80211_prep_tx_info {
* flow offloading for flows originating from the vif.
* Note that the driver must not assume that the vif driver_data is valid
* at this point, since the callback can be called during netdev teardown.
+ * @can_neg_ttlm: for managed interface, requests the driver to determine
+ * if the requested TID-To-Link mapping can be accepted or not.
+ * If it's not accepted the driver may suggest a preferred mapping and
+ * modify @ttlm parameter with the suggested TID-to-Link mapping.
*/
struct ieee80211_ops {
void (*tx)(struct ieee80211_hw *hw,
@@ -4567,7 +4670,8 @@ struct ieee80211_ops {
struct ieee80211_vif *vif,
struct ieee80211_bss_conf *link_conf);
void (*abort_channel_switch)(struct ieee80211_hw *hw,
- struct ieee80211_vif *vif);
+ struct ieee80211_vif *vif,
+ struct ieee80211_bss_conf *link_conf);
void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
struct ieee80211_channel_switch *ch_switch);
@@ -4647,6 +4751,9 @@ struct ieee80211_ops {
struct ieee80211_sta *sta,
struct net_device_path_ctx *ctx,
struct net_device_path *path);
+ bool (*can_activate_links)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u16 active_links);
int (*change_vif_links)(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
u16 old_links, u16 new_links,
@@ -4663,6 +4770,9 @@ struct ieee80211_ops {
struct net_device *dev,
enum tc_setup_type type,
void *type_data);
+ enum ieee80211_neg_ttlm_res
+ (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_neg_ttlm *ttlm);
};
/**
@@ -5445,6 +5555,7 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
/**
* ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* The beacon counter should be updated after each beacon transmission.
* This function is called implicitly when
@@ -5454,7 +5565,8 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
*
* Return: new countdown value
*/
-u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif);
+u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown
@@ -5472,20 +5584,23 @@ void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter);
/**
* ieee80211_csa_finish - notify mac80211 about channel switch
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* After a channel switch announcement was scheduled and the counter in this
* announcement hits 1, this function must be called by the driver to
* notify mac80211 that the channel can be changed.
*/
-void ieee80211_csa_finish(struct ieee80211_vif *vif);
+void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id);
/**
* ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @link_id: valid link_id during MLO or 0 for non-MLO
*
* This function returns whether the countdown reached zero.
*/
-bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif);
+bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif,
+ unsigned int link_id);
/**
* ieee80211_color_change_finish - notify mac80211 about color change
@@ -5809,12 +5924,11 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
* ieee80211_remove_key - remove the given key
* @keyconf: the parameter passed with the set key
*
+ * Context: Must be called with the wiphy mutex held.
+ *
* Remove the given key. If the key was uploaded to the hardware at the
* time this function is called, it is not deleted in the hardware but
* instead assumed to have been removed already.
- *
- * Note that due to locking considerations this function can (currently)
- * only be called during key iteration (ieee80211_iter_keys().)
*/
void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
@@ -5822,6 +5936,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
* ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN
* @vif: the virtual interface to add the key on
* @keyconf: new key data
+ * @link_id: the link id of the key or -1 for non-MLO
*
* When GTK rekeying was done while the system was suspended, (a) new
* key(s) will be available. These will be needed by mac80211 for proper
@@ -5849,7 +5964,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
*/
struct ieee80211_key_conf *
ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
- struct ieee80211_key_conf *keyconf);
+ struct ieee80211_key_conf *keyconf,
+ int link_id);
/**
* ieee80211_gtk_rekey_notify - notify userspace supplicant of rekeying
@@ -6368,12 +6484,12 @@ ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
* @iter: iterator function that will be called for each key
* @iter_data: custom data to pass to the iterator function
*
+ * Context: Must be called with wiphy mutex held; can sleep.
+ *
* This function can be used to iterate all the keys known to
* mac80211, even those that weren't previously programmed into
* the device. This is intended for use in WoWLAN if the device
- * needs reprogramming of the keys during suspend. Note that due
- * to locking reasons, it is also only safe to call this at few
- * spots since it must hold the RTNL and be able to sleep.
+ * needs reprogramming of the keys during suspend.
*
* The order in which the keys are iterated matches the order
* in which they were originally installed and handed to the
@@ -7407,11 +7523,10 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw,
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
* aware of.
- * @gfp: allocation flags
*/
void
ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap, gfp_t gfp);
+ u64 color_bitmap);
/**
* ieee80211_is_tx_data - check if frame is a data frame
@@ -7435,6 +7550,9 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* @vif: interface to set active links on
* @active_links: the new active links bitmap
*
+ * Context: Must be called with wiphy mutex held; may sleep; calls
+ * back into the driver.
+ *
* This changes the active links on an interface. The interface
* must be in client mode (in AP mode, all links are always active),
* and @active_links must be a subset of the vif's valid_links.
@@ -7442,6 +7560,7 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* If a link is switched off and another is switched on at the same
* time (e.g. active_links going from 0x1 to 0x10) then you will get
* a sequence of calls like
+ *
* - change_vif_links(0x11)
* - unassign_vif_chanctx(link_id=0)
* - change_sta_links(0x11) for each affected STA (the AP)
@@ -7451,10 +7570,6 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* - change_sta_links(0x10) for each affected STA (the AP)
* - assign_vif_chanctx(link_id=4)
* - change_vif_links(0x10)
- *
- * Note: This function acquires some mac80211 locks and must not
- * be called with any driver locks held that could cause a
- * lock dependency inversion. Best call it without locks.
*/
int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
@@ -7471,4 +7586,17 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
void ieee80211_set_active_links_async(struct ieee80211_vif *vif,
u16 active_links);
+/* for older drivers - let's not document these ... */
+int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx);
+void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx);
+void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_chanctx_conf *ctx,
+ u32 changed);
+int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs,
+ enum ieee80211_chanctx_switch_mode mode);
+
#endif /* MAC80211_H */
diff --git a/include/net/macsec.h b/include/net/macsec.h
index ebf9bc54036a..dbd22180cc5c 100644
--- a/include/net/macsec.h
+++ b/include/net/macsec.h
@@ -247,6 +247,23 @@ struct macsec_secy {
/**
* struct macsec_context - MACsec context for hardware offloading
+ * @netdev: a valid pointer to a struct net_device if @offload ==
+ * MACSEC_OFFLOAD_MAC
+ * @phydev: a valid pointer to a struct phy_device if @offload ==
+ * MACSEC_OFFLOAD_PHY
+ * @offload: MACsec offload status
+ * @secy: pointer to a MACsec SecY
+ * @rx_sc: pointer to a RX SC
+ * @update_pn: when updating the SA, update the next PN
+ * @assoc_num: association number of the target SA
+ * @key: key of the target SA
+ * @rx_sa: pointer to an RX SA if a RX SA is added/updated/removed
+ * @tx_sa: pointer to an TX SA if a TX SA is added/updated/removed
+ * @tx_sc_stats: pointer to TX SC stats structure
+ * @tx_sa_stats: pointer to TX SA stats structure
+ * @rx_sc_stats: pointer to RX SC stats structure
+ * @rx_sa_stats: pointer to RX SA stats structure
+ * @dev_stats: pointer to dev stats structure
*/
struct macsec_context {
union {
@@ -277,6 +294,33 @@ struct macsec_context {
/**
* struct macsec_ops - MACsec offloading operations
+ * @mdo_dev_open: called when the MACsec interface transitions to the up state
+ * @mdo_dev_stop: called when the MACsec interface transitions to the down
+ * state
+ * @mdo_add_secy: called when a new SecY is added
+ * @mdo_upd_secy: called when the SecY flags are changed or the MAC address of
+ * the MACsec interface is changed
+ * @mdo_del_secy: called when the hw offload is disabled or the MACsec
+ * interface is removed
+ * @mdo_add_rxsc: called when a new RX SC is added
+ * @mdo_upd_rxsc: called when a certain RX SC is updated
+ * @mdo_del_rxsc: called when a certain RX SC is removed
+ * @mdo_add_rxsa: called when a new RX SA is added
+ * @mdo_upd_rxsa: called when a certain RX SA is updated
+ * @mdo_del_rxsa: called when a certain RX SA is removed
+ * @mdo_add_txsa: called when a new TX SA is added
+ * @mdo_upd_txsa: called when a certain TX SA is updated
+ * @mdo_del_txsa: called when a certain TX SA is removed
+ * @mdo_get_dev_stats: called when dev stats are read
+ * @mdo_get_tx_sc_stats: called when TX SC stats are read
+ * @mdo_get_tx_sa_stats: called when TX SA stats are read
+ * @mdo_get_rx_sc_stats: called when RX SC stats are read
+ * @mdo_get_rx_sa_stats: called when RX SA stats are read
+ * @mdo_insert_tx_tag: called to insert the TX tag
+ * @needed_headroom: number of bytes reserved at the beginning of the sk_buff
+ * for the TX tag
+ * @needed_tailroom: number of bytes reserved at the end of the sk_buff for the
+ * TX tag
*/
struct macsec_ops {
/* Device wide */
@@ -303,6 +347,11 @@ struct macsec_ops {
int (*mdo_get_tx_sa_stats)(struct macsec_context *ctx);
int (*mdo_get_rx_sc_stats)(struct macsec_context *ctx);
int (*mdo_get_rx_sa_stats)(struct macsec_context *ctx);
+ /* Offload tag */
+ int (*mdo_insert_tx_tag)(struct phy_device *phydev,
+ struct sk_buff *skb);
+ unsigned int needed_headroom;
+ unsigned int needed_tailroom;
};
void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa);
@@ -325,4 +374,9 @@ static inline void *macsec_netdev_priv(const struct net_device *dev)
return netdev_priv(dev);
}
+static inline u64 sci_to_cpu(sci_t sci)
+{
+ return be64_to_cpu((__force __be64)sci);
+}
+
#endif /* _NET_MACSEC_H_ */
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 88b6ef7ce1a6..27684135bb4d 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -66,6 +66,7 @@ enum {
GDMA_DEVICE_NONE = 0,
GDMA_DEVICE_HWC = 1,
GDMA_DEVICE_MANA = 2,
+ GDMA_DEVICE_MANA_IB = 3,
};
struct gdma_resource {
@@ -149,6 +150,7 @@ struct gdma_general_req {
#define GDMA_MESSAGE_V1 1
#define GDMA_MESSAGE_V2 2
+#define GDMA_MESSAGE_V3 3
struct gdma_general_resp {
struct gdma_resp_hdr hdr;
@@ -293,6 +295,7 @@ struct gdma_queue {
u32 head;
u32 tail;
+ struct list_head entry;
/* Extra fields specific to EQ/CQ. */
union {
@@ -328,6 +331,7 @@ struct gdma_queue_spec {
void *context;
unsigned long log2_throttle_limit;
+ unsigned int msix_index;
} eq;
struct {
@@ -344,7 +348,9 @@ struct gdma_queue_spec {
struct gdma_irq_context {
void (*handler)(void *arg);
- void *arg;
+ /* Protect the eq_list */
+ spinlock_t lock;
+ struct list_head eq_list;
char name[MANA_IRQ_NAME_SZ];
};
@@ -355,7 +361,6 @@ struct gdma_context {
unsigned int max_num_queues;
unsigned int max_num_msix;
unsigned int num_msix_usable;
- struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
/* L2 MTU */
@@ -387,6 +392,9 @@ struct gdma_context {
/* Azure network adapter */
struct gdma_dev mana;
+
+ /* Azure RDMA adapter */
+ struct gdma_dev mana_ib;
};
#define MAX_NUM_GDMA_DEVICES 4
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 6e3e9c1363db..76147feb0d10 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -353,6 +353,25 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 hc_rx_discards_no_wqe;
+ u64 hc_rx_err_vport_disabled;
+ u64 hc_rx_bytes;
+ u64 hc_rx_ucast_pkts;
+ u64 hc_rx_ucast_bytes;
+ u64 hc_rx_bcast_pkts;
+ u64 hc_rx_bcast_bytes;
+ u64 hc_rx_mcast_pkts;
+ u64 hc_rx_mcast_bytes;
+ u64 hc_tx_err_gf_disabled;
+ u64 hc_tx_err_vport_disabled;
+ u64 hc_tx_err_inval_vportoffset_pkt;
+ u64 hc_tx_err_vlan_enforcement;
+ u64 hc_tx_err_eth_type_enforcement;
+ u64 hc_tx_err_sa_enforcement;
+ u64 hc_tx_err_sqpdid_enforcement;
+ u64 hc_tx_err_cqpdid_enforcement;
+ u64 hc_tx_err_mtu_violation;
+ u64 hc_tx_err_inval_oob;
u64 hc_tx_bytes;
u64 hc_tx_ucast_pkts;
u64 hc_tx_ucast_bytes;
@@ -360,6 +379,7 @@ struct mana_ethtool_stats {
u64 hc_tx_bcast_bytes;
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
+ u64 hc_tx_err_gdma;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
u64 rx_coalesced_err;
@@ -602,8 +622,8 @@ struct mana_query_gf_stat_resp {
struct gdma_resp_hdr hdr;
u64 reported_stats;
/* rx errors/discards */
- u64 discard_rx_nowqe;
- u64 err_rx_vport_disabled;
+ u64 rx_discards_nowqe;
+ u64 rx_err_vport_disabled;
/* rx bytes/packets */
u64 hc_rx_bytes;
u64 hc_rx_ucast_pkts;
@@ -613,16 +633,16 @@ struct mana_query_gf_stat_resp {
u64 hc_rx_mcast_pkts;
u64 hc_rx_mcast_bytes;
/* tx errors */
- u64 err_tx_gf_disabled;
- u64 err_tx_vport_disabled;
- u64 err_tx_inval_vport_offset_pkt;
- u64 err_tx_vlan_enforcement;
- u64 err_tx_ethtype_enforcement;
- u64 err_tx_SA_enforecement;
- u64 err_tx_SQPDID_enforcement;
- u64 err_tx_CQPDID_enforcement;
- u64 err_tx_mtu_violation;
- u64 err_tx_inval_oob;
+ u64 tx_err_gf_disabled;
+ u64 tx_err_vport_disabled;
+ u64 tx_err_inval_vport_offset_pkt;
+ u64 tx_err_vlan_enforcement;
+ u64 tx_err_ethtype_enforcement;
+ u64 tx_err_SA_enforcement;
+ u64 tx_err_SQPDID_enforcement;
+ u64 tx_err_CQPDID_enforcement;
+ u64 tx_err_mtu_violation;
+ u64 tx_err_inval_oob;
/* tx bytes/packets */
u64 hc_tx_bytes;
u64 hc_tx_ucast_pkts;
@@ -632,7 +652,7 @@ struct mana_query_gf_stat_resp {
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
/* tx error */
- u64 err_tx_gdma;
+ u64 tx_err_gdma;
}; /* HW DATA */
/* Configure vPort Rx Steering */
diff --git a/include/net/mctp.h b/include/net/mctp.h
index da86e106c91d..7b17c52e8ce2 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -87,7 +87,7 @@ struct mctp_sock {
};
/* Key for matching incoming packets to sockets or reassembly contexts.
- * Packets are matched on (src,dest,tag).
+ * Packets are matched on (peer EID, local EID, tag).
*
* Lifetime / locking requirements:
*
@@ -133,6 +133,7 @@ struct mctp_sock {
* - through an expiry timeout, on a per-socket timer
*/
struct mctp_sk_key {
+ unsigned int net;
mctp_eid_t peer_addr;
mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */
__u8 tag; /* incoming tag match; invert TO for local */
@@ -249,12 +250,14 @@ struct mctp_route {
struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
mctp_eid_t daddr);
+/* always takes ownership of skb */
int mctp_local_output(struct sock *sk, struct mctp_route *rt,
struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
void mctp_key_unref(struct mctp_sk_key *key);
struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
- mctp_eid_t daddr, mctp_eid_t saddr,
+ unsigned int netid,
+ mctp_eid_t local, mctp_eid_t peer,
bool manual, u8 *tagp);
/* routing <--> device interface */
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 07022bb0d44d..0d28172193fa 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -162,7 +162,7 @@ struct neighbour {
struct rcu_head rcu;
struct net_device *dev;
netdevice_tracker dev_tracker;
- u8 primary_key[0];
+ u8 primary_key[];
} __randomize_layout;
struct neigh_ops {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 13b3a4e29fdb..20c34bd7a077 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -67,8 +67,6 @@ struct net {
*/
spinlock_t rules_mod_lock;
- atomic_t dev_unreg_count;
-
unsigned int dev_base_seq; /* protected by rtnl_mutex */
u32 ifindex;
@@ -450,6 +448,9 @@ struct pernet_operations {
void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
+ /* Following method is called with RTNL held. */
+ void (*exit_batch_rtnl)(struct list_head *net_exit_list,
+ struct list_head *dev_kill_list);
unsigned int *id;
size_t size;
};
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index d68b0a483431..1ec408585373 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -4,6 +4,62 @@
#include <linux/netdevice.h>
+/* See the netdev.yaml spec for definition of each statistic */
+struct netdev_queue_stats_rx {
+ u64 bytes;
+ u64 packets;
+ u64 alloc_fail;
+};
+
+struct netdev_queue_stats_tx {
+ u64 bytes;
+ u64 packets;
+};
+
+/**
+ * struct netdev_stat_ops - netdev ops for fine grained stats
+ * @get_queue_stats_rx: get stats for a given Rx queue
+ * @get_queue_stats_tx: get stats for a given Tx queue
+ * @get_base_stats: get base stats (not belonging to any live instance)
+ *
+ * Query stats for a given object. The values of the statistics are undefined
+ * on entry (specifically they are *not* zero-initialized). Drivers should
+ * assign values only to the statistics they collect. Statistics which are not
+ * collected must be left undefined.
+ *
+ * Queue objects are not necessarily persistent, and only currently active
+ * queues are queried by the per-queue callbacks. This means that per-queue
+ * statistics will not generally add up to the total number of events for
+ * the device. The @get_base_stats callback allows filling in the delta
+ * between events for currently live queues and overall device history.
+ * When the statistics for the entire device are queried, first @get_base_stats
+ * is issued to collect the delta, and then a series of per-queue callbacks.
+ * Only statistics which are set in @get_base_stats will be reported
+ * at the device level, meaning that unlike in queue callbacks, setting
+ * a statistic to zero in @get_base_stats is a legitimate thing to do.
+ * This is because @get_base_stats has a second function of designating which
+ * statistics are in fact correct for the entire device (e.g. when history
+ * for some of the events is not maintained, and reliable "total" cannot
+ * be provided).
+ *
+ * Device drivers can assume that when collecting total device stats,
+ * the @get_base_stats and subsequent per-queue calls are performed
+ * "atomically" (without releasing the rtnl_lock).
+ *
+ * Device drivers are encouraged to reset the per-queue statistics when
+ * number of queues change. This is because the primary use case for
+ * per-queue statistics is currently to detect traffic imbalance.
+ */
+struct netdev_stat_ops {
+ void (*get_queue_stats_rx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_rx *stats);
+ void (*get_queue_stats_tx)(struct net_device *dev, int idx,
+ struct netdev_queue_stats_tx *stats);
+ void (*get_base_stats)(struct net_device *dev,
+ struct netdev_queue_stats_rx *rx,
+ struct netdev_queue_stats_tx *tx);
+};
+
/**
* DOC: Lockless queue stopping / waking helpers.
*
@@ -128,7 +184,7 @@ netdev_txq_completed_mb(struct netdev_queue *dev_queue,
netdev_txq_completed_mb(txq, pkts, bytes); \
\
_res = -1; \
- if (pkts && likely(get_desc > start_thrs)) { \
+ if (pkts && likely(get_desc >= start_thrs)) { \
_res = 1; \
if (unlikely(netif_tx_queue_stopped(txq)) && \
!(down_cond)) { \
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index cdcafb30d437..aa1716fb0e53 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -21,6 +21,10 @@ struct netdev_rx_queue {
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
+ /* NAPI instance for the queue
+ * Readers and writers must hold RTNL
+ */
+ struct napi_struct *napi;
} ____cacheline_aligned_in_smp;
/*
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index fe1507c1db82..a763dd327c6e 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -62,6 +62,8 @@ struct nf_flowtable_type {
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule);
void (*free)(struct nf_flowtable *ft);
+ void (*get)(struct nf_flowtable *ft);
+ void (*put)(struct nf_flowtable *ft);
nf_hookfn *hook;
struct module *owner;
};
@@ -72,12 +74,13 @@ enum nf_flowtable_flags {
};
struct nf_flowtable {
- struct list_head list;
- struct rhashtable rhashtable;
- int priority;
+ unsigned int flags; /* readonly in datapath */
+ int priority; /* control path (padding hole) */
+ struct rhashtable rhashtable; /* datapath, read-mostly members come first */
+
+ struct list_head list; /* slowpath parts */
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
- unsigned int flags;
struct flow_block flow_block;
struct rw_semaphore flow_block_lock; /* Guards flow_block */
possible_net_t net;
@@ -240,6 +243,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
}
list_add_tail(&block_cb->list, &block->cb_list);
+ up_write(&flow_table->flow_block_lock);
+
+ if (flow_table->type->get)
+ flow_table->type->get(flow_table);
+ return 0;
unlock:
up_write(&flow_table->flow_block_lock);
@@ -262,10 +270,13 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
WARN_ON(true);
}
up_write(&flow_table->flow_block_lock);
+
+ if (flow_table->type->put)
+ flow_table->type->put(flow_table);
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route);
+ struct nf_flow_route *route);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index c81021ab07aa..4aeffddb7586 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -35,7 +35,6 @@ struct nf_queue_handler {
void nf_register_queue_handler(const struct nf_queue_handler *qh);
void nf_unregister_queue_handler(void);
-void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3bbd13ab1ecf..e27c28b612e4 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -178,9 +178,9 @@ static inline __be32 nft_reg_load_be32(const u32 *sreg)
return *(__force __be32 *)sreg;
}
-static inline void nft_reg_store64(u32 *dreg, u64 val)
+static inline void nft_reg_store64(u64 *dreg, u64 val)
{
- put_unaligned(val, (u64 *)dreg);
+ put_unaligned(val, dreg);
}
static inline u64 nft_reg_load64(const u32 *sreg)
@@ -205,6 +205,7 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
* @nla: netlink attributes
* @portid: netlink portID of the original message
* @seq: netlink sequence number
+ * @flags: modifiers to new request
* @family: protocol family
* @level: depth of the chains
* @report: notify via unicast netlink message
@@ -282,6 +283,7 @@ struct nft_elem_priv { };
*
* @key: element key
* @key_end: closing element key
+ * @data: element data
* @priv: element private data and extensions
*/
struct nft_set_elem {
@@ -325,10 +327,10 @@ struct nft_set_iter {
* @dtype: data type
* @dlen: data length
* @objtype: object type
- * @flags: flags
* @size: number of set elements
* @policy: set policy
* @gc_int: garbage collector interval
+ * @timeout: element timeout
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
* @expr: set must support for expressions
@@ -351,9 +353,9 @@ struct nft_set_desc {
/**
* enum nft_set_class - performance class
*
- * @NFT_LOOKUP_O_1: constant, O(1)
- * @NFT_LOOKUP_O_LOG_N: logarithmic, O(log N)
- * @NFT_LOOKUP_O_N: linear, O(N)
+ * @NFT_SET_CLASS_O_1: constant, O(1)
+ * @NFT_SET_CLASS_O_LOG_N: logarithmic, O(log N)
+ * @NFT_SET_CLASS_O_N: linear, O(N)
*/
enum nft_set_class {
NFT_SET_CLASS_O_1,
@@ -422,9 +424,13 @@ struct nft_set_ext;
* @remove: remove element from set
* @walk: iterate over all set elements
* @get: get set elements
+ * @commit: commit set elements
+ * @abort: abort set elements
* @privsize: function to return size of set private data
+ * @estimate: estimate the required memory size and the lookup complexity class
* @init: initialize private data of new set instance
* @destroy: destroy private data of set instance
+ * @gc_init: initialize garbage collection
* @elemsize: element private size
*
* Operations lookup, update and delete have simpler interfaces, are faster
@@ -540,13 +546,16 @@ struct nft_set_elem_expr {
* @policy: set parameterization (see enum nft_set_policies)
* @udlen: user data length
* @udata: user data
- * @expr: stateful expression
+ * @pending_update: list of pending update set element
* @ops: set ops
* @flags: set flags
* @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
+ * @num_exprs: numbers of exprs
+ * @exprs: stateful expression
+ * @catchall_list: list of catch-all set element
* @data: private set data
*/
struct nft_set {
@@ -692,6 +701,7 @@ extern const struct nft_set_ext_type nft_set_ext_types[];
*
* @len: length of extension area
* @offset: offsets of individual extension types
+ * @ext_len: length of the expected extension(used to sanity check)
*/
struct nft_set_ext_tmpl {
u16 len;
@@ -798,10 +808,16 @@ static inline struct nft_set_elem_expr *nft_set_ext_expr(const struct nft_set_ex
return nft_set_ext(ext, NFT_SET_EXT_EXPRESSIONS);
}
-static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+static inline bool __nft_set_elem_expired(const struct nft_set_ext *ext,
+ u64 tstamp)
{
return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
- time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext));
+ time_after_eq64(tstamp, *nft_set_ext_expiration(ext));
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+ return __nft_set_elem_expired(ext, get_jiffies_64());
}
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
@@ -840,6 +856,7 @@ struct nft_expr_ops;
* @select_ops: function to select nft_expr_ops
* @release_ops: release nft_expr_ops
* @ops: default ops, used when no select_ops functions is present
+ * @inner_ops: inner ops, used for inner packet operation
* @list: used internally
* @name: Identifier
* @owner: module reference
@@ -881,14 +898,22 @@ struct nft_offload_ctx;
* struct nft_expr_ops - nf_tables expression operations
*
* @eval: Expression evaluation function
+ * @clone: Expression clone function
* @size: full expression size, including private data size
* @init: initialization function
* @activate: activate expression in the next generation
* @deactivate: deactivate expression in next generation
* @destroy: destruction function, called after synchronize_rcu
+ * @destroy_clone: destruction clone function
* @dump: function to dump parameters
- * @type: expression type
* @validate: validate expression, called during loop detection
+ * @reduce: reduce expression
+ * @gc: garbage collection expression
+ * @offload: hardware offload expression
+ * @offload_action: function to report true/false to allocate one slot or not in the flow
+ * offload array
+ * @offload_stats: function to synchronize hardware stats via updating the counter expression
+ * @type: expression type
* @data: extra data to attach to this expression operation
*/
struct nft_expr_ops {
@@ -1041,14 +1066,21 @@ struct nft_rule_blob {
/**
* struct nft_chain - nf_tables chain
*
+ * @blob_gen_0: rule blob pointer to the current generation
+ * @blob_gen_1: rule blob pointer to the future generation
* @rules: list of rules in the chain
* @list: used internally
* @rhlhead: used internally
* @table: table that this chain belongs to
* @handle: chain handle
* @use: number of jump references to this chain
- * @flags: bitmask of enum nft_chain_flags
+ * @flags: bitmask of enum NFTA_CHAIN_FLAGS
+ * @bound: bind or not
+ * @genmask: generation mask
* @name: name of the chain
+ * @udlen: user data length
+ * @udata: user data in the chain
+ * @blob_next: rule blob pointer to the next in the chain
*/
struct nft_chain {
struct nft_rule_blob __rcu *blob_gen_0;
@@ -1146,6 +1178,7 @@ struct nft_hook {
* @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family)
* @type: chain type
* @policy: default policy
+ * @flags: indicate the base chain disabled or not
* @stats: per-cpu chain stats
* @chain: the chain
* @flow_block: flow block (for hardware offload)
@@ -1244,6 +1277,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table)
return table->flags & NFT_TABLE_F_OWNER;
}
+static inline bool nft_table_is_orphan(const struct nft_table *table)
+{
+ return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) ==
+ NFT_TABLE_F_PERSIST;
+}
+
static inline bool nft_base_chain_netdev(int family, u32 hooknum)
{
return family == NFPROTO_NETDEV ||
@@ -1274,11 +1313,13 @@ struct nft_object_hash_key {
* struct nft_object - nf_tables stateful object
*
* @list: table stateful object list node
- * @key: keys that identify this object
* @rhlhead: nft_objname_ht node
+ * @key: keys that identify this object
* @genmask: generation mask
* @use: number of references to this stateful object
* @handle: unique object handle
+ * @udlen: length of user data
+ * @udata: user data
* @ops: object operations
* @data: object data, layout depends on type
*/
@@ -1322,6 +1363,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
* @type: stateful object numeric type
* @owner: module owner
* @maxattr: maximum netlink attribute
+ * @family: address family for AF-specific object types
* @policy: netlink attribute policy
*/
struct nft_object_type {
@@ -1331,6 +1373,7 @@ struct nft_object_type {
struct list_head list;
u32 type;
unsigned int maxattr;
+ u8 family;
struct module *owner;
const struct nla_policy *policy;
};
@@ -1344,6 +1387,7 @@ struct nft_object_type {
* @destroy: release existing stateful object
* @dump: netlink dump stateful object
* @update: update stateful object
+ * @type: pointer to object type
*/
struct nft_object_ops {
void (*eval)(struct nft_object *obj,
@@ -1379,9 +1423,8 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
* @genmask: generation mask
* @use: number of references to this flow table
* @handle: unique object handle
- * @dev_name: array of device names
+ * @hook_list: hook list for hooks per net_device in flowtables
* @data: rhashtable and garbage collector
- * @ops: array of hooks
*/
struct nft_flowtable {
struct list_head list;
@@ -1748,6 +1791,7 @@ struct nftables_pernet {
struct list_head notify_list;
struct mutex commit_mutex;
u64 table_handle;
+ u64 tstamp;
unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
@@ -1760,6 +1804,11 @@ static inline struct nftables_pernet *nft_pernet(const struct net *net)
return net_generic(net, nf_tables_net_id);
}
+static inline u64 nft_net_tstamp(const struct net *net)
+{
+ return nft_pernet(net)->tstamp;
+}
+
#define __NFT_REDUCE_READONLY 1UL
#define NFT_REDUCE_READONLY (void *)__NFT_REDUCE_READONLY
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index 947973623dc7..60a7d0ce3080 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -30,7 +30,7 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
return -1;
len = iph_totlen(pkt->skb, iph);
- thoff = iph->ihl * 4;
+ thoff = skb_network_offset(pkt->skb) + (iph->ihl * 4);
if (pkt->skb->len < len)
return -1;
else if (len < thoff)
diff --git a/include/net/netkit.h b/include/net/netkit.h
index 0ba2e6b847ca..9ec0163739f4 100644
--- a/include/net/netkit.h
+++ b/include/net/netkit.h
@@ -10,6 +10,7 @@ int netkit_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int netkit_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int netkit_prog_detach(const union bpf_attr *attr, struct bpf_prog *prog);
int netkit_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
+INDIRECT_CALLABLE_DECLARE(struct net_device *netkit_peer_dev(struct net_device *dev));
#else
static inline int netkit_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
@@ -34,5 +35,10 @@ static inline int netkit_prog_query(const union bpf_attr *attr,
{
return -EINVAL;
}
+
+static inline struct net_device *netkit_peer_dev(struct net_device *dev)
+{
+ return NULL;
+}
#endif /* CONFIG_NETKIT */
#endif /* __NET_NETKIT_H */
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 43ae50337685..f3ab0b8a4b18 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -145,15 +145,14 @@ struct netlbl_lsm_cache {
* processing.
*
*/
-#define NETLBL_CATMAP_MAPTYPE u64
#define NETLBL_CATMAP_MAPCNT 4
-#define NETLBL_CATMAP_MAPSIZE (sizeof(NETLBL_CATMAP_MAPTYPE) * 8)
+#define NETLBL_CATMAP_MAPSIZE (sizeof(u64) * 8)
#define NETLBL_CATMAP_SIZE (NETLBL_CATMAP_MAPSIZE * \
NETLBL_CATMAP_MAPCNT)
-#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01
+#define NETLBL_CATMAP_BIT ((u64)0x01)
struct netlbl_lsm_catmap {
u32 startbit;
- NETLBL_CATMAP_MAPTYPE bitmap[NETLBL_CATMAP_MAPCNT];
+ u64 bitmap[NETLBL_CATMAP_MAPCNT];
struct netlbl_lsm_catmap *next;
};
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 83bdf787aeee..c19ff921b661 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1011,6 +1011,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
}
/**
+ * nlmsg_new_large - Allocate a new netlink message with non-contiguous
+ * physical memory
+ * @payload: size of the message payload
+ *
+ * The allocated skb is unable to have frag page for shinfo->frags*,
+ * as the NULL setting for skb->head in netlink_skb_destructor() will
+ * bypass most of the handling in skb_release_data()
+ */
+static inline struct sk_buff *nlmsg_new_large(size_t payload)
+{
+ return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
+}
+
+/**
* nlmsg_end - Finalize a netlink message
* @skb: socket buffer the message is stored in
* @nlh: netlink message header
@@ -1073,21 +1087,29 @@ static inline void nlmsg_free(struct sk_buff *skb)
}
/**
- * nlmsg_multicast - multicast a netlink message
+ * nlmsg_multicast_filtered - multicast a netlink message with filter function
* @sk: netlink socket to spread messages to
* @skb: netlink message as socket buffer
* @portid: own netlink portid to avoid sending to yourself
* @group: multicast group id
* @flags: allocation flags
+ * @filter: filter function
+ * @filter_data: filter function private data
+ *
+ * Return: 0 on success, negative error code for failure.
*/
-static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
- u32 portid, unsigned int group, gfp_t flags)
+static inline int nlmsg_multicast_filtered(struct sock *sk, struct sk_buff *skb,
+ u32 portid, unsigned int group,
+ gfp_t flags,
+ netlink_filter_fn filter,
+ void *filter_data)
{
int err;
NETLINK_CB(skb).dst_group = group;
- err = netlink_broadcast(sk, skb, portid, group, flags);
+ err = netlink_broadcast_filtered(sk, skb, portid, group, flags,
+ filter, filter_data);
if (err > 0)
err = 0;
@@ -1095,6 +1117,21 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
}
/**
+ * nlmsg_multicast - multicast a netlink message
+ * @sk: netlink socket to spread messages to
+ * @skb: netlink message as socket buffer
+ * @portid: own netlink portid to avoid sending to yourself
+ * @group: multicast group id
+ * @flags: allocation flags
+ */
+static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
+ u32 portid, unsigned int group, gfp_t flags)
+{
+ return nlmsg_multicast_filtered(sk, skb, portid, group, flags,
+ NULL, NULL);
+}
+
+/**
* nlmsg_unicast - unicast a netlink message
* @sk: netlink socket to spread message to
* @skb: netlink message as socket buffer
@@ -1200,7 +1237,7 @@ static inline void *nla_data(const struct nlattr *nla)
* nla_len - length of payload
* @nla: netlink attribute
*/
-static inline int nla_len(const struct nlattr *nla)
+static inline u16 nla_len(const struct nlattr *nla)
{
return nla->nla_len - NLA_HDRLEN;
}
diff --git a/include/net/netmem.h b/include/net/netmem.h
new file mode 100644
index 000000000000..d8b810245c1d
--- /dev/null
+++ b/include/net/netmem.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Network memory
+ *
+ * Author: Mina Almasry <almasrymina@google.com>
+ */
+
+#ifndef _NET_NETMEM_H
+#define _NET_NETMEM_H
+
+/**
+ * typedef netmem_ref - a nonexistent type marking a reference to generic
+ * network memory.
+ *
+ * A netmem_ref currently is always a reference to a struct page. This
+ * abstraction is introduced so support for new memory types can be added.
+ *
+ * Use the supplied helpers to obtain the underlying memory pointer and fields.
+ */
+typedef unsigned long __bitwise netmem_ref;
+
+/* This conversion fails (returns NULL) if the netmem_ref is not struct page
+ * backed.
+ *
+ * Currently struct page is the only possible netmem, and this helper never
+ * fails.
+ */
+static inline struct page *netmem_to_page(netmem_ref netmem)
+{
+ return (__force struct page *)netmem;
+}
+
+/* Converting from page to netmem is always safe, because a page can always be
+ * a netmem.
+ */
+static inline netmem_ref page_to_netmem(struct page *page)
+{
+ return (__force netmem_ref)page;
+}
+
+#endif /* _NET_NETMEM_H */
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index a91ef9f8de60..78214f1b43a2 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -13,6 +13,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ int sysctl_optmem_max;
u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 73f43f699199..c356c458b340 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -19,8 +19,7 @@ struct hlist_head;
struct fib_table;
struct sock;
struct local_ports {
- seqlock_t lock;
- int range[2];
+ u32 range; /* high << 16 | low */
bool warned;
};
@@ -42,6 +41,38 @@ struct inet_timewait_death_row {
struct tcp_fastopen_context;
struct netns_ipv4 {
+ /* Cacheline organization can be found documented in
+ * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
+ * Please update the document when adding new fields.
+ */
+
+ /* TX readonly hotpath cache lines */
+ __cacheline_group_begin(netns_ipv4_read_tx);
+ u8 sysctl_tcp_early_retrans;
+ u8 sysctl_tcp_tso_win_divisor;
+ u8 sysctl_tcp_tso_rtt_log;
+ u8 sysctl_tcp_autocorking;
+ int sysctl_tcp_min_snd_mss;
+ unsigned int sysctl_tcp_notsent_lowat;
+ int sysctl_tcp_limit_output_bytes;
+ int sysctl_tcp_min_rtt_wlen;
+ int sysctl_tcp_wmem[3];
+ u8 sysctl_ip_fwd_use_pmtu;
+ __cacheline_group_end(netns_ipv4_read_tx);
+
+ /* TXRX readonly hotpath cache lines */
+ __cacheline_group_begin(netns_ipv4_read_txrx);
+ u8 sysctl_tcp_moderate_rcvbuf;
+ __cacheline_group_end(netns_ipv4_read_txrx);
+
+ /* RX readonly hotpath cache line */
+ __cacheline_group_begin(netns_ipv4_read_rx);
+ u8 sysctl_ip_early_demux;
+ u8 sysctl_tcp_early_demux;
+ int sysctl_tcp_reordering;
+ int sysctl_tcp_rmem[3];
+ __cacheline_group_end(netns_ipv4_read_rx);
+
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;
@@ -96,17 +127,14 @@ struct netns_ipv4 {
u8 sysctl_ip_default_ttl;
u8 sysctl_ip_no_pmtu_disc;
- u8 sysctl_ip_fwd_use_pmtu;
u8 sysctl_ip_fwd_update_priority;
u8 sysctl_ip_nonlocal_bind;
u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
u8 sysctl_ip_dynaddr;
- u8 sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
u8 sysctl_raw_l3mdev_accept;
#endif
- u8 sysctl_tcp_early_demux;
u8 sysctl_udp_early_demux;
u8 sysctl_nexthop_compat_mode;
@@ -119,7 +147,6 @@ struct netns_ipv4 {
u8 sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss;
- int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold;
u32 sysctl_tcp_probe_interval;
@@ -135,17 +162,14 @@ struct netns_ipv4 {
u8 sysctl_tcp_backlog_ack_defer;
u8 sysctl_tcp_pingpong_thresh;
- int sysctl_tcp_reordering;
u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2;
u8 sysctl_tcp_orphan_retries;
u8 sysctl_tcp_tw_reuse;
int sysctl_tcp_fin_timeout;
- unsigned int sysctl_tcp_notsent_lowat;
u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
- u8 sysctl_tcp_early_retrans;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
@@ -161,21 +185,13 @@ struct netns_ipv4 {
u8 sysctl_tcp_frto;
u8 sysctl_tcp_nometrics_save;
u8 sysctl_tcp_no_ssthresh_metrics_save;
- u8 sysctl_tcp_moderate_rcvbuf;
- u8 sysctl_tcp_tso_win_divisor;
u8 sysctl_tcp_workaround_signed_windows;
- int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
- int sysctl_tcp_min_rtt_wlen;
u8 sysctl_tcp_min_tso_segs;
- u8 sysctl_tcp_tso_rtt_log;
- u8 sysctl_tcp_autocorking;
u8 sysctl_tcp_reflect_tos;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
- int sysctl_tcp_wmem[3];
- int sysctl_tcp_rmem[3];
unsigned int sysctl_tcp_child_ehash_entries;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
index 582212ada3ba..fc752a50f91b 100644
--- a/include/net/netns/smc.h
+++ b/include/net/netns/smc.h
@@ -22,5 +22,7 @@ struct netns_smc {
int sysctl_smcr_testlink_time;
int sysctl_wmem;
int sysctl_rmem;
+ int sysctl_max_links_per_lgr;
+ int sysctl_max_conns_per_lgr;
};
#endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index d92046a4a078..7ec9cc80f11c 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -47,6 +47,8 @@ struct nh_config {
bool nh_grp_res_has_idle_timer;
bool nh_grp_res_has_unbalanced_timer;
+ bool nh_hw_stats;
+
struct nlattr *nh_encap;
u16 nh_encap_type;
@@ -95,8 +97,14 @@ struct nh_res_table {
struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
};
+struct nh_grp_entry_stats {
+ u64_stats_t packets;
+ struct u64_stats_sync syncp;
+};
+
struct nh_grp_entry {
struct nexthop *nh;
+ struct nh_grp_entry_stats __percpu *stats;
u8 weight;
union {
@@ -114,6 +122,7 @@ struct nh_grp_entry {
struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */
+ u64 packets_hw;
};
struct nh_group {
@@ -124,6 +133,7 @@ struct nh_group {
bool resilient;
bool fdb_nh;
bool has_v4;
+ bool hw_stats;
struct nh_res_table __rcu *res_table;
struct nh_grp_entry nh_entries[] __counted_by(num_nh);
@@ -157,6 +167,7 @@ enum nexthop_event_type {
NEXTHOP_EVENT_REPLACE,
NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
NEXTHOP_EVENT_BUCKET_REPLACE,
+ NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
};
enum nh_notifier_info_type {
@@ -164,6 +175,7 @@ enum nh_notifier_info_type {
NH_NOTIFIER_INFO_TYPE_GRP,
NH_NOTIFIER_INFO_TYPE_RES_TABLE,
NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
+ NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
};
struct nh_notifier_single_info {
@@ -187,6 +199,7 @@ struct nh_notifier_grp_entry_info {
struct nh_notifier_grp_info {
u16 num_nh;
bool is_fdb;
+ bool hw_stats;
struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
};
@@ -200,9 +213,21 @@ struct nh_notifier_res_bucket_info {
struct nh_notifier_res_table_info {
u16 num_nh_buckets;
+ bool hw_stats;
struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
};
+struct nh_notifier_grp_hw_stats_entry_info {
+ u32 id;
+ u64 packets;
+};
+
+struct nh_notifier_grp_hw_stats_info {
+ u16 num_nh;
+ bool hw_stats_used;
+ struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
+};
+
struct nh_notifier_info {
struct net *net;
struct netlink_ext_ack *extack;
@@ -213,17 +238,22 @@ struct nh_notifier_info {
struct nh_notifier_grp_info *nh_grp;
struct nh_notifier_res_table_info *nh_res_table;
struct nh_notifier_res_bucket_info *nh_res_bucket;
+ struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
};
};
int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
+int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
bool offload, bool trap);
void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
unsigned long *activity);
+void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
+ unsigned int nh_idx,
+ u64 delta_packets);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@ -316,7 +346,7 @@ static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
u8 rt_family)
{
- struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 5dee575fbe86..3d07abacf08b 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -196,7 +196,7 @@ struct nfc_dev {
};
#define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev)
-extern struct class nfc_class;
+extern const struct class nfc_class;
struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 4ebd544ae977..1d397c1a0043 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -11,7 +11,7 @@
* The page_pool allocator is optimized for recycling page or page fragment used
* by skb packet and xdp frame.
*
- * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(),
+ * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(),
* which allocate memory with or without page splitting depending on the
* requested memory size.
*
@@ -29,7 +29,7 @@
* page allocated from page pool. Page splitting enables memory saving and thus
* avoids TLB/cache miss for data access, but there also is some cost to
* implement page splitting, mainly some cache line dirtying/bouncing for
- * 'struct page' and atomic operation for page->pp_frag_count.
+ * 'struct page' and atomic operation for page->pp_ref_count.
*
* The API keeps track of in-flight pages, in order to let API users know when
* it is safe to free a page_pool object, the API users must call
@@ -37,15 +37,15 @@
* attach the page_pool object to a page_pool-aware object like skbs marked with
* skb_mark_for_recycle().
*
- * page_pool_put_page() may be called multi times on the same page if a page is
- * split into multi fragments. For the last fragment, it will either recycle the
- * page, or in case of page->_refcount > 1, it will release the DMA mapping and
- * in-flight state accounting.
+ * page_pool_put_page() may be called multiple times on the same page if a page
+ * is split into multiple fragments. For the last fragment, it will either
+ * recycle the page, or in case of page->_refcount > 1, it will release the DMA
+ * mapping and in-flight state accounting.
*
* dma_sync_single_range_for_device() is only called for the last fragment when
* page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
* last freed fragment to do the sync_for_device operation for all fragments in
- * the same page when a page is split, the API user must setup pool->p.max_len
+ * the same page when a page is split. The API user must setup pool->p.max_len
* and pool->p.offset correctly and ensure that page_pool_put_page() is called
* with dma_sync_size being -1 for fragment API.
*/
@@ -55,16 +55,12 @@
#include <net/page_pool/types.h>
#ifdef CONFIG_PAGE_POOL_STATS
+/* Deprecated driver-facing API, use netlink instead */
int page_pool_ethtool_stats_get_count(void);
u8 *page_pool_ethtool_stats_get_strings(u8 *data);
u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
-/*
- * Drivers that wish to harvest page pool stats and report them to users
- * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
- * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
- */
-bool page_pool_get_stats(struct page_pool *pool,
+bool page_pool_get_stats(const struct page_pool *pool,
struct page_pool_stats *stats);
#else
static inline int page_pool_ethtool_stats_get_count(void)
@@ -214,69 +210,82 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
return pool->p.dma_dir;
}
-/* pp_frag_count represents the number of writers who can update the page
- * either by updating skb->data or via DMA mappings for the device.
- * We can't rely on the page refcnt for that as we don't know who might be
- * holding page references and we can't reliably destroy or sync DMA mappings
- * of the fragments.
+/**
+ * page_pool_fragment_page() - split a fresh page into fragments
+ * @page: page to split
+ * @nr: references to set
+ *
+ * pp_ref_count represents the number of outstanding references to the page,
+ * which will be freed using page_pool APIs (rather than page allocator APIs
+ * like put_page()). Such references are usually held by page_pool-aware
+ * objects like skbs marked for page pool recycling.
*
- * When pp_frag_count reaches 0 we can either recycle the page if the page
- * refcnt is 1 or return it back to the memory allocator and destroy any
- * mappings we have.
+ * This helper allows the caller to take (set) multiple references to a
+ * freshly allocated page. The page must be freshly allocated (have a
+ * pp_ref_count of 1). This is commonly done by drivers and
+ * "fragment allocators" to save atomic operations - either when they know
+ * upfront how many references they will need; or to take MAX references and
+ * return the unused ones with a single atomic dec(), instead of performing
+ * multiple atomic inc() operations.
*/
static inline void page_pool_fragment_page(struct page *page, long nr)
{
- atomic_long_set(&page->pp_frag_count, nr);
+ atomic_long_set(&page->pp_ref_count, nr);
}
-static inline long page_pool_defrag_page(struct page *page, long nr)
+static inline long page_pool_unref_page(struct page *page, long nr)
{
long ret;
- /* If nr == pp_frag_count then we have cleared all remaining
+ /* If nr == pp_ref_count then we have cleared all remaining
* references to the page:
* 1. 'n == 1': no need to actually overwrite it.
* 2. 'n != 1': overwrite it with one, which is the rare case
- * for pp_frag_count draining.
+ * for pp_ref_count draining.
*
* The main advantage to doing this is that not only we avoid a atomic
* update, as an atomic_read is generally a much cheaper operation than
* an atomic update, especially when dealing with a page that may be
- * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count
+ * referenced by only 2 or 3 users; but also unify the pp_ref_count
* handling by ensuring all pages have partitioned into only 1 piece
* initially, and only overwrite it when the page is partitioned into
* more than one piece.
*/
- if (atomic_long_read(&page->pp_frag_count) == nr) {
+ if (atomic_long_read(&page->pp_ref_count) == nr) {
/* As we have ensured nr is always one for constant case using
* the BUILD_BUG_ON(), only need to handle the non-constant case
- * here for pp_frag_count draining, which is a rare case.
+ * here for pp_ref_count draining, which is a rare case.
*/
BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
if (!__builtin_constant_p(nr))
- atomic_long_set(&page->pp_frag_count, 1);
+ atomic_long_set(&page->pp_ref_count, 1);
return 0;
}
- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ ret = atomic_long_sub_return(nr, &page->pp_ref_count);
WARN_ON(ret < 0);
- /* We are the last user here too, reset pp_frag_count back to 1 to
+ /* We are the last user here too, reset pp_ref_count back to 1 to
* ensure all pages have been partitioned into 1 piece initially,
* this should be the rare case when the last two fragment users call
- * page_pool_defrag_page() currently.
+ * page_pool_unref_page() currently.
*/
if (unlikely(!ret))
- atomic_long_set(&page->pp_frag_count, 1);
+ atomic_long_set(&page->pp_ref_count, 1);
return ret;
}
-static inline bool page_pool_is_last_frag(struct page *page)
+static inline void page_pool_ref_page(struct page *page)
+{
+ atomic_long_inc(&page->pp_ref_count);
+}
+
+static inline bool page_pool_is_last_ref(struct page *page)
{
- /* If page_pool_defrag_page() returns 0, we were the last user */
- return page_pool_defrag_page(page, 1) == 0;
+ /* If page_pool_unref_page() returns 0, we were the last user */
+ return page_pool_unref_page(page, 1) == 0;
}
/**
@@ -301,10 +310,10 @@ static inline void page_pool_put_page(struct page_pool *pool,
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(page))
+ if (!page_pool_is_last_ref(page))
return;
- page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
+ page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct);
#endif
}
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 6fc5134095ed..5e43a08d3231 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -5,6 +5,7 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
+#include <linux/types.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
* map/unmap
@@ -17,8 +18,9 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
-#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
- PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */
+#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
+ PP_FLAG_SYSTEM_POOL)
/*
* Fast allocation side cache array/stack
@@ -48,24 +50,30 @@ struct pp_alloc_cache {
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
* @dev: device, for DMA pre-mapping purposes
+ * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
* @napi: NAPI which is the sole consumer of pages, otherwise NULL
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
*/
struct page_pool_params {
- unsigned int flags;
- unsigned int order;
- unsigned int pool_size;
- int nid;
- struct device *dev;
- struct napi_struct *napi;
- enum dma_data_direction dma_dir;
- unsigned int max_len;
- unsigned int offset;
+ struct_group_tagged(page_pool_params_fast, fast,
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid;
+ struct device *dev;
+ struct napi_struct *napi;
+ enum dma_data_direction dma_dir;
+ unsigned int max_len;
+ unsigned int offset;
+ );
+ struct_group_tagged(page_pool_params_slow, slow,
+ struct net_device *netdev;
/* private: used by test code only */
- void (*init_callback)(struct page *page, void *arg);
- void *init_arg;
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
+ );
};
#ifdef CONFIG_PAGE_POOL_STATS
@@ -119,7 +127,10 @@ struct page_pool_stats {
#endif
struct page_pool {
- struct page_pool_params p;
+ struct page_pool_params_fast p;
+
+ int cpuid;
+ bool has_init_callback;
long frag_users;
struct page *frag_page;
@@ -178,27 +189,34 @@ struct page_pool {
refcount_t user_cnt;
u64 destroy_cnt;
+
+ /* Slow/Control-path information follows */
+ struct page_pool_params_slow slow;
+ /* User-facing fields, protected by page_pools_lock */
+ struct {
+ struct hlist_node list;
+ u64 detach_time;
+ u32 napi_id;
+ u32 id;
+ } user;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
+ int cpuid);
struct xdp_mem_info;
#ifdef CONFIG_PAGE_POOL
-void page_pool_unlink_napi(struct page_pool *pool);
void page_pool_destroy(struct page_pool *pool);
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
struct xdp_mem_info *mem);
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count);
#else
-static inline void page_pool_unlink_napi(struct page_pool *pool)
-{
-}
-
static inline void page_pool_destroy(struct page_pool *pool)
{
}
@@ -215,9 +233,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
#endif
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size,
- bool allow_direct);
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
static inline bool is_page_pool_compiled_in(void)
{
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a76c9171db0e..a4ee43f493bb 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -24,6 +24,8 @@ struct tcf_walker {
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
void unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+#define NET_CLS_ALIAS_PREFIX "net-cls-"
+#define MODULE_ALIAS_NET_CLS(kind) MODULE_ALIAS(NET_CLS_ALIAS_PREFIX kind)
struct tcf_block_ext_info {
enum flow_block_binder_type binder_type;
@@ -154,12 +156,6 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
return xchg(clp, cl);
}
-static inline void tcf_set_drop_reason(struct tcf_result *res,
- enum skb_drop_reason reason)
-{
- res->drop_reason = reason;
-}
-
static inline void
__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
{
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9fa1d0794dfa..d7b7b6cd4aa1 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -100,6 +100,8 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
int register_qdisc(struct Qdisc_ops *qops);
void unregister_qdisc(struct Qdisc_ops *qops);
+#define NET_SCH_ALIAS_PREFIX "net-sch-"
+#define MODULE_ALIAS_NET_SCH(id) MODULE_ALIAS(NET_SCH_ALIAS_PREFIX id)
void qdisc_get_default(char *id, size_t len);
int qdisc_set_default(const char *id);
@@ -275,24 +277,6 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
}
-struct tc_skb_cb {
- struct qdisc_skb_cb qdisc_cb;
-
- u16 mru;
- u8 post_ct:1;
- u8 post_ct_snat:1;
- u8 post_ct_dnat:1;
- u16 zone; /* Only valid if post_ct = true */
-};
-
-static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
-{
- struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
-
- BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
- return cb;
-}
-
static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
unsigned long cl,
struct qdisc_walker *arg)
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6aef8cb11cc8..b2499f88f8f8 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -46,6 +46,7 @@ struct net_protocol {
* socket lookup?
*/
icmp_strict_tag_validation:1;
+ u32 secret;
};
#if IS_ENABLED(CONFIG_IPV6)
@@ -59,6 +60,7 @@ struct inet6_protocol {
__be32 info);
unsigned int flags; /* INET6_PROTO_xxx */
+ u32 secret;
};
#define INET6_PROTO_NOPOLICY 0x1
@@ -68,6 +70,7 @@ struct inet6_protocol {
struct net_offload {
struct offload_callbacks callbacks;
unsigned int flags; /* Flags used by IPv6 for now */
+ u32 secret;
};
/* This should be set for any extension header which is compatible with GSO. */
#define INET6_PROTO_GSO_EXTHDR 0x1
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 144c39db9898..8839133d6f6b 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -83,6 +83,45 @@ static inline struct sock *req_to_sk(struct request_sock *req)
return (struct sock *)req;
}
+/**
+ * skb_steal_sock - steal a socket from an sk_buff
+ * @skb: sk_buff to steal the socket from
+ * @refcounted: is set to true if the socket is reference-counted
+ * @prefetched: is set to true if the socket was assigned from bpf
+ */
+static inline struct sock *skb_steal_sock(struct sk_buff *skb,
+ bool *refcounted, bool *prefetched)
+{
+ struct sock *sk = skb->sk;
+
+ if (!sk) {
+ *prefetched = false;
+ *refcounted = false;
+ return NULL;
+ }
+
+ *prefetched = skb_sk_is_prefetched(skb);
+ if (*prefetched) {
+#if IS_ENABLED(CONFIG_SYN_COOKIES)
+ if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) {
+ struct request_sock *req = inet_reqsk(sk);
+
+ *refcounted = false;
+ sk = req->rsk_listener;
+ req->rsk_listener = NULL;
+ return sk;
+ }
+#endif
+ *refcounted = sk_is_refcounted(sk);
+ } else {
+ *refcounted = true;
+ }
+
+ skb->destructor = NULL;
+ skb->sk = NULL;
+ return sk;
+}
+
static inline struct request_sock *
reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
bool attach_listener)
diff --git a/include/net/route.h b/include/net/route.h
index 980ab474eabd..d4a0147942f1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -37,9 +37,6 @@
#define RTO_ONLINK 0x01
-#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE))
-#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
-
static inline __u8 ip_sock_rt_scope(const struct sock *sk)
{
if (sock_flag(sk, SOCK_LOCALROUTE))
@@ -163,8 +160,8 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__u8 proto, __u8 tos, int oif)
{
flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
- RT_SCOPE_UNIVERSE, proto,
- sk ? inet_sk_flowi_flags(sk) : 0,
+ sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE,
+ proto, sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
if (sk)
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
diff --git a/include/net/rps.h b/include/net/rps.h
new file mode 100644
index 000000000000..7660243e905b
--- /dev/null
+++ b/include/net/rps.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_RPS_H
+#define _NET_RPS_H
+
+#include <linux/types.h>
+#include <linux/static_key.h>
+#include <net/sock.h>
+#include <net/hotdata.h>
+
+#ifdef CONFIG_RPS
+
+extern struct static_key_false rps_needed;
+extern struct static_key_false rfs_needed;
+
+/*
+ * This structure holds an RPS map which can be of variable length. The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+ unsigned int len;
+ struct rcu_head rcu;
+ u16 cpus[];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
+
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
+ * tail pointer for that CPU's input queue at the time of last enqueue, and
+ * a hardware filter index.
+ */
+struct rps_dev_flow {
+ u16 cpu;
+ u16 filter;
+ unsigned int last_qtail;
+};
+#define RPS_NO_FILTER 0xffff
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+ unsigned int mask;
+ struct rcu_head rcu;
+ struct rps_dev_flow flows[];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+ ((_num) * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high-order bits
+ * of flow hash, lower part is CPU number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
+ */
+struct rps_sock_flow_table {
+ u32 mask;
+
+ u32 ents[] ____cacheline_aligned_in_smp;
+};
+#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
+
+#define RPS_NO_CPU 0xffff
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+ u32 hash)
+{
+ unsigned int index = hash & table->mask;
+ u32 val = hash & ~net_hotdata.rps_cpu_mask;
+
+ /* We only give a hint, preemption can change CPU under us */
+ val |= raw_smp_processor_id();
+
+ /* The following WRITE_ONCE() is paired with the READ_ONCE()
+ * here, and another one in get_rps_cpu().
+ */
+ if (READ_ONCE(table->ents[index]) != val)
+ WRITE_ONCE(table->ents[index], val);
+}
+
+#endif /* CONFIG_RPS */
+
+static inline void sock_rps_record_flow_hash(__u32 hash)
+{
+#ifdef CONFIG_RPS
+ struct rps_sock_flow_table *sock_flow_table;
+
+ if (!hash)
+ return;
+ rcu_read_lock();
+ sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table);
+ if (sock_flow_table)
+ rps_record_sock_flow(sock_flow_table, hash);
+ rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+ if (static_branch_unlikely(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED) {
+ /* This READ_ONCE() is paired with the WRITE_ONCE()
+ * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
+ */
+ sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
+ }
+ }
+#endif
+}
+
+#endif /* _NET_RPS_H */
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 6506221c5fe3..3bfb80bad173 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
enum rtnl_link_flags {
RTNL_FLAG_DOIT_UNLOCKED = BIT(0),
RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1),
+ RTNL_FLAG_DUMP_UNLOCKED = BIT(2),
};
enum rtnl_kinds {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index dcb9160e6467..cefe0c4bdae3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
+#include <linux/xarray.h>
struct Qdisc_ops;
struct qdisc_walker;
@@ -237,12 +238,7 @@ static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
{
-#ifdef CONFIG_BQL
- /* Non-BQL migrated drivers will return 0, too. */
- return dql_avail(&txq->dql);
-#else
- return 0;
-#endif
+ return netdev_queue_dql_avail(txq);
}
struct Qdisc_class_ops {
@@ -332,7 +328,6 @@ struct tcf_result {
};
const struct tcf_proto *goto_tp;
};
- enum skb_drop_reason drop_reason;
};
struct tcf_chain;
@@ -375,6 +370,10 @@ struct tcf_proto_ops {
struct nlattr **tca,
struct netlink_ext_ack *extack);
void (*tmplt_destroy)(void *tmplt_priv);
+ void (*tmplt_reoffload)(struct tcf_chain *chain,
+ bool add,
+ flow_setup_cb_t *cb,
+ void *cb_priv);
struct tcf_exts * (*get_exts)(const struct tcf_proto *tp,
u32 handle);
@@ -457,6 +456,7 @@ struct tcf_chain {
};
struct tcf_block {
+ struct xarray ports; /* datapath accessible */
/* Lock protects tcf_block and lifetime-management data of chains
* attached to the block (refcnt, action_refcnt, explicitly_created).
*/
@@ -483,6 +483,8 @@ struct tcf_block {
struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index);
+
static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
{
return lockdep_is_held(&chain->filter_chain_lock);
@@ -1037,6 +1039,37 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
return skb;
}
+struct tc_skb_cb {
+ struct qdisc_skb_cb qdisc_cb;
+ u32 drop_reason;
+
+ u16 zone; /* Only valid if post_ct = true */
+ u16 mru;
+ u8 post_ct:1;
+ u8 post_ct_snat:1;
+ u8 post_ct_dnat:1;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+ return cb;
+}
+
+static inline enum skb_drop_reason
+tcf_get_drop_reason(const struct sk_buff *skb)
+{
+ return tc_skb_cb(skb)->drop_reason;
+}
+
+static inline void tcf_set_drop_reason(const struct sk_buff *skb,
+ enum skb_drop_reason reason)
+{
+ tc_skb_cb(skb)->drop_reason = reason;
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
diff --git a/include/net/scm.h b/include/net/scm.h
index e8c76b4be2fe..92276a2c5543 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -5,6 +5,7 @@
#include <linux/limits.h>
#include <linux/net.h>
#include <linux/cred.h>
+#include <linux/file.h>
#include <linux/security.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
@@ -24,6 +25,7 @@ struct scm_creds {
struct scm_fp_list {
short count;
+ short count_unix;
short max;
struct user_struct *user;
struct file *fp[SCM_MAX_FD];
@@ -208,5 +210,13 @@ static inline void scm_recv_unix(struct socket *sock, struct msghdr *msg,
scm_destroy_cred(scm);
}
+static inline int scm_recv_one_fd(struct file *f, int __user *ufd,
+ unsigned int flags)
+{
+ if (!ufd)
+ return -EFAULT;
+ return receive_fd(f, ufd, flags);
+}
+
#endif /* __LINUX_NET_SCM_H */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 5a24d6d8522a..f24a1bbcb3ef 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -242,10 +242,7 @@ struct sctp_sock {
int do_auto_asconf;
};
-static inline struct sctp_sock *sctp_sk(const struct sock *sk)
-{
- return (struct sctp_sock *)sk;
-}
+#define sctp_sk(ptr) container_of_const(ptr, struct sctp_sock, inet.sk)
static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
{
diff --git a/include/net/smc.h b/include/net/smc.h
index a002552be29c..c9dcb30e3fd9 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -52,9 +52,14 @@ struct smcd_dmb {
struct smcd_dev;
struct ism_client;
+struct smcd_gid {
+ u64 gid;
+ u64 gid_ext;
+};
+
struct smcd_ops {
- int (*query_remote_gid)(struct smcd_dev *dev, u64 rgid, u32 vid_valid,
- u32 vid);
+ int (*query_remote_gid)(struct smcd_dev *dev, struct smcd_gid *rgid,
+ u32 vid_valid, u32 vid);
int (*register_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb,
struct ism_client *client);
int (*unregister_dmb)(struct smcd_dev *dev, struct smcd_dmb *dmb);
@@ -62,14 +67,13 @@ struct smcd_ops {
int (*del_vlan_id)(struct smcd_dev *dev, u64 vlan_id);
int (*set_vlan_required)(struct smcd_dev *dev);
int (*reset_vlan_required)(struct smcd_dev *dev);
- int (*signal_event)(struct smcd_dev *dev, u64 rgid, u32 trigger_irq,
- u32 event_code, u64 info);
+ int (*signal_event)(struct smcd_dev *dev, struct smcd_gid *rgid,
+ u32 trigger_irq, u32 event_code, u64 info);
int (*move_data)(struct smcd_dev *dev, u64 dmb_tok, unsigned int idx,
bool sf, unsigned int offset, void *data,
unsigned int size);
int (*supports_v2)(void);
- u8* (*get_system_eid)(void);
- u64 (*get_local_gid)(struct smcd_dev *dev);
+ void (*get_local_gid)(struct smcd_dev *dev, struct smcd_gid *gid);
u16 (*get_chid)(struct smcd_dev *dev);
struct device* (*get_dev)(struct smcd_dev *dev);
};
diff --git a/include/net/sock.h b/include/net/sock.h
index 1d6931caf0c3..b5e00702acc1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -76,19 +76,6 @@
* the other protocols.
*/
-/* Define this to get the SOCK_DBG debugging facility. */
-#define SOCK_DEBUGGING
-#ifdef SOCK_DEBUGGING
-#define SOCK_DEBUG(sk, msg...) do { if ((sk) && sock_flag((sk), SOCK_DBG)) \
- printk(KERN_DEBUG msg); } while (0)
-#else
-/* Validate arguments and do nothing */
-static inline __printf(2, 3)
-void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)
-{
-}
-#endif
-
/* This is the per-socket lock. The spinlock provides a synchronization
* between user contexts and software interrupt processing, whereas the
* mini-semaphore synchronizes multiple users amongst themselves.
@@ -277,8 +264,6 @@ struct sk_filter;
* @sk_pacing_status: Pacing status (requested, handled by sch_fq)
* @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
* @sk_sndbuf: size of send buffer in bytes
- * @__sk_flags_offset: empty field used to determine location of bitfield
- * @sk_padding: unused element for alignment
* @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
* @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
@@ -352,7 +337,6 @@ struct sk_filter;
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
- * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
@@ -394,14 +378,10 @@ struct sock {
#define sk_flags __sk_common.skc_flags
#define sk_rxhash __sk_common.skc_rxhash
- /* early demux fields */
- struct dst_entry __rcu *sk_rx_dst;
- int sk_rx_dst_ifindex;
- u32 sk_rx_dst_cookie;
+ __cacheline_group_begin(sock_write_rx);
- socket_lock_t sk_lock;
atomic_t sk_drops;
- int sk_rcvlowat;
+ __s32 sk_peek_off;
struct sk_buff_head sk_error_queue;
struct sk_buff_head sk_receive_queue;
/*
@@ -418,18 +398,24 @@ struct sock {
struct sk_buff *head;
struct sk_buff *tail;
} sk_backlog;
-
#define sk_rmem_alloc sk_backlog.rmem_alloc
- int sk_forward_alloc;
- u32 sk_reserved_mem;
+ __cacheline_group_end(sock_write_rx);
+
+ __cacheline_group_begin(sock_read_rx);
+ /* early demux fields */
+ struct dst_entry __rcu *sk_rx_dst;
+ int sk_rx_dst_ifindex;
+ u32 sk_rx_dst_cookie;
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_ll_usec;
- /* ===== mostly read cache line ===== */
unsigned int sk_napi_id;
+ u16 sk_busy_poll_budget;
+ u8 sk_prefer_busy_poll;
#endif
+ u8 sk_userlocks;
int sk_rcvbuf;
- int sk_disconnects;
struct sk_filter __rcu *sk_filter;
union {
@@ -438,15 +424,33 @@ struct sock {
struct socket_wq *sk_wq_raw;
/* public: */
};
+
+ void (*sk_data_ready)(struct sock *sk);
+ long sk_rcvtimeo;
+ int sk_rcvlowat;
+ __cacheline_group_end(sock_read_rx);
+
+ __cacheline_group_begin(sock_read_rxtx);
+ int sk_err;
+ struct socket *sk_socket;
+ struct mem_cgroup *sk_memcg;
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
+ __cacheline_group_end(sock_read_rxtx);
- struct dst_entry __rcu *sk_dst_cache;
+ __cacheline_group_begin(sock_write_rxtx);
+ socket_lock_t sk_lock;
+ u32 sk_reserved_mem;
+ int sk_forward_alloc;
+ u32 sk_tsflags;
+ __cacheline_group_end(sock_write_rxtx);
+
+ __cacheline_group_begin(sock_write_tx);
+ int sk_write_pending;
atomic_t sk_omem_alloc;
int sk_sndbuf;
- /* ===== cache line for TX ===== */
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
@@ -455,22 +459,36 @@ struct sock {
struct rb_root tcp_rtx_queue;
};
struct sk_buff_head sk_write_queue;
- __s32 sk_peek_off;
- int sk_write_pending;
- __u32 sk_dst_pending_confirm;
+ u32 sk_dst_pending_confirm;
u32 sk_pacing_status; /* see enum sk_pacing */
- long sk_sndtimeo;
+ struct page_frag sk_frag;
struct timer_list sk_timer;
- __u32 sk_priority;
- __u32 sk_mark;
+
unsigned long sk_pacing_rate; /* bytes per second */
+ atomic_t sk_zckey;
+ atomic_t sk_tskey;
+ __cacheline_group_end(sock_write_tx);
+
+ __cacheline_group_begin(sock_read_tx);
unsigned long sk_max_pacing_rate;
- struct page_frag sk_frag;
+ long sk_sndtimeo;
+ u32 sk_priority;
+ u32 sk_mark;
+ struct dst_entry __rcu *sk_dst_cache;
netdev_features_t sk_route_caps;
- int sk_gso_type;
+#ifdef CONFIG_SOCK_VALIDATE_XMIT
+ struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
+ struct net_device *dev,
+ struct sk_buff *skb);
+#endif
+ u16 sk_gso_type;
+ u16 sk_gso_max_segs;
unsigned int sk_gso_max_size;
gfp_t sk_allocation;
- __u32 sk_txhash;
+ u32 sk_txhash;
+ u8 sk_pacing_shift;
+ bool sk_use_task_frag;
+ __cacheline_group_end(sock_read_tx);
/*
* Because of non atomicity rules, all
@@ -479,64 +497,44 @@ struct sock {
u8 sk_gso_disabled : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
- sk_no_check_rx : 1,
- sk_userlocks : 4;
- u8 sk_pacing_shift;
+ sk_no_check_rx : 1;
+ u8 sk_shutdown;
u16 sk_type;
u16 sk_protocol;
- u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
- int sk_err,
- sk_err_soft;
+ int sk_err_soft;
u32 sk_ack_backlog;
u32 sk_max_ack_backlog;
kuid_t sk_uid;
- u8 sk_txrehash;
-#ifdef CONFIG_NET_RX_BUSY_POLL
- u8 sk_prefer_busy_poll;
- u16 sk_busy_poll_budget;
-#endif
spinlock_t sk_peer_lock;
int sk_bind_phc;
struct pid *sk_peer_pid;
const struct cred *sk_peer_cred;
- long sk_rcvtimeo;
ktime_t sk_stamp;
#if BITS_PER_LONG==32
seqlock_t sk_stamp_seq;
#endif
- atomic_t sk_tskey;
- atomic_t sk_zckey;
- u32 sk_tsflags;
- u8 sk_shutdown;
+ int sk_disconnects;
+ u8 sk_txrehash;
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
sk_txtime_report_errors : 1,
sk_txtime_unused : 6;
- bool sk_use_task_frag;
- struct socket *sk_socket;
void *sk_user_data;
#ifdef CONFIG_SECURITY
void *sk_security;
#endif
struct sock_cgroup_data sk_cgrp_data;
- struct mem_cgroup *sk_memcg;
void (*sk_state_change)(struct sock *sk);
- void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
struct sk_buff *skb);
-#ifdef CONFIG_SOCK_VALIDATE_XMIT
- struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk,
- struct net_device *dev,
- struct sk_buff *skb);
-#endif
void (*sk_destruct)(struct sock *sk);
struct sock_reuseport __rcu *sk_reuseport_cb;
#ifdef CONFIG_BPF_SYSCALL
@@ -544,7 +542,6 @@ struct sock {
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
- struct hlist_node sk_bind2_node;
};
enum sk_pacing {
@@ -873,16 +870,6 @@ static inline void sk_add_bind_node(struct sock *sk,
hlist_add_head(&sk->sk_bind_node, list);
}
-static inline void __sk_del_bind2_node(struct sock *sk)
-{
- __hlist_del(&sk->sk_bind2_node);
-}
-
-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
-{
- hlist_add_head(&sk->sk_bind2_node, list);
-}
-
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
@@ -900,8 +887,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
-#define sk_for_each_bound_bhash2(__sk, list) \
- hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
@@ -1132,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
-static inline void sock_rps_record_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
- struct rps_sock_flow_table *sock_flow_table;
-
- rcu_read_lock();
- sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, hash);
- rcu_read_unlock();
-#endif
-}
-
-static inline void sock_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
- if (static_branch_unlikely(&rfs_needed)) {
- /* Reading sk->sk_rxhash might incur an expensive cache line
- * miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED) {
- /* This READ_ONCE() is paired with the WRITE_ONCE()
- * from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
- */
- sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
- }
- }
-#endif
-}
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
@@ -1458,6 +1408,7 @@ sk_memory_allocated(const struct sock *sk)
/* 1 MB per cpu, in page units */
#define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT))
+extern int sysctl_mem_pcpu_rsv;
static inline void
sk_memory_allocated_add(struct sock *sk, int amt)
@@ -1466,7 +1417,7 @@ sk_memory_allocated_add(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve >= SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
@@ -1480,7 +1431,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt)
preempt_disable();
local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt);
- if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) {
+ if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) {
__this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve);
atomic_long_add(local_reserve, sk->sk_prot->memory_allocated);
}
@@ -2794,9 +2745,30 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
&skb_shinfo(skb)->tskey);
}
+static inline bool sk_is_inet(const struct sock *sk)
+{
+ int family = READ_ONCE(sk->sk_family);
+
+ return family == AF_INET || family == AF_INET6;
+}
+
static inline bool sk_is_tcp(const struct sock *sk)
{
- return sk->sk_type == SOCK_STREAM && sk->sk_protocol == IPPROTO_TCP;
+ return sk_is_inet(sk) &&
+ sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP;
+}
+
+static inline bool sk_is_udp(const struct sock *sk)
+{
+ return sk_is_inet(sk) &&
+ sk->sk_type == SOCK_DGRAM &&
+ sk->sk_protocol == IPPROTO_UDP;
+}
+
+static inline bool sk_is_stream_unix(const struct sock *sk)
+{
+ return sk->sk_family == AF_UNIX && sk->sk_type == SOCK_STREAM;
}
/**
@@ -2838,31 +2810,6 @@ sk_is_refcounted(struct sock *sk)
return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE);
}
-/**
- * skb_steal_sock - steal a socket from an sk_buff
- * @skb: sk_buff to steal the socket from
- * @refcounted: is set to true if the socket is reference-counted
- * @prefetched: is set to true if the socket was assigned from bpf
- */
-static inline struct sock *
-skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched)
-{
- if (skb->sk) {
- struct sock *sk = skb->sk;
-
- *refcounted = true;
- *prefetched = skb_sk_is_prefetched(skb);
- if (*prefetched)
- *refcounted = sk_is_refcounted(sk);
- skb->destructor = NULL;
- skb->sk = NULL;
- return sk;
- }
- *prefetched = false;
- *refcounted = false;
- return NULL;
-}
-
/* Checks if this SKB belongs to an HW offloaded socket
* and whether any SW fallbacks are required based on dev.
* Check decrypted mark in case skb_orphan() cleared socket.
@@ -2920,7 +2867,6 @@ extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_tstamp_allow_data;
-extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index a43062d4c734..8346b0d29542 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -308,6 +308,9 @@ void switchdev_deferred_process(void);
int switchdev_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack);
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj);
int switchdev_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack);
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index 8a6dbfb23336..77f87c622a2e 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -58,6 +58,11 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
return to_ct_params(a)->nf_ft;
}
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+ return to_ct_params(a)->helper;
+}
+
#else
static inline uint16_t tcf_ct_zone(const struct tc_action *a) { return 0; }
static inline int tcf_ct_action(const struct tc_action *a) { return 0; }
@@ -65,6 +70,10 @@ static inline struct nf_flowtable *tcf_ct_ft(const struct tc_action *a)
{
return NULL;
}
+static inline struct nf_conntrack_helper *tcf_ct_helper(const struct tc_action *a)
+{
+ return NULL;
+}
#endif /* CONFIG_NF_CONNTRACK */
#if IS_ENABLED(CONFIG_NET_ACT_CT)
diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h
deleted file mode 100644
index 4225fcb1c6ba..000000000000
--- a/include/net/tc_act/tc_ipt.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NET_TC_IPT_H
-#define __NET_TC_IPT_H
-
-#include <net/act_api.h>
-
-struct xt_entry_target;
-
-struct tcf_ipt {
- struct tc_action common;
- u32 tcfi_hook;
- char *tcfi_tname;
- struct xt_entry_target *tcfi_t;
-};
-#define to_ipt(a) ((struct tcf_ipt *)a)
-
-#endif /* __NET_TC_IPT_H */
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 32ce8ea36950..75722d967bf2 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -8,6 +8,7 @@
struct tcf_mirred {
struct tc_action common;
int tcfm_eaction;
+ u32 tcfm_blockid;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
netdevice_tracker tcfm_dev_tracker;
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index a6d481b5bcbc..a608546bcefc 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -117,10 +117,6 @@ static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
if (a->ops->act == tcf_ife_act)
return tcf_ife_act(skb, a, res);
#endif
-#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
- if (a->ops->act == tcf_ipt_act)
- return tcf_ipt_act(skb, a, res);
-#endif
#if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
if (a->ops->act == tcf_simp_act)
return tcf_simp_act(skb, a, res);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d2f0736b76b8..6ae35199d3b3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
+enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -396,8 +396,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race);
-int tcp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
+enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child,
+ struct sk_buff *skb);
void tcp_enter_loss(struct sock *sk);
void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag);
void tcp_clear_retrans(struct tcp_sock *tp);
@@ -490,13 +490,30 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
/* From syncookies.c */
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst, u32 tsoff);
-int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
- u32 cookie);
+ struct dst_entry *dst);
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
- const struct tcp_request_sock_ops *af_ops,
- struct sock *sk, struct sk_buff *skb);
+ struct sock *sk, struct sk_buff *skb,
+ struct tcp_options_received *tcp_opt,
+ int mss, u32 tsoff);
+
+#if IS_ENABLED(CONFIG_BPF)
+struct bpf_tcp_req_attrs {
+ u32 rcv_tsval;
+ u32 rcv_tsecr;
+ u16 mss;
+ u8 rcv_wscale;
+ u8 snd_wscale;
+ u8 ecn_ok;
+ u8 wscale_ok;
+ u8 sack_ok;
+ u8 tstamp_ok;
+ u8 usec_ts_ok;
+ u8 reserved[3];
+};
+#endif
+
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -576,18 +593,50 @@ static inline u32 tcp_cookie_time(void)
return val;
}
+/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
+static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val)
+{
+ if (usec_ts)
+ return div_u64(val, NSEC_PER_USEC);
+
+ return div_u64(val, NSEC_PER_MSEC);
+}
+
u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
u64 cookie_init_timestamp(struct request_sock *req, u64 now);
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *opt);
-bool cookie_ecn_ok(const struct tcp_options_received *opt,
- const struct net *net, const struct dst_entry *dst);
+
+static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst)
+{
+ return READ_ONCE(net->ipv4.sysctl_tcp_ecn) ||
+ dst_feature(dst, RTAX_FEATURE_ECN);
+}
+
+#if IS_ENABLED(CONFIG_BPF)
+static inline bool cookie_bpf_ok(struct sk_buff *skb)
+{
+ return skb->sk;
+}
+
+struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb);
+#else
+static inline bool cookie_bpf_ok(struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return NULL;
+}
+#endif
/* From net/ipv6/syncookies.c */
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- u32 cookie);
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
@@ -1514,17 +1563,22 @@ static inline int tcp_full_space(const struct sock *sk)
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
-static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
+static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh)
{
int unused_mem = sk_unused_reserved_mem(sk);
struct tcp_sock *tp = tcp_sk(sk);
- tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh);
if (unused_mem)
tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh,
tcp_win_from_space(sk, unused_mem));
}
+static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
+{
+ __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss);
+}
+
void tcp_cleanup_rbuf(struct sock *sk, int copied);
void __tcp_cleanup_rbuf(struct sock *sk, int copied);
@@ -1783,8 +1837,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
#ifdef CONFIG_TCP_MD5SIG
-#include <linux/jump_label.h>
-extern struct static_key_false_deferred tcp_md5_needed;
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
int family, bool any_l3index);
@@ -2499,7 +2551,7 @@ struct tcp_ulp_ops {
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
- int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+ int (*get_info)(struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk);
/* clone ulp */
void (*clone)(const struct request_sock *req, struct sock *newsk,
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index b56be10838f0..471e177362b4 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -62,11 +62,17 @@ static inline int tcp_ao_maclen(const struct tcp_ao_key *key)
return key->maclen;
}
+/* Use tcp_ao_len_aligned() for TCP header calculations */
static inline int tcp_ao_len(const struct tcp_ao_key *key)
{
return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr);
}
+static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key)
+{
+ return round_up(tcp_ao_len(key), 4);
+}
+
static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key)
{
return key->digest_size;
@@ -121,12 +127,35 @@ struct tcp_ao_info {
struct rcu_head rcu;
};
+#ifdef CONFIG_TCP_MD5SIG
+#include <linux/jump_label.h>
+extern struct static_key_false_deferred tcp_md5_needed;
+#define static_branch_tcp_md5() static_branch_unlikely(&tcp_md5_needed.key)
+#else
+#define static_branch_tcp_md5() false
+#endif
+#ifdef CONFIG_TCP_AO
+/* TCP-AO structures and functions */
+#include <linux/jump_label.h>
+extern struct static_key_false_deferred tcp_ao_needed;
+#define static_branch_tcp_ao() static_branch_unlikely(&tcp_ao_needed.key)
+#else
+#define static_branch_tcp_ao() false
+#endif
+
+static inline bool tcp_hash_should_produce_warnings(void)
+{
+ return static_branch_tcp_md5() || static_branch_tcp_ao();
+}
+
#define tcp_hash_fail(msg, family, skb, fmt, ...) \
do { \
const struct tcphdr *th = tcp_hdr(skb); \
char hdr_flags[6]; \
char *f = hdr_flags; \
\
+ if (!tcp_hash_should_produce_warnings()) \
+ break; \
if (th->fin) \
*f++ = 'F'; \
if (th->syn) \
@@ -153,9 +182,6 @@ do { \
#ifdef CONFIG_TCP_AO
/* TCP-AO structures and functions */
-#include <linux/jump_label.h>
-extern struct static_key_false_deferred tcp_ao_needed;
-
struct tcp4_ao_context {
__be32 saddr;
__be32 daddr;
@@ -265,8 +291,7 @@ void tcp_ao_established(struct sock *sk);
void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb);
void tcp_ao_connect_init(struct sock *sk);
void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
- struct tcp_request_sock *treq,
- unsigned short int family, int l3index);
+ struct request_sock *req, unsigned short int family);
#else /* CONFIG_TCP_AO */
static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
@@ -277,8 +302,7 @@ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
- struct tcp_request_sock *treq,
- unsigned short int family, int l3index)
+ struct request_sock *req, unsigned short int family)
{
}
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index cc00118acca1..d60e8148ff4c 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -22,6 +22,7 @@ enum {
TCP_LISTEN,
TCP_CLOSING, /* Now a valid state */
TCP_NEW_SYN_RECV,
+ TCP_BOUND_INACTIVE, /* Pseudo-state for inet_diag */
TCP_MAX_STATES /* Leave at the end! */
};
@@ -43,6 +44,7 @@ enum {
TCPF_LISTEN = (1 << TCP_LISTEN),
TCPF_CLOSING = (1 << TCP_CLOSING),
TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
+ TCPF_BOUND_INACTIVE = (1 << TCP_BOUND_INACTIVE),
};
#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 962f0c501111..340ad43971e4 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -97,9 +97,6 @@ struct tls_sw_context_tx {
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
- /* protect crypto_wait with encrypt_pending */
- spinlock_t encrypt_compl_lock;
- int async_notify;
u8 async_capable:1;
#define BIT_TX_SCHEDULED 0
@@ -136,8 +133,6 @@ struct tls_sw_context_rx {
struct tls_strparser strp;
atomic_t decrypt_pending;
- /* protect crypto_wait with decrypt_pending*/
- spinlock_t decrypt_compl_lock;
struct sk_buff_head async_hold;
struct wait_queue_head wq;
};
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6a9f8a5f387c..33ba6fc151cf 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -210,22 +210,23 @@ struct vxlan_rdst {
};
struct vxlan_config {
- union vxlan_addr remote_ip;
- union vxlan_addr saddr;
- __be32 vni;
- int remote_ifindex;
- int mtu;
- __be16 dst_port;
- u16 port_min;
- u16 port_max;
- u8 tos;
- u8 ttl;
- __be32 label;
- u32 flags;
- unsigned long age_interval;
- unsigned int addrmax;
- bool no_share;
- enum ifla_vxlan_df df;
+ union vxlan_addr remote_ip;
+ union vxlan_addr saddr;
+ __be32 vni;
+ int remote_ifindex;
+ int mtu;
+ __be16 dst_port;
+ u16 port_min;
+ u16 port_max;
+ u8 tos;
+ u8 ttl;
+ __be32 label;
+ enum ifla_vxlan_label_policy label_policy;
+ u32 flags;
+ unsigned long age_interval;
+ unsigned int addrmax;
+ bool no_share;
+ enum ifla_vxlan_df df;
};
enum {
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 349c36fb5fd8..e6770dd40c91 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -16,7 +16,7 @@
*
* The XDP RX-queue info (xdp_rxq_info) is associated with the driver
* level RX-ring queues. It is information that is specific to how
- * the driver have configured a given RX-ring queue.
+ * the driver has configured a given RX-ring queue.
*
* Each xdp_buff frame received in the driver carries a (pointer)
* reference to this xdp_rxq_info structure. This provides the XDP
@@ -32,7 +32,7 @@
* The struct is not directly tied to the XDP prog. A new XDP prog
* can be attached as long as it doesn't change the underlying
* RX-ring. If the RX-ring does change significantly, the NIC driver
- * naturally need to stop the RX-ring before purging and reallocating
+ * naturally needs to stop the RX-ring before purging and reallocating
* memory. In that process the driver MUST call unregister (which
* also applies for driver shutdown and unload). The register API is
* also mandatory during RX-ring setup.
@@ -369,7 +369,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
static inline bool xdp_metalen_invalid(unsigned long metalen)
{
- return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+ unsigned long meta_max;
+
+ meta_max = type_max(typeof_member(struct skb_shared_info, meta_len));
+ BUILD_BUG_ON(!__builtin_constant_p(meta_max));
+
+ return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max;
}
struct xdp_attachment_info {
@@ -399,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
NETDEV_XDP_RX_METADATA_HASH, \
bpf_xdp_metadata_rx_hash, \
xmo_rx_hash) \
+ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
+ NETDEV_XDP_RX_METADATA_VLAN_TAG, \
+ bpf_xdp_metadata_rx_vlan_tag, \
+ xmo_rx_vlan_tag) \
enum xdp_rx_metadata {
#define XDP_METADATA_KFUNC(name, _, __, ___) name,
@@ -427,6 +436,7 @@ enum xdp_rss_hash_type {
XDP_RSS_L4_UDP = BIT(5),
XDP_RSS_L4_SCTP = BIT(6),
XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */
+ XDP_RSS_L4_ICMP = BIT(8),
/* Second part: RSS hash type combinations used for driver HW mapping */
XDP_RSS_TYPE_NONE = 0,
@@ -442,11 +452,13 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
@@ -457,6 +469,8 @@ struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
+ int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci);
};
#ifdef CONFIG_NET
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index f83128007fb0..3cb4dc9bd70e 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
+ u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
@@ -92,12 +93,105 @@ struct xdp_sock {
struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};
+/*
+ * AF_XDP TX metadata hooks for network devices.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * void (*tmo_request_timestamp)(void *priv)
+ * Called when AF_XDP frame requested egress timestamp.
+ *
+ * u64 (*tmo_fill_timestamp)(void *priv)
+ * Called when AF_XDP frame, that had requested egress timestamp,
+ * received a completion. The hook needs to return the actual HW timestamp.
+ *
+ * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv)
+ * Called when AF_XDP frame requested HW checksum offload. csum_start
+ * indicates position where checksumming should start.
+ * csum_offset indicates position where checksum should be stored.
+ *
+ */
+struct xsk_tx_metadata_ops {
+ void (*tmo_request_timestamp)(void *priv);
+ u64 (*tmo_fill_timestamp)(void *priv);
+ void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+};
+
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
+/**
+ * xsk_tx_metadata_to_compl - Save enough relevant metadata information
+ * to perform tx completion in the future.
+ * @meta: pointer to AF_XDP metadata area
+ * @compl: pointer to output struct xsk_tx_metadata_to_compl
+ *
+ * This function should be called by the networking device when
+ * it prepares AF_XDP egress packet. The value of @compl should be stored
+ * and passed to xsk_tx_metadata_complete upon TX completion.
+ */
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+ struct xsk_tx_metadata_compl *compl)
+{
+ if (!meta)
+ return;
+
+ if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+ compl->tx_timestamp = &meta->completion.tx_timestamp;
+ else
+ compl->tx_timestamp = NULL;
+}
+
+/**
+ * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
+ * and call appropriate xsk_tx_metadata_ops operation.
+ * @meta: pointer to AF_XDP metadata area
+ * @ops: pointer to struct xsk_tx_metadata_ops
+ * @priv: pointer to driver-private aread
+ *
+ * This function should be called by the networking device when
+ * it prepares AF_XDP egress packet.
+ */
+static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+ if (!meta)
+ return;
+
+ if (ops->tmo_request_timestamp)
+ if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+ ops->tmo_request_timestamp(priv);
+
+ if (ops->tmo_request_checksum)
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM)
+ ops->tmo_request_checksum(meta->request.csum_start,
+ meta->request.csum_offset, priv);
+}
+
+/**
+ * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion
+ * and call appropriate xsk_tx_metadata_ops operation.
+ * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl
+ * @ops: pointer to struct xsk_tx_metadata_ops
+ * @priv: pointer to driver-private aread
+ *
+ * This function should be called by the networking device upon
+ * AF_XDP egress completion.
+ */
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+ if (!compl)
+ return;
+
+ *compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
+}
+
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -114,6 +208,23 @@ static inline void __xsk_map_flush(void)
{
}
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+ struct xsk_tx_metadata_compl *compl)
+{
+}
+
+static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+}
+
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+}
+
#endif /* CONFIG_XDP_SOCKETS */
#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 1f6fc8c7a84c..c9aec9ab6191 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -12,6 +12,12 @@
#define XDP_UMEM_MIN_CHUNK_SHIFT 11
#define XDP_UMEM_MIN_CHUNK_SIZE (1 << XDP_UMEM_MIN_CHUNK_SHIFT)
+struct xsk_cb_desc {
+ void *src;
+ u8 off;
+ u8 bytes;
+};
+
#ifdef CONFIG_XDP_SOCKETS
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
@@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
xp_set_rxq_info(pool, rxq);
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+ xp_fill_cb(pool, desc);
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -147,11 +159,29 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
return ret;
}
+static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+{
+ struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
+
+ list_del(&xskb->xskb_list_node);
+}
+
+static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
+{
+ struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
+ struct xdp_buff_xsk *frag;
+
+ frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
+ xskb_list_node);
+ return &frag->xdp;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
xdp->data_meta = xdp->data;
xdp->data_end = xdp->data + size;
+ xdp->flags = 0;
}
static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool,
@@ -165,6 +195,30 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
+#define XDP_TXMD_FLAGS_VALID ( \
+ XDP_TXMD_FLAGS_TIMESTAMP | \
+ XDP_TXMD_FLAGS_CHECKSUM | \
+ 0)
+
+static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+{
+ return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
+}
+
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xsk_tx_metadata *meta;
+
+ if (!pool->tx_metadata_len)
+ return NULL;
+
+ meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
+ return NULL; /* no way to signal the error to the user */
+
+ return meta;
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -250,6 +304,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
{
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
return 0;
@@ -309,6 +368,15 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
return NULL;
}
+static inline void xsk_buff_del_tail(struct xdp_buff *tail)
+{
+}
+
+static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
+{
+ return NULL;
+}
+
static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
{
}
@@ -324,6 +392,16 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
+static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+{
+ return false;
+}
+
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return NULL;
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c9bb0f892f55..57c743b7e4fe 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -51,8 +51,10 @@
#ifdef CONFIG_XFRM_STATISTICS
#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field)
+#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val)
#else
#define XFRM_INC_STATS(net, field) ((void)(net))
+#define XFRM_ADD_STATS(net, field, val) ((void)(net))
#endif
@@ -1577,22 +1579,20 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
unsigned short family);
int xfrm_state_check_expire(struct xfrm_state *x);
+void xfrm_state_update_stats(struct net *net);
#ifdef CONFIG_XFRM_OFFLOAD
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x)
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x)
{
struct xfrm_dev_offload *xdo = &x->xso;
struct net_device *dev = xdo->dev;
- if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
- return;
-
if (dev && dev->xfrmdev_ops &&
- dev->xfrmdev_ops->xdo_dev_state_update_curlft)
- dev->xfrmdev_ops->xdo_dev_state_update_curlft(x);
+ dev->xfrmdev_ops->xdo_dev_state_update_stats)
+ dev->xfrmdev_ops->xdo_dev_state_update_stats(x);
}
#else
-static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {}
+static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {}
#endif
void xfrm_state_insert(struct xfrm_state *x);
int xfrm_state_add(struct xfrm_state *x);
@@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void)
#endif
+#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
+int register_xfrm_state_bpf(void);
+#else
+static inline int register_xfrm_state_bpf(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _NET_XFRM_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..99dd7376df6a 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -12,6 +12,7 @@
struct xsk_buff_pool;
struct xdp_rxq_info;
+struct xsk_cb_desc;
struct xsk_queue;
struct xdp_desc;
struct xdp_umem;
@@ -33,6 +34,7 @@ struct xdp_buff_xsk {
};
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
+#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
struct xsk_dma_map {
dma_addr_t *dma_pages;
@@ -77,10 +79,12 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
bool unaligned;
+ bool tx_sw_csum;
void *addrs;
/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
* NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
@@ -132,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
/* AF_XDP ZC drivers, via xdp_sock_buff.h */
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc);
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages);
void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
@@ -233,4 +238,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
+static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
+{
+ return pool->tx_metadata_len > 0;
+}
+
#endif /* XSK_BUFF_POOL_H_ */