summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h2
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/cfg80211.h102
-rw-r--r--include/net/dropreason-core.h24
-rw-r--r--include/net/genetlink.h46
-rw-r--r--include/net/inet_sock.h5
-rw-r--r--include/net/ip.h10
-rw-r--r--include/net/ip_tunnels.h11
-rw-r--r--include/net/mac80211.h23
-rw-r--r--include/net/mana/gdma.h7
-rw-r--r--include/net/mana/mana.h46
-rw-r--r--include/net/netdev_rx_queue.h4
-rw-r--r--include/net/netlink.h47
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/netns/ipv4.h50
-rw-r--r--include/net/netns/smc.h2
-rw-r--r--include/net/page_pool/helpers.h85
-rw-r--r--include/net/page_pool/types.h49
-rw-r--r--include/net/pkt_cls.h6
-rw-r--r--include/net/pkt_sched.h18
-rw-r--r--include/net/sch_generic.h32
-rw-r--r--include/net/sock.h1
-rw-r--r--include/net/tcp.h22
-rw-r--r--include/net/tcp_ao.h6
-rw-r--r--include/net/tcp_states.h2
-rw-r--r--include/net/vxlan.h33
-rw-r--r--include/net/xdp.h20
-rw-r--r--include/net/xdp_sock.h111
-rw-r--r--include/net/xdp_sock_drv.h51
-rw-r--r--include/net/xfrm.h9
-rw-r--r--include/net/xsk_buff_pool.h10
31 files changed, 651 insertions, 186 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 4ae0580b63ca..ea13e1e4a7c2 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -191,7 +191,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int bind,
u32 flags);
-void tcf_idr_insert_many(struct tc_action *actions[]);
+void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index e302c0e804d0..535701efc1e5 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -137,7 +137,6 @@ struct vsock_transport {
u64 (*stream_rcvhiwat)(struct vsock_sock *);
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
- int (*set_rcvlowat)(struct vsock_sock *vsk, int val);
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
@@ -168,6 +167,7 @@ struct vsock_transport {
struct vsock_transport_send_notify_data *);
/* sk_lock held by the caller */
void (*notify_buffer_size)(struct vsock_sock *, u64 *);
+ int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
/* Shutdown. */
int (*shutdown)(struct vsock_sock *, int);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4ecfb06c413d..ac1fb326dcda 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -52,7 +52,7 @@
* such wiphy can have zero, one, or many virtual interfaces associated with
* it, which need to be identified as such by pointing the network interface's
* @ieee80211_ptr pointer to a &struct wireless_dev which further describes
- * the wireless part of the interface, normally this struct is embedded in the
+ * the wireless part of the interface. Normally this struct is embedded in the
* network interface's private data area. Drivers can optionally allow creating
* or destroying virtual interfaces on the fly, but without at least one or the
* ability to create some the wireless device isn't useful.
@@ -977,6 +977,15 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1,
const struct cfg80211_chan_def *chandef2);
/**
+ * nl80211_chan_width_to_mhz - get the channel width in MHz
+ * @chan_width: the channel width from &enum nl80211_chan_width
+ *
+ * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width
+ * is valid. -1 otherwise.
+ */
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width);
+
+/**
* cfg80211_chandef_valid - check if a channel definition is valid
* @chandef: the channel definition to check
* Return: %true if the channel definition is valid. %false otherwise.
@@ -1665,6 +1674,21 @@ struct link_station_del_parameters {
};
/**
+ * struct cfg80211_ttlm_params: TID to link mapping parameters
+ *
+ * Used for setting a TID to link mapping.
+ *
+ * @dlink: Downlink TID to link mapping, as defined in section 9.4.2.314
+ * (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
+ * @ulink: Uplink TID to link mapping, as defined in section 9.4.2.314
+ * (TID-To-Link Mapping element) in Draft P802.11be_D4.0.
+ */
+struct cfg80211_ttlm_params {
+ u16 dlink[8];
+ u16 ulink[8];
+};
+
+/**
* struct station_parameters - station parameters
*
* Used to change and create a new station.
@@ -2560,7 +2584,7 @@ struct cfg80211_scan_info {
* @short_ssid: short ssid to scan for
* @bssid: bssid to scan for
* @channel_idx: idx of the channel in the channel array in the scan request
- * which the above info relvant to
+ * which the above info is relevant to
* @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU
* @short_ssid_valid: @short_ssid is valid and can be used
* @psc_no_listen: when set, and the channel is a PSC channel, no need to wait
@@ -2608,6 +2632,8 @@ struct cfg80211_scan_6ghz_params {
* @n_6ghz_params: number of 6 GHz params
* @scan_6ghz_params: 6 GHz params
* @bssid: BSSID to scan for (most commonly, the wildcard BSSID)
+ * @tsf_report_link_id: for MLO, indicates the link ID of the BSS that should be
+ * used for TSF reporting. Can be set to -1 to indicate no preference.
*/
struct cfg80211_scan_request {
struct cfg80211_ssid *ssids;
@@ -2636,6 +2662,7 @@ struct cfg80211_scan_request {
bool scan_6ghz;
u32 n_6ghz_params;
struct cfg80211_scan_6ghz_params *scan_6ghz_params;
+ s8 tsf_report_link_id;
/* keep last */
struct ieee80211_channel *channels[] __counted_by(n_channels);
@@ -2816,6 +2843,13 @@ enum cfg80211_signal_type {
* the BSS that requested the scan in which the beacon/probe was received.
* @chains: bitmask for filled values in @chain_signal.
* @chain_signal: per-chain signal strength of last received BSS in dBm.
+ * @restrict_use: restrict usage, if not set, assume @use_for is
+ * %NL80211_BSS_USE_FOR_NORMAL.
+ * @use_for: bitmap of possible usage for this BSS, see
+ * &enum nl80211_bss_use_for
+ * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
+ * if @restrict_use is set and @use_for is zero (empty); may be 0 for
+ * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
* @drv_data: Data to be passed through to @inform_bss
*/
struct cfg80211_inform_bss {
@@ -2827,6 +2861,9 @@ struct cfg80211_inform_bss {
u8 chains;
s8 chain_signal[IEEE80211_MAX_CHAINS];
+ u8 restrict_use:1, use_for:7;
+ u8 cannot_use_reasons;
+
void *drv_data;
};
@@ -2878,6 +2915,11 @@ struct cfg80211_bss_ies {
* @chain_signal: per-chain signal strength of last received BSS in dBm.
* @bssid_index: index in the multiple BSS set
* @max_bssid_indicator: max number of members in the BSS set
+ * @use_for: bitmap of possible usage for this BSS, see
+ * &enum nl80211_bss_use_for
+ * @cannot_use_reasons: the reasons (bitmap) for not being able to connect,
+ * if @restrict_use is set and @use_for is zero (empty); may be 0 for
+ * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons
* @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
*/
struct cfg80211_bss {
@@ -2903,6 +2945,9 @@ struct cfg80211_bss {
u8 bssid_index;
u8 max_bssid_indicator;
+ u8 use_for;
+ u8 cannot_use_reasons;
+
u8 priv[] __aligned(sizeof(void *));
};
@@ -4493,6 +4538,7 @@ struct mgmt_frame_regs {
* @del_link_station: Remove a link of a station.
*
* @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames.
+ * @set_ttlm: set the TID to link mapping.
*/
struct cfg80211_ops {
int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -4852,6 +4898,8 @@ struct cfg80211_ops {
struct link_station_del_parameters *params);
int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_set_hw_timestamp *hwts);
+ int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_ttlm_params *params);
};
/*
@@ -4910,6 +4958,8 @@ struct cfg80211_ops {
* NL80211_REGDOM_SET_BY_DRIVER.
* @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver
* set this flag to update channels on beacon hints.
+ * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link
+ * of an NSTR mobile AP MLD.
*/
enum wiphy_flags {
WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0),
@@ -4923,7 +4973,7 @@ enum wiphy_flags {
WIPHY_FLAG_IBSS_RSN = BIT(8),
WIPHY_FLAG_MESH_AUTH = BIT(10),
WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11),
- /* use hole at 12 */
+ WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12),
WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13),
WIPHY_FLAG_AP_UAPSD = BIT(14),
WIPHY_FLAG_SUPPORTS_TDLS = BIT(15),
@@ -7162,6 +7212,25 @@ cfg80211_inform_bss(struct wiphy *wiphy,
}
/**
+ * __cfg80211_get_bss - get a BSS reference
+ * @wiphy: the wiphy this BSS struct belongs to
+ * @channel: the channel to search on (or %NULL)
+ * @bssid: the desired BSSID (or %NULL)
+ * @ssid: the desired SSID (or %NULL)
+ * @ssid_len: length of the SSID (or 0)
+ * @bss_type: type of BSS, see &enum ieee80211_bss_type
+ * @privacy: privacy filter, see &enum ieee80211_privacy
+ * @use_for: indicates which use is intended
+ */
+struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ const u8 *bssid,
+ const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy,
+ u32 use_for);
+
+/**
* cfg80211_get_bss - get a BSS reference
* @wiphy: the wiphy this BSS struct belongs to
* @channel: the channel to search on (or %NULL)
@@ -7170,13 +7239,20 @@ cfg80211_inform_bss(struct wiphy *wiphy,
* @ssid_len: length of the SSID (or 0)
* @bss_type: type of BSS, see &enum ieee80211_bss_type
* @privacy: privacy filter, see &enum ieee80211_privacy
+ *
+ * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL.
*/
-struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- const u8 *bssid,
- const u8 *ssid, size_t ssid_len,
- enum ieee80211_bss_type bss_type,
- enum ieee80211_privacy privacy);
+static inline struct cfg80211_bss *
+cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel,
+ const u8 *bssid, const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy)
+{
+ return __cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len,
+ bss_type, privacy,
+ NL80211_BSS_USE_FOR_NORMAL);
+}
+
static inline struct cfg80211_bss *
cfg80211_get_ibss(struct wiphy *wiphy,
struct ieee80211_channel *channel,
@@ -7309,7 +7385,7 @@ struct cfg80211_rx_assoc_resp_data {
* This function may sleep. The caller must hold the corresponding wdev's mutex.
*/
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_rx_assoc_resp_data *data);
+ const struct cfg80211_rx_assoc_resp_data *data);
/**
* struct cfg80211_assoc_failure - association failure data
@@ -7428,7 +7504,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev,
* RFkill integration in cfg80211 is almost invisible to drivers,
* as cfg80211 automatically registers an rfkill instance for each
* wireless device it knows about. Soft kill is also translated
- * into disconnecting and turning all interfaces off, drivers are
+ * into disconnecting and turning all interfaces off. Drivers are
* expected to turn off the device when all interfaces are down.
*
* However, devices may have a hard RFkill line, in which case they
@@ -7476,7 +7552,7 @@ static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
* the configuration mechanism.
*
* A driver supporting vendor commands must register them as an array
- * in struct wiphy, with handlers for each one, each command has an
+ * in struct wiphy, with handlers for each one. Each command has an
* OUI and sub command ID to identify it.
*
* Note that this feature should not be (ab)used to implement protocol
@@ -7640,7 +7716,7 @@ static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp)
* interact with driver-specific tools to aid, for instance,
* factory programming.
*
- * This chapter describes how drivers interact with it, for more
+ * This chapter describes how drivers interact with it. For more
* information see the nl80211 book's chapter on it.
*/
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 3c70ad53a49c..6d3a20163260 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -85,7 +85,10 @@
FN(IPV6_NDISC_BAD_OPTIONS) \
FN(IPV6_NDISC_NS_OTHERHOST) \
FN(QUEUE_PURGE) \
- FN(TC_ERROR) \
+ FN(TC_COOKIE_ERROR) \
+ FN(PACKET_SOCK_ERROR) \
+ FN(TC_CHAIN_NOTFOUND) \
+ FN(TC_RECLASSIFY_LOOP) \
FNe(MAX)
/**
@@ -376,8 +379,23 @@ enum skb_drop_reason {
SKB_DROP_REASON_IPV6_NDISC_NS_OTHERHOST,
/** @SKB_DROP_REASON_QUEUE_PURGE: bulk free. */
SKB_DROP_REASON_QUEUE_PURGE,
- /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */
- SKB_DROP_REASON_TC_ERROR,
+ /**
+ * @SKB_DROP_REASON_TC_COOKIE_ERROR: An error occurred whilst
+ * processing a tc ext cookie.
+ */
+ SKB_DROP_REASON_TC_COOKIE_ERROR,
+ /**
+ * @SKB_DROP_REASON_PACKET_SOCK_ERROR: generic packet socket errors
+ * after its filter matches an incoming packet.
+ */
+ SKB_DROP_REASON_PACKET_SOCK_ERROR,
+ /** @SKB_DROP_REASON_TC_CHAIN_NOTFOUND: tc chain lookup failed. */
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND,
+ /**
+ * @SKB_DROP_REASON_TC_RECLASSIFY_LOOP: tc exceeded max reclassify loop
+ * iterations.
+ */
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index c53244f20437..85c63d4f16dd 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -51,6 +51,9 @@ struct genl_info;
* @split_ops: the split do/dump form of operation definition
* @n_split_ops: number of entries in @split_ops, not that with split do/dump
* ops the number of entries is not the same as number of commands
+ * @sock_priv_size: the size of per-socket private memory
+ * @sock_priv_init: the per-socket private memory initializer
+ * @sock_priv_destroy: the per-socket private memory destructor
*
* Attribute policies (the combination of @policy and @maxattr fields)
* can be attached at the family level or at the operation level.
@@ -84,11 +87,17 @@ struct genl_family {
const struct genl_multicast_group *mcgrps;
struct module *module;
+ size_t sock_priv_size;
+ void (*sock_priv_init)(void *priv);
+ void (*sock_priv_destroy)(void *priv);
+
/* private: internal use only */
/* protocol family identifier */
int id;
/* starting number of multicast group IDs in this family */
unsigned int mcgrp_offset;
+ /* list of per-socket privs */
+ struct xarray *sock_privs;
};
/**
@@ -298,6 +307,8 @@ static inline bool genl_info_is_ntf(const struct genl_info *info)
return !info->nlhdr;
}
+void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk);
+void *genl_sk_priv_get(struct genl_family *family, struct sock *sk);
int genl_register_family(struct genl_family *family);
int genl_unregister_family(const struct genl_family *family);
void genl_notify(const struct genl_family *family, struct sk_buff *skb,
@@ -438,6 +449,35 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
}
/**
+ * genlmsg_multicast_netns_filtered - multicast a netlink message
+ * to a specific netns with filter
+ * function
+ * @family: the generic netlink family
+ * @net: the net namespace
+ * @skb: netlink message as socket buffer
+ * @portid: own netlink portid to avoid sending to yourself
+ * @group: offset of multicast group in groups array
+ * @flags: allocation flags
+ * @filter: filter function
+ * @filter_data: filter function private data
+ *
+ * Return: 0 on success, negative error code for failure.
+ */
+static inline int
+genlmsg_multicast_netns_filtered(const struct genl_family *family,
+ struct net *net, struct sk_buff *skb,
+ u32 portid, unsigned int group, gfp_t flags,
+ netlink_filter_fn filter,
+ void *filter_data)
+{
+ if (WARN_ON_ONCE(group >= family->n_mcgrps))
+ return -EINVAL;
+ group = family->mcgrp_offset + group;
+ return nlmsg_multicast_filtered(net->genl_sock, skb, portid, group,
+ flags, filter, filter_data);
+}
+
+/**
* genlmsg_multicast_netns - multicast a netlink message to a specific netns
* @family: the generic netlink family
* @net: the net namespace
@@ -450,10 +490,8 @@ static inline int genlmsg_multicast_netns(const struct genl_family *family,
struct net *net, struct sk_buff *skb,
u32 portid, unsigned int group, gfp_t flags)
{
- if (WARN_ON_ONCE(group >= family->n_mcgrps))
- return -EINVAL;
- group = family->mcgrp_offset + group;
- return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
+ return genlmsg_multicast_netns_filtered(family, net, skb, portid,
+ group, flags, NULL, NULL);
}
/**
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 74db6d97cae1..aa86453f6b9b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -234,10 +234,7 @@ struct inet_sock {
int uc_index;
int mc_index;
__be32 mc_addr;
- struct {
- __u16 lo;
- __u16 hi;
- } local_port_range;
+ u32 local_port_range; /* high << 16 | low */
struct ip_mc_socklist __rcu *mc_list;
struct inet_cork_full cork;
diff --git a/include/net/ip.h b/include/net/ip.h
index 1fc4c8d69e33..de0c69c57e3c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -349,8 +349,14 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
} \
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high);
-void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
+static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
+{
+ u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);
+
+ *low = range & 0xffff;
+ *high = range >> 16;
+}
+bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
#ifdef CONFIG_SYSCTL
static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f346b4efbc30..2d746f4c9a0a 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -416,6 +416,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
return 0;
}
+static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
+ const struct sk_buff *skb)
+{
+ __be16 payload_protocol = skb_protocol(skb, true);
+
+ if (payload_protocol == htons(ETH_P_IPV6))
+ return ip6_flowlabel((const struct ipv6hdr *)iph);
+ else
+ return 0;
+}
+
static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
const struct sk_buff *skb)
{
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 580781ff9dcf..77a71b1396b1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2686,6 +2686,9 @@ struct ieee80211_txq {
* @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting
* multicast frames on all links, mac80211 should not do that.
*
+ * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT
+ * and connecting with a lower bandwidth instead
+ *
* @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
*/
enum ieee80211_hw_flags {
@@ -2743,6 +2746,7 @@ enum ieee80211_hw_flags {
IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP,
IEEE80211_HW_DETECTS_COLOR_COLLISION,
IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX,
+ IEEE80211_HW_DISALLOW_PUNCTURING,
/* keep last, obviously */
NUM_IEEE80211_HW_FLAGS
@@ -5809,12 +5813,11 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
* ieee80211_remove_key - remove the given key
* @keyconf: the parameter passed with the set key
*
+ * Context: Must be called with the wiphy mutex held.
+ *
* Remove the given key. If the key was uploaded to the hardware at the
* time this function is called, it is not deleted in the hardware but
* instead assumed to have been removed already.
- *
- * Note that due to locking considerations this function can (currently)
- * only be called during key iteration (ieee80211_iter_keys().)
*/
void ieee80211_remove_key(struct ieee80211_key_conf *keyconf);
@@ -6368,12 +6371,12 @@ ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
* @iter: iterator function that will be called for each key
* @iter_data: custom data to pass to the iterator function
*
+ * Context: Must be called with wiphy mutex held; can sleep.
+ *
* This function can be used to iterate all the keys known to
* mac80211, even those that weren't previously programmed into
* the device. This is intended for use in WoWLAN if the device
- * needs reprogramming of the keys during suspend. Note that due
- * to locking reasons, it is also only safe to call this at few
- * spots since it must hold the RTNL and be able to sleep.
+ * needs reprogramming of the keys during suspend.
*
* The order in which the keys are iterated matches the order
* in which they were originally installed and handed to the
@@ -7435,6 +7438,9 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* @vif: interface to set active links on
* @active_links: the new active links bitmap
*
+ * Context: Must be called with wiphy mutex held; may sleep; calls
+ * back into the driver.
+ *
* This changes the active links on an interface. The interface
* must be in client mode (in AP mode, all links are always active),
* and @active_links must be a subset of the vif's valid_links.
@@ -7442,6 +7448,7 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* If a link is switched off and another is switched on at the same
* time (e.g. active_links going from 0x1 to 0x10) then you will get
* a sequence of calls like
+ *
* - change_vif_links(0x11)
* - unassign_vif_chanctx(link_id=0)
* - change_sta_links(0x11) for each affected STA (the AP)
@@ -7451,10 +7458,6 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb)
* - change_sta_links(0x10) for each affected STA (the AP)
* - assign_vif_chanctx(link_id=4)
* - change_vif_links(0x10)
- *
- * Note: This function acquires some mac80211 locks and must not
- * be called with any driver locks held that could cause a
- * lock dependency inversion. Best call it without locks.
*/
int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links);
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 88b6ef7ce1a6..76f2fd2645b7 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -293,6 +293,7 @@ struct gdma_queue {
u32 head;
u32 tail;
+ struct list_head entry;
/* Extra fields specific to EQ/CQ. */
union {
@@ -328,6 +329,7 @@ struct gdma_queue_spec {
void *context;
unsigned long log2_throttle_limit;
+ unsigned int msix_index;
} eq;
struct {
@@ -344,7 +346,9 @@ struct gdma_queue_spec {
struct gdma_irq_context {
void (*handler)(void *arg);
- void *arg;
+ /* Protect the eq_list */
+ spinlock_t lock;
+ struct list_head eq_list;
char name[MANA_IRQ_NAME_SZ];
};
@@ -355,7 +359,6 @@ struct gdma_context {
unsigned int max_num_queues;
unsigned int max_num_msix;
unsigned int num_msix_usable;
- struct gdma_resource msix_resource;
struct gdma_irq_context *irq_contexts;
/* L2 MTU */
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 6e3e9c1363db..76147feb0d10 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -353,6 +353,25 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
+ u64 hc_rx_discards_no_wqe;
+ u64 hc_rx_err_vport_disabled;
+ u64 hc_rx_bytes;
+ u64 hc_rx_ucast_pkts;
+ u64 hc_rx_ucast_bytes;
+ u64 hc_rx_bcast_pkts;
+ u64 hc_rx_bcast_bytes;
+ u64 hc_rx_mcast_pkts;
+ u64 hc_rx_mcast_bytes;
+ u64 hc_tx_err_gf_disabled;
+ u64 hc_tx_err_vport_disabled;
+ u64 hc_tx_err_inval_vportoffset_pkt;
+ u64 hc_tx_err_vlan_enforcement;
+ u64 hc_tx_err_eth_type_enforcement;
+ u64 hc_tx_err_sa_enforcement;
+ u64 hc_tx_err_sqpdid_enforcement;
+ u64 hc_tx_err_cqpdid_enforcement;
+ u64 hc_tx_err_mtu_violation;
+ u64 hc_tx_err_inval_oob;
u64 hc_tx_bytes;
u64 hc_tx_ucast_pkts;
u64 hc_tx_ucast_bytes;
@@ -360,6 +379,7 @@ struct mana_ethtool_stats {
u64 hc_tx_bcast_bytes;
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
+ u64 hc_tx_err_gdma;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
u64 rx_coalesced_err;
@@ -602,8 +622,8 @@ struct mana_query_gf_stat_resp {
struct gdma_resp_hdr hdr;
u64 reported_stats;
/* rx errors/discards */
- u64 discard_rx_nowqe;
- u64 err_rx_vport_disabled;
+ u64 rx_discards_nowqe;
+ u64 rx_err_vport_disabled;
/* rx bytes/packets */
u64 hc_rx_bytes;
u64 hc_rx_ucast_pkts;
@@ -613,16 +633,16 @@ struct mana_query_gf_stat_resp {
u64 hc_rx_mcast_pkts;
u64 hc_rx_mcast_bytes;
/* tx errors */
- u64 err_tx_gf_disabled;
- u64 err_tx_vport_disabled;
- u64 err_tx_inval_vport_offset_pkt;
- u64 err_tx_vlan_enforcement;
- u64 err_tx_ethtype_enforcement;
- u64 err_tx_SA_enforecement;
- u64 err_tx_SQPDID_enforcement;
- u64 err_tx_CQPDID_enforcement;
- u64 err_tx_mtu_violation;
- u64 err_tx_inval_oob;
+ u64 tx_err_gf_disabled;
+ u64 tx_err_vport_disabled;
+ u64 tx_err_inval_vport_offset_pkt;
+ u64 tx_err_vlan_enforcement;
+ u64 tx_err_ethtype_enforcement;
+ u64 tx_err_SA_enforcement;
+ u64 tx_err_SQPDID_enforcement;
+ u64 tx_err_CQPDID_enforcement;
+ u64 tx_err_mtu_violation;
+ u64 tx_err_inval_oob;
/* tx bytes/packets */
u64 hc_tx_bytes;
u64 hc_tx_ucast_pkts;
@@ -632,7 +652,7 @@ struct mana_query_gf_stat_resp {
u64 hc_tx_mcast_pkts;
u64 hc_tx_mcast_bytes;
/* tx error */
- u64 err_tx_gdma;
+ u64 tx_err_gdma;
}; /* HW DATA */
/* Configure vPort Rx Steering */
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index cdcafb30d437..aa1716fb0e53 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -21,6 +21,10 @@ struct netdev_rx_queue {
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
+ /* NAPI instance for the queue
+ * Readers and writers must hold RTNL
+ */
+ struct napi_struct *napi;
} ____cacheline_aligned_in_smp;
/*
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 83bdf787aeee..c19ff921b661 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1011,6 +1011,20 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
}
/**
+ * nlmsg_new_large - Allocate a new netlink message with non-contiguous
+ * physical memory
+ * @payload: size of the message payload
+ *
+ * The allocated skb is unable to have frag page for shinfo->frags*,
+ * as the NULL setting for skb->head in netlink_skb_destructor() will
+ * bypass most of the handling in skb_release_data()
+ */
+static inline struct sk_buff *nlmsg_new_large(size_t payload)
+{
+ return netlink_alloc_large_skb(nlmsg_total_size(payload), 0);
+}
+
+/**
* nlmsg_end - Finalize a netlink message
* @skb: socket buffer the message is stored in
* @nlh: netlink message header
@@ -1073,21 +1087,29 @@ static inline void nlmsg_free(struct sk_buff *skb)
}
/**
- * nlmsg_multicast - multicast a netlink message
+ * nlmsg_multicast_filtered - multicast a netlink message with filter function
* @sk: netlink socket to spread messages to
* @skb: netlink message as socket buffer
* @portid: own netlink portid to avoid sending to yourself
* @group: multicast group id
* @flags: allocation flags
+ * @filter: filter function
+ * @filter_data: filter function private data
+ *
+ * Return: 0 on success, negative error code for failure.
*/
-static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
- u32 portid, unsigned int group, gfp_t flags)
+static inline int nlmsg_multicast_filtered(struct sock *sk, struct sk_buff *skb,
+ u32 portid, unsigned int group,
+ gfp_t flags,
+ netlink_filter_fn filter,
+ void *filter_data)
{
int err;
NETLINK_CB(skb).dst_group = group;
- err = netlink_broadcast(sk, skb, portid, group, flags);
+ err = netlink_broadcast_filtered(sk, skb, portid, group, flags,
+ filter, filter_data);
if (err > 0)
err = 0;
@@ -1095,6 +1117,21 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
}
/**
+ * nlmsg_multicast - multicast a netlink message
+ * @sk: netlink socket to spread messages to
+ * @skb: netlink message as socket buffer
+ * @portid: own netlink portid to avoid sending to yourself
+ * @group: multicast group id
+ * @flags: allocation flags
+ */
+static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
+ u32 portid, unsigned int group, gfp_t flags)
+{
+ return nlmsg_multicast_filtered(sk, skb, portid, group, flags,
+ NULL, NULL);
+}
+
+/**
* nlmsg_unicast - unicast a netlink message
* @sk: netlink socket to spread message to
* @skb: netlink message as socket buffer
@@ -1200,7 +1237,7 @@ static inline void *nla_data(const struct nlattr *nla)
* nla_len - length of payload
* @nla: netlink attribute
*/
-static inline int nla_len(const struct nlattr *nla)
+static inline u16 nla_len(const struct nlattr *nla)
{
return nla->nla_len - NLA_HDRLEN;
}
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index a91ef9f8de60..78214f1b43a2 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -13,6 +13,7 @@ struct netns_core {
struct ctl_table_header *sysctl_hdr;
int sysctl_somaxconn;
+ int sysctl_optmem_max;
u8 sysctl_txrehash;
#ifdef CONFIG_PROC_FS
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 73f43f699199..c356c458b340 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -19,8 +19,7 @@ struct hlist_head;
struct fib_table;
struct sock;
struct local_ports {
- seqlock_t lock;
- int range[2];
+ u32 range; /* high << 16 | low */
bool warned;
};
@@ -42,6 +41,38 @@ struct inet_timewait_death_row {
struct tcp_fastopen_context;
struct netns_ipv4 {
+ /* Cacheline organization can be found documented in
+ * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
+ * Please update the document when adding new fields.
+ */
+
+ /* TX readonly hotpath cache lines */
+ __cacheline_group_begin(netns_ipv4_read_tx);
+ u8 sysctl_tcp_early_retrans;
+ u8 sysctl_tcp_tso_win_divisor;
+ u8 sysctl_tcp_tso_rtt_log;
+ u8 sysctl_tcp_autocorking;
+ int sysctl_tcp_min_snd_mss;
+ unsigned int sysctl_tcp_notsent_lowat;
+ int sysctl_tcp_limit_output_bytes;
+ int sysctl_tcp_min_rtt_wlen;
+ int sysctl_tcp_wmem[3];
+ u8 sysctl_ip_fwd_use_pmtu;
+ __cacheline_group_end(netns_ipv4_read_tx);
+
+ /* TXRX readonly hotpath cache lines */
+ __cacheline_group_begin(netns_ipv4_read_txrx);
+ u8 sysctl_tcp_moderate_rcvbuf;
+ __cacheline_group_end(netns_ipv4_read_txrx);
+
+ /* RX readonly hotpath cache line */
+ __cacheline_group_begin(netns_ipv4_read_rx);
+ u8 sysctl_ip_early_demux;
+ u8 sysctl_tcp_early_demux;
+ int sysctl_tcp_reordering;
+ int sysctl_tcp_rmem[3];
+ __cacheline_group_end(netns_ipv4_read_rx);
+
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;
@@ -96,17 +127,14 @@ struct netns_ipv4 {
u8 sysctl_ip_default_ttl;
u8 sysctl_ip_no_pmtu_disc;
- u8 sysctl_ip_fwd_use_pmtu;
u8 sysctl_ip_fwd_update_priority;
u8 sysctl_ip_nonlocal_bind;
u8 sysctl_ip_autobind_reuse;
/* Shall we try to damage output packets if routing dev changes? */
u8 sysctl_ip_dynaddr;
- u8 sysctl_ip_early_demux;
#ifdef CONFIG_NET_L3_MASTER_DEV
u8 sysctl_raw_l3mdev_accept;
#endif
- u8 sysctl_tcp_early_demux;
u8 sysctl_udp_early_demux;
u8 sysctl_nexthop_compat_mode;
@@ -119,7 +147,6 @@ struct netns_ipv4 {
u8 sysctl_tcp_mtu_probing;
int sysctl_tcp_mtu_probe_floor;
int sysctl_tcp_base_mss;
- int sysctl_tcp_min_snd_mss;
int sysctl_tcp_probe_threshold;
u32 sysctl_tcp_probe_interval;
@@ -135,17 +162,14 @@ struct netns_ipv4 {
u8 sysctl_tcp_backlog_ack_defer;
u8 sysctl_tcp_pingpong_thresh;
- int sysctl_tcp_reordering;
u8 sysctl_tcp_retries1;
u8 sysctl_tcp_retries2;
u8 sysctl_tcp_orphan_retries;
u8 sysctl_tcp_tw_reuse;
int sysctl_tcp_fin_timeout;
- unsigned int sysctl_tcp_notsent_lowat;
u8 sysctl_tcp_sack;
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
- u8 sysctl_tcp_early_retrans;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
@@ -161,21 +185,13 @@ struct netns_ipv4 {
u8 sysctl_tcp_frto;
u8 sysctl_tcp_nometrics_save;
u8 sysctl_tcp_no_ssthresh_metrics_save;
- u8 sysctl_tcp_moderate_rcvbuf;
- u8 sysctl_tcp_tso_win_divisor;
u8 sysctl_tcp_workaround_signed_windows;
- int sysctl_tcp_limit_output_bytes;
int sysctl_tcp_challenge_ack_limit;
- int sysctl_tcp_min_rtt_wlen;
u8 sysctl_tcp_min_tso_segs;
- u8 sysctl_tcp_tso_rtt_log;
- u8 sysctl_tcp_autocorking;
u8 sysctl_tcp_reflect_tos;
int sysctl_tcp_invalid_ratelimit;
int sysctl_tcp_pacing_ss_ratio;
int sysctl_tcp_pacing_ca_ratio;
- int sysctl_tcp_wmem[3];
- int sysctl_tcp_rmem[3];
unsigned int sysctl_tcp_child_ehash_entries;
unsigned long sysctl_tcp_comp_sack_delay_ns;
unsigned long sysctl_tcp_comp_sack_slack_ns;
diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
index 582212ada3ba..fc752a50f91b 100644
--- a/include/net/netns/smc.h
+++ b/include/net/netns/smc.h
@@ -22,5 +22,7 @@ struct netns_smc {
int sysctl_smcr_testlink_time;
int sysctl_wmem;
int sysctl_rmem;
+ int sysctl_max_links_per_lgr;
+ int sysctl_max_conns_per_lgr;
};
#endif
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 4ebd544ae977..1d397c1a0043 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -11,7 +11,7 @@
* The page_pool allocator is optimized for recycling page or page fragment used
* by skb packet and xdp frame.
*
- * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(),
+ * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(),
* which allocate memory with or without page splitting depending on the
* requested memory size.
*
@@ -29,7 +29,7 @@
* page allocated from page pool. Page splitting enables memory saving and thus
* avoids TLB/cache miss for data access, but there also is some cost to
* implement page splitting, mainly some cache line dirtying/bouncing for
- * 'struct page' and atomic operation for page->pp_frag_count.
+ * 'struct page' and atomic operation for page->pp_ref_count.
*
* The API keeps track of in-flight pages, in order to let API users know when
* it is safe to free a page_pool object, the API users must call
@@ -37,15 +37,15 @@
* attach the page_pool object to a page_pool-aware object like skbs marked with
* skb_mark_for_recycle().
*
- * page_pool_put_page() may be called multi times on the same page if a page is
- * split into multi fragments. For the last fragment, it will either recycle the
- * page, or in case of page->_refcount > 1, it will release the DMA mapping and
- * in-flight state accounting.
+ * page_pool_put_page() may be called multiple times on the same page if a page
+ * is split into multiple fragments. For the last fragment, it will either
+ * recycle the page, or in case of page->_refcount > 1, it will release the DMA
+ * mapping and in-flight state accounting.
*
* dma_sync_single_range_for_device() is only called for the last fragment when
* page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
* last freed fragment to do the sync_for_device operation for all fragments in
- * the same page when a page is split, the API user must setup pool->p.max_len
+ * the same page when a page is split. The API user must setup pool->p.max_len
* and pool->p.offset correctly and ensure that page_pool_put_page() is called
* with dma_sync_size being -1 for fragment API.
*/
@@ -55,16 +55,12 @@
#include <net/page_pool/types.h>
#ifdef CONFIG_PAGE_POOL_STATS
+/* Deprecated driver-facing API, use netlink instead */
int page_pool_ethtool_stats_get_count(void);
u8 *page_pool_ethtool_stats_get_strings(u8 *data);
u64 *page_pool_ethtool_stats_get(u64 *data, void *stats);
-/*
- * Drivers that wish to harvest page pool stats and report them to users
- * (perhaps via ethtool, debugfs, or another mechanism) can allocate a
- * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool.
- */
-bool page_pool_get_stats(struct page_pool *pool,
+bool page_pool_get_stats(const struct page_pool *pool,
struct page_pool_stats *stats);
#else
static inline int page_pool_ethtool_stats_get_count(void)
@@ -214,69 +210,82 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
return pool->p.dma_dir;
}
-/* pp_frag_count represents the number of writers who can update the page
- * either by updating skb->data or via DMA mappings for the device.
- * We can't rely on the page refcnt for that as we don't know who might be
- * holding page references and we can't reliably destroy or sync DMA mappings
- * of the fragments.
+/**
+ * page_pool_fragment_page() - split a fresh page into fragments
+ * @page: page to split
+ * @nr: references to set
+ *
+ * pp_ref_count represents the number of outstanding references to the page,
+ * which will be freed using page_pool APIs (rather than page allocator APIs
+ * like put_page()). Such references are usually held by page_pool-aware
+ * objects like skbs marked for page pool recycling.
*
- * When pp_frag_count reaches 0 we can either recycle the page if the page
- * refcnt is 1 or return it back to the memory allocator and destroy any
- * mappings we have.
+ * This helper allows the caller to take (set) multiple references to a
+ * freshly allocated page. The page must be freshly allocated (have a
+ * pp_ref_count of 1). This is commonly done by drivers and
+ * "fragment allocators" to save atomic operations - either when they know
+ * upfront how many references they will need; or to take MAX references and
+ * return the unused ones with a single atomic dec(), instead of performing
+ * multiple atomic inc() operations.
*/
static inline void page_pool_fragment_page(struct page *page, long nr)
{
- atomic_long_set(&page->pp_frag_count, nr);
+ atomic_long_set(&page->pp_ref_count, nr);
}
-static inline long page_pool_defrag_page(struct page *page, long nr)
+static inline long page_pool_unref_page(struct page *page, long nr)
{
long ret;
- /* If nr == pp_frag_count then we have cleared all remaining
+ /* If nr == pp_ref_count then we have cleared all remaining
* references to the page:
* 1. 'n == 1': no need to actually overwrite it.
* 2. 'n != 1': overwrite it with one, which is the rare case
- * for pp_frag_count draining.
+ * for pp_ref_count draining.
*
* The main advantage to doing this is that not only we avoid a atomic
* update, as an atomic_read is generally a much cheaper operation than
* an atomic update, especially when dealing with a page that may be
- * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count
+ * referenced by only 2 or 3 users; but also unify the pp_ref_count
* handling by ensuring all pages have partitioned into only 1 piece
* initially, and only overwrite it when the page is partitioned into
* more than one piece.
*/
- if (atomic_long_read(&page->pp_frag_count) == nr) {
+ if (atomic_long_read(&page->pp_ref_count) == nr) {
/* As we have ensured nr is always one for constant case using
* the BUILD_BUG_ON(), only need to handle the non-constant case
- * here for pp_frag_count draining, which is a rare case.
+ * here for pp_ref_count draining, which is a rare case.
*/
BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
if (!__builtin_constant_p(nr))
- atomic_long_set(&page->pp_frag_count, 1);
+ atomic_long_set(&page->pp_ref_count, 1);
return 0;
}
- ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+ ret = atomic_long_sub_return(nr, &page->pp_ref_count);
WARN_ON(ret < 0);
- /* We are the last user here too, reset pp_frag_count back to 1 to
+ /* We are the last user here too, reset pp_ref_count back to 1 to
* ensure all pages have been partitioned into 1 piece initially,
* this should be the rare case when the last two fragment users call
- * page_pool_defrag_page() currently.
+ * page_pool_unref_page() currently.
*/
if (unlikely(!ret))
- atomic_long_set(&page->pp_frag_count, 1);
+ atomic_long_set(&page->pp_ref_count, 1);
return ret;
}
-static inline bool page_pool_is_last_frag(struct page *page)
+static inline void page_pool_ref_page(struct page *page)
+{
+ atomic_long_inc(&page->pp_ref_count);
+}
+
+static inline bool page_pool_is_last_ref(struct page *page)
{
- /* If page_pool_defrag_page() returns 0, we were the last user */
- return page_pool_defrag_page(page, 1) == 0;
+ /* If page_pool_unref_page() returns 0, we were the last user */
+ return page_pool_unref_page(page, 1) == 0;
}
/**
@@ -301,10 +310,10 @@ static inline void page_pool_put_page(struct page_pool *pool,
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
- if (!page_pool_is_last_frag(page))
+ if (!page_pool_is_last_ref(page))
return;
- page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
+ page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct);
#endif
}
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 6fc5134095ed..76481c465375 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -5,6 +5,7 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
+#include <linux/types.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
* map/unmap
@@ -48,24 +49,30 @@ struct pp_alloc_cache {
* @pool_size: size of the ptr_ring
* @nid: NUMA node id to allocate from pages from
* @dev: device, for DMA pre-mapping purposes
+ * @netdev: netdev this pool will serve (leave as NULL if none or multiple)
* @napi: NAPI which is the sole consumer of pages, otherwise NULL
* @dma_dir: DMA mapping direction
* @max_len: max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
* @offset: DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
*/
struct page_pool_params {
- unsigned int flags;
- unsigned int order;
- unsigned int pool_size;
- int nid;
- struct device *dev;
- struct napi_struct *napi;
- enum dma_data_direction dma_dir;
- unsigned int max_len;
- unsigned int offset;
+ struct_group_tagged(page_pool_params_fast, fast,
+ unsigned int flags;
+ unsigned int order;
+ unsigned int pool_size;
+ int nid;
+ struct device *dev;
+ struct napi_struct *napi;
+ enum dma_data_direction dma_dir;
+ unsigned int max_len;
+ unsigned int offset;
+ );
+ struct_group_tagged(page_pool_params_slow, slow,
+ struct net_device *netdev;
/* private: used by test code only */
- void (*init_callback)(struct page *page, void *arg);
- void *init_arg;
+ void (*init_callback)(struct page *page, void *arg);
+ void *init_arg;
+ );
};
#ifdef CONFIG_PAGE_POOL_STATS
@@ -119,7 +126,9 @@ struct page_pool_stats {
#endif
struct page_pool {
- struct page_pool_params p;
+ struct page_pool_params_fast p;
+
+ bool has_init_callback;
long frag_users;
struct page *frag_page;
@@ -178,6 +187,16 @@ struct page_pool {
refcount_t user_cnt;
u64 destroy_cnt;
+
+ /* Slow/Control-path information follows */
+ struct page_pool_params_slow slow;
+ /* User-facing fields, protected by page_pools_lock */
+ struct {
+ struct hlist_node list;
+ u64 detach_time;
+ u32 napi_id;
+ u32 id;
+ } user;
};
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
@@ -215,9 +234,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
#endif
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size,
- bool allow_direct);
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size,
+ bool allow_direct);
static inline bool is_page_pool_compiled_in(void)
{
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a76c9171db0e..f308e8268651 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -154,12 +154,6 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
return xchg(clp, cl);
}
-static inline void tcf_set_drop_reason(struct tcf_result *res,
- enum skb_drop_reason reason)
-{
- res->drop_reason = reason;
-}
-
static inline void
__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
{
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9fa1d0794dfa..1e200d9a066d 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -275,24 +275,6 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
}
-struct tc_skb_cb {
- struct qdisc_skb_cb qdisc_cb;
-
- u16 mru;
- u8 post_ct:1;
- u8 post_ct_snat:1;
- u8 post_ct_dnat:1;
- u16 zone; /* Only valid if post_ct = true */
-};
-
-static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
-{
- struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
-
- BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
- return cb;
-}
-
static inline bool tc_qdisc_stats_dump(struct Qdisc *sch,
unsigned long cl,
struct qdisc_walker *arg)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index dcb9160e6467..1d70c2c1572f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -332,7 +332,6 @@ struct tcf_result {
};
const struct tcf_proto *goto_tp;
};
- enum skb_drop_reason drop_reason;
};
struct tcf_chain;
@@ -1037,6 +1036,37 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
return skb;
}
+struct tc_skb_cb {
+ struct qdisc_skb_cb qdisc_cb;
+ u32 drop_reason;
+
+ u16 zone; /* Only valid if post_ct = true */
+ u16 mru;
+ u8 post_ct:1;
+ u8 post_ct_snat:1;
+ u8 post_ct_dnat:1;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+ struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+ return cb;
+}
+
+static inline enum skb_drop_reason
+tcf_get_drop_reason(const struct sk_buff *skb)
+{
+ return tc_skb_cb(skb)->drop_reason;
+}
+
+static inline void tcf_set_drop_reason(const struct sk_buff *skb,
+ enum skb_drop_reason reason)
+{
+ tc_skb_cb(skb)->drop_reason = reason;
+}
+
/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
diff --git a/include/net/sock.h b/include/net/sock.h
index 0201136b0b9c..ba6642811db4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2925,7 +2925,6 @@ extern __u32 sysctl_wmem_max;
extern __u32 sysctl_rmem_max;
extern int sysctl_tstamp_allow_data;
-extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 144ba48bb07b..f5ca4abaee8b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -490,13 +490,14 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
/* From syncookies.c */
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst, u32 tsoff);
-int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
- u32 cookie);
+ struct dst_entry *dst);
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
- const struct tcp_request_sock_ops *af_ops,
- struct sock *sk, struct sk_buff *skb);
+ struct sock *sk, struct sk_buff *skb,
+ struct tcp_options_received *tcp_opt,
+ int mss, u32 tsoff);
+
#ifdef CONFIG_SYN_COOKIES
/* Syncookies use a monotonic timer which increments every 60 seconds.
@@ -582,12 +583,15 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
u64 cookie_init_timestamp(struct request_sock *req, u64 now);
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *opt);
-bool cookie_ecn_ok(const struct tcp_options_received *opt,
- const struct net *net, const struct dst_entry *dst);
+
+static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst)
+{
+ return READ_ONCE(net->ipv4.sysctl_tcp_ecn) ||
+ dst_feature(dst, RTAX_FEATURE_ECN);
+}
/* From net/ipv6/syncookies.c */
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- u32 cookie);
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 647781080613..0f2dcc9e8d46 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -271,8 +271,7 @@ void tcp_ao_established(struct sock *sk);
void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb);
void tcp_ao_connect_init(struct sock *sk);
void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
- struct tcp_request_sock *treq,
- unsigned short int family, int l3index);
+ struct request_sock *req, unsigned short int family);
#else /* CONFIG_TCP_AO */
static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
@@ -283,8 +282,7 @@ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
}
static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
- struct tcp_request_sock *treq,
- unsigned short int family, int l3index)
+ struct request_sock *req, unsigned short int family)
{
}
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index cc00118acca1..d60e8148ff4c 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -22,6 +22,7 @@ enum {
TCP_LISTEN,
TCP_CLOSING, /* Now a valid state */
TCP_NEW_SYN_RECV,
+ TCP_BOUND_INACTIVE, /* Pseudo-state for inet_diag */
TCP_MAX_STATES /* Leave at the end! */
};
@@ -43,6 +44,7 @@ enum {
TCPF_LISTEN = (1 << TCP_LISTEN),
TCPF_CLOSING = (1 << TCP_CLOSING),
TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV),
+ TCPF_BOUND_INACTIVE = (1 << TCP_BOUND_INACTIVE),
};
#endif /* _LINUX_TCP_STATES_H */
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6a9f8a5f387c..33ba6fc151cf 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -210,22 +210,23 @@ struct vxlan_rdst {
};
struct vxlan_config {
- union vxlan_addr remote_ip;
- union vxlan_addr saddr;
- __be32 vni;
- int remote_ifindex;
- int mtu;
- __be16 dst_port;
- u16 port_min;
- u16 port_max;
- u8 tos;
- u8 ttl;
- __be32 label;
- u32 flags;
- unsigned long age_interval;
- unsigned int addrmax;
- bool no_share;
- enum ifla_vxlan_df df;
+ union vxlan_addr remote_ip;
+ union vxlan_addr saddr;
+ __be32 vni;
+ int remote_ifindex;
+ int mtu;
+ __be16 dst_port;
+ u16 port_min;
+ u16 port_max;
+ u8 tos;
+ u8 ttl;
+ __be32 label;
+ enum ifla_vxlan_label_policy label_policy;
+ u32 flags;
+ unsigned long age_interval;
+ unsigned int addrmax;
+ bool no_share;
+ enum ifla_vxlan_df df;
};
enum {
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 349c36fb5fd8..e6770dd40c91 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -16,7 +16,7 @@
*
* The XDP RX-queue info (xdp_rxq_info) is associated with the driver
* level RX-ring queues. It is information that is specific to how
- * the driver have configured a given RX-ring queue.
+ * the driver has configured a given RX-ring queue.
*
* Each xdp_buff frame received in the driver carries a (pointer)
* reference to this xdp_rxq_info structure. This provides the XDP
@@ -32,7 +32,7 @@
* The struct is not directly tied to the XDP prog. A new XDP prog
* can be attached as long as it doesn't change the underlying
* RX-ring. If the RX-ring does change significantly, the NIC driver
- * naturally need to stop the RX-ring before purging and reallocating
+ * naturally needs to stop the RX-ring before purging and reallocating
* memory. In that process the driver MUST call unregister (which
* also applies for driver shutdown and unload). The register API is
* also mandatory during RX-ring setup.
@@ -369,7 +369,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
static inline bool xdp_metalen_invalid(unsigned long metalen)
{
- return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+ unsigned long meta_max;
+
+ meta_max = type_max(typeof_member(struct skb_shared_info, meta_len));
+ BUILD_BUG_ON(!__builtin_constant_p(meta_max));
+
+ return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max;
}
struct xdp_attachment_info {
@@ -399,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
NETDEV_XDP_RX_METADATA_HASH, \
bpf_xdp_metadata_rx_hash, \
xmo_rx_hash) \
+ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
+ NETDEV_XDP_RX_METADATA_VLAN_TAG, \
+ bpf_xdp_metadata_rx_vlan_tag, \
+ xmo_rx_vlan_tag) \
enum xdp_rx_metadata {
#define XDP_METADATA_KFUNC(name, _, __, ___) name,
@@ -427,6 +436,7 @@ enum xdp_rss_hash_type {
XDP_RSS_L4_UDP = BIT(5),
XDP_RSS_L4_SCTP = BIT(6),
XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */
+ XDP_RSS_L4_ICMP = BIT(8),
/* Second part: RSS hash type combinations used for driver HW mapping */
XDP_RSS_TYPE_NONE = 0,
@@ -442,11 +452,13 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
@@ -457,6 +469,8 @@ struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
+ int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci);
};
#ifdef CONFIG_NET
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index f83128007fb0..3cb4dc9bd70e 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
+ u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
@@ -92,12 +93,105 @@ struct xdp_sock {
struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
};
+/*
+ * AF_XDP TX metadata hooks for network devices.
+ * The following hooks can be defined; unless noted otherwise, they are
+ * optional and can be filled with a null pointer.
+ *
+ * void (*tmo_request_timestamp)(void *priv)
+ * Called when AF_XDP frame requested egress timestamp.
+ *
+ * u64 (*tmo_fill_timestamp)(void *priv)
+ * Called when AF_XDP frame, that had requested egress timestamp,
+ * received a completion. The hook needs to return the actual HW timestamp.
+ *
+ * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv)
+ * Called when AF_XDP frame requested HW checksum offload. csum_start
+ * indicates position where checksumming should start.
+ * csum_offset indicates position where checksum should be stored.
+ *
+ */
+struct xsk_tx_metadata_ops {
+ void (*tmo_request_timestamp)(void *priv);
+ u64 (*tmo_fill_timestamp)(void *priv);
+ void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv);
+};
+
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
void __xsk_map_flush(void);
+/**
+ * xsk_tx_metadata_to_compl - Save enough relevant metadata information
+ * to perform tx completion in the future.
+ * @meta: pointer to AF_XDP metadata area
+ * @compl: pointer to output struct xsk_tx_metadata_to_compl
+ *
+ * This function should be called by the networking device when
+ * it prepares AF_XDP egress packet. The value of @compl should be stored
+ * and passed to xsk_tx_metadata_complete upon TX completion.
+ */
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+ struct xsk_tx_metadata_compl *compl)
+{
+ if (!meta)
+ return;
+
+ if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+ compl->tx_timestamp = &meta->completion.tx_timestamp;
+ else
+ compl->tx_timestamp = NULL;
+}
+
+/**
+ * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission
+ * and call appropriate xsk_tx_metadata_ops operation.
+ * @meta: pointer to AF_XDP metadata area
+ * @ops: pointer to struct xsk_tx_metadata_ops
+ * @priv: pointer to driver-private aread
+ *
+ * This function should be called by the networking device when
+ * it prepares AF_XDP egress packet.
+ */
+static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+ if (!meta)
+ return;
+
+ if (ops->tmo_request_timestamp)
+ if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP)
+ ops->tmo_request_timestamp(priv);
+
+ if (ops->tmo_request_checksum)
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM)
+ ops->tmo_request_checksum(meta->request.csum_start,
+ meta->request.csum_offset, priv);
+}
+
+/**
+ * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion
+ * and call appropriate xsk_tx_metadata_ops operation.
+ * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl
+ * @ops: pointer to struct xsk_tx_metadata_ops
+ * @priv: pointer to driver-private aread
+ *
+ * This function should be called by the networking device upon
+ * AF_XDP egress completion.
+ */
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+ if (!compl)
+ return;
+
+ *compl->tx_timestamp = ops->tmo_fill_timestamp(priv);
+}
+
#else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -114,6 +208,23 @@ static inline void __xsk_map_flush(void)
{
}
+static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta,
+ struct xsk_tx_metadata_compl *compl)
+{
+}
+
+static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+}
+
+static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl,
+ const struct xsk_tx_metadata_ops *ops,
+ void *priv)
+{
+}
+
#endif /* CONFIG_XDP_SOCKETS */
#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET)
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 1f6fc8c7a84c..b62bb8525a5f 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -14,6 +14,12 @@
#ifdef CONFIG_XDP_SOCKETS
+struct xsk_cb_desc {
+ void *src;
+ u8 off;
+ u8 bytes;
+};
+
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
@@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
xp_set_rxq_info(pool, rxq);
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+ xp_fill_cb(pool, desc);
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -165,6 +177,30 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
+#define XDP_TXMD_FLAGS_VALID ( \
+ XDP_TXMD_FLAGS_TIMESTAMP | \
+ XDP_TXMD_FLAGS_CHECKSUM | \
+ 0)
+
+static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+{
+ return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
+}
+
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ struct xsk_tx_metadata *meta;
+
+ if (!pool->tx_metadata_len)
+ return NULL;
+
+ meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
+ return NULL; /* no way to signal the error to the user */
+
+ return meta;
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -250,6 +286,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
{
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
return 0;
@@ -324,6 +365,16 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
+static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
+{
+ return false;
+}
+
+static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return NULL;
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool)
{
}
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c9bb0f892f55..1d107241b901 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void)
#endif
+#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
+int register_xfrm_state_bpf(void);
+#else
+static inline int register_xfrm_state_bpf(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _NET_XFRM_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index b0bdff26fc88..99dd7376df6a 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -12,6 +12,7 @@
struct xsk_buff_pool;
struct xdp_rxq_info;
+struct xsk_cb_desc;
struct xsk_queue;
struct xdp_desc;
struct xdp_umem;
@@ -33,6 +34,7 @@ struct xdp_buff_xsk {
};
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
+#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t))
struct xsk_dma_map {
dma_addr_t *dma_pages;
@@ -77,10 +79,12 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
+ u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
bool unaligned;
+ bool tx_sw_csum;
void *addrs;
/* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect:
* NAPI TX thread and sendmsg error paths in the SKB destructor callback and when
@@ -132,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
/* AF_XDP ZC drivers, via xdp_sock_buff.h */
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc);
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages);
void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
@@ -233,4 +238,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}
+static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
+{
+ return pool->tx_metadata_len > 0;
+}
+
#endif /* XSK_BUFF_POOL_H_ */