diff options
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/act_api.h | 2 | ||||
-rw-r--r-- | include/net/addrconf.h | 12 | ||||
-rw-r--r-- | include/net/af_vsock.h | 2 | ||||
-rw-r--r-- | include/net/cfg80211.h | 99 | ||||
-rw-r--r-- | include/net/dropreason-core.h | 6 | ||||
-rw-r--r-- | include/net/genetlink.h | 2 | ||||
-rw-r--r-- | include/net/if_inet6.h | 4 | ||||
-rw-r--r-- | include/net/inet_sock.h | 5 | ||||
-rw-r--r-- | include/net/ip.h | 10 | ||||
-rw-r--r-- | include/net/ip6_fib.h | 4 | ||||
-rw-r--r-- | include/net/mac80211.h | 23 | ||||
-rw-r--r-- | include/net/mana/gdma.h | 7 | ||||
-rw-r--r-- | include/net/netdev_rx_queue.h | 4 | ||||
-rw-r--r-- | include/net/netfilter/nf_flow_table.h | 10 | ||||
-rw-r--r-- | include/net/netlink.h | 2 | ||||
-rw-r--r-- | include/net/netns/core.h | 1 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 50 | ||||
-rw-r--r-- | include/net/page_pool/helpers.h | 77 | ||||
-rw-r--r-- | include/net/page_pool/types.h | 6 | ||||
-rw-r--r-- | include/net/sock.h | 1 | ||||
-rw-r--r-- | include/net/tcp.h | 9 | ||||
-rw-r--r-- | include/net/tcp_ao.h | 6 | ||||
-rw-r--r-- | include/net/tcp_states.h | 2 |
23 files changed, 248 insertions, 96 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 4ae0580b63ca..ea13e1e4a7c2 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -191,7 +191,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); -void tcf_idr_insert_many(struct tc_action *actions[]); +void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 82da55101b5a..61ebe723ee4d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -31,17 +31,22 @@ struct prefix_info { __u8 length; __u8 prefix_len; + union __packed { + __u8 flags; + struct __packed { #if defined(__BIG_ENDIAN_BITFIELD) - __u8 onlink : 1, + __u8 onlink : 1, autoconf : 1, reserved : 6; #elif defined(__LITTLE_ENDIAN_BITFIELD) - __u8 reserved : 6, + __u8 reserved : 6, autoconf : 1, onlink : 1; #else #error "Please fix <asm/byteorder.h>" #endif + }; + }; __be32 valid; __be32 prefered; __be32 reserved2; @@ -49,6 +54,9 @@ struct prefix_info { struct in6_addr prefix; }; +/* rfc4861 4.6.2: IPv6 PIO is 32 bytes in size */ +static_assert(sizeof(struct prefix_info) == 32); + #include <linux/ipv6.h> #include <linux/netdevice.h> #include <net/if_inet6.h> diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index e302c0e804d0..535701efc1e5 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -137,7 +137,6 @@ struct vsock_transport { u64 (*stream_rcvhiwat)(struct vsock_sock *); bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); - int (*set_rcvlowat)(struct vsock_sock *vsk, int val); /* SEQ_PACKET. */ ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, @@ -168,6 +167,7 @@ struct vsock_transport { struct vsock_transport_send_notify_data *); /* sk_lock held by the caller */ void (*notify_buffer_size)(struct vsock_sock *, u64 *); + int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val); /* Shutdown. */ int (*shutdown)(struct vsock_sock *, int); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1b3345129a95..ac1fb326dcda 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -52,7 +52,7 @@ * such wiphy can have zero, one, or many virtual interfaces associated with * it, which need to be identified as such by pointing the network interface's * @ieee80211_ptr pointer to a &struct wireless_dev which further describes - * the wireless part of the interface, normally this struct is embedded in the + * the wireless part of the interface. Normally this struct is embedded in the * network interface's private data area. Drivers can optionally allow creating * or destroying virtual interfaces on the fly, but without at least one or the * ability to create some the wireless device isn't useful. @@ -977,6 +977,15 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, const struct cfg80211_chan_def *chandef2); /** + * nl80211_chan_width_to_mhz - get the channel width in MHz + * @chan_width: the channel width from &enum nl80211_chan_width + * + * Return: channel width in MHz if the chan_width from &enum nl80211_chan_width + * is valid. -1 otherwise. + */ +int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width); + +/** * cfg80211_chandef_valid - check if a channel definition is valid * @chandef: the channel definition to check * Return: %true if the channel definition is valid. %false otherwise. @@ -1665,6 +1674,21 @@ struct link_station_del_parameters { }; /** + * struct cfg80211_ttlm_params: TID to link mapping parameters + * + * Used for setting a TID to link mapping. + * + * @dlink: Downlink TID to link mapping, as defined in section 9.4.2.314 + * (TID-To-Link Mapping element) in Draft P802.11be_D4.0. + * @ulink: Uplink TID to link mapping, as defined in section 9.4.2.314 + * (TID-To-Link Mapping element) in Draft P802.11be_D4.0. + */ +struct cfg80211_ttlm_params { + u16 dlink[8]; + u16 ulink[8]; +}; + +/** * struct station_parameters - station parameters * * Used to change and create a new station. @@ -2560,7 +2584,7 @@ struct cfg80211_scan_info { * @short_ssid: short ssid to scan for * @bssid: bssid to scan for * @channel_idx: idx of the channel in the channel array in the scan request - * which the above info relvant to + * which the above info is relevant to * @unsolicited_probe: the AP transmits unsolicited probe response every 20 TU * @short_ssid_valid: @short_ssid is valid and can be used * @psc_no_listen: when set, and the channel is a PSC channel, no need to wait @@ -2819,6 +2843,13 @@ enum cfg80211_signal_type { * the BSS that requested the scan in which the beacon/probe was received. * @chains: bitmask for filled values in @chain_signal. * @chain_signal: per-chain signal strength of last received BSS in dBm. + * @restrict_use: restrict usage, if not set, assume @use_for is + * %NL80211_BSS_USE_FOR_NORMAL. + * @use_for: bitmap of possible usage for this BSS, see + * &enum nl80211_bss_use_for + * @cannot_use_reasons: the reasons (bitmap) for not being able to connect, + * if @restrict_use is set and @use_for is zero (empty); may be 0 for + * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons * @drv_data: Data to be passed through to @inform_bss */ struct cfg80211_inform_bss { @@ -2830,6 +2861,9 @@ struct cfg80211_inform_bss { u8 chains; s8 chain_signal[IEEE80211_MAX_CHAINS]; + u8 restrict_use:1, use_for:7; + u8 cannot_use_reasons; + void *drv_data; }; @@ -2881,6 +2915,11 @@ struct cfg80211_bss_ies { * @chain_signal: per-chain signal strength of last received BSS in dBm. * @bssid_index: index in the multiple BSS set * @max_bssid_indicator: max number of members in the BSS set + * @use_for: bitmap of possible usage for this BSS, see + * &enum nl80211_bss_use_for + * @cannot_use_reasons: the reasons (bitmap) for not being able to connect, + * if @restrict_use is set and @use_for is zero (empty); may be 0 for + * unspecified reasons; see &enum nl80211_bss_cannot_use_reasons * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes */ struct cfg80211_bss { @@ -2906,6 +2945,9 @@ struct cfg80211_bss { u8 bssid_index; u8 max_bssid_indicator; + u8 use_for; + u8 cannot_use_reasons; + u8 priv[] __aligned(sizeof(void *)); }; @@ -4496,6 +4538,7 @@ struct mgmt_frame_regs { * @del_link_station: Remove a link of a station. * * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. + * @set_ttlm: set the TID to link mapping. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4855,6 +4898,8 @@ struct cfg80211_ops { struct link_station_del_parameters *params); int (*set_hw_timestamp)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_set_hw_timestamp *hwts); + int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_ttlm_params *params); }; /* @@ -4913,6 +4958,8 @@ struct cfg80211_ops { * NL80211_REGDOM_SET_BY_DRIVER. * @WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON: reg_call_notifier() is called if driver * set this flag to update channels on beacon hints. + * @WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY: support connection to non-primary link + * of an NSTR mobile AP MLD. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), @@ -4926,7 +4973,7 @@ enum wiphy_flags { WIPHY_FLAG_IBSS_RSN = BIT(8), WIPHY_FLAG_MESH_AUTH = BIT(10), WIPHY_FLAG_SUPPORTS_EXT_KCK_32 = BIT(11), - /* use hole at 12 */ + WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY = BIT(12), WIPHY_FLAG_SUPPORTS_FW_ROAM = BIT(13), WIPHY_FLAG_AP_UAPSD = BIT(14), WIPHY_FLAG_SUPPORTS_TDLS = BIT(15), @@ -7165,6 +7212,25 @@ cfg80211_inform_bss(struct wiphy *wiphy, } /** + * __cfg80211_get_bss - get a BSS reference + * @wiphy: the wiphy this BSS struct belongs to + * @channel: the channel to search on (or %NULL) + * @bssid: the desired BSSID (or %NULL) + * @ssid: the desired SSID (or %NULL) + * @ssid_len: length of the SSID (or 0) + * @bss_type: type of BSS, see &enum ieee80211_bss_type + * @privacy: privacy filter, see &enum ieee80211_privacy + * @use_for: indicates which use is intended + */ +struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *bssid, + const u8 *ssid, size_t ssid_len, + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy, + u32 use_for); + +/** * cfg80211_get_bss - get a BSS reference * @wiphy: the wiphy this BSS struct belongs to * @channel: the channel to search on (or %NULL) @@ -7173,13 +7239,20 @@ cfg80211_inform_bss(struct wiphy *wiphy, * @ssid_len: length of the SSID (or 0) * @bss_type: type of BSS, see &enum ieee80211_bss_type * @privacy: privacy filter, see &enum ieee80211_privacy + * + * This version implies regular usage, %NL80211_BSS_USE_FOR_NORMAL. */ -struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, - struct ieee80211_channel *channel, - const u8 *bssid, - const u8 *ssid, size_t ssid_len, - enum ieee80211_bss_type bss_type, - enum ieee80211_privacy privacy); +static inline struct cfg80211_bss * +cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, + const u8 *bssid, const u8 *ssid, size_t ssid_len, + enum ieee80211_bss_type bss_type, + enum ieee80211_privacy privacy) +{ + return __cfg80211_get_bss(wiphy, channel, bssid, ssid, ssid_len, + bss_type, privacy, + NL80211_BSS_USE_FOR_NORMAL); +} + static inline struct cfg80211_bss * cfg80211_get_ibss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -7312,7 +7385,7 @@ struct cfg80211_rx_assoc_resp_data { * This function may sleep. The caller must hold the corresponding wdev's mutex. */ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp_data *data); + const struct cfg80211_rx_assoc_resp_data *data); /** * struct cfg80211_assoc_failure - association failure data @@ -7431,7 +7504,7 @@ void cfg80211_notify_new_peer_candidate(struct net_device *dev, * RFkill integration in cfg80211 is almost invisible to drivers, * as cfg80211 automatically registers an rfkill instance for each * wireless device it knows about. Soft kill is also translated - * into disconnecting and turning all interfaces off, drivers are + * into disconnecting and turning all interfaces off. Drivers are * expected to turn off the device when all interfaces are down. * * However, devices may have a hard RFkill line, in which case they @@ -7479,7 +7552,7 @@ static inline void wiphy_rfkill_stop_polling(struct wiphy *wiphy) * the configuration mechanism. * * A driver supporting vendor commands must register them as an array - * in struct wiphy, with handlers for each one, each command has an + * in struct wiphy, with handlers for each one. Each command has an * OUI and sub command ID to identify it. * * Note that this feature should not be (ab)used to implement protocol @@ -7643,7 +7716,7 @@ static inline void cfg80211_vendor_event(struct sk_buff *skb, gfp_t gfp) * interact with driver-specific tools to aid, for instance, * factory programming. * - * This chapter describes how drivers interact with it, for more + * This chapter describes how drivers interact with it. For more * information see the nl80211 book's chapter on it. */ diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 3c70ad53a49c..278e4c7d465c 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -86,6 +86,7 @@ FN(IPV6_NDISC_NS_OTHERHOST) \ FN(QUEUE_PURGE) \ FN(TC_ERROR) \ + FN(PACKET_SOCK_ERROR) \ FNe(MAX) /** @@ -379,6 +380,11 @@ enum skb_drop_reason { /** @SKB_DROP_REASON_TC_ERROR: generic internal tc error. */ SKB_DROP_REASON_TC_ERROR, /** + * @SKB_DROP_REASON_PACKET_SOCK_ERROR: generic packet socket errors + * after its filter matches an incoming packet. + */ + SKB_DROP_REASON_PACKET_SOCK_ERROR, + /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e18a4c0d69ee..c53244f20437 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -12,10 +12,12 @@ * struct genl_multicast_group - generic netlink multicast group * @name: name of the multicast group, names are per-family * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @cap_sys_admin: whether %CAP_SYS_ADMIN is required for binding */ struct genl_multicast_group { char name[GENL_NAMSIZ]; u8 flags; + u8 cap_sys_admin:1; }; struct genl_split_ops; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 3e454c4d7ba6..f07642264c1e 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -22,10 +22,6 @@ #define IF_RS_SENT 0x10 #define IF_READY 0x80000000 -/* prefix flags */ -#define IF_PREFIX_ONLINK 0x01 -#define IF_PREFIX_AUTOCONF 0x02 - enum { INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 74db6d97cae1..aa86453f6b9b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -234,10 +234,7 @@ struct inet_sock { int uc_index; int mc_index; __be32 mc_addr; - struct { - __u16 lo; - __u16 hi; - } local_port_range; + u32 local_port_range; /* high << 16 | low */ struct ip_mc_socklist __rcu *mc_list; struct inet_cork_full cork; diff --git a/include/net/ip.h b/include/net/ip.h index 1fc4c8d69e33..de0c69c57e3c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -349,8 +349,14 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } \ } -void inet_get_local_port_range(const struct net *net, int *low, int *high); -void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); +static inline void inet_get_local_port_range(const struct net *net, int *low, int *high) +{ + u32 range = READ_ONCE(net->ipv4.ip_local_ports.range); + + *low = range & 0xffff; + *high = range >> 16; +} +bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); #ifdef CONFIG_SYSCTL static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1ba9f4ddf2f6..95ed495c3a40 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -328,8 +328,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { + DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); call_rcu(&f6i->rcu, fib6_info_destroy_rcu); + } } enum fib6_walk_state { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 580781ff9dcf..77a71b1396b1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2686,6 +2686,9 @@ struct ieee80211_txq { * @IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX: Hardware/driver handles transmitting * multicast frames on all links, mac80211 should not do that. * + * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT + * and connecting with a lower bandwidth instead + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2743,6 +2746,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_CONC_MON_RX_DECAP, IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, + IEEE80211_HW_DISALLOW_PUNCTURING, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -5809,12 +5813,11 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf, * ieee80211_remove_key - remove the given key * @keyconf: the parameter passed with the set key * + * Context: Must be called with the wiphy mutex held. + * * Remove the given key. If the key was uploaded to the hardware at the * time this function is called, it is not deleted in the hardware but * instead assumed to have been removed already. - * - * Note that due to locking considerations this function can (currently) - * only be called during key iteration (ieee80211_iter_keys().) */ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); @@ -6368,12 +6371,12 @@ ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq); * @iter: iterator function that will be called for each key * @iter_data: custom data to pass to the iterator function * + * Context: Must be called with wiphy mutex held; can sleep. + * * This function can be used to iterate all the keys known to * mac80211, even those that weren't previously programmed into * the device. This is intended for use in WoWLAN if the device - * needs reprogramming of the keys during suspend. Note that due - * to locking reasons, it is also only safe to call this at few - * spots since it must hold the RTNL and be able to sleep. + * needs reprogramming of the keys during suspend. * * The order in which the keys are iterated matches the order * in which they were originally installed and handed to the @@ -7435,6 +7438,9 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * @vif: interface to set active links on * @active_links: the new active links bitmap * + * Context: Must be called with wiphy mutex held; may sleep; calls + * back into the driver. + * * This changes the active links on an interface. The interface * must be in client mode (in AP mode, all links are always active), * and @active_links must be a subset of the vif's valid_links. @@ -7442,6 +7448,7 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * If a link is switched off and another is switched on at the same * time (e.g. active_links going from 0x1 to 0x10) then you will get * a sequence of calls like + * * - change_vif_links(0x11) * - unassign_vif_chanctx(link_id=0) * - change_sta_links(0x11) for each affected STA (the AP) @@ -7451,10 +7458,6 @@ static inline bool ieee80211_is_tx_data(struct sk_buff *skb) * - change_sta_links(0x10) for each affected STA (the AP) * - assign_vif_chanctx(link_id=4) * - change_vif_links(0x10) - * - * Note: This function acquires some mac80211 locks and must not - * be called with any driver locks held that could cause a - * lock dependency inversion. Best call it without locks. */ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 88b6ef7ce1a6..76f2fd2645b7 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -293,6 +293,7 @@ struct gdma_queue { u32 head; u32 tail; + struct list_head entry; /* Extra fields specific to EQ/CQ. */ union { @@ -328,6 +329,7 @@ struct gdma_queue_spec { void *context; unsigned long log2_throttle_limit; + unsigned int msix_index; } eq; struct { @@ -344,7 +346,9 @@ struct gdma_queue_spec { struct gdma_irq_context { void (*handler)(void *arg); - void *arg; + /* Protect the eq_list */ + spinlock_t lock; + struct list_head eq_list; char name[MANA_IRQ_NAME_SZ]; }; @@ -355,7 +359,6 @@ struct gdma_context { unsigned int max_num_queues; unsigned int max_num_msix; unsigned int num_msix_usable; - struct gdma_resource msix_resource; struct gdma_irq_context *irq_contexts; /* L2 MTU */ diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index cdcafb30d437..aa1716fb0e53 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -21,6 +21,10 @@ struct netdev_rx_queue { #ifdef CONFIG_XDP_SOCKETS struct xsk_buff_pool *pool; #endif + /* NAPI instance for the queue + * Readers and writers must hold RTNL + */ + struct napi_struct *napi; } ____cacheline_aligned_in_smp; /* diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index fe1507c1db82..692d5955911c 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -62,6 +62,8 @@ struct nf_flowtable_type { enum flow_offload_tuple_dir dir, struct nf_flow_rule *flow_rule); void (*free)(struct nf_flowtable *ft); + void (*get)(struct nf_flowtable *ft); + void (*put)(struct nf_flowtable *ft); nf_hookfn *hook; struct module *owner; }; @@ -240,6 +242,11 @@ nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table, } list_add_tail(&block_cb->list, &block->cb_list); + up_write(&flow_table->flow_block_lock); + + if (flow_table->type->get) + flow_table->type->get(flow_table); + return 0; unlock: up_write(&flow_table->flow_block_lock); @@ -262,6 +269,9 @@ nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table, WARN_ON(true); } up_write(&flow_table->flow_block_lock); + + if (flow_table->type->put) + flow_table->type->put(flow_table); } void flow_offload_route_init(struct flow_offload *flow, diff --git a/include/net/netlink.h b/include/net/netlink.h index 167b91348e57..28039e57070a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1214,7 +1214,7 @@ static inline void *nla_data(const struct nlattr *nla) * nla_len - length of payload * @nla: netlink attribute */ -static inline int nla_len(const struct nlattr *nla) +static inline u16 nla_len(const struct nlattr *nla) { return nla->nla_len - NLA_HDRLEN; } diff --git a/include/net/netns/core.h b/include/net/netns/core.h index a91ef9f8de60..78214f1b43a2 100644 --- a/include/net/netns/core.h +++ b/include/net/netns/core.h @@ -13,6 +13,7 @@ struct netns_core { struct ctl_table_header *sysctl_hdr; int sysctl_somaxconn; + int sysctl_optmem_max; u8 sysctl_txrehash; #ifdef CONFIG_PROC_FS diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 73f43f699199..c356c458b340 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -19,8 +19,7 @@ struct hlist_head; struct fib_table; struct sock; struct local_ports { - seqlock_t lock; - int range[2]; + u32 range; /* high << 16 | low */ bool warned; }; @@ -42,6 +41,38 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; struct netns_ipv4 { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. + * Please update the document when adding new fields. + */ + + /* TX readonly hotpath cache lines */ + __cacheline_group_begin(netns_ipv4_read_tx); + u8 sysctl_tcp_early_retrans; + u8 sysctl_tcp_tso_win_divisor; + u8 sysctl_tcp_tso_rtt_log; + u8 sysctl_tcp_autocorking; + int sysctl_tcp_min_snd_mss; + unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_wmem[3]; + u8 sysctl_ip_fwd_use_pmtu; + __cacheline_group_end(netns_ipv4_read_tx); + + /* TXRX readonly hotpath cache lines */ + __cacheline_group_begin(netns_ipv4_read_txrx); + u8 sysctl_tcp_moderate_rcvbuf; + __cacheline_group_end(netns_ipv4_read_txrx); + + /* RX readonly hotpath cache line */ + __cacheline_group_begin(netns_ipv4_read_rx); + u8 sysctl_ip_early_demux; + u8 sysctl_tcp_early_demux; + int sysctl_tcp_reordering; + int sysctl_tcp_rmem[3]; + __cacheline_group_end(netns_ipv4_read_rx); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -96,17 +127,14 @@ struct netns_ipv4 { u8 sysctl_ip_default_ttl; u8 sysctl_ip_no_pmtu_disc; - u8 sysctl_ip_fwd_use_pmtu; u8 sysctl_ip_fwd_update_priority; u8 sysctl_ip_nonlocal_bind; u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ u8 sysctl_ip_dynaddr; - u8 sysctl_ip_early_demux; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_raw_l3mdev_accept; #endif - u8 sysctl_tcp_early_demux; u8 sysctl_udp_early_demux; u8 sysctl_nexthop_compat_mode; @@ -119,7 +147,6 @@ struct netns_ipv4 { u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; - int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; @@ -135,17 +162,14 @@ struct netns_ipv4 { u8 sysctl_tcp_backlog_ack_defer; u8 sysctl_tcp_pingpong_thresh; - int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; u8 sysctl_tcp_orphan_retries; u8 sysctl_tcp_tw_reuse; int sysctl_tcp_fin_timeout; - unsigned int sysctl_tcp_notsent_lowat; u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; - u8 sysctl_tcp_early_retrans; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; @@ -161,21 +185,13 @@ struct netns_ipv4 { u8 sysctl_tcp_frto; u8 sysctl_tcp_nometrics_save; u8 sysctl_tcp_no_ssthresh_metrics_save; - u8 sysctl_tcp_moderate_rcvbuf; - u8 sysctl_tcp_tso_win_divisor; u8 sysctl_tcp_workaround_signed_windows; - int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; - int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; - u8 sysctl_tcp_tso_rtt_log; - u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; - int sysctl_tcp_wmem[3]; - int sysctl_tcp_rmem[3]; unsigned int sysctl_tcp_child_ehash_entries; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 7dc65774cde5..1d397c1a0043 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -11,7 +11,7 @@ * The page_pool allocator is optimized for recycling page or page fragment used * by skb packet and xdp frame. * - * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(), + * Basic use involves replacing any alloc_pages() calls with page_pool_alloc(), * which allocate memory with or without page splitting depending on the * requested memory size. * @@ -29,7 +29,7 @@ * page allocated from page pool. Page splitting enables memory saving and thus * avoids TLB/cache miss for data access, but there also is some cost to * implement page splitting, mainly some cache line dirtying/bouncing for - * 'struct page' and atomic operation for page->pp_frag_count. + * 'struct page' and atomic operation for page->pp_ref_count. * * The API keeps track of in-flight pages, in order to let API users know when * it is safe to free a page_pool object, the API users must call @@ -37,15 +37,15 @@ * attach the page_pool object to a page_pool-aware object like skbs marked with * skb_mark_for_recycle(). * - * page_pool_put_page() may be called multi times on the same page if a page is - * split into multi fragments. For the last fragment, it will either recycle the - * page, or in case of page->_refcount > 1, it will release the DMA mapping and - * in-flight state accounting. + * page_pool_put_page() may be called multiple times on the same page if a page + * is split into multiple fragments. For the last fragment, it will either + * recycle the page, or in case of page->_refcount > 1, it will release the DMA + * mapping and in-flight state accounting. * * dma_sync_single_range_for_device() is only called for the last fragment when * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the * last freed fragment to do the sync_for_device operation for all fragments in - * the same page when a page is split, the API user must setup pool->p.max_len + * the same page when a page is split. The API user must setup pool->p.max_len * and pool->p.offset correctly and ensure that page_pool_put_page() is called * with dma_sync_size being -1 for fragment API. */ @@ -210,69 +210,82 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool) return pool->p.dma_dir; } -/* pp_frag_count represents the number of writers who can update the page - * either by updating skb->data or via DMA mappings for the device. - * We can't rely on the page refcnt for that as we don't know who might be - * holding page references and we can't reliably destroy or sync DMA mappings - * of the fragments. +/** + * page_pool_fragment_page() - split a fresh page into fragments + * @page: page to split + * @nr: references to set + * + * pp_ref_count represents the number of outstanding references to the page, + * which will be freed using page_pool APIs (rather than page allocator APIs + * like put_page()). Such references are usually held by page_pool-aware + * objects like skbs marked for page pool recycling. * - * When pp_frag_count reaches 0 we can either recycle the page if the page - * refcnt is 1 or return it back to the memory allocator and destroy any - * mappings we have. + * This helper allows the caller to take (set) multiple references to a + * freshly allocated page. The page must be freshly allocated (have a + * pp_ref_count of 1). This is commonly done by drivers and + * "fragment allocators" to save atomic operations - either when they know + * upfront how many references they will need; or to take MAX references and + * return the unused ones with a single atomic dec(), instead of performing + * multiple atomic inc() operations. */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_frag_count, nr); + atomic_long_set(&page->pp_ref_count, nr); } -static inline long page_pool_defrag_page(struct page *page, long nr) +static inline long page_pool_unref_page(struct page *page, long nr) { long ret; - /* If nr == pp_frag_count then we have cleared all remaining + /* If nr == pp_ref_count then we have cleared all remaining * references to the page: * 1. 'n == 1': no need to actually overwrite it. * 2. 'n != 1': overwrite it with one, which is the rare case - * for pp_frag_count draining. + * for pp_ref_count draining. * * The main advantage to doing this is that not only we avoid a atomic * update, as an atomic_read is generally a much cheaper operation than * an atomic update, especially when dealing with a page that may be - * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count + * referenced by only 2 or 3 users; but also unify the pp_ref_count * handling by ensuring all pages have partitioned into only 1 piece * initially, and only overwrite it when the page is partitioned into * more than one piece. */ - if (atomic_long_read(&page->pp_frag_count) == nr) { + if (atomic_long_read(&page->pp_ref_count) == nr) { /* As we have ensured nr is always one for constant case using * the BUILD_BUG_ON(), only need to handle the non-constant case - * here for pp_frag_count draining, which is a rare case. + * here for pp_ref_count draining, which is a rare case. */ BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1); if (!__builtin_constant_p(nr)) - atomic_long_set(&page->pp_frag_count, 1); + atomic_long_set(&page->pp_ref_count, 1); return 0; } - ret = atomic_long_sub_return(nr, &page->pp_frag_count); + ret = atomic_long_sub_return(nr, &page->pp_ref_count); WARN_ON(ret < 0); - /* We are the last user here too, reset pp_frag_count back to 1 to + /* We are the last user here too, reset pp_ref_count back to 1 to * ensure all pages have been partitioned into 1 piece initially, * this should be the rare case when the last two fragment users call - * page_pool_defrag_page() currently. + * page_pool_unref_page() currently. */ if (unlikely(!ret)) - atomic_long_set(&page->pp_frag_count, 1); + atomic_long_set(&page->pp_ref_count, 1); return ret; } -static inline bool page_pool_is_last_frag(struct page *page) +static inline void page_pool_ref_page(struct page *page) +{ + atomic_long_inc(&page->pp_ref_count); +} + +static inline bool page_pool_is_last_ref(struct page *page) { - /* If page_pool_defrag_page() returns 0, we were the last user */ - return page_pool_defrag_page(page, 1) == 0; + /* If page_pool_unref_page() returns 0, we were the last user */ + return page_pool_unref_page(page, 1) == 0; } /** @@ -297,10 +310,10 @@ static inline void page_pool_put_page(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_frag(page)) + if (!page_pool_is_last_ref(page)) return; - page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct); + page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct); #endif } diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index ac286ea8ce2d..76481c465375 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -234,9 +234,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif -void page_pool_put_defragged_page(struct page_pool *pool, struct page *page, - unsigned int dma_sync_size, - bool allow_direct); +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, + unsigned int dma_sync_size, + bool allow_direct); static inline bool is_page_pool_compiled_in(void) { diff --git a/include/net/sock.h b/include/net/sock.h index 1d6931caf0c3..8b6fe164b218 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2920,7 +2920,6 @@ extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; extern int sysctl_tstamp_allow_data; -extern int sysctl_optmem_max; extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; diff --git a/include/net/tcp.h b/include/net/tcp.h index 973555cb1d3f..f5ca4abaee8b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1518,17 +1518,22 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +static inline void __tcp_adjust_rcv_ssthresh(struct sock *sk, u32 new_ssthresh) { int unused_mem = sk_unused_reserved_mem(sk); struct tcp_sock *tp = tcp_sk(sk); - tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); + tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh); if (unused_mem) tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh, tcp_win_from_space(sk, unused_mem)); } +static inline void tcp_adjust_rcv_ssthresh(struct sock *sk) +{ + __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss); +} + void tcp_cleanup_rbuf(struct sock *sk, int copied); void __tcp_cleanup_rbuf(struct sock *sk, int copied); diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 4d900ef8dfc3..0f2dcc9e8d46 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -62,11 +62,17 @@ static inline int tcp_ao_maclen(const struct tcp_ao_key *key) return key->maclen; } +/* Use tcp_ao_len_aligned() for TCP header calculations */ static inline int tcp_ao_len(const struct tcp_ao_key *key) { return tcp_ao_maclen(key) + sizeof(struct tcp_ao_hdr); } +static inline int tcp_ao_len_aligned(const struct tcp_ao_key *key) +{ + return round_up(tcp_ao_len(key), 4); +} + static inline unsigned int tcp_ao_digest_size(struct tcp_ao_key *key) { return key->digest_size; diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h index cc00118acca1..d60e8148ff4c 100644 --- a/include/net/tcp_states.h +++ b/include/net/tcp_states.h @@ -22,6 +22,7 @@ enum { TCP_LISTEN, TCP_CLOSING, /* Now a valid state */ TCP_NEW_SYN_RECV, + TCP_BOUND_INACTIVE, /* Pseudo-state for inet_diag */ TCP_MAX_STATES /* Leave at the end! */ }; @@ -43,6 +44,7 @@ enum { TCPF_LISTEN = (1 << TCP_LISTEN), TCPF_CLOSING = (1 << TCP_CLOSING), TCPF_NEW_SYN_RECV = (1 << TCP_NEW_SYN_RECV), + TCPF_BOUND_INACTIVE = (1 << TCP_BOUND_INACTIVE), }; #endif /* _LINUX_TCP_STATES_H */ |