From b18afb6f42292a9154102fed7265ae747bbfbc57 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:09 -0800 Subject: tcp: Move tcp_ns_to_ts() to tcp.h We will support arbitrary SYN Cookie with BPF. When BPF prog validates ACK and kfunc allocates a reqsk, we need to call tcp_ns_to_ts() to calculate an offset of TSval for later use: time t0 : Send SYN+ACK -> tsval = Initial TSval (Random Number) t1 : Recv ACK of 3WHS -> tsoff = TSecr - tcp_ns_to_ts(usec_ts_ok, tcp_clock_ns()) = Initial TSval - t1 t2 : Send ACK -> tsval = t2 + tsoff = Initial TSval + (t2 - t1) = Initial TSval + Time Delta (x) (x) Note that the time delta does not include the initial RTT from t0 to t1. Let's move tcp_ns_to_ts() to tcp.h. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240115205514.68364-2-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a1181031..114000e71a46 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -577,6 +577,15 @@ static inline u32 tcp_cookie_time(void) return val; } +/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ +static inline u64 tcp_ns_to_ts(bool usec_ts, u64 val) +{ + if (usec_ts) + return div_u64(val, NSEC_PER_USEC); + + return div_u64(val, NSEC_PER_MSEC); +} + u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); -- cgit From 95e752b5299fa8c90099f7bc2aa1ee3e2e2c95ab Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:10 -0800 Subject: tcp: Move skb_steal_sock() to request_sock.h We will support arbitrary SYN Cookie with BPF. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to TCP stack as skb->sk with req->syncookie 1. In skb_steal_sock(), we need to check inet_reqsk(sk)->syncookie to see if the reqsk is created by kfunc. However, inet_reqsk() is not available in sock.h. Let's move skb_steal_sock() to request_sock.h. While at it, we refactor skb_steal_sock() so it returns early if skb->sk is NULL to minimise the following patch. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240115205514.68364-3-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/request_sock.h | 28 ++++++++++++++++++++++++++++ include/net/sock.h | 25 ------------------------- 2 files changed, 28 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 144c39db9898..26c630c40abb 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -83,6 +83,34 @@ static inline struct sock *req_to_sk(struct request_sock *req) return (struct sock *)req; } +/** + * skb_steal_sock - steal a socket from an sk_buff + * @skb: sk_buff to steal the socket from + * @refcounted: is set to true if the socket is reference-counted + * @prefetched: is set to true if the socket was assigned from bpf + */ +static inline struct sock *skb_steal_sock(struct sk_buff *skb, + bool *refcounted, bool *prefetched) +{ + struct sock *sk = skb->sk; + + if (!sk) { + *prefetched = false; + *refcounted = false; + return NULL; + } + + *prefetched = skb_sk_is_prefetched(skb); + if (*prefetched) + *refcounted = sk_is_refcounted(sk); + else + *refcounted = true; + + skb->destructor = NULL; + skb->sk = NULL; + return sk; +} + static inline struct request_sock * reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) diff --git a/include/net/sock.h b/include/net/sock.h index a7f815c7cfdf..32a399fdcbb5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2814,31 +2814,6 @@ sk_is_refcounted(struct sock *sk) return !sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE); } -/** - * skb_steal_sock - steal a socket from an sk_buff - * @skb: sk_buff to steal the socket from - * @refcounted: is set to true if the socket is reference-counted - * @prefetched: is set to true if the socket was assigned from bpf - */ -static inline struct sock * -skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) -{ - if (skb->sk) { - struct sock *sk = skb->sk; - - *refcounted = true; - *prefetched = skb_sk_is_prefetched(skb); - if (*prefetched) - *refcounted = sk_is_refcounted(sk); - skb->destructor = NULL; - skb->sk = NULL; - return sk; - } - *prefetched = false; - *refcounted = false; - return NULL; -} - /* Checks if this SKB belongs to an HW offloaded socket * and whether any SW fallbacks are required based on dev. * Check decrypted mark in case skb_orphan() cleared socket. -- cgit From 8b5ac68fb5ee416537c1214cbacf0ddc4293cce9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:11 -0800 Subject: bpf: tcp: Handle BPF SYN Cookie in skb_steal_sock(). We will support arbitrary SYN Cookie with BPF. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to TCP stack as skb->sk with req->syncookie 1. Also, the reqsk has its listener as req->rsk_listener with no refcnt taken. When the TCP stack looks up a socket from the skb, we steal inet_reqsk(skb->sk)->rsk_listener in skb_steal_sock() so that the skb will be processed in cookie_v[46]_check() with the listener. Note that we do not clear skb->sk and skb->destructor so that we can carry the reqsk to cookie_v[46]_check(). Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-4-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/request_sock.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 26c630c40abb..8839133d6f6b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -101,10 +101,21 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, } *prefetched = skb_sk_is_prefetched(skb); - if (*prefetched) + if (*prefetched) { +#if IS_ENABLED(CONFIG_SYN_COOKIES) + if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { + struct request_sock *req = inet_reqsk(sk); + + *refcounted = false; + sk = req->rsk_listener; + req->rsk_listener = NULL; + return sk; + } +#endif *refcounted = sk_is_refcounted(sk); - else + } else { *refcounted = true; + } skb->destructor = NULL; skb->sk = NULL; -- cgit From 695751e31a63efd2bbe6779873adf1e4deb00cd5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:12 -0800 Subject: bpf: tcp: Handle BPF SYN Cookie in cookie_v[46]_check(). We will support arbitrary SYN Cookie with BPF in the following patch. If BPF prog validates ACK and kfunc allocates a reqsk, it will be carried to cookie_[46]_check() as skb->sk. If skb->sk is not NULL, we call cookie_bpf_check(). Then, we clear skb->sk and skb->destructor, which are needed not to hold refcnt for reqsk and the listener. See the following patch for details. After that, we finish initialisation for the remaining fields with cookie_tcp_reqsk_init(). Note that the server side WScale is set only for non-BPF SYN Cookie. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-5-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 114000e71a46..dfe99a084a71 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -599,6 +599,26 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * dst_feature(dst, RTAX_FEATURE_ECN); } +#if IS_ENABLED(CONFIG_BPF) +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return skb->sk; +} + +struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb); +#else +static inline bool cookie_bpf_ok(struct sk_buff *skb) +{ + return false; +} + +static inline struct request_sock *cookie_bpf_check(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return NULL; +} +#endif + /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); -- cgit From e472f88891abbc535a5e16a68a104073985f6061 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 15 Jan 2024 12:55:13 -0800 Subject: bpf: tcp: Support arbitrary SYN Cookie. This patch adds a new kfunc available at TC hook to support arbitrary SYN Cookie. The basic usage is as follows: struct bpf_tcp_req_attrs attrs = { .mss = mss, .wscale_ok = wscale_ok, .rcv_wscale = rcv_wscale, /* Server's WScale < 15 */ .snd_wscale = snd_wscale, /* Client's WScale < 15 */ .tstamp_ok = tstamp_ok, .rcv_tsval = tsval, .rcv_tsecr = tsecr, /* Server's Initial TSval */ .usec_ts_ok = usec_ts_ok, .sack_ok = sack_ok, .ecn_ok = ecn_ok, } skc = bpf_skc_lookup_tcp(...); sk = (struct sock *)bpf_skc_to_tcp_sock(skc); bpf_sk_assign_tcp_reqsk(skb, sk, attrs, sizeof(attrs)); bpf_sk_release(skc); bpf_sk_assign_tcp_reqsk() takes skb, a listener sk, and struct bpf_tcp_req_attrs and allocates reqsk and configures it. Then, bpf_sk_assign_tcp_reqsk() links reqsk with skb and the listener. The notable thing here is that we do not hold refcnt for both reqsk and listener. To differentiate that, we mark reqsk->syncookie, which is only used in TX for now. So, if reqsk->syncookie is 1 in RX, it means that the reqsk is allocated by kfunc. When skb is freed, sock_pfree() checks if reqsk->syncookie is 1, and in that case, we set NULL to reqsk->rsk_listener before calling reqsk_free() as reqsk does not hold a refcnt of the listener. When the TCP stack looks up a socket from the skb, we steal the listener from the reqsk in skb_steal_sock() and create a full sk in cookie_v[46]_check(). The refcnt of reqsk will finally be set to 1 in tcp_get_cookie_sock() after creating a full sk. Note that we can extend struct bpf_tcp_req_attrs in the future when we add a new attribute that is determined in 3WHS. Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240115205514.68364-6-kuniyu@amazon.com Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index dfe99a084a71..451dc1373970 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -600,6 +600,20 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * } #if IS_ENABLED(CONFIG_BPF) +struct bpf_tcp_req_attrs { + u32 rcv_tsval; + u32 rcv_tsecr; + u16 mss; + u8 rcv_wscale; + u8 snd_wscale; + u8 ecn_ok; + u8 wscale_ok; + u8 sack_ok; + u8 tstamp_ok; + u8 usec_ts_ok; + u8 reserved[3]; +}; + static inline bool cookie_bpf_ok(struct sk_buff *skb) { return skb->sk; -- cgit From b3f086a7a136d721d112f35fe4cd7272e93cf06b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 18 Jan 2024 13:17:51 -0800 Subject: bpf: Define struct bpf_tcp_req_attrs when CONFIG_SYN_COOKIES=n. kernel test robot reported the warning below: >> net/core/filter.c:11842:13: warning: declaration of 'struct bpf_tcp_req_attrs' will not be visible outside of this function [-Wvisibility] 11842 | struct bpf_tcp_req_attrs *attrs, int attrs__sz) | ^ 1 warning generated. struct bpf_tcp_req_attrs is defined under CONFIG_SYN_COOKIES but used in kfunc without the config. Let's move struct bpf_tcp_req_attrs definition outside of CONFIG_SYN_COOKIES guard. Fixes: e472f88891ab ("bpf: tcp: Support arbitrary SYN Cookie.") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401180418.CUVc0hxF-lkp@intel.com/ Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240118211751.25790-1-kuniyu@amazon.com Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 451dc1373970..58e65af74ad1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -498,6 +498,22 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct tcp_options_received *tcp_opt, int mss, u32 tsoff); +#if IS_ENABLED(CONFIG_BPF) +struct bpf_tcp_req_attrs { + u32 rcv_tsval; + u32 rcv_tsecr; + u16 mss; + u8 rcv_wscale; + u8 snd_wscale; + u8 ecn_ok; + u8 wscale_ok; + u8 sack_ok; + u8 tstamp_ok; + u8 usec_ts_ok; + u8 reserved[3]; +}; +#endif + #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. @@ -600,20 +616,6 @@ static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry * } #if IS_ENABLED(CONFIG_BPF) -struct bpf_tcp_req_attrs { - u32 rcv_tsval; - u32 rcv_tsecr; - u16 mss; - u8 rcv_wscale; - u8 snd_wscale; - u8 ecn_ok; - u8 wscale_ok; - u8 sack_ok; - u8 tstamp_ok; - u8 usec_ts_ok; - u8 reserved[3]; -}; - static inline bool cookie_bpf_ok(struct sk_buff *skb) { return skb->sk; -- cgit From 20df28fb5bd8081a05ec34542bd45e4f3feeced5 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 22 Jan 2024 10:19:55 -0800 Subject: net/ipv6: resolve warning in ip6_fib.c In some configurations, the 'iter' variable in function fib6_repair_tree() is unused, resulting the following warning when compiled with W=1. net/ipv6/ip6_fib.c:1781:6: warning: variable 'iter' set but not used [-Wunused-but-set-variable] 1781 | int iter = 0; | ^ It is unclear what is the advantage of this RT6_TRACE() macro[1], since users can control pr_debug() in runtime, which is better than at compilation time. pr_debug() has no overhead when disabled. Remove the RT6_TRACE() in favor of simple pr_debug() helpers. [1] Link: https://lore.kernel.org/all/ZZwSEJv2HgI0cD4J@gmail.com/ Signed-off-by: Breno Leitao Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240122181955.2391676-2-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/net/ip6_fib.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9ba6413fd2e3..360b12e61850 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -30,12 +30,6 @@ #define RT6_DEBUG 2 -#if RT6_DEBUG >= 3 -#define RT6_TRACE(x...) pr_debug(x) -#else -#define RT6_TRACE(x...) do { ; } while (0) -#endif - struct rt6_info; struct fib6_info; -- cgit From 8f500fbc6c655976c8062b1f1e55bd0b3095d6c2 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 2 Jan 2024 21:35:34 +0200 Subject: wifi: mac80211: process and save negotiated TID to Link mapping request An MLD may send TID-to-Link mapping request frame to negotiate TID to link mapping with a peer MLD. Support handling negotiated TID-to-Link mapping request frame by parsing the frame, asking the driver whether it supports the received mapping or not, and sending a TID-to-Link mapping response to the AP MLD. Theoretically, links that became inactive due to the received TID-to-Link mapping request, can be selected to be activated but this would require tearing down the negotiated TID-to-Link mapping, which is still not supported. Signed-off-by: Ayala Beker Reviewed-by: Johannes Berg Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.0bc1a24fcc9d.Ie72e47dc6f8c77d4a2f0947b775ef6367fe0edac@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d400fe2e8668..6490b92d5cc1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -342,6 +342,7 @@ struct ieee80211_vif_chanctx_switch { * status changed. * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. + * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -378,6 +379,7 @@ enum ieee80211_bss_change { BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), + BSS_CHANGED_MLD_TTLM = BIT_ULL(34), /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -1845,6 +1847,35 @@ struct ieee80211_vif_cfg { u8 ap_addr[ETH_ALEN] __aligned(2); }; +#define IEEE80211_TTLM_NUM_TIDS 8 + +/** + * struct ieee80211_neg_ttlm - negotiated TID to link map info + * + * @downlink: bitmap of active links per TID for downlink, or 0 if mapping for + * this TID is not included. + * @uplink: bitmap of active links per TID for uplink, or 0 if mapping for this + * TID is not included. + * @valid: info is valid or not. + */ +struct ieee80211_neg_ttlm { + u16 downlink[IEEE80211_TTLM_NUM_TIDS]; + u16 uplink[IEEE80211_TTLM_NUM_TIDS]; + bool valid; +}; + +/** + * enum ieee80211_neg_ttlm_res - return value for negotiated TTLM handling + * @NEG_TTLM_RES_ACCEPT: accept the request + * @NEG_TTLM_RES_REJECT: reject the request + * @NEG_TTLM_RES_SUGGEST_PREFERRED: reject and suggest a new mapping + */ +enum ieee80211_neg_ttlm_res { + NEG_TTLM_RES_ACCEPT, + NEG_TTLM_RES_REJECT, + NEG_TTLM_RES_SUGGEST_PREFERRED +}; + /** * struct ieee80211_vif - per-interface data * @@ -1863,6 +1894,11 @@ struct ieee80211_vif_cfg { * API calls meant for that purpose. * @dormant_links: bitmap of valid but disabled links, or 0 for non-MLO. * Must be a subset of valid_links. + * @suspended_links: subset of dormant_links representing links that are + * suspended. + * 0 for non-MLO. + * @neg_ttlm: negotiated TID to link mapping info. + * see &struct ieee80211_neg_ttlm. * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively @@ -1900,7 +1936,8 @@ struct ieee80211_vif { struct ieee80211_vif_cfg cfg; struct ieee80211_bss_conf bss_conf; struct ieee80211_bss_conf __rcu *link_conf[IEEE80211_MLD_MAX_NUM_LINKS]; - u16 valid_links, active_links, dormant_links; + u16 valid_links, active_links, dormant_links, suspended_links; + struct ieee80211_neg_ttlm neg_ttlm; u8 addr[ETH_ALEN] __aligned(2); bool p2p; @@ -4293,6 +4330,10 @@ struct ieee80211_prep_tx_info { * flow offloading for flows originating from the vif. * Note that the driver must not assume that the vif driver_data is valid * at this point, since the callback can be called during netdev teardown. + * @can_neg_ttlm: for managed interface, requests the driver to determine + * if the requested TID-To-Link mapping can be accepted or not. + * If it's not accepted the driver may suggest a preferred mapping and + * modify @ttlm parameter with the suggested TID-to-Link mapping. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4673,6 +4714,9 @@ struct ieee80211_ops { struct net_device *dev, enum tc_setup_type type, void *type_data); + enum ieee80211_neg_ttlm_res + (*can_neg_ttlm)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_neg_ttlm *ttlm); }; /** -- cgit From f7660b3f584aadd25dde18aa1902488577a15863 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Tue, 2 Jan 2024 21:35:37 +0200 Subject: wifi: mac80211: add support for negotiated TTLM request Update neg_ttlm and active_links according to the new mapping, and send a negotiated TID-to-link map request with the new mapping. Signed-off-by: Ayala Beker Reviewed-by: Gregory Greenman Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.eeb385d771df.I2a5441c14421de884dbd93d1624ce7bb2c944833@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6490b92d5cc1..84cc66dd93c1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1810,9 +1810,11 @@ enum ieee80211_offload_flags { * @ps: power-save mode (STA only). This flag is NOT affected by * offchannel/dynamic_ps operations. * @aid: association ID number, valid only when @assoc is true - * @eml_cap: EML capabilities as described in P802.11be_D2.2 Figure 9-1002k. + * @eml_cap: EML capabilities as described in P802.11be_D4.1 Figure 9-1001j. * @eml_med_sync_delay: Medium Synchronization delay as described in - * P802.11be_D2.2 Figure 9-1002j. + * P802.11be_D4.1 Figure 9-1001i. + * @mld_capa_op: MLD Capabilities and Operations per P802.11be_D4.1 + * Figure 9-1001k * @arp_addr_list: List of IPv4 addresses for hardware ARP filtering. The * may filter ARP queries targeted for other addresses than listed here. * The driver must allow ARP queries targeted for all address listed here @@ -1837,6 +1839,7 @@ struct ieee80211_vif_cfg { u16 aid; u16 eml_cap; u16 eml_med_sync_delay; + u16 mld_capa_op; __be32 arp_addr_list[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; int arp_addr_cnt; -- cgit From 2518e89d5b1913c360f8e4cd9fc6eda6146b8800 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:39 +0200 Subject: wifi: cfg80211: add support for SPP A-MSDUs Add SPP (signaling and payload protected) AMSDU support. Since userspace has to build the RSNX element, add an extended feature flag to indicate that this is supported. In order to avoid downgrade/mismatch attacks, add a flag to the assoc command on the station side, so that we can be sure that the value of the flag comes from the same RSNX element that will be validated by the supplicant against the 4-way-handshake. If we just pulled the data out of a beacon/probe response, we could theoretically look an RSNX element from a different frame, with a different value for this flag, than the supplicant is using to validate in the 4-way-handshake. Note that this patch is only geared towards software crypto implementations or hardware ones that can perfectly implement SPP A-MSDUs, i.e. are able to switch the AAD construction on the fly for each TX/RX frame. For more limited hardware implementations, more capability advertisement would be required, e.g. if the hardware has no way to switch this on the fly but has only a global configuration that must apply to all stations. The driver could of course *reject* mismatches, but the supplicant must know so it can do things like not negotiating SPP A-MSDUs on a T-DLS link when connected to an AP that doesn't support it, or similar. Signed-off-by: Johannes Berg Signed-off-by: Daniel Gabay Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.fadac8df7030.I9240aebcba1be49636a73c647ed0af862713fc6f@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cf79656ce09c..56bce924bec6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3059,6 +3059,7 @@ struct cfg80211_assoc_link { * @CONNECT_REQ_MLO_SUPPORT: Userspace indicates support for handling MLD links. * Drivers shall disable MLO features for the current association if this * flag is not set. + * @ASSOC_REQ_SPP_AMSDU: SPP A-MSDUs will be used on this connection (if any) */ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HT = BIT(0), @@ -3068,6 +3069,7 @@ enum cfg80211_assoc_req_flags { ASSOC_REQ_DISABLE_HE = BIT(4), ASSOC_REQ_DISABLE_EHT = BIT(5), CONNECT_REQ_MLO_SUPPORT = BIT(6), + ASSOC_REQ_SPP_AMSDU = BIT(7), }; /** -- cgit From 3b220ed8b2172fd8edb22a62a5c3a9a9c9f2b6bd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:40 +0200 Subject: wifi: mac80211: add support for SPP A-MSDUs If software crypto is used, simply add support for SPP A-MSDUs (and use it whenever enabled as required by the cfg80211 API). If hardware crypto is used, leave it up to the driver to set the NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT flag and then check sta->spp_amsdu or the IEEE80211_KEY_FLAG_SPP_AMSDU key flag. Signed-off-by: Johannes Berg Signed-off-by: Daniel Gabay Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.b8ada4514e2b.I1ac25d5f158165b5a88062a5a5e4c4fbeecf9a5d@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 84cc66dd93c1..8d6ae22c09bf 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2081,6 +2081,8 @@ static inline bool lockdep_vif_wiphy_mutex_held(struct ieee80211_vif *vif) * @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver * for a AES_CMAC key to indicate that it requires sequence number * generation only + * @IEEE80211_KEY_FLAG_SPP_AMSDU: SPP A-MSDUs can be used with this key + * (set by mac80211 from the sta->spp_amsdu flag) */ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_GENERATE_IV_MGMT = BIT(0), @@ -2094,6 +2096,7 @@ enum ieee80211_key_flags { IEEE80211_KEY_FLAG_PUT_MIC_SPACE = BIT(8), IEEE80211_KEY_FLAG_NO_AUTO_TX = BIT(9), IEEE80211_KEY_FLAG_GENERATE_MMIE = BIT(10), + IEEE80211_KEY_FLAG_SPP_AMSDU = BIT(11), }; /** @@ -2392,6 +2395,7 @@ struct ieee80211_link_sta { * would be assigned to link[link_id] where link_id is the id assigned * by the AP. * @valid_links: bitmap of valid links, or 0 for non-MLO + * @spp_amsdu: indicates whether the STA uses SPP A-MSDU or not. */ struct ieee80211_sta { u8 addr[ETH_ALEN]; @@ -2405,6 +2409,7 @@ struct ieee80211_sta { bool tdls_initiator; bool mfp; bool mlo; + bool spp_amsdu; u8 max_amsdu_subframes; struct ieee80211_sta_aggregates *cur; -- cgit From a923ff876f4b6133a093482a6d465cde3bc2e65c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 25 Jan 2024 14:55:47 -0800 Subject: Revert "nl80211/cfg80211: Specify band specific min RSSI thresholds with sched scan" This *mostly* reverts commit 1e1b11b6a111 ("nl80211/cfg80211: Specify band specific min RSSI thresholds with sched scan"). During the review of a new patch [1] it was observed that the functionality being modified was not actually being used by any in-tree driver. Further research determined that the functionality was originally introduced to support a new Android interface, but that interface was subsequently abandoned. Since the functionality has apparently never been used, remove it. However, to mantain the sanctity of the UABI, keep the nl80211.h assignments, but clearly mark them as obsolete. Cc: Lin Ma Cc: Vamsi Krishna Link: https://lore.kernel.org/linux-wireless/20240119151201.8670-1-linma@zju.edu.cn/ [1] Signed-off-by: Jeff Johnson Link: https://msgid.link/20240125-for-next-v1-1-fd79e01c6c09@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 56bce924bec6..51b9e6fa12f8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2695,19 +2695,11 @@ static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask) * @bssid: BSSID to be matched; may be all-zero BSSID in case of SSID match * or no match (RSSI only) * @rssi_thold: don't report scan results below this threshold (in s32 dBm) - * @per_band_rssi_thold: Minimum rssi threshold for each band to be applied - * for filtering out scan results received. Drivers advertise this support - * of band specific rssi based filtering through the feature capability - * %NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD. These band - * specific rssi thresholds take precedence over rssi_thold, if specified. - * If not specified for any band, it will be assigned with rssi_thold of - * corresponding matchset. */ struct cfg80211_match_set { struct cfg80211_ssid ssid; u8 bssid[ETH_ALEN]; s32 rssi_thold; - s32 per_band_rssi_thold[NUM_NL80211_BANDS]; }; /** -- cgit From 97af84a6bba2ab2b9c704c08e67de3b5ea551bb2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:53 -0800 Subject: af_unix: Do not use atomic ops for unix_sk(sk)->inflight. When touching unix_sk(sk)->inflight, we are always under spin_lock(&unix_gc_lock). Let's convert unix_sk(sk)->inflight to the normal unsigned long. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8..ac38b63db554 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -61,7 +61,7 @@ struct unix_sock { struct mutex iolock, bindlock; struct sock *peer; struct list_head link; - atomic_long_t inflight; + unsigned long inflight; spinlock_t lock; unsigned long gc_flags; #define UNIX_GC_CANDIDATE 0 -- cgit From 5b17307bd0789edea0675d524a2b277b93bbde62 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:54 -0800 Subject: af_unix: Return struct unix_sock from unix_get_socket(). Currently, unix_get_socket() returns struct sock, but after calling it, we always cast it to unix_sk(). Let's return struct unix_sock from unix_get_socket(). Signed-off-by: Kuniyuki Iwashima Acked-by: Pavel Begunkov Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240123170856.41348-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index ac38b63db554..2c98ef95017b 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -14,7 +14,7 @@ void unix_destruct_scm(struct sk_buff *skb); void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(void); -struct sock *unix_get_socket(struct file *filp); +struct unix_sock *unix_get_socket(struct file *filp); struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) -- cgit From d9f21b3613337b55cc9d4a6ead484dca68475143 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Jan 2024 09:08:56 -0800 Subject: af_unix: Try to run GC async. If more than 16000 inflight AF_UNIX sockets exist and the garbage collector is not running, unix_(dgram|stream)_sendmsg() call unix_gc(). Also, they wait for unix_gc() to complete. In unix_gc(), all inflight AF_UNIX sockets are traversed at least once, and more if they are the GC candidate. Thus, sendmsg() significantly slows down with too many inflight AF_UNIX sockets. However, if a process sends data with no AF_UNIX FD, the sendmsg() call does not need to wait for GC. After this change, only the process that meets the condition below will be blocked under such a situation. 1) cmsg contains AF_UNIX socket 2) more than 32 AF_UNIX sent by the same user are still inflight Note that even a sendmsg() call that does not meet the condition but has AF_UNIX FD will be blocked later in unix_scm_to_skb() by the spinlock, but we allow that as a bonus for sane users. The results below are the time spent in unix_dgram_sendmsg() sending 1 byte of data with no FD 4096 times on a host where 32K inflight AF_UNIX sockets exist. Without series: the sane sendmsg() needs to wait gc unreasonably. $ sudo /usr/share/bcc/tools/funclatency -p 11165 unix_dgram_sendmsg Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. ^C nsecs : count distribution [...] 524288 -> 1048575 : 0 | | 1048576 -> 2097151 : 3881 |****************************************| 2097152 -> 4194303 : 214 |** | 4194304 -> 8388607 : 1 | | avg = 1825567 nsecs, total: 7477526027 nsecs, count: 4096 With series: the sane sendmsg() can finish much faster. $ sudo /usr/share/bcc/tools/funclatency -p 8702 unix_dgram_sendmsg Tracing 1 functions for "unix_dgram_sendmsg"... Hit Ctrl-C to end. ^C nsecs : count distribution [...] 128 -> 255 : 0 | | 256 -> 511 : 4092 |****************************************| 512 -> 1023 : 2 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 0 | | 4096 -> 8191 : 1 | | 8192 -> 16383 : 1 | | avg = 410 nsecs, total: 1680510 nsecs, count: 4096 Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240123170856.41348-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 12 ++++++++++-- include/net/scm.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2c98ef95017b..f045bbd9017d 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -8,13 +8,21 @@ #include #include +#if IS_ENABLED(CONFIG_UNIX) +struct unix_sock *unix_get_socket(struct file *filp); +#else +static inline struct unix_sock *unix_get_socket(struct file *filp) +{ + return NULL; +} +#endif + void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); -void wait_for_unix_gc(void); -struct unix_sock *unix_get_socket(struct file *filp); +void wait_for_unix_gc(struct scm_fp_list *fpl); struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) diff --git a/include/net/scm.h b/include/net/scm.h index cf68acec4d70..92276a2c5543 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -25,6 +25,7 @@ struct scm_creds { struct scm_fp_list { short count; + short count_unix; short max; struct user_struct *user; struct file *fp[SCM_MAX_FD]; -- cgit From 31bf508be656a429a17e3adb31e9acae5c1a6299 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:59 +0100 Subject: netfilter: nf_tables: Implement table adoption support Allow a new process to take ownership of a previously owned table, useful mostly for firewall management services restarting or suspending when idle. By extending __NFT_TABLE_F_UPDATE, the on/off/on check in nf_tables_updtable() also covers table adoption, although it is actually not needed: Table adoption is irreversible because nf_tables_updtable() rejects attempts to drop NFT_TABLE_F_OWNER so table->nlpid setting can happen just once within the transaction. If the transaction commences, table's nlpid and flags fields are already set and no further action is required. If it aborts, the table returns to orphaned state. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/net/netfilter/nf_tables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f0..ac7c94d3648e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1271,6 +1271,12 @@ static inline bool nft_table_has_owner(const struct nft_table *table) return table->flags & NFT_TABLE_F_OWNER; } +static inline bool nft_table_is_orphan(const struct nft_table *table) +{ + return (table->flags & (NFT_TABLE_F_OWNER | NFT_TABLE_F_PERSIST)) == + NFT_TABLE_F_PERSIST; +} + static inline bool nft_base_chain_netdev(int family, u32 hooknum) { return family == NFPROTO_NETDEV || -- cgit From d80a52335374e484a4ff2afdc9af843e73273945 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 27 Jan 2024 14:25:09 +0100 Subject: ethtool: replace struct ethtool_eee with a new struct ethtool_keee on kernel side In order to pass EEE link modes beyond bit 32 to userspace we have to complement the 32 bit bitmaps in struct ethtool_eee with linkmode bitmaps. Therefore, similar to ethtool_link_settings and ethtool_link_ksettings, add a struct ethtool_keee. In a first step it's an identical copy of ethtool_eee. This patch simply does a s/ethtool_eee/ethtool_keee/g for all users. No functional change intended. Suggested-by: Andrew Lunn Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/net/dsa.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dsa.h b/include/net/dsa.h index 82135fbdb1e6..7c0da9effe4e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -991,9 +991,9 @@ struct dsa_switch_ops { * Port's MAC EEE settings */ int (*set_mac_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + struct ethtool_keee *e); int (*get_mac_eee)(struct dsa_switch *ds, int port, - struct ethtool_eee *e); + struct ethtool_keee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); -- cgit From 11498715f266a3fb4caabba9dd575636cbcaa8f1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 29 Jan 2024 11:04:34 -0800 Subject: af_unix: Remove io_uring code for GC. Since commit 705318a99a13 ("io_uring/af_unix: disable sending io_uring over sockets"), io_uring's unix socket cannot be passed via SCM_RIGHTS, so it does not contribute to cyclic reference and no longer be candidate for garbage collection. Also, commit 6e5e6d274956 ("io_uring: drop any code related to SCM_RIGHTS") cleaned up SCM_RIGHTS code in io_uring. Let's do it in AF_UNIX as well by reverting commit 0091bfc81741 ("io_uring/af_unix: defer registered files gc to io_uring release") and commit 10369080454d ("net: reclaim skb->scm_io_uring bit"). Signed-off-by: Kuniyuki Iwashima Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20240129190435.57228-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index f045bbd9017d..9e39b2ec4524 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -20,7 +20,6 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); void unix_destruct_scm(struct sk_buff *skb); -void io_uring_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); struct sock *unix_peer_get(struct sock *sk); -- cgit From 99a7a5b9943ea2d05fb0dee38e4ae2290477ed83 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 29 Jan 2024 11:04:35 -0800 Subject: af_unix: Remove CONFIG_UNIX_SCM. Originally, the code related to garbage collection was all in garbage.c. Commit f4e65870e5ce ("net: split out functions related to registering inflight socket files") moved some functions to scm.c for io_uring and added CONFIG_UNIX_SCM just in case AF_UNIX was built as module. However, since commit 97154bcf4d1b ("af_unix: Kconfig: make CONFIG_UNIX bool"), AF_UNIX is no longer built separately. Also, io_uring does not support SCM_RIGHTS now. Let's move the functions back to garbage.c Signed-off-by: Kuniyuki Iwashima Acked-by: Jens Axboe Link: https://lore.kernel.org/r/20240129190435.57228-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/af_unix.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 9e39b2ec4524..54e346152eb1 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -17,19 +17,20 @@ static inline struct unix_sock *unix_get_socket(struct file *filp) } #endif +extern spinlock_t unix_gc_lock; +extern unsigned int unix_tot_inflight; + void unix_inflight(struct user_struct *user, struct file *fp); void unix_notinflight(struct user_struct *user, struct file *fp); -void unix_destruct_scm(struct sk_buff *skb); void unix_gc(void); void wait_for_unix_gc(struct scm_fp_list *fpl); + struct sock *unix_peer_get(struct sock *sk); #define UNIX_HASH_MOD (256 - 1) #define UNIX_HASH_SIZE (256 * 2) #define UNIX_HASH_BITS 8 -extern unsigned int unix_tot_inflight; - struct unix_address { refcount_t refcnt; int len; -- cgit From b26577001af49a20f09770fd6e6cfd10d5daac93 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Thu, 1 Feb 2024 14:09:40 +0100 Subject: net/sched: Add helper macros with module names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macros are preparation for adding module aliases en mass in a separate commit. Although it would be tempting to create aliases like cls-foo for name cls_foo, this could not be used because modprobe utilities treat '-' and '_' interchangeably. In the end, the naming follows pattern of proto modules in linux/net.h. Signed-off-by: Michal Koutný Acked-by: Jamal Hadi Salim Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201130943.19536-2-mkoutny@suse.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 2 ++ include/net/pkt_cls.h | 2 ++ include/net/pkt_sched.h | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index e1e5e72b901e..77ee0c657e2c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -201,6 +201,8 @@ int tcf_idr_release(struct tc_action *a, bool bind); int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops); +#define NET_ACT_ALIAS_PREFIX "net-act-" +#define MODULE_ALIAS_NET_ACT(kind) MODULE_ALIAS(NET_ACT_ALIAS_PREFIX kind) int tcf_action_destroy(struct tc_action *actions[], int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index f308e8268651..a4ee43f493bb 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -24,6 +24,8 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); void unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +#define NET_CLS_ALIAS_PREFIX "net-cls-" +#define MODULE_ALIAS_NET_CLS(kind) MODULE_ALIAS(NET_CLS_ALIAS_PREFIX kind) struct tcf_block_ext_info { enum flow_block_binder_type binder_type; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 1e200d9a066d..d7b7b6cd4aa1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -100,6 +100,8 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, int register_qdisc(struct Qdisc_ops *qops); void unregister_qdisc(struct Qdisc_ops *qops); +#define NET_SCH_ALIAS_PREFIX "net-sch-" +#define MODULE_ALIAS_NET_SCH(id) MODULE_ALIAS(NET_SCH_ALIAS_PREFIX id) void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -- cgit From ffabe98cb576097b77d404d39e8b3df03caa986a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2024 10:11:06 +0000 Subject: net: make dev_unreg_count global We can use a global dev_unreg_count counter instead of a per netns one. As a bonus we can factorize the changes done on it for bulk device removals. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 13b3a4e29fdb..cd0c2eedbb5e 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -67,8 +67,6 @@ struct net { */ spinlock_t rules_mod_lock; - atomic_t dev_unreg_count; - unsigned int dev_base_seq; /* protected by rtnl_mutex */ u32 ifindex; -- cgit From 89304f91bf8efe832557b00f034493420f16dbdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2024 10:14:03 +0000 Subject: sctp: preserve const qualifier in sctp_sk() We can change sctp_sk() to propagate its argument const qualifier, thanks to container_of_const(). Signed-off-by: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Xin Long Acked-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5a24d6d8522a..f24a1bbcb3ef 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -242,10 +242,7 @@ struct sctp_sock { int do_auto_asconf; }; -static inline struct sctp_sock *sctp_sk(const struct sock *sk) -{ - return (struct sctp_sock *)sk; -} +#define sctp_sk(ptr) container_of_const(ptr, struct sctp_sock, inet.sk) static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp) { -- cgit From fd2bc4195d5107f88c1b90e1ec935888ccbfc5c0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 3 Oct 2023 20:57:20 +0300 Subject: xfrm: generalize xdo_dev_state_update_curlft to allow statistics update In order to allow drivers to fill all statistics, change the name of xdo_dev_state_update_curlft to be xdo_dev_state_update_stats. Acked-by: Steffen Klassert Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/net/xfrm.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1d107241b901..4ca2f3205190 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1578,21 +1578,18 @@ struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); #ifdef CONFIG_XFRM_OFFLOAD -static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) +static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; struct net_device *dev = xdo->dev; - if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) - return; - if (dev && dev->xfrmdev_ops && - dev->xfrmdev_ops->xdo_dev_state_update_curlft) - dev->xfrmdev_ops->xdo_dev_state_update_curlft(x); + dev->xfrmdev_ops->xdo_dev_state_update_stats) + dev->xfrmdev_ops->xdo_dev_state_update_stats(x); } #else -static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {} +static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) {} #endif void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); -- cgit From f9f221c98fd83df518fbb2f5ad33980cfedfe1bf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Oct 2023 14:11:48 +0300 Subject: xfrm: get global statistics from the offloaded device Iterate over all SAs in order to fill global IPsec statistics. Acked-by: Steffen Klassert Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- include/net/xfrm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4ca2f3205190..57c743b7e4fe 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -51,8 +51,10 @@ #ifdef CONFIG_XFRM_STATISTICS #define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) +#define XFRM_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.xfrm_statistics, field, val) #else #define XFRM_INC_STATS(net, field) ((void)(net)) +#define XFRM_ADD_STATS(net, field, val) ((void)(net)) #endif @@ -1577,6 +1579,7 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); +void xfrm_state_update_stats(struct net *net); #ifdef CONFIG_XFRM_OFFLOAD static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { -- cgit From 03ba6dc035c60991033529e630bd1552b2bca4d7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 2 Feb 2024 17:37:46 +0100 Subject: net: dst: Make dst_destroy() static and return void. Since commit 52df157f17e56 ("xfrm: take refcnt of dst when creating struct xfrm_dst bundle") dst_destroy() returns only NULL and no caller cares about the return value. There are no in in-tree users of dst_destroy() outside of the file. Make dst_destroy() static and return void. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240202163746.2489150-1-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- include/net/dst.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index f5dfc8fb7b37..0aa331bd2fdb 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -390,7 +390,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_obsolete, unsigned short flags); -struct dst_entry *dst_destroy(struct dst_entry *dst); void dst_dev_put(struct dst_entry *dst); static inline void dst_confirm(struct dst_entry *dst) -- cgit From 240fd405528bbf7fafa0559202ca7aa524c9cd96 Mon Sep 17 00:00:00 2001 From: Aahil Awatramani Date: Fri, 2 Feb 2024 17:58:58 +0000 Subject: bonding: Add independent control state machine Add support for the independent control state machine per IEEE 802.1AX-2008 5.4.15 in addition to the existing implementation of the coupled control state machine. Introduces two new states, AD_MUX_COLLECTING and AD_MUX_DISTRIBUTING in the LACP MUX state machine for separated handling of an initial Collecting state before the Collecting and Distributing state. This enables a port to be in a state where it can receive incoming packets while not still distributing. This is useful for reducing packet loss when a port begins distributing before its partner is able to collect. Added new functions such as bond_set_slave_tx_disabled_flags and bond_set_slave_rx_enabled_flags to precisely manage the port's collecting and distributing states. Previously, there was no dedicated method to disable TX while keeping RX enabled, which this patch addresses. Note that the regular flow process in the kernel's bonding driver remains unaffected by this patch. The extension requires explicit opt-in by the user (in order to ensure no disruptions for existing setups) via netlink support using the new bonding parameter coupled_control. The default value for coupled_control is set to 1 so as to preserve existing behaviour. Signed-off-by: Aahil Awatramani Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20240202175858.1573852-1-aahila@google.com Signed-off-by: Paolo Abeni --- include/net/bond_3ad.h | 2 ++ include/net/bond_options.h | 1 + include/net/bonding.h | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+) (limited to 'include/net') diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c5e57c6bd873..9ce5ac2bfbad 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -54,6 +54,8 @@ typedef enum { AD_MUX_DETACHED, /* mux machine */ AD_MUX_WAITING, /* mux machine */ AD_MUX_ATTACHED, /* mux machine */ + AD_MUX_COLLECTING, /* mux machine */ + AD_MUX_DISTRIBUTING, /* mux machine */ AD_MUX_COLLECTING_DISTRIBUTING /* mux machine */ } mux_states_t; diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 69292ecc0325..473a0147769e 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -76,6 +76,7 @@ enum { BOND_OPT_MISSED_MAX, BOND_OPT_NS_TARGETS, BOND_OPT_PRIO, + BOND_OPT_COUPLED_CONTROL, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 5b8b1b644a2d..b61fb1aa3a56 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -148,6 +148,7 @@ struct bond_params { #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; #endif + int coupled_control; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; @@ -167,6 +168,7 @@ struct slave { u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ + rx_disabled:1, /* indicates whether slave's Rx is disabled */ should_notify:1, /* indicates whether the state changed */ should_notify_link:1; /* indicates whether the link changed */ u8 duplex; @@ -568,6 +570,14 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave, bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); if (!slave->bond->params.all_slaves_active) slave->inactive = 1; + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) + slave->rx_disabled = 1; +} + +static inline void bond_set_slave_tx_disabled_flags(struct slave *slave, + bool notify) +{ + bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); } static inline void bond_set_slave_active_flags(struct slave *slave, @@ -575,6 +585,14 @@ static inline void bond_set_slave_active_flags(struct slave *slave, { bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); slave->inactive = 0; + if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) + slave->rx_disabled = 0; +} + +static inline void bond_set_slave_rx_enabled_flags(struct slave *slave, + bool notify) +{ + slave->rx_disabled = 0; } static inline bool bond_is_slave_inactive(struct slave *slave) @@ -582,6 +600,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } +static inline bool bond_is_slave_rx_disabled(struct slave *slave) +{ + return slave->rx_disabled; +} + static inline void bond_propose_link_state(struct slave *slave, int state) { slave->link_new_state = state; -- cgit From 23c5ae6d467520987dbc8682c3ae6ea0e80a5f27 Mon Sep 17 00:00:00 2001 From: George Guo Date: Sun, 4 Feb 2024 10:35:31 +0800 Subject: netlabel: cleanup struct netlbl_lsm_catmap Simplify the code from macro NETLBL_CATMAP_MAPTYPE to u64, and fix warning "Macros with complex values should be enclosed in parentheses" on "#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01", which is modified to "#define NETLBL_CATMAP_BIT ((u64)0x01)". Signed-off-by: George Guo Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/netlabel.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 43ae50337685..f3ab0b8a4b18 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -145,15 +145,14 @@ struct netlbl_lsm_cache { * processing. * */ -#define NETLBL_CATMAP_MAPTYPE u64 #define NETLBL_CATMAP_MAPCNT 4 -#define NETLBL_CATMAP_MAPSIZE (sizeof(NETLBL_CATMAP_MAPTYPE) * 8) +#define NETLBL_CATMAP_MAPSIZE (sizeof(u64) * 8) #define NETLBL_CATMAP_SIZE (NETLBL_CATMAP_MAPSIZE * \ NETLBL_CATMAP_MAPCNT) -#define NETLBL_CATMAP_BIT (NETLBL_CATMAP_MAPTYPE)0x01 +#define NETLBL_CATMAP_BIT ((u64)0x01) struct netlbl_lsm_catmap { u32 startbit; - NETLBL_CATMAP_MAPTYPE bitmap[NETLBL_CATMAP_MAPCNT]; + u64 bitmap[NETLBL_CATMAP_MAPCNT]; struct netlbl_lsm_catmap *next; }; -- cgit From fd4f101edbd9f99567ab2adb1f2169579ede7c13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:42:57 +0000 Subject: net: add exit_batch_rtnl() method Many (struct pernet_operations)->exit_batch() methods have to acquire rtnl. In presence of rtnl mutex pressure, this makes cleanup_net() very slow. This patch adds a new exit_batch_rtnl() method to reduce number of rtnl acquisitions from cleanup_net(). exit_batch_rtnl() handlers are called while rtnl is locked, and devices to be killed can be queued in a list provided as their second argument. A single unregister_netdevice_many() is called right before rtnl is released. exit_batch_rtnl() handlers are called before ->exit() and ->exit_batch() handlers. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cd0c2eedbb5e..20c34bd7a077 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -448,6 +448,9 @@ struct pernet_operations { void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); + /* Following method is called with RTNL held. */ + void (*exit_batch_rtnl)(struct list_head *net_exit_list, + struct list_head *dev_kill_list); unsigned int *id; size_t size; }; -- cgit From 70f16ea2e4f673fc769fd13c00c20a32b4fe238a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:04 +0000 Subject: ipv4: add __unregister_nexthop_notifier() unregister_nexthop_notifier() assumes the caller does not hold rtnl. We need in the following patch to use it from a context already holding rtnl. Add __unregister_nexthop_notifier(). unregister_nexthop_notifier() becomes a wrapper. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d92046a4a078..6647ad509faa 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -218,6 +218,7 @@ struct nh_notifier_info { int register_nexthop_notifier(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, -- cgit From 9b5b36374ed6953f3efcc82e7cb4c353b9869faf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:10 +0000 Subject: ip_tunnel: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. This patch takes care of ipip, ip_vti, and ip_gre tunnels. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-15-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 2d746f4c9a0a..5cd64bb2104d 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, - struct rtnl_link_ops *ops); + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); -- cgit From 0a44dfc070749514b804ccac0b1fd38718f7daa1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:38 +0100 Subject: wifi: mac80211: simplify non-chanctx drivers There are still surprisingly many non-chanctx drivers, but in mac80211 that code is a bit awkward. Simplify this by having those drivers assign 'emulated' ops, so that the mac80211 code can be more unified between non-chanctx/chanctx drivers. This cuts the number of places caring about it by about 15, which are scattered across - now they're fewer and no longer in the channel context handling. Link: https://msgid.link/20240129194108.6d0ead50f5cf.I60d093b2fc81ca1853925a4d0ac3a2337d5baa5b@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8d6ae22c09bf..62c4b4d10bb4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7532,4 +7532,17 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); void ieee80211_set_active_links_async(struct ieee80211_vif *vif, u16 active_links); +/* for older drivers - let's not document these ... */ +int ieee80211_emulate_add_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx); +void ieee80211_emulate_remove_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx); +void ieee80211_emulate_change_chanctx(struct ieee80211_hw *hw, + struct ieee80211_chanctx_conf *ctx, + u32 changed); +int ieee80211_emulate_switch_vif_chanctx(struct ieee80211_hw *hw, + struct ieee80211_vif_chanctx_switch *vifs, + int n_vifs, + enum ieee80211_chanctx_switch_mode mode); + #endif /* MAC80211_H */ -- cgit From 6092077ad09ce880c61735c314060f0bd79ae4aa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:40 +0100 Subject: wifi: mac80211: introduce 'channel request' For channel contexts, mac80211 currently uses the cfg80211 chandef struct (control channel, center freq(s), width) to define towards drivers and internally how these behave. In fact, there are _two_ such structs used, where the min_def can reduce bandwidth according to the stations connected. Unfortunately, with EHT this is longer be sufficient, at least not for all hardware. EHT requires that non-AP STAs that are connected to an AP with a lower bandwidth than it (the AP) advertises (e.g. 160 MHz STA connected to 320 MHz AP) still be able to receive downlink OFDMA and respond to trigger frames for uplink OFDMA that specify the position and bandwidth for the non-AP STA relative to the channel the AP is using. Therefore, they need to be aware of this, and at least for some hardware (e.g. Intel) this awareness is in the hardware. As a result, use of the "same" channel may need to be split over two channel contexts where they differ by the AP being used. As a first step, introduce a concept of a channel request ('chanreq') for each interface, to control the context it requests. This step does nothing but reorganise the code, so that later the AP's chandef can be added to the request in order to handle the EHT case described above. Link: https://msgid.link/20240129194108.2e88e48bd2e9.I4256183debe975c5ed71621611206fdbb69ba330@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 62c4b4d10bb4..dd8a66e9afd9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -223,6 +223,14 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), }; +/** + * struct ieee80211_chan_req - A channel "request" + * @oper: channel definition to use for operation + */ +struct ieee80211_chan_req { + struct cfg80211_chan_def oper; +}; + /** * struct ieee80211_chanctx_conf - channel context that vifs may be tuned to * @@ -583,7 +591,7 @@ struct ieee80211_fils_discovery { * @mcast_rate: per-band multicast rate index + 1 (0: disabled) * @bssid: The BSSID for this BSS * @enable_beacon: whether beaconing should be enabled or not - * @chandef: Channel definition for this BSS -- the hardware might be + * @chanreq: Channel request for this BSS -- the hardware might be * configured a higher bandwidth than this BSS uses, for example. * @mu_group: VHT MU-MIMO group membership data * @ht_operation_mode: HT operation mode like in &struct ieee80211_ht_operation. @@ -716,7 +724,7 @@ struct ieee80211_bss_conf { u32 cqm_rssi_hyst; s32 cqm_rssi_low; s32 cqm_rssi_high; - struct cfg80211_chan_def chandef; + struct ieee80211_chan_req chanreq; struct ieee80211_mu_group_data mu_group; bool qos; bool hidden_ssid; -- cgit From 761748f001800d925c2ee8b04407e7aee12c3ffb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:42 +0100 Subject: wifi: mac80211: support wider bandwidth OFDMA config EHT requires that stations are able to participate in wider bandwidth OFDMA, i.e. parse downlink OFDMA and uplink OFDMA triggers when they're not capable of (or not connected at) the (wider) bandwidth that the AP is using. This requires hardware configuration, since the entity responsible for parsing (possibly hardware) needs to know the AP bandwidth. To support this, change the channel request to have the AP's bandwidth for clients, and track that in the channel context in mac80211. This means that the same chandef might need to be split up into two different contexts, if the APs are different. Interfaces other than client are not participating in OFDMA the same way, so they don't request any AP setting. Note that this doesn't introduce any API to split a channel context, so that there are cases where this might lead to a disconnect, e.g. if there are two client interfaces using the same channel context, e.g. both 160 MHz connected to different 320 MHz APs, and one of the APs switches to 160 MHz. Note also there are possible cases where this can be optimised, e.g. when using the upper or lower 160 Mhz, but I haven't been able to really fully understand the spec and/or hardware limitations. If, for some reason, there are no hardware limits on this because the OFDMA (downlink/trigger) parsing is done in firmware and can take the transmitter into account, then drivers can set the new flag IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW on interfaces to not have them request any AP bandwidth in the channel context and ignore this issue entirely. The bss_conf still contains the AP configuration (if any, i.e. EHT) in the chanreq. Link: https://msgid.link/20240129194108.d3d5b35dd783.I939d04674f4ff06f39934b1591c8d36a30ce74c2@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dd8a66e9afd9..ab6bc89d3394 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -214,6 +214,8 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_CHANNEL: switched to another operating channel, * this is used only with channel switching with CSA * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed + * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider + * bandwidth) OFDMA settings need to be changed */ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0), @@ -221,14 +223,18 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_RADAR = BIT(2), IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), + IEEE80211_CHANCTX_CHANGE_AP = BIT(5), }; /** * struct ieee80211_chan_req - A channel "request" * @oper: channel definition to use for operation + * @ap: the channel definition of the AP, if any + * (otherwise the chan member is %NULL) */ struct ieee80211_chan_req { struct cfg80211_chan_def oper; + struct cfg80211_chan_def ap; }; /** @@ -239,6 +245,8 @@ struct ieee80211_chan_req { * * @def: the channel definition * @min_def: the minimum channel definition currently required. + * @ap: the channel definition the AP actually is operating as, + * for use with (wider bandwidth) OFDMA * @rx_chains_static: The number of RX chains that must always be * active on the channel to receive MIMO transmissions * @rx_chains_dynamic: The number of RX chains that must be enabled @@ -251,6 +259,7 @@ struct ieee80211_chan_req { struct ieee80211_chanctx_conf { struct cfg80211_chan_def def; struct cfg80211_chan_def min_def; + struct cfg80211_chan_def ap; u8 rx_chains_static, rx_chains_dynamic; @@ -1782,6 +1791,10 @@ struct ieee80211_channel_switch { * this is not pure P2P vif. * @IEEE80211_VIF_EML_ACTIVE: The driver indicates that EML operation is * enabled for the interface. + * @IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW: Ignore wider bandwidth OFDMA + * operation on this interface and request a channel context without + * the AP definition. Use this e.g. because the device is able to + * handle OFDMA (downlink and trigger for uplink) on a per-AP basis. */ enum ieee80211_vif_flags { IEEE80211_VIF_BEACON_FILTER = BIT(0), @@ -1789,6 +1802,7 @@ enum ieee80211_vif_flags { IEEE80211_VIF_SUPPORTS_UAPSD = BIT(2), IEEE80211_VIF_GET_NOA_UPDATE = BIT(3), IEEE80211_VIF_EML_ACTIVE = BIT(4), + IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW = BIT(5), }; -- cgit From 8f251a0a1566e3e1da0f1d9322c8ffae808a7509 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:47 +0100 Subject: wifi: cfg80211: simplify cfg80211_chandef_compatible() Simplify cfg80211_chandef_compatible() a bit by switching c1 and c2 around so that c1 is always the narrower one (once they're not identical or narrow/S1G). Then we can just check the various primary channels and exit with the wider one (c2), or NULL. Also refactor the primary 40/80/160 function to not have all the calculations hard-coded, and use a wrapper around it to check primary 40/80/160 compatibility. While at it, add some kunit tests for this functionality. Also expose the new cfg80211_chandef_primary_freq() to drivers, mac80211 will use it. Link: https://msgid.link/20240129194108.be3e6eccaba3.I8399c2ff1435d7378e5837794cb5aa6dd2ee1416@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5b42bfc1b660..fc2ad80118e8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1047,6 +1047,17 @@ unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); +/** + * cfg80211_chandef_primary_freq - calculate primary 40/80/160 MHz freq + * @chandef: chandef to calculate for + * @primary_chan_width: primary channel width to calculate center for + * + * Returns: the primary 40/80/160 MHz channel center frequency, or -1 + * for errors + */ +int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *chandef, + enum nl80211_chan_width primary_chan_width); + /** * nl80211_send_chandef - sends the channel definition. * @msg: the msg to send channel definition -- cgit From b82730bf57b54803ab94abbfd8c4422a7081886d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Jan 2024 19:34:49 +0100 Subject: wifi: cfg80211/mac80211: move puncturing into chandef Aloka originally suggested that puncturing should be part of the chandef, so that it's treated correctly. At the time, I disagreed and it ended up not part of the chandef, but I've now realized that this was wrong. Even for clients, the RX, and perhaps more importantly, CCA configuration needs to take puncturing into account. Move puncturing into the chandef, and adjust all the code accordingly. Also add a few tests for puncturing in chandef compatibility checking. Link: https://lore.kernel.org/linux-wireless/20220214223051.3610-1-quic_alokad@quicinc.com/ Suggested-by: Aloka Dixit Link: https://msgid.link/20240129194108.307183a5d2e5.I4d7fe2f126b2366c1312010e2900dfb2abffa0f6@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 44 +++++++++++++++----------------------------- include/net/mac80211.h | 11 ++++------- 2 files changed, 19 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fc2ad80118e8..cb5e34d640cd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7,7 +7,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021, 2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -808,6 +808,9 @@ struct key_params { * chan will define the primary channel and all other * parameters are ignored. * @freq1_offset: offset from @center_freq1, in KHz + * @punctured: mask of the punctured 20 MHz subchannels, with + * bits turned on being disabled (punctured); numbered + * from lower to higher frequency (like in the spec) */ struct cfg80211_chan_def { struct ieee80211_channel *chan; @@ -816,6 +819,7 @@ struct cfg80211_chan_def { u32 center_freq2; struct ieee80211_edmg edmg; u16 freq1_offset; + u16 punctured; }; /* @@ -956,7 +960,8 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, chandef1->width == chandef2->width && chandef1->center_freq1 == chandef2->center_freq1 && chandef1->freq1_offset == chandef2->freq1_offset && - chandef1->center_freq2 == chandef2->center_freq2); + chandef1->center_freq2 == chandef2->center_freq2 && + chandef1->punctured == chandef2->punctured); } /** @@ -1051,12 +1056,15 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, * cfg80211_chandef_primary_freq - calculate primary 40/80/160 MHz freq * @chandef: chandef to calculate for * @primary_chan_width: primary channel width to calculate center for + * @punctured: punctured sub-channel bitmap, will be recalculated + * according to the new bandwidth, can be %NULL * * Returns: the primary 40/80/160 MHz channel center frequency, or -1 - * for errors + * for errors, updating the punctured bitmap */ -int cfg80211_chandef_primary_freq(const struct cfg80211_chan_def *chandef, - enum nl80211_chan_width primary_chan_width); +int cfg80211_chandef_primary(const struct cfg80211_chan_def *chandef, + enum nl80211_chan_width primary_chan_width, + u16 *punctured); /** * nl80211_send_chandef - sends the channel definition. @@ -1468,9 +1476,6 @@ struct cfg80211_unsol_bcast_probe_resp { * @fils_discovery: FILS discovery transmission parameters * @unsol_bcast_probe_resp: Unsolicited broadcast probe response parameters * @mbssid_config: AP settings for multiple bssid - * @punct_bitmap: Preamble puncturing bitmap. Each bit represents - * a 20 MHz channel, lowest bit corresponding to the lowest channel. - * Bit set to 1 indicates that the channel is punctured. */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1505,7 +1510,6 @@ struct cfg80211_ap_settings { struct cfg80211_fils_discovery fils_discovery; struct cfg80211_unsol_bcast_probe_resp unsol_bcast_probe_resp; struct cfg80211_mbssid_config mbssid_config; - u16 punct_bitmap; }; @@ -1539,9 +1543,6 @@ struct cfg80211_ap_update { * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch - * @punct_bitmap: Preamble puncturing bitmap. Each bit represents - * a 20 MHz channel, lowest bit corresponding to the lowest channel. - * Bit set to 1 indicates that the channel is punctured. */ struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; @@ -1554,7 +1555,6 @@ struct cfg80211_csa_settings { bool radar_required; bool block_tx; u8 count; - u16 punct_bitmap; }; /** @@ -8738,14 +8738,13 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * @dev: the device which switched channels * @chandef: the new channel definition * @link_id: the link ID for MLO, must be 0 for non-MLO - * @punct_bitmap: the new puncturing bitmap * * Caller must hold wiphy mutex, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, - unsigned int link_id, u16 punct_bitmap); + unsigned int link_id); /* * cfg80211_ch_switch_started_notify - notify channel switch start @@ -8754,7 +8753,6 @@ void cfg80211_ch_switch_notify(struct net_device *dev, * @link_id: the link ID for MLO, must be 0 for non-MLO * @count: the number of TBTTs until the channel switch happens * @quiet: whether or not immediate quiet was requested by the AP - * @punct_bitmap: the future puncturing bitmap * * Inform the userspace about the channel switch that has just * started, so that it can take appropriate actions (eg. starting @@ -8763,7 +8761,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, - bool quiet, u16 punct_bitmap); + bool quiet); /** * ieee80211_operating_class_to_band - convert operating class to band @@ -9381,18 +9379,6 @@ static inline int cfg80211_color_change_notify(struct net_device *dev) 0, 0); } -/** - * cfg80211_valid_disable_subchannel_bitmap - validate puncturing bitmap - * @bitmap: bitmap to be validated - * @chandef: channel definition - * - * Validate the puncturing bitmap. - * - * Return: %true if the bitmap is valid. %false otherwise. - */ -bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap, - const struct cfg80211_chan_def *chandef); - /** * cfg80211_links_removed - Notify about removed STA MLD setup links. * @dev: network device. diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ab6bc89d3394..54aa4a06c878 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7,7 +7,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #ifndef MAC80211_H @@ -216,6 +216,8 @@ struct ieee80211_low_level_stats { * @IEEE80211_CHANCTX_CHANGE_MIN_WIDTH: The min required channel width changed * @IEEE80211_CHANCTX_CHANGE_AP: The AP channel definition changed, so (wider * bandwidth) OFDMA settings need to be changed + * @IEEE80211_CHANCTX_CHANGE_PUNCTURING: The punctured channel(s) bitmap + * was changed. */ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_WIDTH = BIT(0), @@ -224,6 +226,7 @@ enum ieee80211_chanctx_change { IEEE80211_CHANCTX_CHANGE_CHANNEL = BIT(3), IEEE80211_CHANCTX_CHANGE_MIN_WIDTH = BIT(4), IEEE80211_CHANCTX_CHANGE_AP = BIT(5), + IEEE80211_CHANCTX_CHANGE_PUNCTURING = BIT(6), }; /** @@ -357,7 +360,6 @@ struct ieee80211_vif_chanctx_switch { * @BSS_CHANGED_FILS_DISCOVERY: FILS discovery status changed. * @BSS_CHANGED_UNSOL_BCAST_PROBE_RESP: Unsolicited broadcast probe response * status changed. - * @BSS_CHANGED_EHT_PUNCTURING: The channel puncturing bitmap changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: TID to link mapping was changed */ @@ -394,7 +396,6 @@ enum ieee80211_bss_change { BSS_CHANGED_HE_BSS_COLOR = 1<<29, BSS_CHANGED_FILS_DISCOVERY = 1<<30, BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, - BSS_CHANGED_EHT_PUNCTURING = BIT_ULL(32), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), @@ -661,9 +662,7 @@ struct ieee80211_fils_discovery { * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT - * @eht_puncturing: bitmap to indicate which channels are punctured in this BSS * @csa_active: marks whether a channel switch is going on. - * @csa_punct_bitmap: new puncturing bitmap for channel switch * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @chanctx_conf: The channel context this interface is assigned to, or %NULL * when it is not assigned. This pointer is RCU-protected due to the TX @@ -766,10 +765,8 @@ struct ieee80211_bss_conf { u8 tx_pwr_env_num; u8 pwr_reduction; bool eht_support; - u16 eht_puncturing; bool csa_active; - u16 csa_punct_bitmap; bool mu_mimo_owner; struct ieee80211_chanctx_conf __rcu *chanctx_conf; -- cgit From 68de13028b94572fc570b7eb1e0e2de1d751fe7e Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Fri, 22 Dec 2023 09:09:13 +0800 Subject: wifi: cfg80211: Add utility for converting op_class into chandef This utility is used in STA CSA handling. The op_class in the ECSA Element can be converted into chandef. Co-developed-by: Money Wang Signed-off-by: Michael-CY Lee Link: https://msgid.link/20231222010914.6521-2-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cb5e34d640cd..e27ed2307cdb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8774,6 +8774,19 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, bool ieee80211_operating_class_to_band(u8 operating_class, enum nl80211_band *band); +/** + * ieee80211_operating_class_to_chandef - convert operating class to chandef + * + * @operating_class: the operating class to convert + * @chan: the ieee80211_channel to convert + * @chandef: a pointer to the resulting chandef + * + * Returns %true if the conversion was successful, %false otherwise. + */ +bool ieee80211_operating_class_to_chandef(u8 operating_class, + struct ieee80211_channel *chan, + struct cfg80211_chan_def *chandef); + /** * ieee80211_chandef_to_operating_class - convert chandef to operation class * -- cgit From 4ace04c0bdbde3b028ec0a5a3be2471cdb1efb67 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:14 +0530 Subject: wifi: cfg80211: send link id in channel_switch ops Currently, during channel switch, no link id information is passed down. In order to support channel switch during Multi Link Operation, it is required to pass link id as well. Add changes to pass link id in the channel_switch cfg80211_ops. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-2-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e27ed2307cdb..d4c83ea3213d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1543,6 +1543,8 @@ struct cfg80211_ap_update { * @radar_required: whether radar detection is required on the new channel * @block_tx: whether transmissions should be blocked while changing * @count: number of beacons until switch + * @link_id: defines the link on which channel switch is expected during + * MLO. 0 in case of non-MLO. */ struct cfg80211_csa_settings { struct cfg80211_chan_def chandef; @@ -1555,6 +1557,7 @@ struct cfg80211_csa_settings { bool radar_required; bool block_tx; u8 count; + u8 link_id; }; /** -- cgit From 480e7048aa0bbf0a79a976cdfa0195fd157da902 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:15 +0530 Subject: wifi: mac80211: update beacon counters per link basis Currently, function to update beacon counter uses deflink to fetch the beacon and then update the counter. However, with MLO, there is a need to update the counter for the beacon in a particular link. Add support to use link_id in order to fetch the beacon from a particular link data during beacon update counter. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-3-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 54aa4a06c878..8acee7ce3aa9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5526,6 +5526,7 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, /** * ieee80211_beacon_update_cntdwn - request mac80211 to decrement the beacon countdown * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * The beacon counter should be updated after each beacon transmission. * This function is called implicitly when @@ -5535,7 +5536,8 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, * * Return: new countdown value */ -u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif); +u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_beacon_set_cntdwn - request mac80211 to set beacon countdown -- cgit From 04ada8599c35ecb2cf16c94eb118d227630d06ee Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 30 Jan 2024 19:39:18 +0530 Subject: wifi: mac80211: add support to call csa_finish on a link Currently ieee80211_csa_finish() function finalizes CSA by scheduling a finalizing worker using the deflink. With MLO, there is a need to do it on a given link basis. Pass link ID of the link on which CSA needs to be finalized. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240130140918.1172387-6-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8acee7ce3aa9..45d905b17a65 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5555,12 +5555,13 @@ void ieee80211_beacon_set_cntdwn(struct ieee80211_vif *vif, u8 counter); /** * ieee80211_csa_finish - notify mac80211 about channel switch * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * After a channel switch announcement was scheduled and the counter in this * announcement hits 1, this function must be called by the driver to * notify mac80211 that the channel can be changed. */ -void ieee80211_csa_finish(struct ieee80211_vif *vif); +void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id); /** * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1 -- cgit From d4655db0a1e11eeacc55c44c81121c83b087982e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Feb 2024 08:04:23 +0100 Subject: wifi: cfg80211: fix kernel-doc for cfg80211_chandef_primary This was still referring to cfg80211_chandef_primary_freq(), fix it. Reported-by: Stephen Rothwell Fixes: b82730bf57b5 ("wifi: cfg80211/mac80211: move puncturing into chandef") Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d4c83ea3213d..f52f989a54ad 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1053,7 +1053,7 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef); /** - * cfg80211_chandef_primary_freq - calculate primary 40/80/160 MHz freq + * cfg80211_chandef_primary - calculate primary 40/80/160 MHz freq * @chandef: chandef to calculate for * @primary_chan_width: primary channel width to calculate center for * @punctured: punctured sub-channel bitmap, will be recalculated -- cgit From 129e406e1811538c1afc9c8e97d61bb18eed3363 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:49 -0800 Subject: net/ipv6: set expires in rt6_add_dflt_router(). Pass the duration of a lifetime (in seconds) to the function rt6_add_dflt_router() so that it can properly set the expiration time. The function ndisc_router_discovery() is the only one that calls rt6_add_dflt_router(), and it will later set the expiration time for the route created by rt6_add_dflt_router(). However, there is a gap of time between calling rt6_add_dflt_router() and setting the expiration time in ndisc_router_discovery(). During this period, there is a possibility that a new route may be removed from the routing table. By setting the correct expiration time in rt6_add_dflt_router(), we can prevent this from happening. The reason for setting RTF_EXPIRES in rt6_add_dflt_router() is to start the Garbage Collection (GC) timer, as it only activates when a route with RTF_EXPIRES is added to a table. Suggested-by: David Ahern Reviewed-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 28b065790261..52a51c69aa9d 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -170,7 +170,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref, - u32 defrtr_usr_metric); + u32 defrtr_usr_metric, + int lifetime); void rt6_purge_dflt_routers(struct net *net); -- cgit From 5eb902b8e7193cdcb33242af0a56502e6b5206e9 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Thu, 8 Feb 2024 14:06:51 -0800 Subject: net/ipv6: Remove expired routes with a separated list of routes. FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree can be expensive if the number of routes in a table is big, even if most of them are permanent. Checking routes in a separated list of routes having expiration will avoid this potential issue. Reviewed-by: David Ahern Signed-off-by: Kui-Feng Lee Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 360b12e61850..323c94f1845b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -173,6 +173,9 @@ struct fib6_info { refcount_t fib6_ref; unsigned long expires; + + struct hlist_node gc_link; + struct dst_metrics *fib6_metrics; #define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] @@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt) return rt->fib6_src.plen > 0; } +/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever + * been added to a table before. + */ static inline void fib6_clean_expires(struct fib6_info *f6i) { f6i->fib6_flags &= ~RTF_EXPIRES; f6i->expires = 0; } +/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever + * been added to a table before. + */ static inline void fib6_set_expires(struct fib6_info *f6i, unsigned long expires) { @@ -327,8 +336,10 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i) { - if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) + if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) { + DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link)); call_rcu(&f6i->rcu, fib6_info_destroy_rcu); + } } enum fib6_walk_state { @@ -382,6 +393,7 @@ struct fib6_table { struct inet_peer_base tb6_peers; unsigned int flags; unsigned int fib_seq; + struct hlist_head tb6_gc_hlist; /* GC candidates */ #define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; @@ -498,6 +510,38 @@ void fib6_gc_cleanup(void); int fib6_init(void); +/* Add the route to the gc list if it is not already there + * + * The callers should hold f6i->fib6_table->tb6_lock. + */ +static inline void fib6_add_gc_list(struct fib6_info *f6i) +{ + /* If fib6_node is null, the f6i is not in (or removed from) the + * table. + * + * There is a gap between finding the f6i from the table and + * calling this function without the protection of the tb6_lock. + * This check makes sure the f6i is not added to the gc list when + * it is not on the table. + */ + if (!rcu_dereference_protected(f6i->fib6_node, + lockdep_is_held(&f6i->fib6_table->tb6_lock))) + return; + + if (hlist_unhashed(&f6i->gc_link)) + hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist); +} + +/* Remove the route from the gc list if it is on the list. + * + * The callers should hold f6i->fib6_table->tb6_lock. + */ +static inline void fib6_remove_gc_list(struct fib6_info *f6i) +{ + if (!hlist_unhashed(&f6i->gc_link)) + hlist_del_init(&f6i->gc_link); +} + struct ipv6_route_iter { struct seq_net_private p; struct fib6_walker w; -- cgit From 6f656131f6988709e3bf828c3ad992032b717c2e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 8 Feb 2024 12:01:43 +0100 Subject: wifi: mac80211: remove gfp parameter from ieee80211_obss_color_collision_notify Get rid of gfp parameter from ieee80211_obss_color_collision_notify since it is no longer used. Signed-off-by: Lorenzo Bianconi Reviewed-by: Jeff Johnson Acked-by: Jeff Johnson Link: https://msgid.link/f91e1c78896408ac556586ba8c99e4e389aeba02.1707389901.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- include/net/mac80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 45d905b17a65..fc223761e3af 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -7490,11 +7490,10 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, * @vif: &struct ieee80211_vif pointer from the add_interface callback. * @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is * aware of. - * @gfp: allocation flags */ void ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif, - u64 color_bitmap, gfp_t gfp); + u64 color_bitmap); /** * ieee80211_is_tx_data - check if frame is a data frame -- cgit From f6ca96aa51a4ae1b3a416fbe85acdf1197c405a6 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 5 Feb 2024 21:59:50 +0530 Subject: wifi: cfg80211: add support for link id attribute in NL80211_CMD_DEL_STATION Currently whenever NL80211_CMD_DEL_STATION command is called without any MAC address, all stations present on that interface are flushed. However with MLO there is a need to flush such stations only which are using at least a particular link from the AP MLD interface. For example - 2 GHz and 5 GHz are part of an AP MLD. To this interface, following stations are connected - 1. One non-EHT STA on 2 GHz link. 2. One non-EHT STA on 5 GHz link. 3. One Multi-Link STA having 2 GHz and 5 GHz as active links. Now if currently, NL80211_CMD_DEL_STATION is issued by the 2 GHz link without any MAC address, it would flush all station entries. However, flushing of station entry #2 at least is not desireable since it is connected to 5 GHz link alone. Hence, add an option to pass link ID as well in the command so that if link ID is passed, stations using that passed link ID alone would be flushed and others will not. So after this, station entries #1 and #3 alone would be flushed and #2 will remain as it is. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240205162952.1697646-2-quic_adisi@quicinc.com [clarify documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f52f989a54ad..62894b024e88 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1780,11 +1780,15 @@ struct station_parameters { * @subtype: Management frame subtype to use for indicating removal * (10 = Disassociation, 12 = Deauthentication) * @reason_code: Reason code for the Disassociation/Deauthentication frame + * @link_id: Link ID indicating a link that stations to be flushed must be + * using; valid only for MLO, but can also be -1 for MLO to really + * remove all stations. */ struct station_del_parameters { const u8 *mac; u8 subtype; u16 reason_code; + int link_id; }; /** -- cgit From 7b5e25b8baebc02db728bfbdc3080be863144c7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:07 +0200 Subject: wifi: cfg80211: rename UHB to 6 GHz UHB stands for "Ultra High Band", but this term doesn't really exist in the spec. Rename all occurrences to "6 GHz", but keep a few defines for userspace API compatibility. Link: https://msgid.link/20240206164849.c9cfb9400839.I153db3b951934a1d84409c17fbe1f1d1782543fa@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62894b024e88..7bb8484e859e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -118,9 +118,9 @@ struct wiphy; * restrictions. * @IEEE80211_CHAN_NO_EHT: EHT operation is not permitted on this channel. * @IEEE80211_CHAN_DFS_CONCURRENT: See %NL80211_RRF_DFS_CONCURRENT - * @IEEE80211_CHAN_NO_UHB_VLP_CLIENT: Client connection with VLP AP + * @IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT: Client connection with VLP AP * not permitted using this channel - * @IEEE80211_CHAN_NO_UHB_AFC_CLIENT: Client connection with AFC AP + * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP * not permitted using this channel */ enum ieee80211_channel_flags { @@ -146,8 +146,8 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_320MHZ = 1<<19, IEEE80211_CHAN_NO_EHT = 1<<20, IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, - IEEE80211_CHAN_NO_UHB_VLP_CLIENT= 1<<22, - IEEE80211_CHAN_NO_UHB_AFC_CLIENT= 1<<23, + IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, + IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, }; #define IEEE80211_CHAN_NO_HT40 \ @@ -4935,7 +4935,7 @@ struct cfg80211_ops { * enum wiphy_flags - wiphy capability flags * * @WIPHY_FLAG_SPLIT_SCAN_6GHZ: if set to true, the scan request will be split - * into two, first for legacy bands and second for UHB. + * into two, first for legacy bands and second for 6 GHz. * @WIPHY_FLAG_NETNS_OK: if not set, do not allow changing the netns of this * wiphy at all * @WIPHY_FLAG_PS_ON_BY_DEFAULT: if set to true, powersave will be enabled -- cgit From a110a3b79177ddd7e7295671df97fb5386406835 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:08 +0200 Subject: wifi: cfg80211: optionally support monitor on disabled channels If the hardware supports a disabled channel, it may in some cases be possible to use monitor mode (without any transmit) on it when it's otherwise disabled. Add a new channel flag IEEE80211_CHAN_CAN_MONITOR that makes it possible for a driver to indicate such a thing. Make it per channel so drivers could have a choice with it, perhaps it's only possible on some channels, perhaps some channels are not supported at all, but still there and marked disabled. In _nl80211_parse_chandef() simplify the code and check only for an unknown channel, _cfg80211_chandef_usable() will later check for IEEE80211_CHAN_DISABLED anyway. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.87fad3a21a09.I9116b2fdc2e2c9fd59a9273a64db7fcb41fc0328@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7bb8484e859e..0a3151587556 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -122,6 +122,9 @@ struct wiphy; * not permitted using this channel * @IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT: Client connection with AFC AP * not permitted using this channel + * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor + * mode even in the presence of other (regulatory) restrictions, + * even if it is otherwise disabled. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = 1<<0, @@ -148,6 +151,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, + IEEE80211_CHAN_CAN_MONITOR = 1<<24, }; #define IEEE80211_CHAN_NO_HT40 \ -- cgit From a64be8296e31f432d4a9df4db684cc8a250eb81c Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Tue, 6 Feb 2024 16:54:11 +0200 Subject: wifi: cfg80211: report unprotected deauth/disassoc in wowlan Add to cfg80211_wowlan_wakeup another wakeup reason - unprot_deauth_disassoc. To be set to true if the woke up was due to an unprotected deauth or disassoc frame in MFP. In that case report WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC. Signed-off-by: Shaul Triebitz Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.a3d739850d03.I8f52a21c4f36d1af1f8068bed79e2f9cbf8289ef@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0a3151587556..93e9abb7fc3d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3612,12 +3612,15 @@ struct cfg80211_wowlan_nd_info { * @tcp_connlost: TCP connection lost or failed to establish * @tcp_nomoretokens: TCP data ran out of tokens * @net_detect: if not %NULL, woke up because of net detect + * @unprot_deauth_disassoc: woke up due to unprotected deauth or + * disassoc frame (in MFP). */ struct cfg80211_wowlan_wakeup { bool disconnect, magic_pkt, gtk_rekey_failure, eap_identity_req, four_way_handshake, rfkill_release, packet_80211, - tcp_match, tcp_connlost, tcp_nomoretokens; + tcp_match, tcp_connlost, tcp_nomoretokens, + unprot_deauth_disassoc; s32 pattern_idx; u32 packet_present_len, packet_len; const void *packet; -- cgit From a3522a2edb3faf8cb98d38c2a99f5967beef24e2 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 9 Feb 2024 17:43:37 +0100 Subject: ipv4: Set the routing scope properly in ip_route_output_ports(). Set scope automatically in ip_route_output_ports() (using the socket SOCK_LOCALROUTE flag). This way, callers don't have to overload the tos with the RTO_ONLINK flag, like RT_CONN_FLAGS() does. For callers that don't pass a struct sock, this doesn't change anything as the scope is still set to RT_SCOPE_UNIVERSE when sk is NULL. Callers that passed a struct sock and used RT_CONN_FLAGS(sk) or RT_CONN_FLAGS_TOS(sk, tos) for the tos are modified to use ip_sock_tos(sk) and RT_TOS(tos) respectively, as overloading tos with the RTO_ONLINK flag now becomes unnecessary. In drivers/net/amt.c, all ip_route_output_ports() calls use a 0 tos parameter, ignoring the SOCK_LOCALROUTE flag of the socket. But the sk parameter is a kernel socket, which doesn't have any configuration path for setting SOCK_LOCALROUTE anyway. Therefore, ip_route_output_ports() will continue to initialise scope with RT_SCOPE_UNIVERSE and amt.c doesn't need to be modified. Also, remove RT_CONN_FLAGS() and RT_CONN_FLAGS_TOS() from route.h as these macros are now unused. The objective is to eventually remove RTO_ONLINK entirely to allow converting ->flowi4_tos to dscp_t. This will ensure proper isolation between the DSCP and ECN bits, thus minimising the risk of introducing bugs where TOS values interfere with ECN. Signed-off-by: Guillaume Nault Reviewed-by: David Ahern Link: https://lore.kernel.org/r/dacfd2ab40685e20959ab7b53c427595ba229e7d.1707496938.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 980ab474eabd..d4a0147942f1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -37,9 +37,6 @@ #define RTO_ONLINK 0x01 -#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE)) -#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) - static inline __u8 ip_sock_rt_scope(const struct sock *sk) { if (sock_flag(sk, SOCK_LOCALROUTE)) @@ -163,8 +160,8 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi __u8 proto, __u8 tos, int oif) { flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, - RT_SCOPE_UNIVERSE, proto, - sk ? inet_sk_flowi_flags(sk) : 0, + sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE, + proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); -- cgit From 2b0cfa6e49566c8fa6759734cf821aa6e8271a9e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 12 Feb 2024 10:50:54 +0100 Subject: net: add generic percpu page_pool allocator Introduce generic percpu page_pools allocator. Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct in order to recycle the page in the page_pool "hot" cache if napi_pp_put_page() is running on the same cpu. This is a preliminary patch to add xdp multi-buff support for xdp running in generic mode. Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/80bc4285228b6f4220cd03de1999d86e46e3fcbd.1707729884.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 76481c465375..3828396ae60c 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -128,6 +128,7 @@ struct page_pool_stats { struct page_pool { struct page_pool_params_fast p; + int cpuid; bool has_init_callback; long frag_users; @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); struct page_pool *page_pool_create(const struct page_pool_params *params); +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, + int cpuid); struct xdp_mem_info; -- cgit From 414532d8aa8915d9aebd01c6b5aa54bdfd98da71 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Feb 2024 20:08:43 +0100 Subject: wifi: cfg80211: use IEEE80211_MAX_MESH_ID_LEN appropriately Even if that's the same as IEEE80211_MAX_SSID_LEN, we really should just use IEEE80211_MAX_MESH_ID_LEN for mesh, rather than having the BUILD_BUG_ON()s. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 93e9abb7fc3d..57c2298af35b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6223,7 +6223,7 @@ struct wireless_dev { int beacon_interval; struct cfg80211_chan_def preset_chandef; struct cfg80211_chan_def chandef; - u8 id[IEEE80211_MAX_SSID_LEN]; + u8 id[IEEE80211_MAX_MESH_ID_LEN]; u8 id_len, id_up_len; } mesh; struct { -- cgit From a5fcea2d2f790aa90b6e996d411ae2cf8db55186 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Tue, 13 Feb 2024 23:26:31 -0700 Subject: net: ipv6/addrconf: introduce a regen_min_advance sysctl In RFC 8981, REGEN_ADVANCE cannot be less than 2 seconds, and the RFC does not permit the creation of temporary addresses with lifetimes shorter than that: > When processing a Router Advertisement with a > Prefix Information option carrying a prefix for the purposes of > address autoconfiguration (i.e., the A bit is set), the host MUST > perform the following steps: > 5. A temporary address is created only if this calculated preferred > lifetime is greater than REGEN_ADVANCE time units. However, some users want to change their IPv6 address as frequently as possible regardless of the RFC's arbitrary minimum lifetime. For the benefit of those users, add a regen_min_advance sysctl parameter that can be set to below or above 2 seconds. Link: https://datatracker.ietf.org/doc/html/rfc8981 Signed-off-by: Alex Henrie Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- include/net/addrconf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 61ebe723ee4d..30d6f1e84e46 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -8,8 +8,9 @@ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ -#define TEMP_VALID_LIFETIME (7*86400) -#define TEMP_PREFERRED_LIFETIME (86400) +#define TEMP_VALID_LIFETIME (7*86400) /* 1 week */ +#define TEMP_PREFERRED_LIFETIME (86400) /* 24 hours */ +#define REGEN_MIN_ADVANCE (2) /* 2 seconds */ #define REGEN_MAX_RETRY (3) #define MAX_DESYNC_FACTOR (600) -- cgit From 3de21a8990d3c2cc507e9cc4ed00f36358d5b93e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 12 Feb 2024 17:16:13 +0100 Subject: genetlink: Add per family bind/unbind callbacks Add genetlink family bind()/unbind() callbacks when adding/removing multicast group to/from netlink client socket via setsockopt() or bind() syscall. They can be used to track if consumers of netlink multicast messages emerge or disappear. Thus, a client implementing callbacks, can now send events only when there are active consumers, preventing unnecessary work when none exist. Suggested-by: Jakub Kicinski Signed-off-by: Stanislaw Gruszka Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240212161615.161935-2-stanislaw.gruszka@linux.intel.com Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e61469129402..ecadba836ae5 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -41,6 +41,8 @@ struct genl_info; * do additional, common, filtering and return an error * @post_doit: called after an operation's doit callback, it may * undo operations done by pre_doit, for example release locks + * @bind: called when family multicast group is added to a netlink socket + * @unbind: called when family multicast group is removed from a netlink socket * @module: pointer to the owning module (set to THIS_MODULE) * @mcgrps: multicast groups used by this family * @n_mcgrps: number of multicast groups @@ -84,6 +86,8 @@ struct genl_family { void (*post_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); + int (*bind)(int mcgrp); + void (*unbind)(int mcgrp); const struct genl_ops * ops; const struct genl_small_ops *small_ops; const struct genl_split_ops *split_ops; -- cgit From 56ef27e3abe6d6453b1f4f6127041f3a65d7cbc9 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 15 Feb 2024 12:39:05 +0100 Subject: page_pool: disable direct recycling based on pool->cpuid on destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that direct recycling is performed basing on pool->cpuid when set, memory leaks are possible: 1. A pool is destroyed. 2. Alloc cache is emptied (it's done only once). 3. pool->cpuid is still set. 4. napi_pp_put_page() does direct recycling basing on pool->cpuid. 5. Now alloc cache is not empty, but it won't ever be freed. In order to avoid that, rewrite pool->cpuid to -1 when unlinking NAPI to make sure no direct recycling will be possible after emptying the cache. This involves a bit of overhead as pool->cpuid now must be accessed via READ_ONCE() to avoid partial reads. Rename page_pool_unlink_napi() -> page_pool_disable_direct_recycling() to reflect what it actually does and unexport it. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240215113905.96817-1-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3828396ae60c..3590fbe6e3f1 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -210,17 +210,12 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL -void page_pool_unlink_napi(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), struct xdp_mem_info *mem); void page_pool_put_page_bulk(struct page_pool *pool, void **data, int count); #else -static inline void page_pool_unlink_napi(struct page_pool *pool) -{ -} - static inline void page_pool_destroy(struct page_pool *pool) { } -- cgit From f853fa5c54e7a0364a52125074dedeaf2c7ddace Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Fri, 16 Feb 2024 10:25:43 +0100 Subject: net: page_pool: fix recycle stats for system page_pool allocator Use global percpu page_pool_recycle_stats counter for system page_pool allocator instead of allocating a separate percpu variable for each (also percpu) page pool instance. Reviewed-by: Toke Hoiland-Jorgensen Signed-off-by: Lorenzo Bianconi Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/87f572425e98faea3da45f76c3c68815c01a20ee.1708075412.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3590fbe6e3f1..5e43a08d3231 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -18,8 +18,9 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ - PP_FLAG_DMA_SYNC_DEV) +#define PP_FLAG_SYSTEM_POOL BIT(2) /* Global system page_pool */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \ + PP_FLAG_SYSTEM_POOL) /* * Fast allocation side cache array/stack -- cgit From 18ddbf5cf0e7553fd05c3e1a02d740514ee3f0a6 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 14 Feb 2024 14:34:02 -0800 Subject: net: introduce abstraction for network memory Add the netmem_ref type, an abstraction for network memory. To add support for new memory types to the net stack, we must first abstract the current memory type. Currently parts of the net stack use struct page directly: - page_pool - drivers - skb_frag_t Originally the plan was to reuse struct page* for the new memory types, and to set the LSB on the page* to indicate it's not really a page. However, for compiler type checking we need to introduce a new type. netmem_ref is introduced to abstract the underlying memory type. Currently it's a no-op abstraction that is always a struct page underneath. In parallel there is an undergoing effort to add support for devmem to the net stack: https://lore.kernel.org/netdev/20231208005250.2910004-1-almasrymina@google.com/ netmem_ref can be pointers to different underlying memory types, and the low bits are set to indicate the memory type. Helpers are provided to convert netmem pointers to the underlying memory type (currently only struct page). In the devmem series helpers are provided so that calling code can use netmem without worrying about the underlying memory type unless absolutely necessary. Reviewed-by: Shakeel Butt Signed-off-by: Mina Almasry Signed-off-by: Paolo Abeni --- include/net/netmem.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/net/netmem.h (limited to 'include/net') diff --git a/include/net/netmem.h b/include/net/netmem.h new file mode 100644 index 000000000000..d8b810245c1d --- /dev/null +++ b/include/net/netmem.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Network memory + * + * Author: Mina Almasry + */ + +#ifndef _NET_NETMEM_H +#define _NET_NETMEM_H + +/** + * typedef netmem_ref - a nonexistent type marking a reference to generic + * network memory. + * + * A netmem_ref currently is always a reference to a struct page. This + * abstraction is introduced so support for new memory types can be added. + * + * Use the supplied helpers to obtain the underlying memory pointer and fields. + */ +typedef unsigned long __bitwise netmem_ref; + +/* This conversion fails (returns NULL) if the netmem_ref is not struct page + * backed. + * + * Currently struct page is the only possible netmem, and this helper never + * fails. + */ +static inline struct page *netmem_to_page(netmem_ref netmem) +{ + return (__force struct page *)netmem; +} + +/* Converting from page to netmem is always safe, because a page can always be + * a netmem. + */ +static inline netmem_ref page_to_netmem(struct page *page) +{ + return (__force netmem_ref)page; +} + +#endif /* _NET_NETMEM_H */ -- cgit From 5d4cc87414c5d11345c4b11d61377d351b5c28a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Feb 2024 16:20:06 +0000 Subject: net: reorganize "struct sock" fields Last major reorg happened in commit 9115e8cd2a0c ("net: reorganize struct sock for better data locality") Since then, many changes have been done. Before SO_PEEK_OFF support is added to TCP, we need to move sk_peek_off to a better location. It is time to make another pass, and add six groups, without explicit alignment. - sock_write_rx (following sk_refcnt) read-write fields in rx path. - sock_read_rx read-mostly fields in rx path. - sock_read_rxtx read-mostly fields in both rx and tx paths. - sock_write_rxtx read-write fields in both rx and tx paths. - sock_write_tx read-write fields in tx paths. - sock_read_tx read-mostly fields in tx paths. Results on TCP_RR benchmarks seem to show a gain (4 to 5 %). It is possible UDP needs a change, because sk_peek_off shares a cache line with sk_receive_queue. If this the case, we can exchange roles of sk->sk_receive and up->reader_queue queues. After this change, we have the following layout: struct sock { struct sock_common __sk_common; /* 0 0x88 */ /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ __u8 __cacheline_group_begin__sock_write_rx[0]; /* 0x88 0 */ atomic_t sk_drops; /* 0x88 0x4 */ __s32 sk_peek_off; /* 0x8c 0x4 */ struct sk_buff_head sk_error_queue; /* 0x90 0x18 */ struct sk_buff_head sk_receive_queue; /* 0xa8 0x18 */ /* --- cacheline 3 boundary (192 bytes) --- */ struct { atomic_t rmem_alloc; /* 0xc0 0x4 */ int len; /* 0xc4 0x4 */ struct sk_buff * head; /* 0xc8 0x8 */ struct sk_buff * tail; /* 0xd0 0x8 */ } sk_backlog; /* 0xc0 0x18 */ struct { atomic_t rmem_alloc; /* 0 0x4 */ int len; /* 0x4 0x4 */ struct sk_buff * head; /* 0x8 0x8 */ struct sk_buff * tail; /* 0x10 0x8 */ /* size: 24, cachelines: 1, members: 4 */ /* last cacheline: 24 bytes */ }; __u8 __cacheline_group_end__sock_write_rx[0]; /* 0xd8 0 */ __u8 __cacheline_group_begin__sock_read_rx[0]; /* 0xd8 0 */ rcu * sk_rx_dst; /* 0xd8 0x8 */ int sk_rx_dst_ifindex; /* 0xe0 0x4 */ u32 sk_rx_dst_cookie; /* 0xe4 0x4 */ unsigned int sk_ll_usec; /* 0xe8 0x4 */ unsigned int sk_napi_id; /* 0xec 0x4 */ u16 sk_busy_poll_budget; /* 0xf0 0x2 */ u8 sk_prefer_busy_poll; /* 0xf2 0x1 */ u8 sk_userlocks; /* 0xf3 0x1 */ int sk_rcvbuf; /* 0xf4 0x4 */ rcu * sk_filter; /* 0xf8 0x8 */ /* --- cacheline 4 boundary (256 bytes) --- */ union { rcu * sk_wq; /* 0x100 0x8 */ struct socket_wq * sk_wq_raw; /* 0x100 0x8 */ }; /* 0x100 0x8 */ union { rcu * sk_wq; /* 0 0x8 */ struct socket_wq * sk_wq_raw; /* 0 0x8 */ }; void (*sk_data_ready)(struct sock *); /* 0x108 0x8 */ long sk_rcvtimeo; /* 0x110 0x8 */ int sk_rcvlowat; /* 0x118 0x4 */ __u8 __cacheline_group_end__sock_read_rx[0]; /* 0x11c 0 */ __u8 __cacheline_group_begin__sock_read_rxtx[0]; /* 0x11c 0 */ int sk_err; /* 0x11c 0x4 */ struct socket * sk_socket; /* 0x120 0x8 */ struct mem_cgroup * sk_memcg; /* 0x128 0x8 */ rcu * sk_policy[2]; /* 0x130 0x10 */ /* --- cacheline 5 boundary (320 bytes) --- */ __u8 __cacheline_group_end__sock_read_rxtx[0]; /* 0x140 0 */ __u8 __cacheline_group_begin__sock_write_rxtx[0]; /* 0x140 0 */ socket_lock_t sk_lock; /* 0x140 0x20 */ u32 sk_reserved_mem; /* 0x160 0x4 */ int sk_forward_alloc; /* 0x164 0x4 */ u32 sk_tsflags; /* 0x168 0x4 */ __u8 __cacheline_group_end__sock_write_rxtx[0]; /* 0x16c 0 */ __u8 __cacheline_group_begin__sock_write_tx[0]; /* 0x16c 0 */ int sk_write_pending; /* 0x16c 0x4 */ atomic_t sk_omem_alloc; /* 0x170 0x4 */ int sk_sndbuf; /* 0x174 0x4 */ int sk_wmem_queued; /* 0x178 0x4 */ refcount_t sk_wmem_alloc; /* 0x17c 0x4 */ /* --- cacheline 6 boundary (384 bytes) --- */ unsigned long sk_tsq_flags; /* 0x180 0x8 */ union { struct sk_buff * sk_send_head; /* 0x188 0x8 */ struct rb_root tcp_rtx_queue; /* 0x188 0x8 */ }; /* 0x188 0x8 */ union { struct sk_buff * sk_send_head; /* 0 0x8 */ struct rb_root tcp_rtx_queue; /* 0 0x8 */ }; struct sk_buff_head sk_write_queue; /* 0x190 0x18 */ u32 sk_dst_pending_confirm; /* 0x1a8 0x4 */ u32 sk_pacing_status; /* 0x1ac 0x4 */ struct page_frag sk_frag; /* 0x1b0 0x10 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct timer_list sk_timer; /* 0x1c0 0x28 */ /* XXX last struct has 4 bytes of padding */ unsigned long sk_pacing_rate; /* 0x1e8 0x8 */ atomic_t sk_zckey; /* 0x1f0 0x4 */ atomic_t sk_tskey; /* 0x1f4 0x4 */ __u8 __cacheline_group_end__sock_write_tx[0]; /* 0x1f8 0 */ __u8 __cacheline_group_begin__sock_read_tx[0]; /* 0x1f8 0 */ unsigned long sk_max_pacing_rate; /* 0x1f8 0x8 */ /* --- cacheline 8 boundary (512 bytes) --- */ long sk_sndtimeo; /* 0x200 0x8 */ u32 sk_priority; /* 0x208 0x4 */ u32 sk_mark; /* 0x20c 0x4 */ rcu * sk_dst_cache; /* 0x210 0x8 */ netdev_features_t sk_route_caps; /* 0x218 0x8 */ u16 sk_gso_type; /* 0x220 0x2 */ u16 sk_gso_max_segs; /* 0x222 0x2 */ unsigned int sk_gso_max_size; /* 0x224 0x4 */ gfp_t sk_allocation; /* 0x228 0x4 */ u32 sk_txhash; /* 0x22c 0x4 */ u8 sk_pacing_shift; /* 0x230 0x1 */ bool sk_use_task_frag; /* 0x231 0x1 */ __u8 __cacheline_group_end__sock_read_tx[0]; /* 0x232 0 */ u8 sk_gso_disabled:1; /* 0x232: 0 0x1 */ u8 sk_kern_sock:1; /* 0x232:0x1 0x1 */ u8 sk_no_check_tx:1; /* 0x232:0x2 0x1 */ u8 sk_no_check_rx:1; /* 0x232:0x3 0x1 */ /* XXX 4 bits hole, try to pack */ u8 sk_shutdown; /* 0x233 0x1 */ u16 sk_type; /* 0x234 0x2 */ u16 sk_protocol; /* 0x236 0x2 */ unsigned long sk_lingertime; /* 0x238 0x8 */ /* --- cacheline 9 boundary (576 bytes) --- */ struct proto * sk_prot_creator; /* 0x240 0x8 */ rwlock_t sk_callback_lock; /* 0x248 0x8 */ int sk_err_soft; /* 0x250 0x4 */ u32 sk_ack_backlog; /* 0x254 0x4 */ u32 sk_max_ack_backlog; /* 0x258 0x4 */ kuid_t sk_uid; /* 0x25c 0x4 */ spinlock_t sk_peer_lock; /* 0x260 0x4 */ int sk_bind_phc; /* 0x264 0x4 */ struct pid * sk_peer_pid; /* 0x268 0x8 */ const struct cred * sk_peer_cred; /* 0x270 0x8 */ ktime_t sk_stamp; /* 0x278 0x8 */ /* --- cacheline 10 boundary (640 bytes) --- */ int sk_disconnects; /* 0x280 0x4 */ u8 sk_txrehash; /* 0x284 0x1 */ u8 sk_clockid; /* 0x285 0x1 */ u8 sk_txtime_deadline_mode:1; /* 0x286: 0 0x1 */ u8 sk_txtime_report_errors:1; /* 0x286:0x1 0x1 */ u8 sk_txtime_unused:6; /* 0x286:0x2 0x1 */ /* XXX 1 byte hole, try to pack */ void * sk_user_data; /* 0x288 0x8 */ void * sk_security; /* 0x290 0x8 */ struct sock_cgroup_data sk_cgrp_data; /* 0x298 0x8 */ void (*sk_state_change)(struct sock *); /* 0x2a0 0x8 */ void (*sk_write_space)(struct sock *); /* 0x2a8 0x8 */ void (*sk_error_report)(struct sock *); /* 0x2b0 0x8 */ int (*sk_backlog_rcv)(struct sock *, struct sk_buff *); /* 0x2b8 0x8 */ /* --- cacheline 11 boundary (704 bytes) --- */ void (*sk_destruct)(struct sock *); /* 0x2c0 0x8 */ rcu * sk_reuseport_cb; /* 0x2c8 0x8 */ rcu * sk_bpf_storage; /* 0x2d0 0x8 */ struct callback_head sk_rcu __attribute__((__aligned__(8))); /* 0x2d8 0x10 */ netns_tracker ns_tracker; /* 0x2e8 0x8 */ /* size: 752, cachelines: 12, members: 105 */ /* sum members: 749, holes: 1, sum holes: 1 */ /* sum bitfield members: 12 bits, bit holes: 1, sum bit holes: 4 bits */ /* paddings: 1, sum paddings: 4 */ /* forced alignments: 1 */ /* last cacheline: 48 bytes */ }; Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Link: https://lore.kernel.org/r/20240216162006.2342759-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 108 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 61 insertions(+), 47 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a9d99a9c583f..796a902cf4c1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -378,14 +378,10 @@ struct sock { #define sk_flags __sk_common.skc_flags #define sk_rxhash __sk_common.skc_rxhash - /* early demux fields */ - struct dst_entry __rcu *sk_rx_dst; - int sk_rx_dst_ifindex; - u32 sk_rx_dst_cookie; + __cacheline_group_begin(sock_write_rx); - socket_lock_t sk_lock; atomic_t sk_drops; - int sk_rcvlowat; + __s32 sk_peek_off; struct sk_buff_head sk_error_queue; struct sk_buff_head sk_receive_queue; /* @@ -402,18 +398,24 @@ struct sock { struct sk_buff *head; struct sk_buff *tail; } sk_backlog; - #define sk_rmem_alloc sk_backlog.rmem_alloc - int sk_forward_alloc; - u32 sk_reserved_mem; + __cacheline_group_end(sock_write_rx); + + __cacheline_group_begin(sock_read_rx); + /* early demux fields */ + struct dst_entry __rcu *sk_rx_dst; + int sk_rx_dst_ifindex; + u32 sk_rx_dst_cookie; + #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int sk_ll_usec; - /* ===== mostly read cache line ===== */ unsigned int sk_napi_id; + u16 sk_busy_poll_budget; + u8 sk_prefer_busy_poll; #endif + u8 sk_userlocks; int sk_rcvbuf; - int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -422,15 +424,33 @@ struct sock { struct socket_wq *sk_wq_raw; /* public: */ }; + + void (*sk_data_ready)(struct sock *sk); + long sk_rcvtimeo; + int sk_rcvlowat; + __cacheline_group_end(sock_read_rx); + + __cacheline_group_begin(sock_read_rxtx); + int sk_err; + struct socket *sk_socket; + struct mem_cgroup *sk_memcg; #ifdef CONFIG_XFRM struct xfrm_policy __rcu *sk_policy[2]; #endif + __cacheline_group_end(sock_read_rxtx); - struct dst_entry __rcu *sk_dst_cache; + __cacheline_group_begin(sock_write_rxtx); + socket_lock_t sk_lock; + u32 sk_reserved_mem; + int sk_forward_alloc; + u32 sk_tsflags; + __cacheline_group_end(sock_write_rxtx); + + __cacheline_group_begin(sock_write_tx); + int sk_write_pending; atomic_t sk_omem_alloc; int sk_sndbuf; - /* ===== cache line for TX ===== */ int sk_wmem_queued; refcount_t sk_wmem_alloc; unsigned long sk_tsq_flags; @@ -439,22 +459,36 @@ struct sock { struct rb_root tcp_rtx_queue; }; struct sk_buff_head sk_write_queue; - __s32 sk_peek_off; - int sk_write_pending; - __u32 sk_dst_pending_confirm; + u32 sk_dst_pending_confirm; u32 sk_pacing_status; /* see enum sk_pacing */ - long sk_sndtimeo; + struct page_frag sk_frag; struct timer_list sk_timer; - __u32 sk_priority; - __u32 sk_mark; + unsigned long sk_pacing_rate; /* bytes per second */ + atomic_t sk_zckey; + atomic_t sk_tskey; + __cacheline_group_end(sock_write_tx); + + __cacheline_group_begin(sock_read_tx); unsigned long sk_max_pacing_rate; - struct page_frag sk_frag; + long sk_sndtimeo; + u32 sk_priority; + u32 sk_mark; + struct dst_entry __rcu *sk_dst_cache; netdev_features_t sk_route_caps; - int sk_gso_type; +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif + u16 sk_gso_type; + u16 sk_gso_max_segs; unsigned int sk_gso_max_size; gfp_t sk_allocation; - __u32 sk_txhash; + u32 sk_txhash; + u8 sk_pacing_shift; + bool sk_use_task_frag; + __cacheline_group_end(sock_read_tx); /* * Because of non atomicity rules, all @@ -463,64 +497,44 @@ struct sock { u8 sk_gso_disabled : 1, sk_kern_sock : 1, sk_no_check_tx : 1, - sk_no_check_rx : 1, - sk_userlocks : 4; - u8 sk_pacing_shift; + sk_no_check_rx : 1; + u8 sk_shutdown; u16 sk_type; u16 sk_protocol; - u16 sk_gso_max_segs; unsigned long sk_lingertime; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; - int sk_err, - sk_err_soft; + int sk_err_soft; u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; - u8 sk_txrehash; -#ifdef CONFIG_NET_RX_BUSY_POLL - u8 sk_prefer_busy_poll; - u16 sk_busy_poll_budget; -#endif spinlock_t sk_peer_lock; int sk_bind_phc; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; - long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 seqlock_t sk_stamp_seq; #endif - atomic_t sk_tskey; - atomic_t sk_zckey; - u32 sk_tsflags; - u8 sk_shutdown; + int sk_disconnects; + u8 sk_txrehash; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, sk_txtime_report_errors : 1, sk_txtime_unused : 6; - bool sk_use_task_frag; - struct socket *sk_socket; void *sk_user_data; #ifdef CONFIG_SECURITY void *sk_security; #endif struct sock_cgroup_data sk_cgrp_data; - struct mem_cgroup *sk_memcg; void (*sk_state_change)(struct sock *sk); - void (*sk_data_ready)(struct sock *sk); void (*sk_write_space)(struct sock *sk); void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); -#ifdef CONFIG_SOCK_VALIDATE_XMIT - struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, - struct net_device *dev, - struct sk_buff *skb); -#endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; #ifdef CONFIG_BPF_SYSCALL -- cgit From 3f801968889459ecae1eab524b039676e6eaa319 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 14 Feb 2024 14:41:02 +0100 Subject: netfilter: move nf_reinject into nfnetlink_queue modules No need to keep this in the core, move it to the nfnetlink_queue module. nf_reroute is moved too, there were no other callers. Signed-off-by: Florian Westphal --- include/net/netfilter/nf_queue.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index c81021ab07aa..4aeffddb7586 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -35,7 +35,6 @@ struct nf_queue_handler { void nf_register_queue_handler(const struct nf_queue_handler *qh); void nf_unregister_queue_handler(void); -void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); -- cgit From 6030b3a469f8936d7f0f928e788f12a4fe14a4ca Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Fri, 16 Feb 2024 20:16:20 +0530 Subject: wifi: mac80211: check beacon countdown is complete on per link basis Currently, function to check if beacon countdown is complete uses deflink to fetch the beacon and check the counter. However, with MLO, there is a need to check the counter for the beacon in a particular link. Add support to use link_id in order to fetch the beacon from a particular link data. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240216144621.514385-2-quic_adisi@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fc223761e3af..25c892ea9eb3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5566,10 +5566,12 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif, unsigned int link_id); /** * ieee80211_beacon_cntdwn_is_complete - find out if countdown reached 1 * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @link_id: valid link_id during MLO or 0 for non-MLO * * This function returns whether the countdown reached zero. */ -bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif); +bool ieee80211_beacon_cntdwn_is_complete(struct ieee80211_vif *vif, + unsigned int link_id); /** * ieee80211_color_change_finish - notify mac80211 about color change -- cgit From d73fbaf24c5a1e0698a7a5e17d66a5100efef72a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 6 Feb 2024 16:54:06 +0200 Subject: wifi: mac80211: make associated BSS pointer visible to the driver Some drivers need the data in it, so move it to the link conf, which is exposed to the driver. Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.6fe9782b87b4.Ifbffef638f07ca7f5c2b27f40d2cf2942d21de0b@changeid [remove bss pointer from internal struct, update docs] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 25c892ea9eb3..56c6ecb2c10a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -557,6 +557,10 @@ struct ieee80211_fils_discovery { * to that BSS) that can change during the lifetime of the BSS. * * @vif: reference to owning VIF + * @bss: the cfg80211 bss descriptor. Valid only for a station, and only + * when associated. Note: This contains information which is not + * necessarily authenticated. For example, information coming from probe + * responses. * @addr: (link) address used locally * @link_id: link ID, or 0 for non-MLO * @htc_trig_based_pkt_ext: default PE in 4us units, if BSS supports HE @@ -700,6 +704,7 @@ struct ieee80211_fils_discovery { */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; + struct cfg80211_bss *bss; const u8 *bssid; unsigned int link_id; -- cgit From 7e899c1d6f0da2a98ebf6629274ef912d4c83359 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Feb 2024 13:54:29 +0200 Subject: wifi: cfg80211: clean up cfg80211_inform_bss_frame_data() Make cfg80211_inform_bss_frame_data() call the existing cfg80211_inform_bss_data() after parsing the frame in the appropriate way, so we have less code duplication. This required introducing a new CFG80211_BSS_FTYPE_S1G_BEACON, but that can be used by other drivers as well. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240216135047.874aed1eff5f.Ib7d88d126eec50c64763251a78cb432bb5df14df@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 57c2298af35b..f9eada2a26ec 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -7175,11 +7175,13 @@ size_t cfg80211_merge_profile(const u8 *ie, size_t ielen, * from a beacon or probe response * @CFG80211_BSS_FTYPE_BEACON: data comes from a beacon * @CFG80211_BSS_FTYPE_PRESP: data comes from a probe response + * @CFG80211_BSS_FTYPE_S1G_BEACON: data comes from an S1G beacon */ enum cfg80211_bss_frame_type { CFG80211_BSS_FTYPE_UNKNOWN, CFG80211_BSS_FTYPE_BEACON, CFG80211_BSS_FTYPE_PRESP, + CFG80211_BSS_FTYPE_S1G_BEACON, }; /** -- cgit From ee076b73e576b0a052d5686d873346b285ae50ea Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:46 +0800 Subject: net: mctp: avoid confusion over local/peer dest/source addresses We have a double-swap of local and peer addresses in mctp_alloc_local_tag; the arguments in both call sites are swapped, but there is also a swap in the implementation of alloc_local_tag. This is opaque because we're using source/dest address references, which don't match the local/peer semantics. Avoid this confusion by naming the arguments as 'local' and 'peer', and remove the double swap. The calling order now matches mctp_key_alloc. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index da86e106c91d..f937a325ea6f 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -87,7 +87,7 @@ struct mctp_sock { }; /* Key for matching incoming packets to sockets or reassembly contexts. - * Packets are matched on (src,dest,tag). + * Packets are matched on (peer EID, local EID, tag). * * Lifetime / locking requirements: * @@ -254,7 +254,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, void mctp_key_unref(struct mctp_sk_key *key); struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, - mctp_eid_t daddr, mctp_eid_t saddr, + mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp); /* routing <--> device interface */ -- cgit From 43e6795574f5d75284a3cb21f5b76a5ffb98e8e6 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:50 +0800 Subject: net: mctp: separate key correlation across nets Currently, we lookup sk_keys from the entire struct net_namespace, which may contain multiple MCTP net IDs. In those cases we want to distinguish between endpoints with the same EID but different net ID. Add the net ID data to the struct mctp_sk_key, populate on add and filter on this during route lookup. For the ioctl interface, we use a default net of MCTP_INITIAL_DEFAULT_NET (ie., what will be in use for single-net configurations), but we'll extend the ioctl interface to provide net-specific tag allocation in an upcoming change. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/mctp.h b/include/net/mctp.h index f937a325ea6f..0dfae6f51a32 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -133,6 +133,7 @@ struct mctp_sock { * - through an expiry timeout, on a per-socket timer */ struct mctp_sk_key { + unsigned int net; mctp_eid_t peer_addr; mctp_eid_t local_addr; /* MCTP_ADDR_ANY for local owned tags */ __u8 tag; /* incoming tag match; invert TO for local */ @@ -254,6 +255,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt, void mctp_key_unref(struct mctp_sk_key *key); struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, + unsigned int netid, mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp); -- cgit From 5fd5403964ecf047b03a9608a339bdc26a109f33 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 22 Feb 2024 14:28:19 -0800 Subject: genetlink: make info in GENL_REQ_ATTR_CHECK() const Make the local variable in GENL_REQ_ATTR_CHECK() const. genl_info_dump() returns a const pointer, so the macro is currently hard to use in genl dumps. Link: https://lore.kernel.org/r/20240222222819.156320-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index ecadba836ae5..9ece6e5a3ea8 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -153,7 +153,7 @@ static inline void *genl_info_userhdr(const struct genl_info *info) /* Report that a root attribute is missing */ #define GENL_REQ_ATTR_CHECK(info, attr) ({ \ - struct genl_info *__info = (info); \ + const struct genl_info *__info = (info); \ \ NL_REQ_ATTR_CHECK(__info->extack, NULL, __info->attrs, (attr)); \ }) -- cgit From 386520e0ecc01004d3a29c70c5a77d4bbf8a8420 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:15 +0000 Subject: rtnetlink: add RTNL_FLAG_DUMP_UNLOCKED flag Similarly to RTNL_FLAG_DOIT_UNLOCKED, this new flag allows dump operations registered via rtnl_register() or rtnl_register_module() to opt-out from RTNL protection. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 6506221c5fe3..3bfb80bad173 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -12,6 +12,7 @@ typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *); enum rtnl_link_flags { RTNL_FLAG_DOIT_UNLOCKED = BIT(0), RTNL_FLAG_BULK_DEL_SUPPORTED = BIT(1), + RTNL_FLAG_DUMP_UNLOCKED = BIT(2), }; enum rtnl_kinds { -- cgit From 22e36ea9f5d7707ae3d64c497d172f4ef735c353 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:17 +0000 Subject: inet: allow ip_valid_fib_dump_req() to be called with RTNL or RCU Add a new field into struct fib_dump_filter, to let callers tell if they use RTNL locking or RCU. This is used in the following patch, when inet_dump_fib() no longer holds RTNL. Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index d4667b7797e3..9b2f69ba5e49 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -264,6 +264,7 @@ struct fib_dump_filter { bool filter_set; bool dump_routes; bool dump_exceptions; + bool rtnl_held; unsigned char protocol; unsigned char rt_type; unsigned int flags; -- cgit From 0ac3fa0c3b365f97c3969f391edf7b44d3bb210d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2024 10:50:18 +0000 Subject: nexthop: allow nexthop_mpath_fill_node() to be called without RTNL nexthop_mpath_fill_node() will be potentially called from contexts holding rcu_lock instead of RTNL. Suggested-by: Ido Schimmel Link: https://lore.kernel.org/all/ZdZDWVdjMaQkXBgW@shredder/ Signed-off-by: Eric Dumazet Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/net/nexthop.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 6647ad509faa..77e99cba60ad 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -317,7 +317,7 @@ static inline int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, u8 rt_family) { - struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); int i; for (i = 0; i < nhg->num_nh; i++) { -- cgit From c3718936ec47ae811a7ce9a618b6cb1cda835bab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 23 Feb 2024 20:10:54 +0000 Subject: ipv6: anycast: complete RCU handling of struct ifacaddr6 struct ifacaddr6 are already freed after RCU grace period. Add __rcu qualifier to aca_next pointer, and idev->ac_list Add relevant rcu_assign_pointer() and dereference accessors. ipv6_chk_acast_dev() no longer needs to acquire idev->lock. /proc/net/anycast6 is now purely RCU protected, it no longer acquires idev->lock. Similarly in6_dump_addrs() can use RCU protection to iterate through anycast addresses. It was relying on a mixture of RCU and RTNL but next patches will get rid of RTNL there. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240223201054.220534-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/if_inet6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f07642264c1e..238ad3349456 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -144,7 +144,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; struct fib6_info *aca_rt; - struct ifacaddr6 *aca_next; + struct ifacaddr6 __rcu *aca_next; struct hlist_node aca_addr_lst; int aca_users; refcount_t aca_refcnt; @@ -196,7 +196,7 @@ struct inet6_dev { spinlock_t mc_report_lock; /* mld query report lock */ struct mutex mc_lock; /* mld global lock */ - struct ifacaddr6 *ac_list; + struct ifacaddr6 __rcu *ac_list; rwlock_t lock; refcount_t refcnt; __u32 if_flags; -- cgit From 12a686c2e761f1f1f6e6e2117a9ab9c6de2ac8a7 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 26 Feb 2024 02:24:52 +0000 Subject: net: make SK_MEMORY_PCPU_RESERV tunable This patch adds /proc/sys/net/core/mem_pcpu_rsv sysctl file, to make SK_MEMORY_PCPU_RESERV tunable. Commit 3cd3399dd7a8 ("net: implement per-cpu reserves for memory_allocated") introduced per-cpu forward alloc cache: "Implement a per-cpu cache of +1/-1 MB, to reduce number of changes to sk->sk_prot->memory_allocated, which would otherwise be cause of false sharing." sk_prot->memory_allocated points to global atomic variable: atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; If increasing the per-cpu cache size from 1MB to e.g. 16MB, changes to sk->sk_prot->memory_allocated can be further reduced. Performance may be improved on system with many cores. Signed-off-by: Adam Li Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 796a902cf4c1..09a0cde8bf52 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1443,6 +1443,7 @@ sk_memory_allocated(const struct sock *sk) /* 1 MB per cpu, in page units */ #define SK_MEMORY_PCPU_RESERVE (1 << (20 - PAGE_SHIFT)) +extern int sysctl_mem_pcpu_rsv; static inline void sk_memory_allocated_add(struct sock *sk, int amt) @@ -1451,7 +1452,7 @@ sk_memory_allocated_add(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_add_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve >= SK_MEMORY_PCPU_RESERVE) { + if (local_reserve >= READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } @@ -1465,7 +1466,7 @@ sk_memory_allocated_sub(struct sock *sk, int amt) preempt_disable(); local_reserve = __this_cpu_sub_return(*sk->sk_prot->per_cpu_fw_alloc, amt); - if (local_reserve <= -SK_MEMORY_PCPU_RESERVE) { + if (local_reserve <= -READ_ONCE(sysctl_mem_pcpu_rsv)) { __this_cpu_sub(*sk->sk_prot->per_cpu_fw_alloc, local_reserve); atomic_long_add(local_reserve, sk->sk_prot->memory_allocated); } -- cgit From 48e4704aedb9f17213206dc649fdac90f94cb749 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:18 +0800 Subject: tcp: add a dropreason definitions and prepare for cookie check Adding one drop reason to detect the condition of skb dropped because of hook points in cookie check and extending NO_SOCKET to consider another two cases can be used later. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 6d3a20163260..a871f061558d 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -54,6 +54,7 @@ FN(NEIGH_QUEUEFULL) \ FN(NEIGH_DEAD) \ FN(TC_EGRESS) \ + FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ FN(CPU_BACKLOG) \ FN(XDP) \ @@ -105,7 +106,13 @@ enum skb_drop_reason { SKB_CONSUMED, /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ SKB_DROP_REASON_NOT_SPECIFIED, - /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ + /** + * @SKB_DROP_REASON_NO_SOCKET: no valid socket that can be used. + * Reason could be one of three cases: + * 1) no established/listening socket found during lookup process + * 2) no valid request socket during 3WHS process + * 3) no valid child socket during 3WHS process + */ SKB_DROP_REASON_NO_SOCKET, /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ SKB_DROP_REASON_PKT_TOO_SMALL, @@ -271,6 +278,8 @@ enum skb_drop_reason { SKB_DROP_REASON_NEIGH_DEAD, /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ SKB_DROP_REASON_TC_EGRESS, + /** @SKB_DROP_REASON_SECURITY_HOOK: dropped due to security HOOK */ + SKB_DROP_REASON_SECURITY_HOOK, /** * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( * failed to enqueue to current qdisc) -- cgit From 3d359faba191c95e97ac6699b0163e797812bfca Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:23 +0800 Subject: tcp: introduce dropreasons in receive path Soon later patches can use these relatively more accurate reasons to recognise and find out the cause. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/dropreason-core.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index a871f061558d..9707ab54fdd5 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -30,6 +30,7 @@ FN(TCP_AOFAILURE) \ FN(SOCKET_BACKLOG) \ FN(TCP_FLAGS) \ + FN(TCP_ABORT_ON_DATA) \ FN(TCP_ZEROWINDOW) \ FN(TCP_OLD_DATA) \ FN(TCP_OVERWINDOW) \ @@ -37,6 +38,7 @@ FN(TCP_RFC7323_PAWS) \ FN(TCP_OLD_SEQUENCE) \ FN(TCP_INVALID_SEQUENCE) \ + FN(TCP_INVALID_ACK_SEQUENCE) \ FN(TCP_RESET) \ FN(TCP_INVALID_SYN) \ FN(TCP_CLOSE) \ @@ -204,6 +206,11 @@ enum skb_drop_reason { SKB_DROP_REASON_SOCKET_BACKLOG, /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */ SKB_DROP_REASON_TCP_FLAGS, + /** + * @SKB_DROP_REASON_TCP_ABORT_ON_DATA: abort on data, corresponding to + * LINUX_MIB_TCPABORTONDATA + */ + SKB_DROP_REASON_TCP_ABORT_ON_DATA, /** * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero, * see LINUX_MIB_TCPZEROWINDOWDROP @@ -228,13 +235,19 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_OFOMERGE, /** * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED + * LINUX_MIB_PAWSESTABREJECTED, LINUX_MIB_PAWSACTIVEREJECTED */ SKB_DROP_REASON_TCP_RFC7323_PAWS, /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */ SKB_DROP_REASON_TCP_OLD_SEQUENCE, /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** + * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ + * field because ack sequence is not in the window between snd_una + * and snd_nxt + */ + SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE, /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */ SKB_DROP_REASON_TCP_RESET, /** -- cgit From 7d6ed9afde8547723f6f96f81f984cc6c48eef52 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:25 +0800 Subject: tcp: add dropreasons in tcp_rcv_state_process() In this patch, I equipped this function with more dropreasons, but it still doesn't work yet, which I will do later. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 33bf92dff0af..af2a4dcd4518 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -348,7 +348,7 @@ void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); -int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); +enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); -- cgit From b9825695930546af725b1e686b8eaf4c71201728 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 26 Feb 2024 11:22:26 +0800 Subject: tcp: make the dropreason really work when calling tcp_rcv_state_process() Update three callers including both ipv4 and ipv6 and let the dropreason mechanism work in reality. Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index af2a4dcd4518..6ae35199d3b3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -396,8 +396,8 @@ enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen, bool *lost_race); -int tcp_child_process(struct sock *parent, struct sock *child, - struct sk_buff *skb); +enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int newly_lost, int flag); void tcp_clear_retrans(struct tcp_sock *tp); -- cgit From 67c8e4bb4f54ad22ca0ce4700c8728ffd73acb66 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 26 Feb 2024 14:14:11 +0100 Subject: net: ioam6: multicast event Add a multicast group to the ioam6 generic netlink family and provide ioam6_event() to send an ioam6 event to the multicast group. Reviewed-by: David Ahern Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/net/ioam6.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/ioam6.h b/include/net/ioam6.h index 781d2d8b2f29..2cbbee6e806a 100644 --- a/include/net/ioam6.h +++ b/include/net/ioam6.h @@ -12,6 +12,7 @@ #include #include #include +#include #include struct ioam6_namespace { @@ -65,4 +66,7 @@ void ioam6_exit(void); int ioam6_iptunnel_init(void); void ioam6_iptunnel_exit(void); +void ioam6_event(enum ioam6_event_type type, struct net *net, gfp_t gfp, + void *opt, unsigned int opt_len); + #endif /* _NET_IOAM6_H */ -- cgit From e7135f484994494a38071e3653a83d21d305f50c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:28 +0000 Subject: ipv6: annotate data-races around cnf.mtu6 idev->cnf.mtu6 might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ip6_route.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 52a51c69aa9d..a30c6aa9e5cf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -332,7 +332,7 @@ static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst rcu_read_lock(); idev = __in6_dev_get(dst->dev); if (idev) - mtu = idev->cnf.mtu6; + mtu = READ_ONCE(idev->cnf.mtu6); rcu_read_unlock(); out: -- cgit From 32f754176e889cdfe989ef08ece19859427755df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:30 +0000 Subject: ipv6: annotate data-races around cnf.forwarding idev->cnf.forwarding and net->ipv6.devconf_all->forwarding might be read locklessly, add appropriate READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/ipv6.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cf25ea21d770..88a8e554f7a1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -534,13 +534,15 @@ static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) return 0; } -static inline bool ipv6_accept_ra(struct inet6_dev *idev) +static inline bool ipv6_accept_ra(const struct inet6_dev *idev) { + s32 accept_ra = READ_ONCE(idev->cnf.accept_ra); + /* If forwarding is enabled, RA are not accepted unless the special * hybrid mode (accept_ra=2) is enabled. */ - return idev->cnf.forwarding ? idev->cnf.accept_ra == 2 : - idev->cnf.accept_ra; + return READ_ONCE(idev->cnf.forwarding) ? accept_ra == 2 : + accept_ra; } #define IPV6_FRAG_HIGH_THRESH (4 * 1024*1024) /* 4194304 */ -- cgit From fca34cc075996767fbbdb6252be9ddd21c34c920 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Feb 2024 13:54:32 +0000 Subject: ipv6: annotate data-races around idev->cnf.ignore_routes_with_linkdown idev->cnf.ignore_routes_with_linkdown can be used without any locks, add appropriate annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 30d6f1e84e46..9d06eb945509 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -417,7 +417,7 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) if (unlikely(!idev)) return true; - return !!idev->cnf.ignore_routes_with_linkdown; + return !!READ_ONCE(idev->cnf.ignore_routes_with_linkdown); } void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); -- cgit From 04577bfa99ac8cfb7a43dbbba09584e0b1086cee Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Wed, 28 Feb 2024 09:44:56 +0100 Subject: wifi: mac80211: add link id to ieee80211_gtk_rekey_add() In MLO, we need the link id in the GTK key to be given by the driver after rekeying in wowlan, so add that. Signed-off-by: Shaul Triebitz Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094500.ce1bfc83a680.I43a6f8ab2804ee07116a37d5b9ec601b843464b1@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 56c6ecb2c10a..34d66d0a24b1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5912,6 +5912,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); * ieee80211_gtk_rekey_add - add a GTK key from rekeying during WoWLAN * @vif: the virtual interface to add the key on * @keyconf: new key data + * @link_id: the link id of the key or -1 for non-MLO * * When GTK rekeying was done while the system was suspended, (a) new * key(s) will be available. These will be needed by mac80211 for proper @@ -5939,7 +5940,8 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf); */ struct ieee80211_key_conf * ieee80211_gtk_rekey_add(struct ieee80211_vif *vif, - struct ieee80211_key_conf *keyconf); + struct ieee80211_key_conf *keyconf, + int link_id); /** * ieee80211_gtk_rekey_notify - notify userspace supplicant of rekeying -- cgit From 68f6c6afbcebdc3acdc6084abfe453f4cba6b9dc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:48:11 +0100 Subject: wifi: mac80211: add ieee80211_vif_link_active() helper We sometimes need to check if a link is active, and this is complicated by the fact that active_links has no bits set when the vif isn't (acting as) an MLD. Add a small new helper ieee80211_vif_link_active() to make that a bit easier, and use it in a few places. Reviewed-by: Ilan Peer Reviewed-by: Emmanuel Grumbach Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228094901.688760aff5f7.I06892a503f5ecb9563fbd678d35d08daf7a044b0@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 34d66d0a24b1..6c6d8210d637 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2011,6 +2011,21 @@ static inline bool ieee80211_vif_is_mld(const struct ieee80211_vif *vif) return vif->valid_links != 0; } +/** + * ieee80211_vif_link_active - check if a given link is active + * @vif: the vif + * @link_id: the link ID to check + * Return: %true if the vif is an MLD and the link is active, or if + * the vif is not an MLD and the link ID is 0; %false otherwise. + */ +static inline bool ieee80211_vif_link_active(const struct ieee80211_vif *vif, + unsigned int link_id) +{ + if (!ieee80211_vif_is_mld(vif)) + return link_id == 0; + return vif->active_links & BIT(link_id); +} + #define for_each_vif_active_link(vif, link, link_id) \ for (link_id = 0; link_id < ARRAY_SIZE((vif)->link_conf); link_id++) \ if ((!(vif)->active_links || \ -- cgit From 22667035e5ddb7b68c7d473693b321fb9e20a397 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:41 +0100 Subject: wifi: cfg80211: expose cfg80211_iter_rnr() to drivers In mac80211 we'll need to look at reduced neighbor report entries for channel switch purposes, so export the iteration function to make that simpler. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095718.0954809964ef.I53e95c017aa71f14e8d1057afbbc75982ddb43df@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f9eada2a26ec..53653d234d39 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6870,6 +6870,38 @@ cfg80211_find_vendor_ie(unsigned int oui, int oui_type, return (const void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len); } +/** + * enum cfg80211_rnr_iter_ret - reduced neighbor report iteration state + * @RNR_ITER_CONTINUE: continue iterating with the next entry + * @RNR_ITER_BREAK: break iteration and return success + * @RNR_ITER_ERROR: break iteration and return error + */ +enum cfg80211_rnr_iter_ret { + RNR_ITER_CONTINUE, + RNR_ITER_BREAK, + RNR_ITER_ERROR, +}; + +/** + * cfg80211_iter_rnr - iterate reduced neighbor report entries + * @elems: the frame elements to iterate RNR elements and then + * their entries in + * @elems_len: length of the elements + * @iter: iteration function, see also &enum cfg80211_rnr_iter_ret + * for the return value + * @iter_data: additional data passed to the iteration function + * Return: %true on success (after successfully iterating all entries + * or if the iteration function returned %RNR_ITER_BREAK), + * %false on error (iteration function returned %RNR_ITER_ERROR + * or elements were malformed.) + */ +bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len, + enum cfg80211_rnr_iter_ret + (*iter)(void *data, u8 type, + const struct ieee80211_neighbor_ap_info *info, + const u8 *tbtt_info, u8 tbtt_info_len), + void *iter_data); + /** * cfg80211_defragment_element - Defrag the given element data into a buffer * -- cgit From 8ade3356b25ab2522892a21832a709e7ad5f8168 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:42 +0100 Subject: wifi: cfg80211: allow cfg80211_defragment_element() without output If we just want to determine the length of the fragmented data, we basically need the same logic, and really we want it to be _literally_ the same logic, so it cannot be out of sync in any way. Allow calling cfg80211_defragment_element() without an output buffer, where it then just returns the required output size. Also add this to the tests, just to exercise it, using the pre-calculated length to really do the defragmentation, which checks that this is sufficient. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Benjamin Berg Link: https://msgid.link/20240228095718.6d6565b9e3f2.Ib441903f4b8644ba04b1c766f90580ee6f54fc66@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 53653d234d39..2e2be4fd2bb6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6908,8 +6908,8 @@ bool cfg80211_iter_rnr(const u8 *elems, size_t elems_len, * @elem: the element to defragment * @ies: elements where @elem is contained * @ieslen: length of @ies - * @data: buffer to store element data - * @data_len: length of @data + * @data: buffer to store element data, or %NULL to just determine size + * @data_len: length of @data, or 0 * @frag_id: the element ID of fragments * * Return: length of @data, or -EINVAL on error -- cgit From e6ee3a3713fe38e4ae94318e9cb18b39ec30da4a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:43 +0100 Subject: wifi: mac80211: pass link_id to channel switch ops For CSA to work correctly in multi-link scenarios, pass the link_id to the relevant callbacks. While at it, unify/deduplicate the tracing for them. Reviewed-by: Miriam Rachel Korenblit Reviewed-by: Emmanuel Grumbach Link: https://msgid.link/20240228095718.b7726635c054.I0be5d00af4acb48cfbd23a9dbf067f9aeb66469d@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6c6d8210d637..8ea9fa81e68c 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1763,8 +1763,9 @@ struct ieee80211_conf { * @chandef: the new channel to switch to * @count: the number of TBTT's until the channel switch event * @delay: maximum delay between the time the AP transmitted the last beacon in - * current channel and the expected time of the first beacon in the new - * channel, expressed in TU. + * current channel and the expected time of the first beacon in the new + * channel, expressed in TU. + * @link_id: the link ID of the link doing the channel switch, 0 for non-MLO */ struct ieee80211_channel_switch { u64 timestamp; @@ -1772,6 +1773,7 @@ struct ieee80211_channel_switch { bool block_tx; struct cfg80211_chan_def chandef; u8 count; + u8 link_id; u32 delay; }; -- cgit From 5ecd5d82b17ee2818548aeb5eb52f3a5b5cae18c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:44 +0100 Subject: wifi: mac80211: pass link conf to abort_channel_switch Pass the link conf to the abort_channel_switch driver method so the driver can handle things correctly. Reviewed-by: Emmanuel Grumbach Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095718.27f621106ddd.Iadd3d69b722ffe5934779a32a0e4e596a4e33ed4@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8ea9fa81e68c..c622450ac012 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4268,7 +4268,7 @@ struct ieee80211_prep_tx_info { * after a channel switch procedure is completed, allowing the * driver to go back to a normal configuration. * @abort_channel_switch: This is an optional callback that is called - * when channel switch procedure was completed, allowing the + * when channel switch procedure was aborted, allowing the * driver to go back to a normal configuration. * @channel_switch_rx_beacon: This is an optional callback that is called * when channel switch procedure is in progress and additional beacon with @@ -4664,7 +4664,8 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_bss_conf *link_conf); void (*abort_channel_switch)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + struct ieee80211_bss_conf *link_conf); void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel_switch *ch_switch); -- cgit From 6f0107d195a812074c6f977d492ffc99ba3ff2bb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Feb 2024 09:55:45 +0100 Subject: wifi: mac80211: introduce a feature flag for quiet in CSA When doing CSA in multi-link, there really isn't a need to stop transmissions entirely. Add a feature flag for drivers to indicate they can handle quiet in CSA (be it by parsing themselves, or by implementing drv_pre_channel_switch()), to make that possible. Also clean up the csa_block_tx handling: it clearly cannot handle multi-link due to the way queues are stopped, move it to the sdata. Drivers should be doing it themselves for working properly during CSA in MLO anyway. Also rename it to indicate that it reflects TX was blocked at mac80211. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240228095719.258439191541.I2469d206e2bf5cb244cfde2b4bbc2ae6d1cd3dd9@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c622450ac012..353488ab94a2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2778,6 +2778,11 @@ struct ieee80211_txq { * @IEEE80211_HW_DISALLOW_PUNCTURING: HW requires disabling puncturing in EHT * and connecting with a lower bandwidth instead * + * @IEEE80211_HW_HANDLES_QUIET_CSA: HW/driver handles quieting for CSA, so + * no need to stop queues. This really should be set by a driver that + * implements MLO, so operation can continue on other links when one + * link is switching. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2836,6 +2841,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_DETECTS_COLOR_COLLISION, IEEE80211_HW_MLO_MCAST_MULTI_LINK_TX, IEEE80211_HW_DISALLOW_PUNCTURING, + IEEE80211_HW_HANDLES_QUIET_CSA, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS -- cgit From 93e16ea025d234d0ed01d9dc9c819257a2159bb6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 19:37:37 +0000 Subject: net: gro: rename skb_gro_header_hard() skb_gro_header_hard() is renamed to skb_gro_may_pull() to match the convention used by common helpers like pskb_may_pull(). This means the condition is inverted: if (skb_gro_header_hard(skb, hlen)) slow_path(); becomes: if (!skb_gro_may_pull(skb, hlen)) slow_path(); Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- include/net/gro.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/gro.h b/include/net/gro.h index b435f0ddbf64..ffc2c96d263b 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -145,9 +145,10 @@ static inline void *skb_gro_header_fast(struct sk_buff *skb, return NAPI_GRO_CB(skb)->frag0 + offset; } -static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) +static inline bool skb_gro_may_pull(const struct sk_buff *skb, + unsigned int hlen) { - return NAPI_GRO_CB(skb)->frag0_len < hlen; + return hlen <= NAPI_GRO_CB(skb)->frag0_len; } static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) @@ -172,7 +173,7 @@ static inline void *skb_gro_header(struct sk_buff *skb, void *ptr; ptr = skb_gro_header_fast(skb, offset); - if (skb_gro_header_hard(skb, hlen)) + if (!skb_gro_may_pull(skb, hlen)) ptr = skb_gro_header_slow(skb, hlen, offset); return ptr; } -- cgit From bd56a29c7a4ebcd3ca69505a2e676449e60965f3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 19:37:38 +0000 Subject: net: gro: change skb_gro_network_header() Change skb_gro_network_header() to accept a const sk_buff and to no longer check if frag0 is NULL or not. This allows to remove skb_gro_frag0_invalidate() which is seen in profiles when header-split is enabled. sk_buff parameter is constified for skb_gro_header_fast(), inet_gro_compute_pseudo() and ip6_gro_compute_pseudo(). Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- include/net/gro.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/gro.h b/include/net/gro.h index ffc2c96d263b..3c3666e46b30 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -139,7 +139,7 @@ static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) NAPI_GRO_CB(skb)->data_offset += len; } -static inline void *skb_gro_header_fast(struct sk_buff *skb, +static inline void *skb_gro_header_fast(const struct sk_buff *skb, unsigned int offset) { return NAPI_GRO_CB(skb)->frag0 + offset; @@ -151,24 +151,17 @@ static inline bool skb_gro_may_pull(const struct sk_buff *skb, return hlen <= NAPI_GRO_CB(skb)->frag0_len; } -static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) -{ - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; -} - static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { if (!pskb_may_pull(skb, hlen)) return NULL; - skb_gro_frag0_invalidate(skb); return skb->data + offset; } -static inline void *skb_gro_header(struct sk_buff *skb, - unsigned int hlen, unsigned int offset) +static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, + unsigned int offset) { void *ptr; @@ -178,13 +171,16 @@ static inline void *skb_gro_header(struct sk_buff *skb, return ptr; } -static inline void *skb_gro_network_header(struct sk_buff *skb) +static inline void *skb_gro_network_header(const struct sk_buff *skb) { - return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) + - skb_network_offset(skb); + if (skb_gro_may_pull(skb, skb_gro_offset(skb))) + return skb_gro_header_fast(skb, skb_network_offset(skb)); + + return skb_network_header(skb); } -static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct iphdr *iph = skb_gro_network_header(skb); @@ -422,7 +418,8 @@ static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) return uh; } -static inline __wsum ip6_gro_compute_pseudo(struct sk_buff *skb, int proto) +static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, + int proto) { const struct ipv6hdr *iph = skb_gro_network_header(skb); -- cgit From c7583e9f768eeb82f2531c8372584ba89cfade8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Mar 2024 19:37:39 +0000 Subject: net: gro: enable fast path for more cases Currently the so-called GRO fast path is only enabled for napi_frags_skb() callers. After the prior patch, we no longer have to clear frag0 whenever we pulled bytes to skb->head. We therefore can initialize frag0 to skb->data so that GRO fast path can be used in the following additional cases: - Drivers using header split (populating skb->data with headers, and having payload in one or more page fragments). - Drivers not using any page frag (entire packet is in skb->data) Add a likely() in skb_gro_may_pull() to help the compiler to generate better code if possible. Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Signed-off-by: Paolo Abeni --- include/net/gro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/gro.h b/include/net/gro.h index 3c3666e46b30..2b58671a6549 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -148,7 +148,7 @@ static inline void *skb_gro_header_fast(const struct sk_buff *skb, static inline bool skb_gro_may_pull(const struct sk_buff *skb, unsigned int hlen) { - return hlen <= NAPI_GRO_CB(skb)->frag0_len; + return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); } static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, -- cgit From e5560011692981bc8bfeae7fc0673c65a02badba Mon Sep 17 00:00:00 2001 From: "Ricardo B. Marliere" Date: Sat, 2 Mar 2024 14:06:02 -0300 Subject: nfc: core: make nfc_class constant Since commit 43a7206b0963 ("driver core: class: make class_register() take a const *"), the driver core allows for struct class to be in read-only memory, so move the nfc_class structure to be declared at build time placing it into read-only memory, instead of having to be dynamically allocated at boot time. Suggested-by: Greg Kroah-Hartman Signed-off-by: Ricardo B. Marliere Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240302-class_cleanup-net-next-v1-6-8fa378595b93@marliere.net Signed-off-by: Jakub Kicinski --- include/net/nfc/nfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 5dee575fbe86..3d07abacf08b 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -196,7 +196,7 @@ struct nfc_dev { }; #define to_nfc_dev(_dev) container_of(_dev, struct nfc_dev, dev) -extern struct class nfc_class; +extern const struct class nfc_class; struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops, u32 supported_protocols, -- cgit From 6f2fc8584a46bb35787bfc1dad1fb7dd5898e21f Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 2 Mar 2024 20:53:00 +0100 Subject: net: add helpers for EEE configuration Add helpers that phylib and phylink can use to manage EEE configuration and determine whether the MAC should be permitted to use LPI based on that configuration. Signed-off-by: Russell King (Oracle) Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Oleksij Rempel Link: https://lore.kernel.org/r/20240302195306.3207716-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- include/net/eee.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/net/eee.h (limited to 'include/net') diff --git a/include/net/eee.h b/include/net/eee.h new file mode 100644 index 000000000000..84837aba3cd9 --- /dev/null +++ b/include/net/eee.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _EEE_H +#define _EEE_H + +#include + +struct eee_config { + u32 tx_lpi_timer; + bool tx_lpi_enabled; + bool eee_enabled; +}; + +static inline bool eeecfg_mac_can_tx_lpi(const struct eee_config *eeecfg) +{ + /* eee_enabled is the master on/off */ + if (!eeecfg->eee_enabled || !eeecfg->tx_lpi_enabled) + return false; + + return true; +} + +static inline void eeecfg_to_eee(struct ethtool_keee *eee, + const struct eee_config *eeecfg) +{ + eee->tx_lpi_timer = eeecfg->tx_lpi_timer; + eee->tx_lpi_enabled = eeecfg->tx_lpi_enabled; + eee->eee_enabled = eeecfg->eee_enabled; +} + +static inline void eee_to_eeecfg(struct eee_config *eeecfg, + const struct ethtool_keee *eee) +{ + eeecfg->tx_lpi_timer = eee->tx_lpi_timer; + eeecfg->tx_lpi_enabled = eee->tx_lpi_enabled; + eeecfg->eee_enabled = eee->eee_enabled; +} + +#endif -- cgit From eeb78df4063c0b162324a9408ef573b24791871f Mon Sep 17 00:00:00 2001 From: Juntong Deng Date: Mon, 4 Mar 2024 11:32:08 +0000 Subject: inet: Add getsockopt support for IP_ROUTER_ALERT and IPV6_ROUTER_ALERT Currently getsockopt does not support IP_ROUTER_ALERT and IPV6_ROUTER_ALERT, and we are unable to get the values of these two socket options through getsockopt. This patch adds getsockopt support for IP_ROUTER_ALERT and IPV6_ROUTER_ALERT. Signed-off-by: Juntong Deng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index d94c242eb3ed..f9ddd47dc4f8 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -274,6 +274,7 @@ enum { INET_FLAGS_REPFLOW = 27, INET_FLAGS_RTALERT_ISOLATE = 28, INET_FLAGS_SNDFLOW = 29, + INET_FLAGS_RTALERT = 30, }; /* cmsg flags for inet */ -- cgit From e8a1e58345cf40b7b272e08ac7b32328b2543e40 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 28 Feb 2024 19:38:39 +0300 Subject: mac802154: fix llsec key resources release in mac802154_llsec_key_del mac802154_llsec_key_del() can free resources of a key directly without following the RCU rules for waiting before the end of a grace period. This may lead to use-after-free in case llsec_lookup_key() is traversing the list of keys in parallel with a key deletion: refcount_t: addition on 0; use-after-free. WARNING: CPU: 4 PID: 16000 at lib/refcount.c:25 refcount_warn_saturate+0x162/0x2a0 Modules linked in: CPU: 4 PID: 16000 Comm: wpan-ping Not tainted 6.7.0 #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 RIP: 0010:refcount_warn_saturate+0x162/0x2a0 Call Trace: llsec_lookup_key.isra.0+0x890/0x9e0 mac802154_llsec_encrypt+0x30c/0x9c0 ieee802154_subif_start_xmit+0x24/0x1e0 dev_hard_start_xmit+0x13e/0x690 sch_direct_xmit+0x2ae/0xbc0 __dev_queue_xmit+0x11dd/0x3c20 dgram_sendmsg+0x90b/0xd60 __sys_sendto+0x466/0x4c0 __x64_sys_sendto+0xe0/0x1c0 do_syscall_64+0x45/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 Also, ieee802154_llsec_key_entry structures are not freed by mac802154_llsec_key_del(): unreferenced object 0xffff8880613b6980 (size 64): comm "iwpan", pid 2176, jiffies 4294761134 (age 60.475s) hex dump (first 32 bytes): 78 0d 8f 18 80 88 ff ff 22 01 00 00 00 00 ad de x......."....... 00 00 00 00 00 00 00 00 03 00 cd ab 00 00 00 00 ................ backtrace: [] __kmem_cache_alloc_node+0x1e2/0x2d0 [] kmalloc_trace+0x25/0xc0 [] mac802154_llsec_key_add+0xac9/0xcf0 [] ieee802154_add_llsec_key+0x5a/0x80 [] nl802154_add_llsec_key+0x426/0x5b0 [] genl_family_rcv_msg_doit+0x1fe/0x2f0 [] genl_rcv_msg+0x531/0x7d0 [] netlink_rcv_skb+0x169/0x440 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x53c/0x820 [] netlink_sendmsg+0x93b/0xe60 [] ____sys_sendmsg+0xac5/0xca0 [] ___sys_sendmsg+0x11d/0x1c0 [] __sys_sendmsg+0xfa/0x1d0 [] do_syscall_64+0x45/0xf0 [] entry_SYSCALL_64_after_hwframe+0x6e/0x76 Handle the proper resource release in the RCU callback function mac802154_llsec_key_del_rcu(). Note that if llsec_lookup_key() finds a key, it gets a refcount via llsec_key_get() and locally copies key id from key_entry (which is a list element). So it's safe to call llsec_key_put() and free the list entry after the RCU grace period elapses. Found by Linux Verification Center (linuxtesting.org). Fixes: 5d637d5aabd8 ("mac802154: add llsec structures and mutators") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Acked-by: Alexander Aring Message-ID: <20240228163840.6667-1-pchelkin@ispras.ru> Signed-off-by: Stefan Schmidt --- include/net/cfg802154.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index cd95711b12b8..76d2cd2e2b30 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -401,6 +401,7 @@ struct ieee802154_llsec_key { struct ieee802154_llsec_key_entry { struct list_head list; + struct rcu_head rcu; struct ieee802154_llsec_key_id id; struct ieee802154_llsec_key *key; -- cgit From 968667f2e0345a67a6eea5a502f4659085666564 Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Sun, 7 Jan 2024 19:02:47 +0100 Subject: Bluetooth: Remove HCI_POWER_OFF_TIMEOUT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With commit cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED"), the power off sequence got refactored so that this timeout was no longer necessary, let's remove the leftover define from the header too. Fixes: cf75ad8b41d2 ("Bluetooth: hci_sync: Convert MGMT_SET_POWERED") Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index bdee5d649cc6..f7918c755183 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -437,7 +437,6 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ -#define HCI_POWER_OFF_TIMEOUT msecs_to_jiffies(5000) /* 5 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ -- cgit From b14202aff5acba3b672704d792e821f02f8f562a Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Sun, 7 Jan 2024 19:02:49 +0100 Subject: Bluetooth: Add new state HCI_POWERING_DOWN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new state HCI_POWERING_DOWN that indicates that the device is currently powering down, this will be useful for the next commit. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index f7918c755183..a94a8491ec7a 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -372,6 +372,7 @@ enum { HCI_SETUP, HCI_CONFIG, HCI_DEBUGFS_CREATED, + HCI_POWERING_DOWN, HCI_AUTO_OFF, HCI_RFKILLED, HCI_MGMT, -- cgit From 79c0868ad65a8fc7cdfaa5f2b77a4b70d0b0ea16 Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Mon, 8 Jan 2024 23:46:07 +0100 Subject: Bluetooth: hci_event: Use HCI error defines instead of magic values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have error defines already, so let's use them. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index a94a8491ec7a..1cd212bb3789 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -653,6 +653,7 @@ enum { #define HCI_ERROR_PIN_OR_KEY_MISSING 0x06 #define HCI_ERROR_MEMORY_EXCEEDED 0x07 #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 +#define HCI_ERROR_COMMAND_DISALLOWED 0x0c #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d #define HCI_ERROR_REJ_BAD_ADDR 0x0f #define HCI_ERROR_INVALID_PARAMETERS 0x12 @@ -661,6 +662,7 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 +#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c -- cgit From 63298d6e752fc0ec7f5093860af8bc9f047b30c8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 9 Jan 2024 13:45:40 -0500 Subject: Bluetooth: hci_core: Cancel request on command timeout If command has timed out call __hci_cmd_sync_cancel to notify the hci_req since it will inevitably cause a timeout. This also rework the code around __hci_cmd_sync_cancel since it was wrongly assuming it needs to cancel timer as well, but sometimes the timers have not been started or in fact they already had timed out in which case they don't need to be cancel yet again. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 6efbc2152146..e2582c242544 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -42,7 +42,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); void hci_cmd_sync_cancel(struct hci_dev *hdev, int err); -void __hci_cmd_sync_cancel(struct hci_dev *hdev, int err); +void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err); int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -- cgit From e7b02296fb400ee64822fbdd81a0718449066333 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 1 Feb 2024 11:18:58 -0500 Subject: Bluetooth: Remove BT_HS High Speed, Alternate MAC and PHY (AMP) extension, has been removed from Bluetooth Core specification on 5.3: https://www.bluetooth.com/blog/new-core-specification-v5-3-feature-enhancements/ Fixes: 244bc377591c ("Bluetooth: Add BT_HS config option") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 - include/net/bluetooth/l2cap.h | 42 ------------------------------------------ 2 files changed, 43 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1cd212bb3789..aa6c69053d7c 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -394,7 +394,6 @@ enum { HCI_LIMITED_PRIVACY, HCI_RPA_EXPIRED, HCI_RPA_RESOLVING, - HCI_HS_ENABLED, HCI_LE_ENABLED, HCI_ADVERTISING, HCI_ADVERTISING_CONNECTABLE, diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index cf393e72d6ed..92d7197f9a56 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -59,8 +59,6 @@ #define L2CAP_WAIT_ACK_POLL_PERIOD msecs_to_jiffies(200) #define L2CAP_WAIT_ACK_TIMEOUT msecs_to_jiffies(10000) -#define L2CAP_A2MP_DEFAULT_MTU 670 - /* L2CAP socket address */ struct sockaddr_l2 { sa_family_t l2_family; @@ -109,12 +107,6 @@ struct l2cap_conninfo { #define L2CAP_ECHO_RSP 0x09 #define L2CAP_INFO_REQ 0x0a #define L2CAP_INFO_RSP 0x0b -#define L2CAP_CREATE_CHAN_REQ 0x0c -#define L2CAP_CREATE_CHAN_RSP 0x0d -#define L2CAP_MOVE_CHAN_REQ 0x0e -#define L2CAP_MOVE_CHAN_RSP 0x0f -#define L2CAP_MOVE_CHAN_CFM 0x10 -#define L2CAP_MOVE_CHAN_CFM_RSP 0x11 #define L2CAP_CONN_PARAM_UPDATE_REQ 0x12 #define L2CAP_CONN_PARAM_UPDATE_RSP 0x13 #define L2CAP_LE_CONN_REQ 0x14 @@ -144,7 +136,6 @@ struct l2cap_conninfo { /* L2CAP fixed channels */ #define L2CAP_FC_SIG_BREDR 0x02 #define L2CAP_FC_CONNLESS 0x04 -#define L2CAP_FC_A2MP 0x08 #define L2CAP_FC_ATT 0x10 #define L2CAP_FC_SIG_LE 0x20 #define L2CAP_FC_SMP_LE 0x40 @@ -267,7 +258,6 @@ struct l2cap_conn_rsp { /* channel identifier */ #define L2CAP_CID_SIGNALING 0x0001 #define L2CAP_CID_CONN_LESS 0x0002 -#define L2CAP_CID_A2MP 0x0003 #define L2CAP_CID_ATT 0x0004 #define L2CAP_CID_LE_SIGNALING 0x0005 #define L2CAP_CID_SMP 0x0006 @@ -282,7 +272,6 @@ struct l2cap_conn_rsp { #define L2CAP_CR_BAD_PSM 0x0002 #define L2CAP_CR_SEC_BLOCK 0x0003 #define L2CAP_CR_NO_MEM 0x0004 -#define L2CAP_CR_BAD_AMP 0x0005 #define L2CAP_CR_INVALID_SCID 0x0006 #define L2CAP_CR_SCID_IN_USE 0x0007 @@ -404,29 +393,6 @@ struct l2cap_info_rsp { __u8 data[]; } __packed; -struct l2cap_create_chan_req { - __le16 psm; - __le16 scid; - __u8 amp_id; -} __packed; - -struct l2cap_create_chan_rsp { - __le16 dcid; - __le16 scid; - __le16 result; - __le16 status; -} __packed; - -struct l2cap_move_chan_req { - __le16 icid; - __u8 dest_amp_id; -} __packed; - -struct l2cap_move_chan_rsp { - __le16 icid; - __le16 result; -} __packed; - #define L2CAP_MR_SUCCESS 0x0000 #define L2CAP_MR_PEND 0x0001 #define L2CAP_MR_BAD_ID 0x0002 @@ -539,8 +505,6 @@ struct l2cap_seq_list { struct l2cap_chan { struct l2cap_conn *conn; - struct hci_conn *hs_hcon; - struct hci_chan *hs_hchan; struct kref kref; atomic_t nesting; @@ -591,12 +555,6 @@ struct l2cap_chan { unsigned long conn_state; unsigned long flags; - __u8 remote_amp_id; - __u8 local_amp_id; - __u8 move_id; - __u8 move_state; - __u8 move_role; - __u16 next_tx_seq; __u16 expected_ack_seq; __u16 expected_tx_seq; -- cgit From 45340097ce6ea7e875674a5a7d24c95ecbc93ef9 Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Tue, 6 Feb 2024 12:08:13 +0100 Subject: Bluetooth: hci_conn: Only do ACL connections sequentially MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pretty much all bluetooth chipsets only support paging a single device at a time, and if they don't reject a secondary "Create Connection" request while another is still ongoing, they'll most likely serialize those requests in the firware. With commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests") we started adding some serialization of our own in case the adapter returns "Command Disallowed" HCI error. This commit was using the BT_CONNECT2 state for the serialization, this state is also used for a few more things (most notably to indicate we're waiting for an inquiry to cancel) and therefore a bit unreliable. Also not all BT firwares would respond with "Command Disallowed" on too many connection requests, some will also respond with "Hardware Failure" (BCM4378), and others will error out later and send a "Connect Complete" event with error "Rejected Limited Resources" (Marvell 88W8897). We can clean things up a bit and also make the serialization more reliable by using our hci_sync machinery to always do "Create Connection" requests in a sequential manner. This is very similar to what we're already doing for establishing LE connections, and it works well there. Note that this causes a test failure in mgmt-tester (test "Pair Device - Power off 1") because the hci_abort_conn_sync() changes the error we return on timeout of the "Create Connection". We'll fix this on the mgmt-tester side by adjusting the expected error for the test. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_sync.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index aa6c69053d7c..08cb5cb249a4 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -437,6 +437,7 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index e2582c242544..824660f8f30d 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -138,3 +138,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason); int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); + +int hci_acl_create_connection_sync(struct hci_dev *hdev, + struct hci_conn *conn); -- cgit From 4aa42119d971603dc9e4d8cf4f53d5fcf082ea7d Mon Sep 17 00:00:00 2001 From: Jonas Dreßler Date: Tue, 6 Feb 2024 12:08:14 +0100 Subject: Bluetooth: Remove pending ACL connection attempts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the last commit we moved to using the hci_sync queue for "Create Connection" requests, removing the need for retrying the paging after finished/failed "Create Connection" requests and after the end of inquiries. hci_conn_check_pending() was used to trigger this retry, we can remove it now. Note that we can also remove the special handling for COMMAND_DISALLOWED errors in the completion handler of "Create Connection", because "Create Connection" requests are now always serialized. This is somewhat reverting commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests"). With this, the BT_CONNECT2 state of ACL hci_conn objects should now be back to meaning only one thing: That we received a "Connection Request" from another device (see hci_conn_request_evt), but the response to that is going to be deferred. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 8f8dd9173714..34aa9d0290fe 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1480,7 +1480,6 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); -void hci_conn_check_pending(struct hci_dev *hdev); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); -- cgit From bf98feea5b65ced367a871cf35fc044dedbcfb85 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 7 Feb 2024 15:26:20 -0500 Subject: Bluetooth: hci_conn: Always use sk_timeo as conn_timeout This aligns the use socket sk_timeo as conn_timeout when initiating a connection and then use it when scheduling the resulting HCI command, that way the command is actually aborted synchronously thus not blocking commands generated by hci_abort_conn_sync to inform the controller the connection is to be aborted. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 5 +++-- include/net/bluetooth/l2cap.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 34aa9d0290fe..2bdea85b7c44 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1495,9 +1495,10 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u16 conn_timeout, u8 role); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, - enum conn_reasons conn_reason); + enum conn_reasons conn_reason, u16 timeout); struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, - __u16 setting, struct bt_codec *codec); + __u16 setting, struct bt_codec *codec, + u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 92d7197f9a56..a4278aa618ab 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -939,7 +939,7 @@ int l2cap_add_scid(struct l2cap_chan *chan, __u16 scid); struct l2cap_chan *l2cap_chan_create(void); void l2cap_chan_close(struct l2cap_chan *chan, int reason); int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, - bdaddr_t *dst, u8 dst_type); + bdaddr_t *dst, u8 dst_type, u16 timeout); int l2cap_chan_reconfigure(struct l2cap_chan *chan, __u16 mtu); int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len); void l2cap_chan_busy(struct l2cap_chan *chan, int busy); -- cgit From 5f641f03abccddd1a37233ff1b8e774b9ff1f4e8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 9 Feb 2024 09:08:06 -0500 Subject: Bluetooth: hci_conn: Fix UAF Write in __hci_acl_create_connection_sync This fixes the UAF on __hci_acl_create_connection_sync caused by connection abortion, it uses the same logic as to LE_LINK which uses hci_cmd_sync_cancel to prevent the callback to run if the connection is abort prematurely. Reported-by: syzbot+3f0a39be7a2035700868@syzkaller.appspotmail.com Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 824660f8f30d..ed334c253ebc 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -139,5 +139,4 @@ int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); -int hci_acl_create_connection_sync(struct hci_dev *hdev, - struct hci_conn *conn); +int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); -- cgit From 505ea2b295929e7be2b4e1bc86ee31cb7862fb01 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 9 Aug 2023 13:43:53 -0700 Subject: Bluetooth: hci_sync: Add helper functions to manipulate cmd_sync queue This adds functions to queue, dequeue and lookup into the cmd_sync list. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index ed334c253ebc..4ff4aa68ee19 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -48,6 +48,18 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +struct hci_cmd_sync_work_entry * +hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry); +bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, + hci_cmd_sync_work_func_t func, void *data, + hci_cmd_sync_work_destroy_t destroy); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); -- cgit From 881559af5f5c545f6828e7c74d79813eb886d523 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 13 Feb 2024 09:59:32 -0500 Subject: Bluetooth: hci_sync: Attempt to dequeue connection attempt If connection is still queued/pending in the cmd_sync queue it means no command has been generated and it should be safe to just dequeue the callback when it is being aborted. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 19 +++++++++++++++++++ include/net/bluetooth/hci_sync.h | 10 ++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2bdea85b7c44..317d495cfcf5 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1083,6 +1083,24 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) return c->acl_num + c->amp_num + c->sco_num + c->le_num + c->iso_num; } +static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c == conn) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1493,6 +1511,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, u16 conn_timeout, u8 role); +void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason, u16 timeout); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 4ff4aa68ee19..6a9d063e9f47 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -48,11 +48,11 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, - void *data, hci_cmd_sync_work_destroy_t destroy); void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, struct hci_cmd_sync_work_entry *entry); bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, @@ -139,8 +139,6 @@ struct hci_conn; int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); -int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); - int hci_le_create_cis_sync(struct hci_dev *hdev); int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle); @@ -152,3 +150,7 @@ int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); -- cgit From 02171da6e86a73e1b343b36722f5d9d5c04b3539 Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 23 Feb 2024 15:14:41 +0200 Subject: Bluetooth: ISO: Add hcon for listening bis sk This creates a hcon instance at bis listen, before the PA sync procedure is started. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 317d495cfcf5..199a9f81cf50 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1,7 +1,7 @@ /* BlueZ - Bluetooth protocol stack for Linux Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved. - Copyright 2023 NXP + Copyright 2023-2024 NXP Written 2000,2001 by Maxim Krasnyansky @@ -1528,8 +1528,8 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos, __u8 data_len, __u8 *data); -int hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, - __u8 sid, struct bt_iso_qos *qos); +struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, + __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, struct bt_iso_qos *qos, __u16 sync_handle, __u8 num_bis, __u8 bis[]); -- cgit From 168d9bf9c7f01df71e6404cfff66d9c2a8e968fb Mon Sep 17 00:00:00 2001 From: Iulia Tanasescu Date: Fri, 23 Feb 2024 15:14:42 +0200 Subject: Bluetooth: ISO: Reassemble PA data for bcast sink This adds support to reassemble PA data for a Broadcast Sink listening socket. This is needed in case the BASE is received fragmented in multiple PA reports. PA data is first reassembled inside the hcon, before the BASE is extracted and stored inside the socket. The length of the le_per_adv_data hcon array has been raised to 1650, to accommodate the maximum PA data length that can come fragmented, according to spec. Signed-off-by: Iulia Tanasescu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 5 +++++ include/net/bluetooth/hci_core.h | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 08cb5cb249a4..21099bd3c8bc 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2037,6 +2037,7 @@ struct hci_cp_le_set_per_adv_params { } __packed; #define HCI_MAX_PER_AD_LENGTH 252 +#define HCI_MAX_PER_AD_TOT_LEN 1650 #define HCI_OP_LE_SET_PER_ADV_DATA 0x203f struct hci_cp_le_set_per_adv_data { @@ -2797,6 +2798,10 @@ struct hci_ev_le_per_adv_report { __u8 data[]; } __packed; +#define LE_PA_DATA_COMPLETE 0x00 +#define LE_PA_DATA_MORE_TO_COME 0x01 +#define LE_PA_DATA_TRUNCATED 0x02 + #define HCI_EV_LE_EXT_ADV_SET_TERM 0x12 struct hci_evt_le_ext_adv_set_term { __u8 status; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 199a9f81cf50..da6aa6549b81 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -734,8 +734,9 @@ struct hci_conn { __u16 le_supv_timeout; __u8 le_adv_data[HCI_MAX_EXT_AD_LENGTH]; __u8 le_adv_data_len; - __u8 le_per_adv_data[HCI_MAX_PER_AD_LENGTH]; - __u8 le_per_adv_data_len; + __u8 le_per_adv_data[HCI_MAX_PER_AD_TOT_LEN]; + __u16 le_per_adv_data_len; + __u16 le_per_adv_data_offset; __u8 le_tx_phy; __u8 le_rx_phy; __s8 rssi; -- cgit From 2615fd9a7c2507eb3be3fbe49dcec88a2f56454a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 16 Feb 2024 16:20:11 -0500 Subject: Bluetooth: hci_sync: Fix overwriting request callback In a few cases the stack may generate commands as responses to events which would happen to overwrite the sent_cmd, so this attempts to store the request in req_skb so even if sent_cmd is replaced with a new command the pending request will remain in stored in req_skb. Fixes: 6a98e3836fa2 ("Bluetooth: Add helper for serialized HCI command execution") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index da6aa6549b81..56fb42df44a3 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -552,6 +552,7 @@ struct hci_dev { __u32 req_status; __u32 req_result; struct sk_buff *req_skb; + struct sk_buff *req_rsp; void *smp_data; void *smp_bredr_data; -- cgit From 48201a3b3f398be6a01f78a14b18bd5d31c47458 Mon Sep 17 00:00:00 2001 From: Vinicius Peixoto Date: Mon, 26 Feb 2024 22:43:26 -0300 Subject: Bluetooth: Add new quirk for broken read key length on ATS2851 The ATS2851 controller erroneously reports support for the "Read Encryption Key Length" HCI command. This makes it unable to connect to any devices, since this command is issued by the kernel during the connection process in response to an "Encryption Change" HCI event. Add a new quirk (HCI_QUIRK_BROKEN_ENC_KEY_SIZE) to hint that the command is unsupported, preventing it from interrupting the connection process. This is the error log from btmon before this patch: > HCI Event: Encryption Change (0x08) plen 4 Status: Success (0x00) Handle: 2048 Address: ... Encryption: Enabled with E0 (0x01) < HCI Command: Read Encryption Key Size (0x05|0x0008) plen 2 Handle: 2048 Address: ... > HCI Event: Command Status (0x0f) plen 4 Read Encryption Key Size (0x05|0x0008) ncmd 1 Status: Unknown HCI Command (0x01) Signed-off-by: Vinicius Peixoto Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 21099bd3c8bc..8701ca5f31ee 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -330,6 +330,14 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_LE_CODED, + + /* + * When this quirk is set, the HCI_OP_READ_ENC_KEY_SIZE command is + * skipped during an HCI_EV_ENCRYPT_CHANGE event. This is required + * for Actions Semiconductor ATS2851 based controllers, which erroneously + * claim to support it. + */ + HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, }; /* HCI device flags */ -- cgit From 42ed95de82c01184a88945d3ca274be6a7ea607d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 7 Mar 2024 11:58:17 -0500 Subject: Bluetooth: ISO: Align broadcast sync_timeout with connection timeout This aligns broadcast sync_timeout with existing connection timeouts which are 20 seconds long. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 7ffa8c192c3f..9fe95a22abeb 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -164,6 +164,8 @@ struct bt_voice { #define BT_ISO_QOS_BIG_UNSET 0xff #define BT_ISO_QOS_BIS_UNSET 0xff +#define BT_ISO_SYNC_TIMEOUT 0x07d0 /* 20 secs */ + struct bt_iso_io_qos { __u32 interval; __u16 latency; -- cgit From 2658b5a8a4eee5fad378d0bde2f221deacbc58f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:14 +0000 Subject: net: introduce struct net_hotdata Instead of spreading networking critical fields all over the places, add a custom net_hotdata structure so that we can precisely control its layout. In this first patch, move : - gro_normal_batch used in rx (GRO stack) - offload_base used in rx and tx (GRO and TSO stacks) Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/gro.h | 5 ++--- include/net/hotdata.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 include/net/hotdata.h (limited to 'include/net') diff --git a/include/net/gro.h b/include/net/gro.h index 2b58671a6549..d6fc8fbd3730 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -9,6 +9,7 @@ #include #include #include +#include struct napi_gro_cb { union { @@ -446,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, { list_add_tail(&skb->list, &napi->rx_list); napi->rx_count += segs; - if (napi->rx_count >= READ_ONCE(gro_normal_batch)) + if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) gro_normal_list(napi); } @@ -493,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int * #endif } -extern struct list_head offload_base; - #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/hotdata.h b/include/net/hotdata.h new file mode 100644 index 000000000000..6ed32e4e34aa --- /dev/null +++ b/include/net/hotdata.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_HOTDATA_H +#define _NET_HOTDATA_H + +#include + +/* Read mostly data used in network fast paths. */ +struct net_hotdata { + struct list_head offload_base; + int gro_normal_batch; +}; + +extern struct net_hotdata net_hotdata; + +#endif /* _NET_HOTDATA_H */ -- cgit From ae6e22f7b7f0702015d86cfa036492b94be92f04 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:15 +0000 Subject: net: move netdev_budget and netdev_budget to net_hotdata netdev_budget and netdev_budget are used in rx path (net_rx_action()) Move them into net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6ed32e4e34aa..72170223385e 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -8,6 +8,8 @@ struct net_hotdata { struct list_head offload_base; int gro_normal_batch; + int netdev_budget; + int netdev_budget_usecs; }; extern struct net_hotdata net_hotdata; -- cgit From f59b5416c396ac4910dd7a0cdf26cbb0e1faf529 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:16 +0000 Subject: net: move netdev_tstamp_prequeue into net_hotdata netdev_tstamp_prequeue is used in rx path. Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 72170223385e..149e56528537 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -10,6 +10,7 @@ struct net_hotdata { int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; + int tstamp_prequeue; }; extern struct net_hotdata net_hotdata; -- cgit From 0b91fa4bfb1caedd01cb6eb3b733cbc77c9edb0e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:17 +0000 Subject: net: move ptype_all into net_hotdata ptype_all is used in rx/tx fast paths. Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 149e56528537..d462cb8f16ba 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -7,6 +7,7 @@ /* Read mostly data used in network fast paths. */ struct net_hotdata { struct list_head offload_base; + struct list_head ptype_all; int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; -- cgit From edbc666cdcbf4a80ada4311c272a2078af87b880 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:18 +0000 Subject: net: move netdev_max_backlog to net_hotdata netdev_max_backlog is used in rx fat path. Move it to net_hodata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index d462cb8f16ba..dc50b200a94b 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -12,6 +12,7 @@ struct net_hotdata { int netdev_budget; int netdev_budget_usecs; int tstamp_prequeue; + int max_backlog; }; extern struct net_hotdata net_hotdata; -- cgit From 61a0be1a5342045059ce53eabfe6500d499d2f89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:19 +0000 Subject: net: move ip_packet_offload and ipv6_packet_offload to net_hotdata These structures are used in GRO and GSO paths. v2: ipv6_packet_offload definition depends on CONFIG_INET Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index dc50b200a94b..d8ce20d3215d 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -3,9 +3,14 @@ #define _NET_HOTDATA_H #include +#include /* Read mostly data used in network fast paths. */ struct net_hotdata { +#if IS_ENABLED(CONFIG_INET) + struct packet_offload ip_packet_offload; + struct packet_offload ipv6_packet_offload; +#endif struct list_head offload_base; struct list_head ptype_all; int gro_normal_batch; -- cgit From 0139806eebd6bcf6a3df98950cd0691aff216304 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:20 +0000 Subject: net: move tcpv4_offload and tcpv6_offload to net_hotdata These are used in TCP fast paths. Move them into net_hotdata for better cache locality. v2: tcpv6_offload definition depends on CONFIG_INET Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index d8ce20d3215d..d86d02f156fc 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -4,12 +4,15 @@ #include #include +#include /* Read mostly data used in network fast paths. */ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) struct packet_offload ip_packet_offload; + struct net_offload tcpv4_offload; struct packet_offload ipv6_packet_offload; + struct net_offload tcpv6_offload; #endif struct list_head offload_base; struct list_head ptype_all; -- cgit From 26722dc74bf08fd79564cbcad1e5f3e2aa3bf9cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:21 +0000 Subject: net: move dev_tx_weight to net_hotdata dev_tx_weight is used in tx fast path. Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index d86d02f156fc..ffea9cc263e5 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -21,6 +21,7 @@ struct net_hotdata { int netdev_budget_usecs; int tstamp_prequeue; int max_backlog; + int dev_tx_weight; }; extern struct net_hotdata net_hotdata; -- cgit From 71c0de9bac9c1dda503322c86be4924f055dc6c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:22 +0000 Subject: net: move dev_rx_weight to net_hotdata dev_rx_weight is read from process_backlog(). Move it to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index ffea9cc263e5..e6595ed2c3be 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -22,6 +22,7 @@ struct net_hotdata { int tstamp_prequeue; int max_backlog; int dev_tx_weight; + int dev_rx_weight; }; extern struct net_hotdata net_hotdata; -- cgit From aa70d2d16f280efe8aa52afc25a33b2ec8d346b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:23 +0000 Subject: net: move skbuff_cache(s) to net_hotdata skbuff_cache, skbuff_fclone_cache and skb_small_head_cache are used in rx/tx fast paths. Move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index e6595ed2c3be..a8f7e5e826fb 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -16,6 +16,9 @@ struct net_hotdata { #endif struct list_head offload_base; struct list_head ptype_all; + struct kmem_cache *skbuff_cache; + struct kmem_cache *skbuff_fclone_cache; + struct kmem_cache *skb_small_head_cache; int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; -- cgit From 6a55ca6b0122d4678e3ab54a8553361aae5082f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:24 +0000 Subject: udp: move udpv4_offload and udpv6_offload to net_hotdata These structures are used in GRO and GSO paths. Move them to net_hodata for better cache locality. v2: udpv6_offload definition depends on CONFIG_INET=y Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index a8f7e5e826fb..daeee8ce8084 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -11,8 +11,10 @@ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) struct packet_offload ip_packet_offload; struct net_offload tcpv4_offload; + struct net_offload udpv4_offload; struct packet_offload ipv6_packet_offload; struct net_offload tcpv6_offload; + struct net_offload udpv6_offload; #endif struct list_head offload_base; struct list_head ptype_all; -- cgit From 4ea0875b9d897e3c64cdb486788509f1f062285b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:25 +0000 Subject: ipv6: move tcpv6_protocol and udpv6_protocol to net_hotdata These structures are read in rx path, move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-13-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index daeee8ce8084..03d758d25c02 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -14,6 +14,10 @@ struct net_hotdata { struct net_offload udpv4_offload; struct packet_offload ipv6_packet_offload; struct net_offload tcpv6_offload; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_protocol tcpv6_protocol; + struct inet6_protocol udpv6_protocol; +#endif struct net_offload udpv6_offload; #endif struct list_head offload_base; -- cgit From 571bf020be9c3b135e8b6dd87421919953268c1f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:26 +0000 Subject: inet: move tcp_protocol and udp_protocol to net_hotdata These structures are read in rx path, move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-14-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 03d758d25c02..87215f7ac200 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -11,7 +11,9 @@ struct net_hotdata { #if IS_ENABLED(CONFIG_INET) struct packet_offload ip_packet_offload; struct net_offload tcpv4_offload; + struct net_protocol tcp_protocol; struct net_offload udpv4_offload; + struct net_protocol udp_protocol; struct packet_offload ipv6_packet_offload; struct net_offload tcpv6_offload; #if IS_ENABLED(CONFIG_IPV6) -- cgit From 6e0735723ab437793cfab02d50b3ae3539aeb520 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:27 +0000 Subject: inet: move inet_ehash_secret and udp_ehash_secret into net_hotdata "struct net_protocol" has a 32bit hole in 32bit arches. Use it to store the 32bit secret used by UDP and TCP, to increase cache locality in rx path. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-15-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 3 +++ include/net/protocol.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 87215f7ac200..7a210ea6899c 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -36,6 +36,9 @@ struct net_hotdata { int dev_rx_weight; }; +#define inet_ehash_secret net_hotdata.tcp_protocol.secret +#define udp_ehash_secret net_hotdata.udp_protocol.secret + extern struct net_hotdata net_hotdata; #endif /* _NET_HOTDATA_H */ diff --git a/include/net/protocol.h b/include/net/protocol.h index 6aef8cb11cc8..3ff26e66735c 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -46,6 +46,7 @@ struct net_protocol { * socket lookup? */ icmp_strict_tag_validation:1; + u32 secret; }; #if IS_ENABLED(CONFIG_IPV6) -- cgit From 5af674bb90a030317a02419e04b66ec0dc892dcd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:28 +0000 Subject: ipv6: move inet6_ehash_secret and udp6_ehash_secret into net_hotdata "struct inet6_protocol" has a 32bit hole in 32bit arches. Use it to store the 32bit secret used by UDP and TCP, to increase cache locality in rx path. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-16-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ include/net/protocol.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 7a210ea6899c..6d5cd967183a 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -38,6 +38,8 @@ struct net_hotdata { #define inet_ehash_secret net_hotdata.tcp_protocol.secret #define udp_ehash_secret net_hotdata.udp_protocol.secret +#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret +#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret extern struct net_hotdata net_hotdata; diff --git a/include/net/protocol.h b/include/net/protocol.h index 3ff26e66735c..213649d2ab09 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -60,6 +60,7 @@ struct inet6_protocol { __be32 info); unsigned int flags; /* INET6_PROTO_xxx */ + u32 secret; }; #define INET6_PROTO_NOPOLICY 0x1 -- cgit From df51b84564159cdd91a67ee0f9e30b42b3a73cef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:29 +0000 Subject: ipv6: move tcp_ipv6_hash_secret and udp_ipv6_hash_secret to net_hotdata Use a 32bit hole in "struct net_offload" to store the remaining 32bit secrets used by TCPv6 and UDPv6. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-17-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 2 ++ include/net/protocol.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 6d5cd967183a..b0b847585f7e 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -39,7 +39,9 @@ struct net_hotdata { #define inet_ehash_secret net_hotdata.tcp_protocol.secret #define udp_ehash_secret net_hotdata.udp_protocol.secret #define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret +#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret #define udp6_ehash_secret net_hotdata.udpv6_protocol.secret +#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret extern struct net_hotdata net_hotdata; diff --git a/include/net/protocol.h b/include/net/protocol.h index 213649d2ab09..b2499f88f8f8 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -70,6 +70,7 @@ struct inet6_protocol { struct net_offload { struct offload_callbacks callbacks; unsigned int flags; /* Flags used by IPv6 for now */ + u32 secret; }; /* This should be set for any extension header which is compatible with GSO. */ #define INET6_PROTO_GSO_EXTHDR 0x1 -- cgit From 490a79faf95e705ba0ffd9ebf04a624b379e53c9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:30 +0000 Subject: net: introduce include/net/rps.h Move RPS related structures and helpers from include/linux/netdevice.h and include/net/sock.h to a new include file. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-18-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/rps.h | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++ include/net/sock.h | 35 --------------- 2 files changed, 127 insertions(+), 35 deletions(-) create mode 100644 include/net/rps.h (limited to 'include/net') diff --git a/include/net/rps.h b/include/net/rps.h new file mode 100644 index 000000000000..6081d817d245 --- /dev/null +++ b/include/net/rps.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_RPS_H +#define _NET_RPS_H + +#include +#include +#include + +#ifdef CONFIG_RPS + +extern struct static_key_false rps_needed; +extern struct static_key_false rfs_needed; + +/* + * This structure holds an RPS map which can be of variable length. The + * map is an array of CPUs. + */ +struct rps_map { + unsigned int len; + struct rcu_head rcu; + u16 cpus[]; +}; +#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) + +/* + * The rps_dev_flow structure contains the mapping of a flow to a CPU, the + * tail pointer for that CPU's input queue at the time of last enqueue, and + * a hardware filter index. + */ +struct rps_dev_flow { + u16 cpu; + u16 filter; + unsigned int last_qtail; +}; +#define RPS_NO_FILTER 0xffff + +/* + * The rps_dev_flow_table structure contains a table of flow mappings. + */ +struct rps_dev_flow_table { + unsigned int mask; + struct rcu_head rcu; + struct rps_dev_flow flows[]; +}; +#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ + ((_num) * sizeof(struct rps_dev_flow))) + +/* + * The rps_sock_flow_table contains mappings of flows to the last CPU + * on which they were processed by the application (set in recvmsg). + * Each entry is a 32bit value. Upper part is the high-order bits + * of flow hash, lower part is CPU number. + * rps_cpu_mask is used to partition the space, depending on number of + * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 + * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, + * meaning we use 32-6=26 bits for the hash. + */ +struct rps_sock_flow_table { + u32 mask; + + u32 ents[] ____cacheline_aligned_in_smp; +}; +#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) + +#define RPS_NO_CPU 0xffff + +extern u32 rps_cpu_mask; +extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; + +static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, + u32 hash) +{ + unsigned int index = hash & table->mask; + u32 val = hash & ~rps_cpu_mask; + + /* We only give a hint, preemption can change CPU under us */ + val |= raw_smp_processor_id(); + + /* The following WRITE_ONCE() is paired with the READ_ONCE() + * here, and another one in get_rps_cpu(). + */ + if (READ_ONCE(table->ents[index]) != val) + WRITE_ONCE(table->ents[index], val); +} + +#endif /* CONFIG_RPS */ + +static inline void sock_rps_record_flow_hash(__u32 hash) +{ +#ifdef CONFIG_RPS + struct rps_sock_flow_table *sock_flow_table; + + if (!hash) + return; + rcu_read_lock(); + sock_flow_table = rcu_dereference(rps_sock_flow_table); + if (sock_flow_table) + rps_record_sock_flow(sock_flow_table, hash); + rcu_read_unlock(); +#endif +} + +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS + if (static_branch_unlikely(&rfs_needed)) { + /* Reading sk->sk_rxhash might incur an expensive cache line + * miss. + * + * TCP_ESTABLISHED does cover almost all states where RFS + * might be useful, and is cheaper [1] than testing : + * IPv4: inet_sk(sk)->inet_daddr + * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) + * OR an additional socket flag + * [1] : sk_state and sk_prot are in the same cache line. + */ + if (sk->sk_state == TCP_ESTABLISHED) { + /* This READ_ONCE() is paired with the WRITE_ONCE() + * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). + */ + sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); + } + } +#endif +} + +#endif /* _NET_RPS_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 09a0cde8bf52..b5e00702acc1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk) WRITE_ONCE(sk->sk_incoming_cpu, cpu); } -static inline void sock_rps_record_flow_hash(__u32 hash) -{ -#ifdef CONFIG_RPS - struct rps_sock_flow_table *sock_flow_table; - - rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); - rps_record_sock_flow(sock_flow_table, hash); - rcu_read_unlock(); -#endif -} - -static inline void sock_rps_record_flow(const struct sock *sk) -{ -#ifdef CONFIG_RPS - if (static_branch_unlikely(&rfs_needed)) { - /* Reading sk->sk_rxhash might incur an expensive cache line - * miss. - * - * TCP_ESTABLISHED does cover almost all states where RFS - * might be useful, and is cheaper [1] than testing : - * IPv4: inet_sk(sk)->inet_daddr - * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) - * OR an additional socket flag - * [1] : sk_state and sk_prot are in the same cache line. - */ - if (sk->sk_state == TCP_ESTABLISHED) { - /* This READ_ONCE() is paired with the WRITE_ONCE() - * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). - */ - sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); - } - } -#endif -} static inline void sock_rps_save_rxhash(struct sock *sk, const struct sk_buff *skb) -- cgit From ce7f49ab741591d83e33e56948bac2f12de6e14e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Mar 2024 16:00:31 +0000 Subject: net: move rps_sock_flow_table to net_hotdata rps_sock_flow_table and rps_cpu_mask are used in fast path. Move them to net_hotdata for better cache locality. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240306160031.874438-19-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/hotdata.h | 4 ++++ include/net/rps.h | 8 +++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/hotdata.h b/include/net/hotdata.h index b0b847585f7e..003667a1efd6 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -27,6 +27,10 @@ struct net_hotdata { struct kmem_cache *skbuff_cache; struct kmem_cache *skbuff_fclone_cache; struct kmem_cache *skb_small_head_cache; +#ifdef CONFIG_RPS + struct rps_sock_flow_table __rcu *rps_sock_flow_table; + u32 rps_cpu_mask; +#endif int gro_normal_batch; int netdev_budget; int netdev_budget_usecs; diff --git a/include/net/rps.h b/include/net/rps.h index 6081d817d245..7660243e905b 100644 --- a/include/net/rps.h +++ b/include/net/rps.h @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef CONFIG_RPS @@ -64,14 +65,11 @@ struct rps_sock_flow_table { #define RPS_NO_CPU 0xffff -extern u32 rps_cpu_mask; -extern struct rps_sock_flow_table __rcu *rps_sock_flow_table; - static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { unsigned int index = hash & table->mask; - u32 val = hash & ~rps_cpu_mask; + u32 val = hash & ~net_hotdata.rps_cpu_mask; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); @@ -93,7 +91,7 @@ static inline void sock_rps_record_flow_hash(__u32 hash) if (!hash) return; rcu_read_lock(); - sock_flow_table = rcu_dereference(rps_sock_flow_table); + sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (sock_flow_table) rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); -- cgit From ab63a2387cb906d43b72a8effb611bbaecb2d0cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:07 -0800 Subject: netdev: add per-queue statistics The ethtool-nl family does a good job exposing various protocol related and IEEE/IETF statistics which used to get dumped under ethtool -S, with creative names. Queue stats don't have a netlink API, yet, and remain a lion's share of ethtool -S output for new drivers. Not only is that bad because the names differ driver to driver but it's also bug-prone. Intuitively drivers try to report only the stats for active queues, but querying ethtool stats involves multiple system calls, and the number of stats is read separately from the stats themselves. Worse still when user space asks for values of the stats, it doesn't inform the kernel how big the buffer is. If number of stats increases in the meantime kernel will overflow user buffer. Add a netlink API for dumping queue stats. Queue information is exposed via the netdev-genl family, so add the stats there. Support per-queue and sum-for-device dumps. Latter will be useful when subsequent patches add more interesting common stats than just bytes and packets. The API does not currently distinguish between HW and SW stats. The expectation is that the source of the stats will either not matter much (good packets) or be obvious (skb alloc errors). Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include/net') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 8b8ed4e13d74..d633347eeda5 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,60 @@ #include +struct netdev_queue_stats_rx { + u64 bytes; + u64 packets; +}; + +struct netdev_queue_stats_tx { + u64 bytes; + u64 packets; +}; + +/** + * struct netdev_stat_ops - netdev ops for fine grained stats + * @get_queue_stats_rx: get stats for a given Rx queue + * @get_queue_stats_tx: get stats for a given Tx queue + * @get_base_stats: get base stats (not belonging to any live instance) + * + * Query stats for a given object. The values of the statistics are undefined + * on entry (specifically they are *not* zero-initialized). Drivers should + * assign values only to the statistics they collect. Statistics which are not + * collected must be left undefined. + * + * Queue objects are not necessarily persistent, and only currently active + * queues are queried by the per-queue callbacks. This means that per-queue + * statistics will not generally add up to the total number of events for + * the device. The @get_base_stats callback allows filling in the delta + * between events for currently live queues and overall device history. + * When the statistics for the entire device are queried, first @get_base_stats + * is issued to collect the delta, and then a series of per-queue callbacks. + * Only statistics which are set in @get_base_stats will be reported + * at the device level, meaning that unlike in queue callbacks, setting + * a statistic to zero in @get_base_stats is a legitimate thing to do. + * This is because @get_base_stats has a second function of designating which + * statistics are in fact correct for the entire device (e.g. when history + * for some of the events is not maintained, and reliable "total" cannot + * be provided). + * + * Device drivers can assume that when collecting total device stats, + * the @get_base_stats and subsequent per-queue calls are performed + * "atomically" (without releasing the rtnl_lock). + * + * Device drivers are encouraged to reset the per-queue statistics when + * number of queues change. This is because the primary use case for + * per-queue statistics is currently to detect traffic imbalance. + */ +struct netdev_stat_ops { + void (*get_queue_stats_rx)(struct net_device *dev, int idx, + struct netdev_queue_stats_rx *stats); + void (*get_queue_stats_tx)(struct net_device *dev, int idx, + struct netdev_queue_stats_tx *stats); + void (*get_base_stats)(struct net_device *dev, + struct netdev_queue_stats_rx *rx, + struct netdev_queue_stats_tx *tx); +}; + /** * DOC: Lockless queue stopping / waking helpers. * -- cgit From 92f8b1f5ca0f157f564e75cef4c63641c172e0f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:08 -0800 Subject: netdev: add queue stat for alloc failures Rx alloc failures are commonly counted by drivers. Support reporting those via netdev-genl queue stats. Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index d633347eeda5..1ec408585373 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,9 +4,11 @@ #include +/* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; u64 packets; + u64 alloc_fail; }; struct netdev_queue_stats_tx { -- cgit From f4676ea74b8549cd88dbfe2a592ce4530039e61f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:17 +0100 Subject: net: nexthop: Add nexthop group entry stats Add nexthop group entry stats to count the number of packets forwarded via each nexthop in the group. The stats will be exposed to user space for better data path observability in the next patch. The per-CPU stats pointer is placed at the beginning of 'struct nh_grp_entry', so that all the fields accessed for the data path reside on the same cache line: struct nh_grp_entry { struct nexthop * nh; /* 0 8 */ struct nh_grp_entry_stats * stats; /* 8 8 */ u8 weight; /* 16 1 */ /* XXX 7 bytes hole, try to pack */ union { struct { atomic_t upper_bound; /* 24 4 */ } hthr; /* 24 4 */ struct { struct list_head uw_nh_entry; /* 24 16 */ u16 count_buckets; /* 40 2 */ u16 wants_buckets; /* 42 2 */ } res; /* 24 24 */ }; /* 24 24 */ struct list_head nh_list; /* 48 16 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct nexthop * nh_parent; /* 64 8 */ /* size: 72, cachelines: 2, members: 6 */ /* sum members: 65, holes: 1, sum holes: 7 */ /* last cacheline: 8 bytes */ }; Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/nexthop.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 77e99cba60ad..6e6a36fee51e 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -95,8 +95,14 @@ struct nh_res_table { struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets); }; +struct nh_grp_entry_stats { + u64_stats_t packets; + struct u64_stats_sync syncp; +}; + struct nh_grp_entry { struct nexthop *nh; + struct nh_grp_entry_stats __percpu *stats; u8 weight; union { -- cgit From 5877786fcf52d1b255afcd61832753d1619f0738 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:19 +0100 Subject: net: nexthop: Add hardware statistics notifications Add hw_stats field to several notifier structures to communicate to the drivers that HW statistics should be configured for nexthops within a given group. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/nexthop.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 6e6a36fee51e..584c37120c20 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -130,6 +130,7 @@ struct nh_group { bool resilient; bool fdb_nh; bool has_v4; + bool hw_stats; struct nh_res_table __rcu *res_table; struct nh_grp_entry nh_entries[] __counted_by(num_nh); @@ -193,6 +194,7 @@ struct nh_notifier_grp_entry_info { struct nh_notifier_grp_info { u16 num_nh; bool is_fdb; + bool hw_stats; struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh); }; @@ -206,6 +208,7 @@ struct nh_notifier_res_bucket_info { struct nh_notifier_res_table_info { u16 num_nh_buckets; + bool hw_stats; struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; -- cgit From 746c19a52ec50b81422fd4772254d55e588d7df6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:20 +0100 Subject: net: nexthop: Add ability to enable / disable hardware statistics Add netlink support for enabling collection of HW statistics on nexthop groups. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/net/nexthop.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 584c37120c20..1ba6d3668518 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -47,6 +47,8 @@ struct nh_config { bool nh_grp_res_has_idle_timer; bool nh_grp_res_has_unbalanced_timer; + bool nh_hw_stats; + struct nlattr *nh_encap; u16 nh_encap_type; -- cgit From 5072ae00aea434d922cabd1c3e6236350a77c4d7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:21 +0100 Subject: net: nexthop: Expose nexthop group HW stats to user space Add netlink support for reading NH group hardware stats. Stats collection is done through a new notifier, NEXTHOP_EVENT_HW_STATS_REPORT_DELTA. Drivers that implement HW counters for a given NH group are thereby asked to collect the stats and report back to core by calling nh_grp_hw_stats_report_delta(). This is similar to what netdevice L3 stats do. Besides exposing number of packets that passed in the HW datapath, also include information on whether any driver actually realizes the counters. The core can tell based on whether it got any _report_delta() reports from the drivers. This allows enabling the statistics at the group at any time, with drivers opting into supporting them. This is also in line with what netdevice L3 stats are doing. So as not to waste time and space, tie the collection and reporting of HW stats with a new op flag, NHA_OP_FLAG_DUMP_HW_STATS. Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Kees Cook # For the __counted_by bits Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/nexthop.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 1ba6d3668518..7ec9cc80f11c 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -122,6 +122,7 @@ struct nh_grp_entry { struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ + u64 packets_hw; }; struct nh_group { @@ -166,6 +167,7 @@ enum nexthop_event_type { NEXTHOP_EVENT_REPLACE, NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, NEXTHOP_EVENT_BUCKET_REPLACE, + NEXTHOP_EVENT_HW_STATS_REPORT_DELTA, }; enum nh_notifier_info_type { @@ -173,6 +175,7 @@ enum nh_notifier_info_type { NH_NOTIFIER_INFO_TYPE_GRP, NH_NOTIFIER_INFO_TYPE_RES_TABLE, NH_NOTIFIER_INFO_TYPE_RES_BUCKET, + NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS, }; struct nh_notifier_single_info { @@ -214,6 +217,17 @@ struct nh_notifier_res_table_info { struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets); }; +struct nh_notifier_grp_hw_stats_entry_info { + u32 id; + u64 packets; +}; + +struct nh_notifier_grp_hw_stats_info { + u16 num_nh; + bool hw_stats_used; + struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh); +}; + struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; @@ -224,6 +238,7 @@ struct nh_notifier_info { struct nh_notifier_grp_info *nh_grp; struct nh_notifier_res_table_info *nh_res_table; struct nh_notifier_res_bucket_info *nh_res_bucket; + struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats; }; }; @@ -236,6 +251,9 @@ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, bool offload, bool trap); void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, unsigned long *activity); +void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info, + unsigned int nh_idx, + u64 delta_packets); /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); -- cgit From e5b7aefe38f7f6258935d8a10c36552dd957048a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Mar 2024 10:22:30 +0000 Subject: net: gro: move two declarations to include/net/gro.h Move gro_find_receive_by_type() and gro_find_complete_by_type() to include/net/gro.h where they belong. Also use _NET_GRO_H instead of _NET_IPV6_GRO_H to protect include/net/gro.h from multiple inclusions. Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20240308102230.296224-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/gro.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/gro.h b/include/net/gro.h index d6fc8fbd3730..50f1e403dbbb 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _NET_IPV6_GRO_H -#define _NET_IPV6_GRO_H +#ifndef _NET_GRO_H +#define _NET_GRO_H #include #include @@ -494,4 +494,7 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int * #endif } -#endif /* _NET_IPV6_GRO_H */ +struct packet_offload *gro_find_receive_by_type(__be16 type); +struct packet_offload *gro_find_complete_by_type(__be16 type); + +#endif /* _NET_GRO_H */ -- cgit From e99eb57e9b14d830a571c5255248d4d7eb08b27e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 8 Mar 2024 13:59:46 +0100 Subject: net: nexthop: Have all NH notifiers carry NH ID When sending the notifications to collect NH statistics for resilient groups, the driver will need to know the nexthop IDs in individual buckets to look up the right counter. To that end, move the nexthop ID from struct nh_notifier_grp_entry_info to nh_notifier_single_info. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/8f964cd50b1a56d3606ce7ab4c50354ae019c43b.1709901020.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 7ec9cc80f11c..7ca315ad500e 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -185,6 +185,7 @@ struct nh_notifier_single_info { __be32 ipv4; struct in6_addr ipv6; }; + u32 id; u8 is_reject:1, is_fdb:1, has_encap:1; @@ -192,7 +193,6 @@ struct nh_notifier_single_info { struct nh_notifier_grp_entry_info { u8 weight; - u32 id; struct nh_notifier_single_info nh; }; -- cgit