From f98df79bf7f772597313adca2720cb38770490dd Mon Sep 17 00:00:00 2001 From: Victor Stewart Date: Tue, 16 Jan 2024 20:29:52 +0000 Subject: bpf, docs: Fix bpf_redirect_peer header doc Amend the bpf_redirect_peer() header documentation to also mention support for the netkit device type. Signed-off-by: Victor Stewart Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240116202952.241009-1-v@nametag.social Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 754e68ca8744..a00f8a5623e1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4839,9 +4839,9 @@ union bpf_attr { * going through the CPU's backlog queue. * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types at the ingress - * hook and for veth device types. The peer device must reside in a - * different network namespace. + * currently only supported for tc BPF program types at the + * ingress hook and for veth and netkit target device types. The + * peer device must reside in a different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. -- cgit From d5c16492c66fbfca85f36e42363d32212df5927b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:04:58 +0100 Subject: bpf: Add cookie to perf_event bpf_link_info records At the moment we don't store cookie for perf_event probes, while we do that for the rest of the probes. Adding cookie fields to struct bpf_link_info perf event probe records: perf_event.uprobe perf_event.kprobe perf_event.tracepoint perf_event.perf_event And the code to store that in bpf_link_info struct. Signed-off-by: Jiri Olsa Acked-by: Song Liu Acked-by: Yafang Shao Link: https://lore.kernel.org/r/20240119110505.400573-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a00f8a5623e1..181e74433272 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6582,6 +6582,7 @@ struct bpf_link_info { __aligned_u64 file_name; /* in/out */ __u32 name_len; __u32 offset; /* offset from file_name */ + __u64 cookie; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6589,14 +6590,19 @@ struct bpf_link_info { __u32 offset; /* offset from func_name */ __u64 addr; __u64 missed; + __u64 cookie; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ __u32 name_len; + __u32 :32; + __u64 cookie; } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ struct { __u64 config; __u32 type; + __u32 :32; + __u64 cookie; } event; /* BPF_PERF_EVENT_EVENT */ }; } perf_event; -- cgit From 9fd112b1f82b587ffb12fb67dd032f551fdb571a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 19 Jan 2024 12:04:59 +0100 Subject: bpf: Store cookies in kprobe_multi bpf_link_info data Storing cookies in kprobe_multi bpf_link_info data. The cookies field is optional and if provided it needs to be an array of __u64 with kprobe_multi.count length. Acked-by: Yafang Shao Signed-off-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20240119110505.400573-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 181e74433272..287d05732668 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6563,6 +6563,7 @@ struct bpf_link_info { __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; __u64 missed; + __aligned_u64 cookies; } kprobe_multi; struct { __aligned_u64 path; -- cgit From 1338b93346587a2a6ac79bbcf55ef5b357745573 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:49:57 -0800 Subject: bpf: pass btf object id in bpf_map_info. Include btf object id (btf_obj_id) in bpf_map_info so that tools (ex: bpftools struct_ops dump) know the correct btf from the kernel to look up type information of struct_ops types. Since struct_ops types can be defined and registered in a module. The type information of a struct_ops type are defined in the btf of the module defining it. The userspace tools need to know which btf is for the module defining a struct_ops type. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-7-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 287d05732668..a380047c86af 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6487,7 +6487,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; - __u32 :32; /* alignment pad */ + __u32 btf_vmlinux_id; __u64 map_extra; } __attribute__((aligned(8))); -- cgit From fcc2c1fb0651477c8ed78a3a293c175ccd70697a Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 19 Jan 2024 14:49:59 -0800 Subject: bpf: pass attached BTF to the bpf_struct_ops subsystem Pass the fd of a btf from the userspace to the bpf() syscall, and then convert the fd into a btf. The btf is generated from the module that defines the target BPF struct_ops type. In order to inform the kernel about the module that defines the target struct_ops type, the userspace program needs to provide a btf fd for the respective module's btf. This btf contains essential information on the types defined within the module, including the target struct_ops type. A btf fd must be provided to the kernel for struct_ops maps and for the bpf programs attached to those maps. In the case of the bpf programs, the attach_btf_obj_fd parameter is passed as part of the bpf_attr and is converted into a btf. This btf is then stored in the prog->aux->attach_btf field. Here, it just let the verifier access attach_btf directly. In the case of struct_ops maps, a btf fd is passed as value_type_btf_obj_fd of bpf_attr. The bpf_struct_ops_map_alloc() function converts the fd to a btf and stores it as st_map->btf. A flag BPF_F_VTYPE_BTF_OBJ_FD is added for map_flags to indicate that the value of value_type_btf_obj_fd is set. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240119225005.668602-9-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a380047c86af..1fef6d5a1330 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1330,6 +1330,9 @@ enum { /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ BPF_F_PATH_FD = (1U << 14), + +/* Flag for value_type_btf_obj_fd, the fd is available */ + BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), }; /* Flags for BPF_PROG_QUERY. */ @@ -1403,6 +1406,11 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit From 35f96de04127d332a5c5e8a155d31f452f88c76d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:00 -0800 Subject: bpf: Introduce BPF token object Add new kind of BPF kernel object, BPF token. BPF token is meant to allow delegating privileged BPF functionality, like loading a BPF program or creating a BPF map, from privileged process to a *trusted* unprivileged process, all while having a good amount of control over which privileged operations could be performed using provided BPF token. This is achieved through mounting BPF FS instance with extra delegation mount options, which determine what operations are delegatable, and also constraining it to the owning user namespace (as mentioned in the previous patch). BPF token itself is just a derivative from BPF FS and can be created through a new bpf() syscall command, BPF_TOKEN_CREATE, which accepts BPF FS FD, which can be attained through open() API by opening BPF FS mount point. Currently, BPF token "inherits" delegated command, map types, prog type, and attach type bit sets from BPF FS as is. In the future, having an BPF token as a separate object with its own FD, we can allow to further restrict BPF token's allowable set of things either at the creation time or after the fact, allowing the process to guard itself further from unintentionally trying to load undesired kind of BPF programs. But for now we keep things simple and just copy bit sets as is. When BPF token is created from BPF FS mount, we take reference to the BPF super block's owning user namespace, and then use that namespace for checking all the {CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN} capabilities that are normally only checked against init userns (using capable()), but now we check them using ns_capable() instead (if BPF token is provided). See bpf_token_capable() for details. Such setup means that BPF token in itself is not sufficient to grant BPF functionality. User namespaced process has to *also* have necessary combination of capabilities inside that user namespace. So while previously CAP_BPF was useless when granted within user namespace, now it gains a meaning and allows container managers and sys admins to have a flexible control over which processes can and need to use BPF functionality within the user namespace (i.e., container in practice). And BPF FS delegation mount options and derived BPF tokens serve as a per-container "flag" to grant overall ability to use bpf() (plus further restrict on which parts of bpf() syscalls are treated as namespaced). Note also, BPF_TOKEN_CREATE command itself requires ns_capable(CAP_BPF) within the BPF FS owning user namespace, rounding up the ns_capable() story of BPF token. Also creating BPF token in init user namespace is currently not supported, given BPF token doesn't have any effect in init user namespace anyways. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Christian Brauner Link: https://lore.kernel.org/bpf/20240124022127.2379740-4-andrii@kernel.org --- include/uapi/linux/bpf.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1fef6d5a1330..b9dc0cca172c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -1722,6 +1754,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit From a177fc2bf6fd83704854feaf7aae926b1df4f0b9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:01 -0800 Subject: bpf: Add BPF token support to BPF_MAP_CREATE command Allow providing token_fd for BPF_MAP_CREATE command to allow controlled BPF map creation from unprivileged process through delegated BPF token. New BPF_F_TOKEN_FD flag is added to specify together with BPF token FD for BPF_MAP_CREATE command. Wire through a set of allowed BPF map types to BPF token, derived from BPF FS at BPF token creation time. This, in combination with allowed_cmds allows to create a narrowly-focused BPF token (controlled by privileged agent) with a restrictive set of BPF maps that application can attempt to create. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-5-andrii@kernel.org --- include/uapi/linux/bpf.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b9dc0cca172c..c78cab8b462d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -983,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -1365,6 +1366,9 @@ enum { /* Flag for value_type_btf_obj_fd, the fd is available */ BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), + +/* BPF token FD is passed in a corresponding command's token_fd field */ + BPF_F_TOKEN_FD = (1U << 16), }; /* Flags for BPF_PROG_QUERY. */ @@ -1443,6 +1447,10 @@ union bpf_attr { * type data for * btf_vmlinux_value_type_id. */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ -- cgit From 9ea7c4bf17e39d463eb4782f948f401d9764b1b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:02 -0800 Subject: bpf: Add BPF token support to BPF_BTF_LOAD command Accept BPF token FD in BPF_BTF_LOAD command to allow BTF data loading through delegated BPF token. BPF_F_TOKEN_FD flag has to be specified when passing BPF token FD. Given BPF_BTF_LOAD command didn't have flags field before, we also add btf_flags field. BTF loading is a pretty straightforward operation, so as long as BPF token is created with allow_cmds granting BPF_BTF_LOAD command, kernel proceeds to parsing BTF data and creating BTF object. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-6-andrii@kernel.org --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c78cab8b462d..cb2c888e3bb4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1632,6 +1632,11 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; }; struct { -- cgit From caf8f28e036c4ba1e823355da6c0c01c39e70ab9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 23 Jan 2024 18:21:03 -0800 Subject: bpf: Add BPF token support to BPF_PROG_LOAD command Add basic support of BPF token to BPF_PROG_LOAD. BPF_F_TOKEN_FD flag should be set in prog_flags field when providing prog_token_fd. Wire through a set of allowed BPF program types and attach types, derived from BPF FS at BPF token creation time. Then make sure we perform bpf_token_capable() checks everywhere where it's relevant. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20240124022127.2379740-7-andrii@kernel.org --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cb2c888e3bb4..d96708380e52 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1028,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1520,6 +1521,10 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit From 2518e89d5b1913c360f8e4cd9fc6eda6146b8800 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jan 2024 21:35:39 +0200 Subject: wifi: cfg80211: add support for SPP A-MSDUs Add SPP (signaling and payload protected) AMSDU support. Since userspace has to build the RSNX element, add an extended feature flag to indicate that this is supported. In order to avoid downgrade/mismatch attacks, add a flag to the assoc command on the station side, so that we can be sure that the value of the flag comes from the same RSNX element that will be validated by the supplicant against the 4-way-handshake. If we just pulled the data out of a beacon/probe response, we could theoretically look an RSNX element from a different frame, with a different value for this flag, than the supplicant is using to validate in the 4-way-handshake. Note that this patch is only geared towards software crypto implementations or hardware ones that can perfectly implement SPP A-MSDUs, i.e. are able to switch the AAD construction on the fly for each TX/RX frame. For more limited hardware implementations, more capability advertisement would be required, e.g. if the hardware has no way to switch this on the fly but has only a global configuration that must apply to all stations. The driver could of course *reject* mismatches, but the supplicant must know so it can do things like not negotiating SPP A-MSDUs on a T-DLS link when connected to an AP that doesn't support it, or similar. Signed-off-by: Johannes Berg Signed-off-by: Daniel Gabay Reviewed-by: Gregory Greenman Signed-off-by: Miri Korenblit Link: https://msgid.link/20240102213313.fadac8df7030.I9240aebcba1be49636a73c647ed0af862713fc6f@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1ccdcae24372..3e239df3528f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2851,6 +2851,10 @@ enum nl80211_commands { * mapping is as defined in section 9.4.2.314 (TID-To-Link Mapping element) * in Draft P802.11be_D4.0. * + * @NL80211_ATTR_ASSOC_SPP_AMSDU: flag attribute used with + * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs + * are used on this connection + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3394,6 +3398,8 @@ enum nl80211_attrs { NL80211_ATTR_MLO_TTLM_DLINK, NL80211_ATTR_MLO_TTLM_ULINK, + NL80211_ATTR_ASSOC_SPP_AMSDU, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3534,6 +3540,7 @@ enum nl80211_iftype { * @NL80211_STA_FLAG_ASSOCIATED: station is associated; used with drivers * that support %NL80211_FEATURE_FULL_AP_CLIENT_STATE to transition a * previously added station into associated state + * @NL80211_STA_FLAG_SPP_AMSDU: station supports SPP A-MSDUs * @NL80211_STA_FLAG_MAX: highest station flag number currently defined * @__NL80211_STA_FLAG_AFTER_LAST: internal use */ @@ -3546,6 +3553,7 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_AUTHENTICATED, NL80211_STA_FLAG_TDLS_PEER, NL80211_STA_FLAG_ASSOCIATED, + NL80211_STA_FLAG_SPP_AMSDU, /* keep last */ __NL80211_STA_FLAG_AFTER_LAST, @@ -6520,6 +6528,11 @@ enum nl80211_feature_flags { * DFS master on the same channel as described in FCC-594280 D01 * (Section B.3). This, for example, allows P2P GO and P2P clients to * operate on DFS channels as long as there's a concurrent BSS connection. + * + * @NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT: The driver has support for SPP + * (signaling and payload protected) A-MSDUs and this shall be advertised + * in the RSNXE. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -6594,6 +6607,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_OWE_OFFLOAD, NL80211_EXT_FEATURE_OWE_OFFLOAD_AP, NL80211_EXT_FEATURE_DFS_CONCURRENT, + NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, -- cgit From a923ff876f4b6133a093482a6d465cde3bc2e65c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 25 Jan 2024 14:55:47 -0800 Subject: Revert "nl80211/cfg80211: Specify band specific min RSSI thresholds with sched scan" This *mostly* reverts commit 1e1b11b6a111 ("nl80211/cfg80211: Specify band specific min RSSI thresholds with sched scan"). During the review of a new patch [1] it was observed that the functionality being modified was not actually being used by any in-tree driver. Further research determined that the functionality was originally introduced to support a new Android interface, but that interface was subsequently abandoned. Since the functionality has apparently never been used, remove it. However, to mantain the sanctity of the UABI, keep the nl80211.h assignments, but clearly mark them as obsolete. Cc: Lin Ma Cc: Vamsi Krishna Link: https://lore.kernel.org/linux-wireless/20240119151201.8670-1-linma@zju.edu.cn/ [1] Signed-off-by: Jeff Johnson Link: https://msgid.link/20240125-for-next-v1-1-fd79e01c6c09@quicinc.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 3e239df3528f..853ac538a686 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4463,14 +4463,7 @@ enum nl80211_reg_rule_attr { * value as specified by &struct nl80211_bss_select_rssi_adjust. * @NL80211_SCHED_SCAN_MATCH_ATTR_BSSID: BSSID to be used for matching * (this cannot be used together with SSID). - * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Nested attribute that carries the - * band specific minimum rssi thresholds for the bands defined in - * enum nl80211_band. The minimum rssi threshold value(s32) specific to a - * band shall be encapsulated in attribute with type value equals to one - * of the NL80211_BAND_* defined in enum nl80211_band. For example, the - * minimum rssi threshold value for 2.4GHZ band shall be encapsulated - * within an attribute of type NL80211_BAND_2GHZ. And one or more of such - * attributes will be nested within this attribute. + * @NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI: Obsolete * @NL80211_SCHED_SCAN_MATCH_ATTR_MAX: highest scheduled scan filter * attribute number currently defined * @__NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST: internal use @@ -4483,7 +4476,7 @@ enum nl80211_sched_scan_match_attr { NL80211_SCHED_SCAN_MATCH_ATTR_RELATIVE_RSSI, NL80211_SCHED_SCAN_MATCH_ATTR_RSSI_ADJUST, NL80211_SCHED_SCAN_MATCH_ATTR_BSSID, - NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, + NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI, /* obsolete */ /* keep last */ __NL80211_SCHED_SCAN_MATCH_ATTR_AFTER_LAST, @@ -6418,8 +6411,7 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching * (set/del PMKSA operations) in AP mode. * - * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Driver supports - * filtering of sched scan results using band specific RSSI thresholds. + * @NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD: Obsolete * * @NL80211_EXT_FEATURE_STA_TX_PWR: This driver supports controlling tx power * to a station. @@ -6574,7 +6566,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS, NL80211_EXT_FEATURE_AP_PMKSA_CACHING, - NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, + NL80211_EXT_FEATURE_SCHED_SCAN_BAND_SPECIFIC_RSSI_THOLD, /* obsolete */ NL80211_EXT_FEATURE_EXT_KEY_ID, NL80211_EXT_FEATURE_STA_TX_PWR, NL80211_EXT_FEATURE_SAE_OFFLOAD, -- cgit From ea1cc3ee34a5f3144f6c2cdc07c19c914ccb9526 Mon Sep 17 00:00:00 2001 From: Min Li Date: Wed, 24 Jan 2024 13:49:46 -0500 Subject: ptp: introduce PTP_CLOCK_EXTOFF event for the measured external offset This change is for the PHC devices that can measure the phase offset between PHC signal and the external signal, such as the 1PPS signal of GNSS. Reporting PTP_CLOCK_EXTOFF to user space will be piggy-backed to the existing ptp_extts_event so that application such as ts2phc can poll the external offset the same way as extts. Hence, ts2phc can use the offset to achieve the alignment between PHC and the external signal by the help of either SW or HW filters. Signed-off-by: Min Li Acked-by: Richard Cochran Signed-off-by: David S. Miller --- include/uapi/linux/ptp_clock.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index da700999cad4..053b40d642de 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -32,6 +32,7 @@ #define PTP_RISING_EDGE (1<<1) #define PTP_FALLING_EDGE (1<<2) #define PTP_STRICT_FLAGS (1<<3) +#define PTP_EXT_OFFSET (1<<4) #define PTP_EXTTS_EDGES (PTP_RISING_EDGE | PTP_FALLING_EDGE) /* @@ -40,7 +41,8 @@ #define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \ PTP_RISING_EDGE | \ PTP_FALLING_EDGE | \ - PTP_STRICT_FLAGS) + PTP_STRICT_FLAGS | \ + PTP_EXT_OFFSET) /* * flag fields valid for the original PTP_EXTTS_REQUEST ioctl. @@ -50,6 +52,11 @@ PTP_RISING_EDGE | \ PTP_FALLING_EDGE) +/* + * flag fields valid for the ptp_extts_event report. + */ +#define PTP_EXTTS_EVENT_VALID (PTP_ENABLE_FEATURE) + /* * Bits of the ptp_perout_request.flags field: */ @@ -228,9 +235,9 @@ struct ptp_pin_desc { #define PTP_MASK_EN_SINGLE _IOW(PTP_CLK_MAGIC, 20, unsigned int) struct ptp_extts_event { - struct ptp_clock_time t; /* Time event occured. */ + struct ptp_clock_time t; /* Time event occurred. */ unsigned int index; /* Which channel produced the event. */ - unsigned int flags; /* Reserved for future use. */ + unsigned int flags; /* Event type. */ unsigned int rsv[2]; /* Reserved for future use. */ }; -- cgit From 941988af572434e4aa93fb0f2f509f92adfd691a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:57 +0100 Subject: netfilter: uapi: Document NFT_TABLE_F_OWNER flag Add at least this one-liner describing the obvious. Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ca30232b7bc8..fbce238abdc1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -179,6 +179,7 @@ enum nft_hook_attributes { * enum nft_table_flags - nf_tables table flags * * @NFT_TABLE_F_DORMANT: this table is not active + * @NFT_TABLE_F_OWNER: this table is owned by a process */ enum nft_table_flags { NFT_TABLE_F_DORMANT = 0x1, -- cgit From da5141bbe0c2693d85f14a89ee991921904f4d0c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 21 Dec 2023 14:31:58 +0100 Subject: netfilter: nf_tables: Introduce NFT_TABLE_F_PERSIST This companion flag to NFT_TABLE_F_OWNER requests the kernel to keep the table around after the process has exited. It marks such table as orphaned (by dropping OWNER flag but keeping PERSIST flag in place), which opens it for other processes to manipulate. For the sake of simplicity, PERSIST flag may not be altered though. Signed-off-by: Phil Sutter Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index fbce238abdc1..3fee994721cd 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -180,13 +180,16 @@ enum nft_hook_attributes { * * @NFT_TABLE_F_DORMANT: this table is not active * @NFT_TABLE_F_OWNER: this table is owned by a process + * @NFT_TABLE_F_PERSIST: this table shall outlive its owner */ enum nft_table_flags { NFT_TABLE_F_DORMANT = 0x1, NFT_TABLE_F_OWNER = 0x2, + NFT_TABLE_F_PERSIST = 0x4, }; #define NFT_TABLE_F_MASK (NFT_TABLE_F_DORMANT | \ - NFT_TABLE_F_OWNER) + NFT_TABLE_F_OWNER | \ + NFT_TABLE_F_PERSIST) /** * enum nft_table_attributes - nf_tables table netlink attributes -- cgit From cf4f0f1e1c465da7c1f6bc89c3ff50bf42f0ab02 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 30 Jan 2024 13:08:29 +0100 Subject: dpll: extend uapi by lock status error attribute If the dpll devices goes to state "unlocked" or "holdover", it may be caused by an error. In that case, allow user to see what the error was. Introduce a new attribute and values it can carry. Signed-off-by: Jiri Pirko Acked-by: Vadim Fedorenko Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/uapi/linux/dpll.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index b4e947f9bfbc..0c13d7f1a1bc 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -50,6 +50,35 @@ enum dpll_lock_status { DPLL_LOCK_STATUS_MAX = (__DPLL_LOCK_STATUS_MAX - 1) }; +/** + * enum dpll_lock_status_error - if previous status change was done due to a + * failure, this provides information of dpll device lock status error. Valid + * values for DPLL_A_LOCK_STATUS_ERROR attribute + * @DPLL_LOCK_STATUS_ERROR_NONE: dpll device lock status was changed without + * any error + * @DPLL_LOCK_STATUS_ERROR_UNDEFINED: dpll device lock status was changed due + * to undefined error. Driver fills this value up in case it is not able to + * obtain suitable exact error type. + * @DPLL_LOCK_STATUS_ERROR_MEDIA_DOWN: dpll device lock status was changed + * because of associated media got down. This may happen for example if dpll + * device was previously locked on an input pin of type + * PIN_TYPE_SYNCE_ETH_PORT. + * @DPLL_LOCK_STATUS_ERROR_FRACTIONAL_FREQUENCY_OFFSET_TOO_HIGH: the FFO + * (Fractional Frequency Offset) between the RX and TX symbol rate on the + * media got too high. This may happen for example if dpll device was + * previously locked on an input pin of type PIN_TYPE_SYNCE_ETH_PORT. + */ +enum dpll_lock_status_error { + DPLL_LOCK_STATUS_ERROR_NONE = 1, + DPLL_LOCK_STATUS_ERROR_UNDEFINED, + DPLL_LOCK_STATUS_ERROR_MEDIA_DOWN, + DPLL_LOCK_STATUS_ERROR_FRACTIONAL_FREQUENCY_OFFSET_TOO_HIGH, + + /* private: */ + __DPLL_LOCK_STATUS_ERROR_MAX, + DPLL_LOCK_STATUS_ERROR_MAX = (__DPLL_LOCK_STATUS_ERROR_MAX - 1) +}; + #define DPLL_TEMP_DIVIDER 1000 /** @@ -150,6 +179,7 @@ enum dpll_a { DPLL_A_LOCK_STATUS, DPLL_A_TEMP, DPLL_A_TYPE, + DPLL_A_LOCK_STATUS_ERROR, __DPLL_A_MAX, DPLL_A_MAX = (__DPLL_A_MAX - 1) -- cgit From 240fd405528bbf7fafa0559202ca7aa524c9cd96 Mon Sep 17 00:00:00 2001 From: Aahil Awatramani Date: Fri, 2 Feb 2024 17:58:58 +0000 Subject: bonding: Add independent control state machine Add support for the independent control state machine per IEEE 802.1AX-2008 5.4.15 in addition to the existing implementation of the coupled control state machine. Introduces two new states, AD_MUX_COLLECTING and AD_MUX_DISTRIBUTING in the LACP MUX state machine for separated handling of an initial Collecting state before the Collecting and Distributing state. This enables a port to be in a state where it can receive incoming packets while not still distributing. This is useful for reducing packet loss when a port begins distributing before its partner is able to collect. Added new functions such as bond_set_slave_tx_disabled_flags and bond_set_slave_rx_enabled_flags to precisely manage the port's collecting and distributing states. Previously, there was no dedicated method to disable TX while keeping RX enabled, which this patch addresses. Note that the regular flow process in the kernel's bonding driver remains unaffected by this patch. The extension requires explicit opt-in by the user (in order to ensure no disruptions for existing setups) via netlink support using the new bonding parameter coupled_control. The default value for coupled_control is set to 1 so as to preserve existing behaviour. Signed-off-by: Aahil Awatramani Reviewed-by: Hangbin Liu Link: https://lore.kernel.org/r/20240202175858.1573852-1-aahila@google.com Signed-off-by: Paolo Abeni --- include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ab9bcff96e4d..ffa637b38c93 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1505,6 +1505,7 @@ enum { IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, + IFLA_BOND_COUPLED_CONTROL, __IFLA_BOND_MAX, }; -- cgit From 6c06c88fa838fcc1b7e5380facd086f57fd9d1c4 Mon Sep 17 00:00:00 2001 From: Marek BehĂșn Date: Sun, 4 Feb 2024 15:16:46 +0100 Subject: net: mdio: add 2.5g and 5g related PMA speed constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add constants indicating 2.5g and 5g ability in the MMD PMA speed register. Signed-off-by: Marek BehĂșn Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/98e15038-d96c-442f-93e4-410100d27866@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/mdio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index d03863da180e..3c9097502403 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -138,6 +138,8 @@ #define MDIO_PMA_SPEED_1000 0x0010 /* 1000M capable */ #define MDIO_PMA_SPEED_100 0x0020 /* 100M capable */ #define MDIO_PMA_SPEED_10 0x0040 /* 10M capable */ +#define MDIO_PMA_SPEED_2_5G 0x2000 /* 2.5G capable */ +#define MDIO_PMA_SPEED_5G 0x4000 /* 5G capable */ #define MDIO_PCS_SPEED_10P2B 0x0002 /* 10PASS-TS/2BASE-TL capable */ #define MDIO_PCS_SPEED_2_5G 0x0040 /* 2.5G capable */ #define MDIO_PCS_SPEED_5G 0x0080 /* 5G capable */ -- cgit From e1aa35e16399d600215470411dfb56e1d6f8e017 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 8 Dec 2023 17:57:29 +0100 Subject: can: isotp: support dynamic flow control parameters The ISO15765-2 standard supports to take the PDUs communication parameters blocksize (BS) and Separation Time minimum (STmin) either from the first received flow control (FC) "static" or from every received FC "dynamic". Add a new CAN_ISOTP_DYN_FC_PARMS flag to support dynamic FC parameters. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20231208165729.3011-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 439c982f7e81..6cde62371b6f 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -137,6 +137,7 @@ struct can_isotp_ll_options { #define CAN_ISOTP_WAIT_TX_DONE 0x0400 /* wait for tx completion */ #define CAN_ISOTP_SF_BROADCAST 0x0800 /* 1-to-N functional addressing */ #define CAN_ISOTP_CF_BROADCAST 0x1000 /* 1-to-N transmission w/o FC */ +#define CAN_ISOTP_DYN_FC_PARMS 0x2000 /* dynamic FC parameters BS/STmin */ /* protocol machine default values */ -- cgit From f6ca96aa51a4ae1b3a416fbe85acdf1197c405a6 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 5 Feb 2024 21:59:50 +0530 Subject: wifi: cfg80211: add support for link id attribute in NL80211_CMD_DEL_STATION Currently whenever NL80211_CMD_DEL_STATION command is called without any MAC address, all stations present on that interface are flushed. However with MLO there is a need to flush such stations only which are using at least a particular link from the AP MLD interface. For example - 2 GHz and 5 GHz are part of an AP MLD. To this interface, following stations are connected - 1. One non-EHT STA on 2 GHz link. 2. One non-EHT STA on 5 GHz link. 3. One Multi-Link STA having 2 GHz and 5 GHz as active links. Now if currently, NL80211_CMD_DEL_STATION is issued by the 2 GHz link without any MAC address, it would flush all station entries. However, flushing of station entry #2 at least is not desireable since it is connected to 5 GHz link alone. Hence, add an option to pass link ID as well in the command so that if link ID is passed, stations using that passed link ID alone would be flushed and others will not. So after this, station entries #1 and #3 alone would be flushed and #2 will remain as it is. Signed-off-by: Aditya Kumar Singh Link: https://msgid.link/20240205162952.1697646-2-quic_adisi@quicinc.com [clarify documentation] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 853ac538a686..805bfe712971 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -438,7 +438,8 @@ * %NL80211_ATTR_REASON_CODE can optionally be used to specify which type * of disconnection indication should be sent to the station * (Deauthentication or Disassociation frame and reason code for that - * frame). + * frame). %NL80211_ATTR_MLO_LINK_ID can be used optionally to remove + * stations connected and using at least that link as one of its links. * * @NL80211_CMD_GET_MPATH: Get mesh path attributes for mesh path to * destination %NL80211_ATTR_MAC on the interface identified by -- cgit From 7b5e25b8baebc02db728bfbdc3080be863144c7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:07 +0200 Subject: wifi: cfg80211: rename UHB to 6 GHz UHB stands for "Ultra High Band", but this term doesn't really exist in the spec. Rename all occurrences to "6 GHz", but keep a few defines for userspace API compatibility. Link: https://msgid.link/20240206164849.c9cfb9400839.I153db3b951934a1d84409c17fbe1f1d1782543fa@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 805bfe712971..13fa10804909 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -11,7 +11,7 @@ * Copyright 2008 Jouni Malinen * Copyright 2008 Colin McCabe * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -4269,9 +4269,9 @@ enum nl80211_wmm_rule { * allowed for peer-to-peer or adhoc communication under the control * of a DFS master which operates on the same channel (FCC-594280 D01 * Section B.3). Should be used together with %NL80211_RRF_DFS only. - * @NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT: Client connection to VLP AP + * @NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP * not allowed using this channel - * @NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT: Client connection to AFC AP + * @NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP * not allowed using this channel * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined @@ -4313,8 +4313,8 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_EHT, NL80211_FREQUENCY_ATTR_PSD, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT, - NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT, - NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT, + NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, + NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4327,6 +4327,10 @@ enum nl80211_frequency_attr { #define NL80211_FREQUENCY_ATTR_NO_IR NL80211_FREQUENCY_ATTR_NO_IR #define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \ NL80211_FREQUENCY_ATTR_IR_CONCURRENT +#define NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT \ + NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT +#define NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT \ + NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT /** * enum nl80211_bitrate_attr - bitrate attributes @@ -4517,8 +4521,8 @@ enum nl80211_sched_scan_match_attr { peer-to-peer or adhoc communication under the control of a DFS master which operates on the same channel (FCC-594280 D01 Section B.3). Should be used together with %NL80211_RRF_DFS only. - * @NL80211_RRF_NO_UHB_VLP_CLIENT: Client connection to VLP AP not allowed - * @NL80211_RRF_NO_UHB_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed + * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4541,8 +4545,8 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_EHT = 1<<19, NL80211_RRF_PSD = 1<<20, NL80211_RRF_DFS_CONCURRENT = 1<<21, - NL80211_RRF_NO_UHB_VLP_CLIENT = 1<<22, - NL80211_RRF_NO_UHB_AFC_CLIENT = 1<<23, + NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, + NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR @@ -4551,6 +4555,8 @@ enum nl80211_reg_rule_flags { #define NL80211_RRF_NO_HT40 (NL80211_RRF_NO_HT40MINUS |\ NL80211_RRF_NO_HT40PLUS) #define NL80211_RRF_GO_CONCURRENT NL80211_RRF_IR_CONCURRENT +#define NL80211_RRF_NO_UHB_VLP_CLIENT NL80211_RRF_NO_6GHZ_VLP_CLIENT +#define NL80211_RRF_NO_UHB_AFC_CLIENT NL80211_RRF_NO_6GHZ_AFC_CLIENT /* For backport compatibility with older userspace */ #define NL80211_RRF_NO_IR_ALL (NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS) @@ -5098,14 +5104,17 @@ enum nl80211_bss_use_for { * BSS isn't possible * @NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY: NSTR nonprimary links aren't * supported by the device, and this BSS entry represents one. - * @NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH: STA is not supporting + * @NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH: STA is not supporting * the AP power type (SP, VLP, AP) that the AP uses. */ enum nl80211_bss_cannot_use_reasons { NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY = 1 << 0, - NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH = 1 << 1, + NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH = 1 << 1, }; +#define NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH \ + NL80211_BSS_CANNOT_USE_6GHZ_PWR_MISMATCH + /** * enum nl80211_bss - netlink attributes for a BSS * -- cgit From a110a3b79177ddd7e7295671df97fb5386406835 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Feb 2024 16:54:08 +0200 Subject: wifi: cfg80211: optionally support monitor on disabled channels If the hardware supports a disabled channel, it may in some cases be possible to use monitor mode (without any transmit) on it when it's otherwise disabled. Add a new channel flag IEEE80211_CHAN_CAN_MONITOR that makes it possible for a driver to indicate such a thing. Make it per channel so drivers could have a choice with it, perhaps it's only possible on some channels, perhaps some channels are not supported at all, but still there and marked disabled. In _nl80211_parse_chandef() simplify the code and check only for an unknown channel, _cfg80211_chandef_usable() will later check for IEEE80211_CHAN_DISABLED anyway. Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.87fad3a21a09.I9116b2fdc2e2c9fd59a9273a64db7fcb41fc0328@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 13fa10804909..546cc176c2a9 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4273,6 +4273,9 @@ enum nl80211_wmm_rule { * not allowed using this channel * @NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP * not allowed using this channel + * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor + * mode despite other (regulatory) restrictions, even if the channel is + * otherwise completely disabled. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4315,6 +4318,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_DFS_CONCURRENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, + NL80211_FREQUENCY_ATTR_CAN_MONITOR, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, -- cgit From 93d9f26db5b34ec4d2d5056aeb8819cbef35519c Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Tue, 6 Feb 2024 16:54:10 +0200 Subject: wifi: nl80211: allow reporting wakeup for unprot deauth/disassoc Add a report reason for waking up due to an unprotected deauth/disassoc when MFP is used. If setting wowlan to wake on disconnection, and an unprotected deatuh/disassoc arrived (in MFP), some drivers might want to report wakeup due to unprotected deauth/disassoc, rather than dissassociation. Add support for that. Signed-off-by: Shaul Triebitz Signed-off-by: Miri Korenblit Link: https://msgid.link/20240206164849.8dc9ad531a17.I7f8e926adf927f762e11aaa3458f6354665c7fc5@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 546cc176c2a9..f23ecbdd84a2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -5757,6 +5757,8 @@ struct nl80211_pattern_support { * %NL80211_ATTR_SCAN_FREQUENCIES contains more than one * frequency, it means that the match occurred in more than one * channel. + * @NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC: For wakeup reporting only. + * Wake up happened due to unprotected deauth or disassoc frame in MFP. * @NUM_NL80211_WOWLAN_TRIG: number of wake on wireless triggers * @MAX_NL80211_WOWLAN_TRIG: highest wowlan trigger attribute number * @@ -5784,6 +5786,7 @@ enum nl80211_wowlan_triggers { NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS, NL80211_WOWLAN_TRIG_NET_DETECT, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS, + NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC, /* keep last */ NUM_NL80211_WOWLAN_TRIG, -- cgit From c83c22ec1493c0b7cc77327bedbd387e295872b6 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Mon, 12 Feb 2024 22:35:50 +0100 Subject: can: canxl: add virtual CAN network identifier support CAN XL data frames contain an 8-bit virtual CAN network identifier (VCID). A VCID value of zero represents an 'untagged' CAN XL frame. To receive and send these optional VCIDs via CAN_RAW sockets a new socket option CAN_RAW_XL_VCID_OPTS is introduced to define/access VCID content: - tx: set the outgoing VCID value by the kernel (one fixed 8-bit value) - tx: pass through VCID values from the user space (e.g. for traffic replay) - rx: apply VCID receive filter (value/mask) to be passed to the user space With the 'tx pass through' option CAN_RAW_XL_VCID_TX_PASS all valid VCID values can be sent, e.g. to replay full qualified CAN XL traffic. The VCID value provided for the CAN_RAW_XL_VCID_TX_SET option will override the VCID value in the struct canxl_frame.prio defined for CAN_RAW_XL_VCID_TX_PASS when both flags are set. With a rx_vcid_mask of zero all possible VCID values (0x00 - 0xFF) are passed to the user space when the CAN_RAW_XL_VCID_RX_FILTER flag is set. Without this flag only untagged CAN XL frames (VCID = 0x00) are delivered to the user space (default). The 8-bit VCID is stored inside the CAN XL prio element (only in CAN XL frames!) to not interfere with other CAN content or the CAN filters provided by the CAN_RAW sockets and kernel infrastruture. Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20240212213550.18516-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can.h | 9 +++++++-- include/uapi/linux/can/raw.h | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h index 939db2388208..e78cbd85ce7c 100644 --- a/include/uapi/linux/can.h +++ b/include/uapi/linux/can.h @@ -193,9 +193,14 @@ struct canfd_frame { #define CANXL_XLF 0x80 /* mandatory CAN XL frame flag (must always be set!) */ #define CANXL_SEC 0x01 /* Simple Extended Content (security/segmentation) */ +/* the 8-bit VCID is optionally placed in the canxl_frame.prio element */ +#define CANXL_VCID_OFFSET 16 /* bit offset of VCID in prio element */ +#define CANXL_VCID_VAL_MASK 0xFFUL /* VCID is an 8-bit value */ +#define CANXL_VCID_MASK (CANXL_VCID_VAL_MASK << CANXL_VCID_OFFSET) + /** * struct canxl_frame - CAN with e'X'tended frame 'L'ength frame structure - * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags + * @prio: 11 bit arbitration priority with zero'ed CAN_*_FLAG flags / VCID * @flags: additional flags for CAN XL * @sdt: SDU (service data unit) type * @len: frame payload length in byte (CANXL_MIN_DLEN .. CANXL_MAX_DLEN) @@ -205,7 +210,7 @@ struct canfd_frame { * @prio shares the same position as @can_id from struct can[fd]_frame. */ struct canxl_frame { - canid_t prio; /* 11 bit priority for arbitration (canid_t) */ + canid_t prio; /* 11 bit priority for arbitration / 8 bit VCID */ __u8 flags; /* additional flags for CAN XL */ __u8 sdt; /* SDU (service data unit) type */ __u16 len; /* frame payload length in byte */ diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h index 31622c9b7988..e024d896e278 100644 --- a/include/uapi/linux/can/raw.h +++ b/include/uapi/linux/can/raw.h @@ -65,6 +65,22 @@ enum { CAN_RAW_FD_FRAMES, /* allow CAN FD frames (default:off) */ CAN_RAW_JOIN_FILTERS, /* all filters must match to trigger */ CAN_RAW_XL_FRAMES, /* allow CAN XL frames (default:off) */ + CAN_RAW_XL_VCID_OPTS, /* CAN XL VCID configuration options */ }; +/* configuration for CAN XL virtual CAN identifier (VCID) handling */ +struct can_raw_vcid_options { + + __u8 flags; /* flags for vcid (filter) behaviour */ + __u8 tx_vcid; /* VCID value set into canxl_frame.prio */ + __u8 rx_vcid; /* VCID value for VCID filter */ + __u8 rx_vcid_mask; /* VCID mask for VCID filter */ + +}; + +/* can_raw_vcid_options.flags for CAN XL virtual CAN identifier handling */ +#define CAN_RAW_XL_VCID_TX_SET 0x01 +#define CAN_RAW_XL_VCID_TX_PASS 0x02 +#define CAN_RAW_XL_VCID_RX_FILTER 0x04 + #endif /* !_UAPI_CAN_RAW_H */ -- cgit From 18e2bf0edf4dd88d9656ec92395aa47392e85b61 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 13 Feb 2024 06:16:45 +0000 Subject: eventpoll: Add epoll ioctl for epoll_params Add an ioctl for getting and setting epoll_params. User programs can use this ioctl to get and set the busy poll usec time, packet budget, and prefer busy poll params for a specific epoll context. Parameters are limited: - busy_poll_usecs is limited to <= s32_max - busy_poll_budget is limited to <= NAPI_POLL_WEIGHT by unprivileged users (!capable(CAP_NET_ADMIN)) - prefer_busy_poll must be 0 or 1 - __pad must be 0 Signed-off-by: Joe Damato Acked-by: Stanislav Fomichev Reviewed-by: Jiri Slaby Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/eventpoll.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index cfbcc4cc49ac..4f4b948ef381 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -85,4 +85,17 @@ struct epoll_event { __u64 data; } EPOLL_PACKED; +struct epoll_params { + __u32 busy_poll_usecs; + __u16 busy_poll_budget; + __u8 prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + __u8 __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) + #endif /* _UAPI_LINUX_EVENTPOLL_H */ -- cgit From 1e63e5a813fa6203d7430af51d6bffb728525015 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Feb 2024 15:27:44 -0800 Subject: net: sched: Annotate struct tc_pedit with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time checking via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). As found with Coccinelle[1], add __counted_by for struct tc_pedit. Additionally, since the element count member must be set before accessing the annotated flexible array member, move its initialization earlier. Link: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/uapi/linux/tc_act/tc_pedit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h index f3e61b04fa01..f5cab7fc96ab 100644 --- a/include/uapi/linux/tc_act/tc_pedit.h +++ b/include/uapi/linux/tc_act/tc_pedit.h @@ -62,7 +62,7 @@ struct tc_pedit_sel { tc_gen; unsigned char nkeys; unsigned char flags; - struct tc_pedit_key keys[0]; + struct tc_pedit_key keys[] __counted_by(nkeys); }; #define tc_pedit tc_pedit_sel -- cgit From 953cc643329b38434bb7d6206951d1a48016e38b Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Sun, 18 Feb 2024 08:57:38 +0100 Subject: net: phy: Add BaseT1 auto-negotiation constants Added constants for advertising 100BT1 and 1000BT1 in register BASE-T1 auto-negotiation advertisement register [31:16] (Register 7.515) Reviewed-by: Andrew Lunn Signed-off-by: Dimitri Fedrau Link: https://lore.kernel.org/r/20240218075753.18067-2-dima.fedrau@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/mdio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h index 3c9097502403..c0c8ec995b06 100644 --- a/include/uapi/linux/mdio.h +++ b/include/uapi/linux/mdio.h @@ -350,6 +350,8 @@ /* BASE-T1 auto-negotiation advertisement register [31:16] */ #define MDIO_AN_T1_ADV_M_B10L 0x4000 /* device is compatible with 10BASE-T1L */ +#define MDIO_AN_T1_ADV_M_1000BT1 0x0080 /* advertise 1000BASE-T1 */ +#define MDIO_AN_T1_ADV_M_100BT1 0x0020 /* advertise 100BASE-T1 */ #define MDIO_AN_T1_ADV_M_MST 0x0010 /* advertise master preference */ /* BASE-T1 auto-negotiation advertisement register [47:32] */ -- cgit From c16d2380e8fdd9fc1fd0e9c60e068f264b2d0ced Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Mon, 19 Feb 2024 17:51:51 +0800 Subject: net: mctp: provide a more specific tag allocation ioctl Now that we have net-specific tags, extend the tag allocation ioctls (SIOCMCTPALLOCTAG / SIOCMCTPDROPTAG) to allow a network parameter to be passed to the tag allocation. We also add a local_addr member to the ioc struct, to allow for a future finer-grained tag allocation using local EIDs too. We don't add any specific support for that now though, so require MCTP_ADDR_ANY or MCTP_ADDR_NULL for those at present. The old ioctls will still work, but allocate for the default MCTP net. These are now marked as deprecated in the header. Signed-off-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/uapi/linux/mctp.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h index 154ab56651f1..e1db65df9359 100644 --- a/include/uapi/linux/mctp.h +++ b/include/uapi/linux/mctp.h @@ -50,7 +50,14 @@ struct sockaddr_mctp_ext { #define SIOCMCTPALLOCTAG (SIOCPROTOPRIVATE + 0) #define SIOCMCTPDROPTAG (SIOCPROTOPRIVATE + 1) +#define SIOCMCTPALLOCTAG2 (SIOCPROTOPRIVATE + 2) +#define SIOCMCTPDROPTAG2 (SIOCPROTOPRIVATE + 3) +/* Deprecated: use mctp_ioc_tag_ctl2 / TAG2 ioctls instead, which defines the + * MCTP network ID as part of the allocated tag. Using this assumes the default + * net ID for allocated tags, which may not give correct behaviour on system + * with multiple networks configured. + */ struct mctp_ioc_tag_ctl { mctp_eid_t peer_addr; @@ -65,4 +72,29 @@ struct mctp_ioc_tag_ctl { __u16 flags; }; +struct mctp_ioc_tag_ctl2 { + /* Peer details: network ID, peer EID, local EID. All set by the + * caller. + * + * Local EID must be MCTP_ADDR_NULL or MCTP_ADDR_ANY in current + * kernels. + */ + unsigned int net; + mctp_eid_t peer_addr; + mctp_eid_t local_addr; + + /* Set by caller, but no flags defined currently. Must be 0 */ + __u16 flags; + + /* For SIOCMCTPALLOCTAG2: must be passed as zero, kernel will + * populate with the allocated tag value. Returned tag value will + * always have TO and PREALLOC set. + * + * For SIOCMCTPDROPTAG2: userspace provides tag value to drop, from + * a prior SIOCMCTPALLOCTAG2 call (and so must have TO and PREALLOC set). + */ + __u8 tag; + +}; + #endif /* __UAPI_MCTP_H */ -- cgit From 58fd62e0aa50fdd20bc41a01e787001f3af8a925 Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Wed, 21 Feb 2024 13:18:38 -0800 Subject: bpf: Clarify batch lookup/lookup_and_delete semantics The batch lookup and lookup_and_delete APIs have two parameters, in_batch and out_batch, to facilitate iterative lookup/lookup_and_deletion operations for supported maps. Except NULL for in_batch at the start of these two batch operations, both parameters need to point to memory equal or larger than the respective map key size, except for various hashmaps (hash, percpu_hash, lru_hash, lru_percpu_hash) where the in_batch/out_batch memory size should be at least 4 bytes. Document these semantics to clarify the API. Signed-off-by: Martin Kelly Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240221211838.1241578-1-martin.kelly@crowdstrike.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d96708380e52..d2e6c5fcec01 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -617,7 +617,11 @@ union bpf_iter_link_info { * to NULL to begin the batched operation. After each subsequent * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant * *out_batch* as the *in_batch* for the next operation to - * continue iteration from the current point. + * continue iteration from the current point. Both *in_batch* and + * *out_batch* must point to memory large enough to hold a key, + * except for maps of type **BPF_MAP_TYPE_{HASH, PERCPU_HASH, + * LRU_HASH, LRU_PERCPU_HASH}**, for which batch parameters + * must be at least 4 bytes wide regardless of key size. * * The *keys* and *values* are output parameters which must point * to memory large enough to hold *count* items based on the key -- cgit From 5fa918a33563e050d45fb8f260f9240b2039f432 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Mon, 26 Feb 2024 14:14:10 +0100 Subject: uapi: ioam6: API for netlink multicast events Add new api to support ioam6 events for generic netlink multicast. A first "trace" event is added to the list of ioam6 events, which will represent an IOAM Pre-allocated Trace Option-Type. It provides another solution to share IOAM data with user space. Reviewed-by: David Ahern Signed-off-by: Justin Iurman Signed-off-by: David S. Miller --- include/uapi/linux/ioam6_genl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ioam6_genl.h b/include/uapi/linux/ioam6_genl.h index ca4b22833754..1733fbc51fb5 100644 --- a/include/uapi/linux/ioam6_genl.h +++ b/include/uapi/linux/ioam6_genl.h @@ -49,4 +49,24 @@ enum { #define IOAM6_CMD_MAX (__IOAM6_CMD_MAX - 1) +#define IOAM6_GENL_EV_GRP_NAME "ioam6_events" + +enum ioam6_event_type { + IOAM6_EVENT_UNSPEC, + IOAM6_EVENT_TRACE, +}; + +enum ioam6_event_attr { + IOAM6_EVENT_ATTR_UNSPEC, + + IOAM6_EVENT_ATTR_TRACE_NAMESPACE, /* u16 */ + IOAM6_EVENT_ATTR_TRACE_NODELEN, /* u8 */ + IOAM6_EVENT_ATTR_TRACE_TYPE, /* u32 */ + IOAM6_EVENT_ATTR_TRACE_DATA, /* Binary */ + + __IOAM6_EVENT_ATTR_MAX +}; + +#define IOAM6_EVENT_ATTR_MAX (__IOAM6_EVENT_ATTR_MAX - 1) + #endif /* _UAPI_LINUX_IOAM6_GENL_H */ -- cgit From 896880ff30866f386ebed14ab81ce1ad3710cfc4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 22 Feb 2024 07:56:15 -0800 Subject: bpf: Replace bpf_lpm_trie_key 0-length array with flexible array Replace deprecated 0-length array in struct bpf_lpm_trie_key with flexible array. Found with GCC 13: ../kernel/bpf/lpm_trie.c:207:51: warning: array subscript i is outside array bounds of 'const __u8[0]' {aka 'const unsigned char[]'} [-Warray-bounds=] 207 | *(__be16 *)&key->data[i]); | ^~~~~~~~~~~~~ ../include/uapi/linux/swab.h:102:54: note: in definition of macro '__swab16' 102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) | ^ ../include/linux/byteorder/generic.h:97:21: note: in expansion of macro '__be16_to_cpu' 97 | #define be16_to_cpu __be16_to_cpu | ^~~~~~~~~~~~~ ../kernel/bpf/lpm_trie.c:206:28: note: in expansion of macro 'be16_to_cpu' 206 | u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^ | ^~~~~~~~~~~ In file included from ../include/linux/bpf.h:7: ../include/uapi/linux/bpf.h:82:17: note: while referencing 'data' 82 | __u8 data[0]; /* Arbitrary size */ | ^~~~ And found at run-time under CONFIG_FORTIFY_SOURCE: UBSAN: array-index-out-of-bounds in kernel/bpf/lpm_trie.c:218:49 index 0 is out of range for type '__u8 [*]' Changing struct bpf_lpm_trie_key is difficult since has been used by userspace. For example, in Cilium: struct egress_gw_policy_key { struct bpf_lpm_trie_key lpm_key; __u32 saddr; __u32 daddr; }; While direct references to the "data" member haven't been found, there are static initializers what include the final member. For example, the "{}" here: struct egress_gw_policy_key in_key = { .lpm_key = { 32 + 24, {} }, .saddr = CLIENT_IP, .daddr = EXTERNAL_SVC_IP & 0Xffffff, }; To avoid the build time and run time warnings seen with a 0-sized trailing array for struct bpf_lpm_trie_key, introduce a new struct that correctly uses a flexible array for the trailing bytes, struct bpf_lpm_trie_key_u8. As part of this, include the "header" portion (which is just the "prefixlen" member), so it can be used by anything building a bpf_lpr_trie_key that has trailing members that aren't a u8 flexible array (like the self-test[1]), which is named struct bpf_lpm_trie_key_hdr. Unfortunately, C++ refuses to parse the __struct_group() helper, so it is not possible to define struct bpf_lpm_trie_key_hdr directly in struct bpf_lpm_trie_key_u8, so we must open-code the union directly. Adjust the kernel code to use struct bpf_lpm_trie_key_u8 through-out, and for the selftest to use struct bpf_lpm_trie_key_hdr. Add a comment to the UAPI header directing folks to the two new options. Reported-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Daniel Borkmann Acked-by: Gustavo A. R. Silva Closes: https://paste.debian.net/hidden/ca500597/ Link: https://lore.kernel.org/all/202206281009.4332AA33@keescook/ [1] Link: https://lore.kernel.org/bpf/20240222155612.it.533-kees@kernel.org --- include/uapi/linux/bpf.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d2e6c5fcec01..a241f407c234 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -77,12 +77,29 @@ struct bpf_insn { __s32 imm; /* signed immediate constant */ }; -/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ +/* Deprecated: use struct bpf_lpm_trie_key_u8 (when the "data" member is needed for + * byte access) or struct bpf_lpm_trie_key_hdr (when using an alternative type for + * the trailing flexible array member) instead. + */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ __u8 data[0]; /* Arbitrary size */ }; +/* Header for bpf_lpm_trie_key structs */ +struct bpf_lpm_trie_key_hdr { + __u32 prefixlen; +}; + +/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry, with trailing byte array. */ +struct bpf_lpm_trie_key_u8 { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + __u8 data[]; /* Arbitrary size */ +}; + struct bpf_cgroup_storage_key { __u64 cgroup_inode_id; /* cgroup inode id */ __u32 attach_type; /* program attach type (enum bpf_attach_type) */ -- cgit From 0ed3bba16d37618c7776a44ef02ad3039966d687 Mon Sep 17 00:00:00 2001 From: Takeru Hayasaka Date: Mon, 12 Feb 2024 02:04:03 +0000 Subject: ethtool: Add GTP RSS hash options to ethtool.h This is a patch that enables RSS functionality for GTP packets using ethtool. A user can include TEID and make RSS work for GTP-U over IPv4 by doing the following:`ethtool -N ens3 rx-flow-hash gtpu4 sde` In addition to gtpu(4|6), we now support gtpc(4|6),gtpc(4|6)t,gtpu(4|6)e, gtpu(4|6)u, and gtpu(4|6)d. gtpc(4|6): Used for GTP-C in IPv4 and IPv6, where the GTP header format does not include a TEID. gtpc(4|6)t: Used for GTP-C in IPv4 and IPv6, with a GTP header format that includes a TEID. gtpu(4|6): Used for GTP-U in both IPv4 and IPv6 scenarios. gtpu(4|6)e: Used for GTP-U with extended headers in both IPv4 and IPv6. gtpu(4|6)u: Used when the PSC (PDU session container) in the GTP-U extended header includes Uplink, applicable to both IPv4 and IPv6. gtpu(4|6)d: Used when the PSC in the GTP-U extended header includes Downlink, for both IPv4 and IPv6. GTP generates a flow that includes an ID called TEID to identify the tunnel. This tunnel is created for each UE (User Equipment).By performing RSS based on this flow, it is possible to apply RSS for each communication unit from the UE. Without this, RSS would only be effective within the range of IP addresses. For instance, the PGW can only perform RSS within the IP range of the SGW. Problematic from a load distribution perspective, especially if there's a bias in the terminals connected to a particular base station.This case can be solved by using this patch. Signed-off-by: Takeru Hayasaka Reviewed-by: Marcin Szycik Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- include/uapi/linux/ethtool.h | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 06ef6b78b7de..11fc18988bc2 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ + +/* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ +#define GTPU_V4_FLOW 0x13 /* hash only */ +#define GTPU_V6_FLOW 0x14 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ +#define GTPC_V4_FLOW 0x15 /* hash only */ +#define GTPC_V6_FLOW 0x16 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ +#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ +#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ + +/* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ +#define GTPU_EH_V4_FLOW 0x19 /* hash only */ +#define GTPU_EH_V6_FLOW 0x1a /* hash only */ + +/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ +#define GTPU_UL_V4_FLOW 0x1b /* hash only */ +#define GTPU_UL_V6_FLOW 0x1c /* hash only */ +#define GTPU_DL_V4_FLOW 0x1d /* hash only */ +#define GTPU_DL_V6_FLOW 0x1e /* hash only */ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 #define FLOW_MAC_EXT 0x40000000 @@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define RXH_IP_DST (1 << 5) #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL -- cgit From 011832b97b311bb9e3c27945bc0d1089a14209c9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 5 Mar 2024 19:19:26 -0800 Subject: bpf: Introduce may_goto instruction Introduce may_goto instruction that from the verifier pov is similar to open coded iterators bpf_for()/bpf_repeat() and bpf_loop() helper, but it doesn't iterate any objects. In assembly 'may_goto' is a nop most of the time until bpf runtime has to terminate the program for whatever reason. In the current implementation may_goto has a hidden counter, but other mechanisms can be used. For programs written in C the later patch introduces 'cond_break' macro that combines 'may_goto' with 'break' statement and has similar semantics: cond_break is a nop until bpf runtime has to break out of this loop. It can be used in any normal "for" or "while" loop, like for (i = zero; i < cnt; cond_break, i++) { The verifier recognizes that may_goto is used in the program, reserves additional 8 bytes of stack, initializes them in subprog prologue, and replaces may_goto instruction with: aux_reg = *(u64 *)(fp - 40) if aux_reg == 0 goto pc+off aux_reg -= 1 *(u64 *)(fp - 40) = aux_reg may_goto instruction can be used by LLVM to implement __builtin_memcpy, __builtin_strcmp. may_goto is not a full substitute for bpf_for() macro. bpf_for() doesn't have induction variable that verifiers sees, so 'i' in bpf_for(i, 0, 100) is seen as imprecise and bounded. But when the code is written as: for (i = 0; i < 100; cond_break, i++) the verifier see 'i' as precise constant zero, hence cond_break (aka may_goto) doesn't help to converge the loop. A static or global variable can be used as a workaround: static int zero = 0; for (i = zero; i < 100; cond_break, i++) // works! may_goto works well with arena pointers that don't need to be bounds checked on access. Load/store from arena returns imprecise unbounded scalar and loops with may_goto pass the verifier. Reserve new opcode BPF_JMP | BPF_JCOND for may_goto insn. JCOND stands for conditional pseudo jump. Since goto_or_nop insn was proposed, it may use the same opcode. may_goto vs goto_or_nop can be distinguished by src_reg: code = BPF_JMP | BPF_JCOND src_reg = 0 - may_goto src_reg = 1 - goto_or_nop Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Eduard Zingerman Acked-by: John Fastabend Tested-by: John Fastabend Link: https://lore.kernel.org/bpf/20240306031929.42666-2-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a241f407c234..85ec7fc799d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -42,6 +42,7 @@ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ #define BPF_JSLT 0xc0 /* SLT is signed, '<' */ #define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ +#define BPF_JCOND 0xe0 /* conditional pseudo jumps: may_goto, goto_or_nop */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ @@ -50,6 +51,10 @@ #define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ #define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ +enum bpf_cond_pseudo_jmp { + BPF_MAY_GOTO = 0, +}; + /* Register numbers */ enum { BPF_REG_0 = 0, -- cgit From ab63a2387cb906d43b72a8effb611bbaecb2d0cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:07 -0800 Subject: netdev: add per-queue statistics The ethtool-nl family does a good job exposing various protocol related and IEEE/IETF statistics which used to get dumped under ethtool -S, with creative names. Queue stats don't have a netlink API, yet, and remain a lion's share of ethtool -S output for new drivers. Not only is that bad because the names differ driver to driver but it's also bug-prone. Intuitively drivers try to report only the stats for active queues, but querying ethtool stats involves multiple system calls, and the number of stats is read separately from the stats themselves. Worse still when user space asks for values of the stats, it doesn't inform the kernel how big the buffer is. If number of stats increases in the meantime kernel will overflow user buffer. Add a netlink API for dumping queue stats. Queue information is exposed via the netdev-genl family, so add the stats there. Support per-queue and sum-for-device dumps. Latter will be useful when subsequent patches add more interesting common stats than just bytes and packets. The API does not currently distinguish between HW and SW stats. The expectation is that the source of the stats will either not matter much (good packets) or be obvious (skb alloc errors). Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 93cb411adf72..639ffa04c172 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -70,6 +70,10 @@ enum netdev_queue_type { NETDEV_QUEUE_TYPE_TX, }; +enum netdev_qstats_scope { + NETDEV_QSTATS_SCOPE_QUEUE = 1, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -132,6 +136,20 @@ enum { NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) }; +enum { + NETDEV_A_QSTATS_IFINDEX = 1, + NETDEV_A_QSTATS_QUEUE_TYPE, + NETDEV_A_QSTATS_QUEUE_ID, + NETDEV_A_QSTATS_SCOPE, + NETDEV_A_QSTATS_RX_PACKETS = 8, + NETDEV_A_QSTATS_RX_BYTES, + NETDEV_A_QSTATS_TX_PACKETS, + NETDEV_A_QSTATS_TX_BYTES, + + __NETDEV_A_QSTATS_MAX, + NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -144,6 +162,7 @@ enum { NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, + NETDEV_CMD_QSTATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit From 92f8b1f5ca0f157f564e75cef4c63641c172e0f1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Mar 2024 11:55:08 -0800 Subject: netdev: add queue stat for alloc failures Rx alloc failures are commonly counted by drivers. Support reporting those via netdev-genl queue stats. Acked-by: Stanislav Fomichev Reviewed-by: Amritha Nambiar Reviewed-by: Xuan Zhuo Link: https://lore.kernel.org/r/20240306195509.1502746-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/netdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 639ffa04c172..bb65ee840cda 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -145,6 +145,7 @@ enum { NETDEV_A_QSTATS_RX_BYTES, NETDEV_A_QSTATS_TX_PACKETS, NETDEV_A_QSTATS_TX_BYTES, + NETDEV_A_QSTATS_RX_ALLOC_FAIL, __NETDEV_A_QSTATS_MAX, NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) -- cgit From a207eab1039b501daddc8e729c9cc5d99fe93d18 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 6 Mar 2024 13:49:16 +0100 Subject: net: nexthop: Add NHA_OP_FLAGS In order to add per-nexthop statistics, but still not increase netlink message size for consumers that do not care about them, there needs to be a toggle through which the user indicates their desire to get the statistics. To that end, add a new attribute, NHA_OP_FLAGS. The idea is to be able to use the attribute for carrying of arbitrary operation-specific flags, i.e. not make it specific for get / dump. Add the new attribute to get and dump policies, but do not actually allow any flags yet -- those will come later as the flags themselves are defined. Add the necessary parsing code. Signed-off-by: Petr Machata Reviewed-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index d8ffa8c9ca78..086444e2946c 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -60,6 +60,9 @@ enum { /* nested; nexthop bucket attributes */ NHA_RES_BUCKET, + /* u32; operation-specific flags */ + NHA_OP_FLAGS, + __NHA_MAX, }; -- cgit From 95fedd7685912f96304615efe50485588b1c3567 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:18 +0100 Subject: net: nexthop: Expose nexthop group stats to user space Add netlink support for reading NH group stats. This data is only for statistics of the traffic in the SW datapath. HW nexthop group statistics will be added in the following patches. Emission of the stats is keyed to a new op_stats flag to avoid cluttering the netlink message with stats if the user doesn't need them: NHA_OP_FLAG_DUMP_STATS. Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 086444e2946c..f4db63c17085 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -30,6 +30,8 @@ enum { #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) +#define NHA_OP_FLAG_DUMP_STATS BIT(0) + enum { NHA_UNSPEC, NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ @@ -63,6 +65,9 @@ enum { /* u32; operation-specific flags */ NHA_OP_FLAGS, + /* nested; nexthop group stats */ + NHA_GROUP_STATS, + __NHA_MAX, }; @@ -104,4 +109,29 @@ enum { #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) +enum { + NHA_GROUP_STATS_UNSPEC, + + /* nested; nexthop group entry stats */ + NHA_GROUP_STATS_ENTRY, + + __NHA_GROUP_STATS_MAX, +}; + +#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1) + +enum { + NHA_GROUP_STATS_ENTRY_UNSPEC, + + /* u32; nexthop id of the nexthop group entry */ + NHA_GROUP_STATS_ENTRY_ID, + + /* uint; number of packets forwarded via the nexthop group entry */ + NHA_GROUP_STATS_ENTRY_PACKETS, + + __NHA_GROUP_STATS_ENTRY_MAX, +}; + +#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1) + #endif -- cgit From 746c19a52ec50b81422fd4772254d55e588d7df6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:20 +0100 Subject: net: nexthop: Add ability to enable / disable hardware statistics Add netlink support for enabling collection of HW statistics on nexthop groups. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index f4db63c17085..b86af80d4e09 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -68,6 +68,9 @@ enum { /* nested; nexthop group stats */ NHA_GROUP_STATS, + /* u32; nexthop hardware stats enable */ + NHA_HW_STATS_ENABLE, + __NHA_MAX, }; -- cgit From 5072ae00aea434d922cabd1c3e6236350a77c4d7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 6 Mar 2024 13:49:21 +0100 Subject: net: nexthop: Expose nexthop group HW stats to user space Add netlink support for reading NH group hardware stats. Stats collection is done through a new notifier, NEXTHOP_EVENT_HW_STATS_REPORT_DELTA. Drivers that implement HW counters for a given NH group are thereby asked to collect the stats and report back to core by calling nh_grp_hw_stats_report_delta(). This is similar to what netdevice L3 stats do. Besides exposing number of packets that passed in the HW datapath, also include information on whether any driver actually realizes the counters. The core can tell based on whether it got any _report_delta() reports from the drivers. This allows enabling the statistics at the group at any time, with drivers opting into supporting them. This is also in line with what netdevice L3 stats are doing. So as not to waste time and space, tie the collection and reporting of HW stats with a new op flag, NHA_OP_FLAG_DUMP_HW_STATS. Co-developed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Kees Cook # For the __counted_by bits Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/uapi/linux/nexthop.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index b86af80d4e09..dd8787f9cf39 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -31,6 +31,7 @@ enum { #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) #define NHA_OP_FLAG_DUMP_STATS BIT(0) +#define NHA_OP_FLAG_DUMP_HW_STATS BIT(1) enum { NHA_UNSPEC, @@ -71,6 +72,9 @@ enum { /* u32; nexthop hardware stats enable */ NHA_HW_STATS_ENABLE, + /* u32; read-only; whether any driver collects HW stats */ + NHA_HW_STATS_USED, + __NHA_MAX, }; @@ -132,6 +136,11 @@ enum { /* uint; number of packets forwarded via the nexthop group entry */ NHA_GROUP_STATS_ENTRY_PACKETS, + /* uint; number of packets forwarded via the nexthop group entry in + * hardware + */ + NHA_GROUP_STATS_ENTRY_PACKETS_HW, + __NHA_GROUP_STATS_ENTRY_MAX, }; -- cgit From 317460317a02a1af512697e6e964298dedd8a163 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:07:59 -0800 Subject: bpf: Introduce bpf_arena. Introduce bpf_arena, which is a sparse shared memory region between the bpf program and user space. Use cases: 1. User space mmap-s bpf_arena and uses it as a traditional mmap-ed anonymous region, like memcached or any key/value storage. The bpf program implements an in-kernel accelerator. XDP prog can search for a key in bpf_arena and return a value without going to user space. 2. The bpf program builds arbitrary data structures in bpf_arena (hash tables, rb-trees, sparse arrays), while user space consumes it. 3. bpf_arena is a "heap" of memory from the bpf program's point of view. The user space may mmap it, but bpf program will not convert pointers to user base at run-time to improve bpf program speed. Initially, the kernel vm_area and user vma are not populated. User space can fault in pages within the range. While servicing a page fault, bpf_arena logic will insert a new page into the kernel and user vmas. The bpf program can allocate pages from that region via bpf_arena_alloc_pages(). This kernel function will insert pages into the kernel vm_area. The subsequent fault-in from user space will populate that page into the user vma. The BPF_F_SEGV_ON_FAULT flag at arena creation time can be used to prevent fault-in from user space. In such a case, if a page is not allocated by the bpf program and not present in the kernel vm_area, the user process will segfault. This is useful for use cases 2 and 3 above. bpf_arena_alloc_pages() is similar to user space mmap(). It allocates pages either at a specific address within the arena or allocates a range with the maple tree. bpf_arena_free_pages() is analogous to munmap(), which frees pages and removes the range from the kernel vm_area and from user process vmas. bpf_arena can be used as a bpf program "heap" of up to 4GB. The speed of bpf program is more important than ease of sharing with user space. This is use case 3. In such a case, the BPF_F_NO_USER_CONV flag is recommended. It will tell the verifier to treat the rX = bpf_arena_cast_user(rY) instruction as a 32-bit move wX = wY, which will improve bpf prog performance. Otherwise, bpf_arena_cast_user is translated by JIT to conditionally add the upper 32 bits of user vm_start (if the pointer is not NULL) to arena pointers before they are stored into memory. This way, user space sees them as valid 64-bit pointers. Diff https://github.com/llvm/llvm-project/pull/84410 enables LLVM BPF backend generate the bpf_addr_space_cast() instruction to cast pointers between address_space(1) which is reserved for bpf_arena pointers and default address space zero. All arena pointers in a bpf program written in C language are tagged as __attribute__((address_space(1))). Hence, clang provides helpful diagnostics when pointers cross address space. Libbpf and the kernel support only address_space == 1. All other address space identifiers are reserved. rX = bpf_addr_space_cast(rY, /* dst_as */ 1, /* src_as */ 0) tells the verifier that rX->type = PTR_TO_ARENA. Any further operations on PTR_TO_ARENA register have to be in the 32-bit domain. The verifier will mark load/store through PTR_TO_ARENA with PROBE_MEM32. JIT will generate them as kern_vm_start + 32bit_addr memory accesses. The behavior is similar to copy_from_kernel_nofault() except that no address checks are necessary. The address is guaranteed to be in the 4GB range. If the page is not present, the destination register is zeroed on read, and the operation is ignored on write. rX = bpf_addr_space_cast(rY, 0, 1) tells the verifier that rX->type = unknown scalar. If arena->map_flags has BPF_F_NO_USER_CONV set, then the verifier converts such cast instructions to mov32. Otherwise, JIT will emit native code equivalent to: rX = (u32)rY; if (rY) rX |= clear_lo32_bits(arena->user_vm_start); /* replace hi32 bits in rX */ After such conversion, the pointer becomes a valid user pointer within bpf_arena range. The user process can access data structures created in bpf_arena without any additional computations. For example, a linked list built by a bpf program can be walked natively by user space. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Reviewed-by: Barret Rhoden Link: https://lore.kernel.org/bpf/20240308010812.89848-2-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 85ec7fc799d7..e30d943db8a4 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1009,6 +1009,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + BPF_MAP_TYPE_ARENA, __MAX_BPF_MAP_TYPE }; @@ -1396,6 +1397,12 @@ enum { /* BPF token FD is passed in a corresponding command's token_fd field */ BPF_F_TOKEN_FD = (1U << 16), + +/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */ + BPF_F_SEGV_ON_FAULT = (1U << 17), + +/* Do not translate kernel bpf_arena pointers to user pointers */ + BPF_F_NO_USER_CONV = (1U << 18), }; /* Flags for BPF_PROG_QUERY. */ @@ -1467,6 +1474,9 @@ union bpf_attr { * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the * number of hash functions (if 0, the bloom filter will default * to using 5 hash functions). + * + * BPF_MAP_TYPE_ARENA - contains the address where user space + * is going to mmap() the arena. It has to be page aligned. */ __u64 map_extra; -- cgit From 667a86ad9b71d934c444eec193cf3508016f35c5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 7 Mar 2024 17:08:00 -0800 Subject: bpf: Disasm support for addr_space_cast instruction. LLVM generates rX = addr_space_cast(rY, dst_addr_space, src_addr_space) instruction when pointers in non-zero address space are used by the bpf program. Recognize this insn in uapi and in bpf disassembler. Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20240308010812.89848-3-alexei.starovoitov@gmail.com --- include/uapi/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e30d943db8a4..3c42b9f1bada 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1339,6 +1339,10 @@ enum { */ #define BPF_PSEUDO_KFUNC_CALL 2 +enum bpf_addr_space_cast { + BPF_ADDR_SPACE_CAST = 1, +}; + /* flags for BPF_MAP_UPDATE_ELEM command */ enum { BPF_ANY = 0, /* create new element or update existing */ -- cgit From eaf657f7adba8984509db7403ac6bdaa219e5722 Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 10 Mar 2024 16:55:03 +0200 Subject: devlink: Add comments to use netlink gen tool Add the comment to remind people not to manually modify the net/devlink/netlink_gen.c, but to use tools/net/ynl/ynl-regen.sh to generate it. Signed-off-by: William Tu Suggested-by: Jiri Pirko Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240310145503.32721-1-witu@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/devlink.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 130cae0d3e20..2da0c7eb6710 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -614,7 +614,10 @@ enum devlink_attr { DEVLINK_ATTR_REGION_DIRECT, /* flag */ - /* add new attributes above here, update the policy in devlink.c */ + /* Add new attributes above here, update the spec in + * Documentation/netlink/specs/devlink.yaml and re-generate + * net/devlink/netlink_gen.c. + */ __DEVLINK_ATTR_MAX, DEVLINK_ATTR_MAX = __DEVLINK_ATTR_MAX - 1 -- cgit