summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/cpumasks.rst2
-rw-r--r--Documentation/bpf/fs_kfuncs.rst21
-rw-r--r--Documentation/bpf/index.rst1
-rw-r--r--Documentation/netlink/specs/netdev.yaml4
-rw-r--r--Documentation/networking/xdp-rx-metadata.rst8
-rw-r--r--Documentation/networking/xsk-tx-metadata.rst2
-rw-r--r--arch/arm64/net/bpf_jit_comp.c55
-rw-r--r--arch/riscv/include/asm/cfi.h3
-rw-r--r--arch/riscv/kernel/cfi.c2
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c25
-rw-r--r--arch/s390/net/bpf_jit_comp.c59
-rw-r--r--arch/x86/include/asm/cfi.h126
-rw-r--r--arch/x86/kernel/alternative.c87
-rw-r--r--arch/x86/kernel/cfi.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c264
-rw-r--r--drivers/media/rc/bpf-lirc.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h412
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c21
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c22
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.h16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c19
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h32
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.c207
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx_lib.h18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c15
-rw-r--r--drivers/net/veth.c19
-rw-r--r--fs/verity/fsverity_private.h10
-rw-r--r--fs/verity/init.c1
-rw-r--r--fs/verity/measure.c84
-rw-r--r--include/asm-generic/Kbuild1
-rw-r--r--include/asm-generic/cfi.h5
-rw-r--r--include/linux/bpf.h143
-rw-r--r--include/linux/bpf_verifier.h66
-rw-r--r--include/linux/cfi.h12
-rw-r--r--include/linux/filter.h4
-rw-r--r--include/linux/if_vlan.h4
-rw-r--r--include/linux/lsm_hook_defs.h15
-rw-r--r--include/linux/mlx5/device.h2
-rw-r--r--include/linux/security.h43
-rw-r--r--include/linux/skbuff.h13
-rw-r--r--include/net/xdp.h20
-rw-r--r--include/net/xdp_sock_drv.h17
-rw-r--r--include/net/xfrm.h9
-rw-r--r--include/net/xsk_buff_pool.h2
-rw-r--r--include/uapi/linux/bpf.h46
-rw-r--r--include/uapi/linux/netdev.h3
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c37
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c6
-rw-r--r--kernel/bpf/bpf_lsm.c27
-rw-r--r--kernel/bpf/bpf_struct_ops.c35
-rw-r--r--kernel/bpf/btf.c11
-rw-r--r--kernel/bpf/cgroup.c6
-rw-r--r--kernel/bpf/core.c53
-rw-r--r--kernel/bpf/cpumask.c20
-rw-r--r--kernel/bpf/dispatcher.c7
-rw-r--r--kernel/bpf/hashtab.c12
-rw-r--r--kernel/bpf/helpers.c38
-rw-r--r--kernel/bpf/inode.c326
-rw-r--r--kernel/bpf/log.c42
-rw-r--r--kernel/bpf/map_in_map.c17
-rw-r--r--kernel/bpf/map_in_map.h2
-rw-r--r--kernel/bpf/syscall.c294
-rw-r--r--kernel/bpf/tnum.c6
-rw-r--r--kernel/bpf/token.c271
-rw-r--r--kernel/bpf/trampoline.c101
-rw-r--r--kernel/bpf/verifier.c558
-rw-r--r--kernel/trace/bpf_trace.c84
-rw-r--r--lib/test_bpf.c2
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c38
-rw-r--r--net/bpf/test_run.c15
-rw-r--r--net/core/filter.c36
-rw-r--r--net/core/xdp.c33
-rw-r--r--net/ipv4/bpf_tcp_ca.c71
-rw-r--r--net/netfilter/nf_bpf_link.c2
-rw-r--r--net/xdp/xsk_buff_pool.c12
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state_bpf.c134
-rw-r--r--security/security.c101
-rw-r--r--security/selinux/hooks.c47
-rw-r--r--tools/include/uapi/linux/bpf.h46
-rw-r--r--tools/include/uapi/linux/netdev.h3
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c37
-rw-r--r--tools/lib/bpf/bpf.h35
-rw-r--r--tools/lib/bpf/bpf_core_read.h32
-rw-r--r--tools/lib/bpf/btf.c7
-rw-r--r--tools/lib/bpf/elf.c2
-rw-r--r--tools/lib/bpf/features.c478
-rw-r--r--tools/lib/bpf/libbpf.c584
-rw-r--r--tools/lib/bpf/libbpf.h37
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/libbpf_internal.h36
-rw-r--r--tools/lib/bpf/libbpf_probes.c8
-rw-r--r--tools/lib/bpf/linker.c24
-rw-r--r--tools/lib/bpf/str_error.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h10
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c16
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c142
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_probes.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_btf.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_in_map.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/time_tai.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c1031
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c175
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c165
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c132
-rw-r--r--tools/testing/selftests/bpf/progs/access_map_in_map.c93
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h1
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c84
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c61
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c82
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h1
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c43
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c2
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_dead_global_func.c11
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c2
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c53
-rw-r--r--tools/testing/selftests/bpf/progs/map_in_map_btf.c73
-rw-r--r--tools/testing/selftests/bpf/progs/normal_map_btf.c56
-rw-r--r--tools/testing/selftests/bpf/progs/priv_map.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/syscall.c96
-rw-r--r--tools/testing/selftests/bpf/progs/test_fsverity.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_xattr.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func15.c34
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func16.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sig_in_xattr.c83
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c138
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c8
-rw-r--r--tools/testing/selftests/bpf/progs/timer_failure.c37
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_basic_stack.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bitfield_write.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c15
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_int_ptr.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_stack.c5
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c287
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c137
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c91
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c38
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c36
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c4
-rw-r--r--tools/testing/selftests/bpf/test_loader.c7
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh92
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c38
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh25
-rw-r--r--tools/testing/selftests/bpf/veristat.c2
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c36
-rw-r--r--tools/testing/selftests/bpf/xdp_metadata.h34
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c25
177 files changed, 8407 insertions, 1798 deletions
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index a22b6ad105fb..b5d47a04da5d 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -352,7 +352,7 @@ can be used to query the contents of cpumasks.
.. kernel-doc:: kernel/bpf/cpumask.c
:identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and
- bpf_cpumask_test_cpu
+ bpf_cpumask_test_cpu bpf_cpumask_weight
.. kernel-doc:: kernel/bpf/cpumask.c
:identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset
diff --git a/Documentation/bpf/fs_kfuncs.rst b/Documentation/bpf/fs_kfuncs.rst
new file mode 100644
index 000000000000..8762c3233a3d
--- /dev/null
+++ b/Documentation/bpf/fs_kfuncs.rst
@@ -0,0 +1,21 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. _fs_kfuncs-header-label:
+
+=====================
+BPF filesystem kfuncs
+=====================
+
+BPF LSM programs need to access filesystem data from LSM hooks. The following
+BPF kfuncs can be used to get these data.
+
+ * ``bpf_get_file_xattr()``
+
+ * ``bpf_get_fsverity_digest()``
+
+To avoid recursions, these kfuncs follow the following rules:
+
+1. These kfuncs are only permitted from BPF LSM function.
+2. These kfuncs should not call into other LSM hooks, i.e. security_*(). For
+ example, ``bpf_get_file_xattr()`` does not use ``vfs_getxattr()``, because
+ the latter calls LSM hook ``security_inode_getxattr``.
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index aeaeb35e6d4a..0bb5cb8157f1 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -21,6 +21,7 @@ that goes into great technical depth about the BPF Architecture.
helpers
kfuncs
cpumasks
+ fs_kfuncs
programs
maps
bpf_prog_run
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index f2c76d103bd8..3addac970680 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -54,6 +54,10 @@ definitions:
name: hash
doc:
Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
+ -
+ name: vlan-tag
+ doc:
+ Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
-
type: flags
name: xsk-flags
diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index e3e9420fd817..a6e0ece18be5 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -20,7 +20,13 @@ Currently, the following kfuncs are supported. In the future, as more
metadata is supported, this set will grow:
.. kernel-doc:: net/core/xdp.c
- :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
+ :identifiers: bpf_xdp_metadata_rx_timestamp
+
+.. kernel-doc:: net/core/xdp.c
+ :identifiers: bpf_xdp_metadata_rx_hash
+
+.. kernel-doc:: net/core/xdp.c
+ :identifiers: bpf_xdp_metadata_rx_vlan_tag
An XDP program can use these kfuncs to read the metadata into stack
variables for its own consumption. Or, to pass the metadata on to other
diff --git a/Documentation/networking/xsk-tx-metadata.rst b/Documentation/networking/xsk-tx-metadata.rst
index 97ecfa480d00..bd033fe95cca 100644
--- a/Documentation/networking/xsk-tx-metadata.rst
+++ b/Documentation/networking/xsk-tx-metadata.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
==================
AF_XDP TX Metadata
==================
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 7d4af64e3982..8955da5c47cf 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1828,7 +1828,7 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
*
*/
static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
- struct bpf_tramp_links *tlinks, void *orig_call,
+ struct bpf_tramp_links *tlinks, void *func_addr,
int nregs, u32 flags)
{
int i;
@@ -1926,7 +1926,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_IP_ARG) {
/* save ip address of the traced function */
- emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx);
+ emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
}
@@ -2026,18 +2026,10 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
return ctx->idx;
}
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
- void *image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
- void *orig_call)
+static int btf_func_model_nregs(const struct btf_func_model *m)
{
- int i, ret;
int nregs = m->nr_args;
- int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
- struct jit_ctx ctx = {
- .image = NULL,
- .idx = 0,
- };
+ int i;
/* extra registers needed for struct argument */
for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
@@ -2046,22 +2038,49 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
nregs += (m->arg_size[i] + 7) / 8 - 1;
}
+ return nregs;
+}
+
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr)
+{
+ struct jit_ctx ctx = {
+ .image = NULL,
+ .idx = 0,
+ };
+ struct bpf_tramp_image im;
+ int nregs, ret;
+
+ nregs = btf_func_model_nregs(m);
/* the first 8 registers are used for arguments */
if (nregs > 8)
return -ENOTSUPP;
- ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
+ ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
if (ret < 0)
return ret;
- if (ret > max_insns)
- return -EFBIG;
+ return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
+}
- ctx.image = image;
- ctx.idx = 0;
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ int ret, nregs;
+ struct jit_ctx ctx = {
+ .image = image,
+ .idx = 0,
+ };
+
+ nregs = btf_func_model_nregs(m);
+ /* the first 8 registers are used for arguments */
+ if (nregs > 8)
+ return -ENOTSUPP;
jit_fill_hole(image, (unsigned int)(image_end - image));
- ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nregs, flags);
+ ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
if (ret > 0 && validate_code(&ctx) < 0)
ret = -EINVAL;
diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h
index 56bf9d69d5e3..8f7a62257044 100644
--- a/arch/riscv/include/asm/cfi.h
+++ b/arch/riscv/include/asm/cfi.h
@@ -7,8 +7,9 @@
*
* Copyright (C) 2023 Google LLC
*/
+#include <linux/bug.h>
-#include <linux/cfi.h>
+struct pt_regs;
#ifdef CONFIG_CFI_CLANG
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
diff --git a/arch/riscv/kernel/cfi.c b/arch/riscv/kernel/cfi.c
index 820158d7a291..6ec9dbd7292e 100644
--- a/arch/riscv/kernel/cfi.c
+++ b/arch/riscv/kernel/cfi.c
@@ -4,7 +4,7 @@
*
* Copyright (C) 2023 Google LLC
*/
-#include <asm/cfi.h>
+#include <linux/cfi.h>
#include <asm/insn.h>
/*
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 8581693e62d3..58dc64dd94a8 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1029,23 +1029,28 @@ out:
return ret;
}
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
- void *image_end, const struct btf_func_model *m,
- u32 flags, struct bpf_tramp_links *tlinks,
- void *func_addr)
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr)
{
- int ret;
+ struct bpf_tramp_image im;
struct rv_jit_context ctx;
+ int ret;
ctx.ninsns = 0;
ctx.insns = NULL;
ctx.ro_insns = NULL;
- ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
- if (ret < 0)
- return ret;
+ ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
- if (ninsns_rvoff(ret) > (long)image_end - (long)image)
- return -EFBIG;
+ return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ int ret;
+ struct rv_jit_context ctx;
ctx.ninsns = 0;
/*
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bf06b7283f0c..7f0a7b97ef4c 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2362,7 +2362,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return -ENOTSUPP;
/* Return to %r14, since func_addr and %r0 are not available. */
- if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+ if ((!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK)) ||
+ (flags & BPF_TRAMP_F_INDIRECT))
flags |= BPF_TRAMP_F_SKIP_FRAME;
/*
@@ -2637,6 +2638,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
return 0;
}
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *orig_call)
+{
+ struct bpf_tramp_image im;
+ struct bpf_tramp_jit tjit;
+ int ret;
+
+ memset(&tjit, 0, sizeof(tjit));
+
+ ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
+ tlinks, orig_call);
+
+ return ret < 0 ? ret : tjit.common.prg;
+}
+
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
void *image_end, const struct btf_func_model *m,
u32 flags, struct bpf_tramp_links *tlinks,
@@ -2644,30 +2660,27 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
{
struct bpf_tramp_jit tjit;
int ret;
- int i;
- for (i = 0; i < 2; i++) {
- if (i == 0) {
- /* Compute offsets, check whether the code fits. */
- memset(&tjit, 0, sizeof(tjit));
- } else {
- /* Generate the code. */
- tjit.common.prg = 0;
- tjit.common.prg_buf = image;
- }
- ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
- tlinks, func_addr);
- if (ret < 0)
- return ret;
- if (tjit.common.prg > (char *)image_end - (char *)image)
- /*
- * Use the same error code as for exceeding
- * BPF_MAX_TRAMP_LINKS.
- */
- return -E2BIG;
- }
+ /* Compute offsets, check whether the code fits. */
+ memset(&tjit, 0, sizeof(tjit));
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
+
+ if (ret < 0)
+ return ret;
+ if (tjit.common.prg > (char *)image_end - (char *)image)
+ /*
+ * Use the same error code as for exceeding
+ * BPF_MAX_TRAMP_LINKS.
+ */
+ return -E2BIG;
+
+ tjit.common.prg = 0;
+ tjit.common.prg_buf = image;
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
- return tjit.common.prg;
+ return ret < 0 ? ret : tjit.common.prg;
}
bool bpf_jit_supports_subprog_tailcalls(void)
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 58dacd90daef..7cd752557905 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -7,16 +7,140 @@
*
* Copyright (C) 2022 Google LLC
*/
+#include <linux/bug.h>
+#include <asm/ibt.h>
-#include <linux/cfi.h>
+/*
+ * An overview of the various calling conventions...
+ *
+ * Traditional:
+ *
+ * foo:
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * call *%r11
+ *
+ *
+ * IBT:
+ *
+ * foo:
+ * endbr64
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo / call foo+4
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * call *%r11
+ *
+ *
+ * kCFI:
+ *
+ * __cfi_foo:
+ * movl $0x12345678, %eax
+ * # 11 nops when CONFIG_CALL_PADDING
+ * foo:
+ * endbr64 # when IBT
+ * ... code here ...
+ * ret
+ *
+ * direct call:
+ * call foo # / call foo+4 when IBT
+ *
+ * indirect call:
+ * lea foo(%rip), %r11
+ * ...
+ * movl $(-0x12345678), %r10d
+ * addl -4(%r11), %r10d # -15 when CONFIG_CALL_PADDING
+ * jz 1f
+ * ud2
+ * 1:call *%r11
+ *
+ *
+ * FineIBT (builds as kCFI + CALL_PADDING + IBT + RETPOLINE and runtime patches into):
+ *
+ * __cfi_foo:
+ * endbr64
+ * subl 0x12345678, %r10d
+ * jz foo
+ * ud2
+ * nop
+ * foo:
+ * osp nop3 # was endbr64
+ * ... code here ...
+ * ret
+ *
+ * direct caller:
+ * call foo / call foo+4
+ *
+ * indirect caller:
+ * lea foo(%rip), %r11
+ * ...
+ * movl $0x12345678, %r10d
+ * subl $16, %r11
+ * nop4
+ * call *%r11
+ *
+ */
+enum cfi_mode {
+ CFI_DEFAULT, /* FineIBT if hardware has IBT, otherwise kCFI */
+ CFI_OFF, /* Taditional / IBT depending on .config */
+ CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */
+ CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */
+};
+
+extern enum cfi_mode cfi_mode;
+
+struct pt_regs;
#ifdef CONFIG_CFI_CLANG
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
+#define __bpfcall
+extern u32 cfi_bpf_hash;
+extern u32 cfi_bpf_subprog_hash;
+
+static inline int cfi_get_offset(void)
+{
+ switch (cfi_mode) {
+ case CFI_FINEIBT:
+ return 16;
+ case CFI_KCFI:
+ if (IS_ENABLED(CONFIG_CALL_PADDING))
+ return 16;
+ return 5;
+ default:
+ return 0;
+ }
+}
+#define cfi_get_offset cfi_get_offset
+
+extern u32 cfi_get_func_hash(void *func);
+
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
{
return BUG_TRAP_TYPE_NONE;
}
+#define cfi_bpf_hash 0U
+#define cfi_bpf_subprog_hash 0U
+static inline u32 cfi_get_func_hash(void *func)
+{
+ return 0;
+}
#endif /* CONFIG_CFI_CLANG */
+#if HAS_KERNEL_IBT == 1
+#define CFI_NOSEAL(x) asm(IBT_NOSEAL(__stringify(x)))
+#endif
+
#endif /* _ASM_X86_CFI_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73be3931e4f0..49c2a62ba5e4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -30,6 +30,7 @@
#include <asm/fixmap.h>
#include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
+#include <asm/cfi.h>
int __read_mostly alternatives_patched;
@@ -832,15 +833,82 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
#endif /* CONFIG_X86_KERNEL_IBT */
#ifdef CONFIG_FINEIBT
+#define __CFI_DEFAULT CFI_DEFAULT
+#elif defined(CONFIG_CFI_CLANG)
+#define __CFI_DEFAULT CFI_KCFI
+#else
+#define __CFI_DEFAULT CFI_OFF
+#endif
-enum cfi_mode {
- CFI_DEFAULT,
- CFI_OFF,
- CFI_KCFI,
- CFI_FINEIBT,
-};
+enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
+
+#ifdef CONFIG_CFI_CLANG
+struct bpf_insn;
+
+/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */
+extern unsigned int __bpf_prog_runX(const void *ctx,
+ const struct bpf_insn *insn);
+
+/*
+ * Force a reference to the external symbol so the compiler generates
+ * __kcfi_typid.
+ */
+__ADDRESSABLE(__bpf_prog_runX);
+
+/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_hash,@object \n"
+" .globl cfi_bpf_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_hash: \n"
+" .long __kcfi_typeid___bpf_prog_runX \n"
+" .size cfi_bpf_hash, 4 \n"
+" .popsection \n"
+);
+
+/* Must match bpf_callback_t */
+extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
+
+__ADDRESSABLE(__bpf_callback_fn);
+
+/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
+asm (
+" .pushsection .data..ro_after_init,\"aw\",@progbits \n"
+" .type cfi_bpf_subprog_hash,@object \n"
+" .globl cfi_bpf_subprog_hash \n"
+" .p2align 2, 0x0 \n"
+"cfi_bpf_subprog_hash: \n"
+" .long __kcfi_typeid___bpf_callback_fn \n"
+" .size cfi_bpf_subprog_hash, 4 \n"
+" .popsection \n"
+);
+
+u32 cfi_get_func_hash(void *func)
+{
+ u32 hash;
+
+ func -= cfi_get_offset();
+ switch (cfi_mode) {
+ case CFI_FINEIBT:
+ func += 7;
+ break;
+ case CFI_KCFI:
+ func += 1;
+ break;
+ default:
+ return 0;
+ }
+
+ if (get_kernel_nofault(hash, func))
+ return 0;
+
+ return hash;
+}
+#endif
+
+#ifdef CONFIG_FINEIBT
-static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
static bool cfi_rand __ro_after_init = true;
static u32 cfi_seed __ro_after_init;
@@ -1149,8 +1217,11 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
goto err;
if (cfi_rand) {
- if (builtin)
+ if (builtin) {
cfi_seed = get_random_u32();
+ cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
+ cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
+ }
ret = cfi_rand_preamble(start_cfi, end_cfi);
if (ret)
diff --git a/arch/x86/kernel/cfi.c b/arch/x86/kernel/cfi.c
index 8674a5c0c031..e6bf78fac146 100644
--- a/arch/x86/kernel/cfi.c
+++ b/arch/x86/kernel/cfi.c
@@ -4,10 +4,10 @@
*
* Copyright (C) 2022 Google LLC
*/
-#include <asm/cfi.h>
+#include <linux/string.h>
+#include <linux/cfi.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
-#include <linux/string.h>
/*
* Returns the target address and the expected type when regs->ip points
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e89e415aa743..bdacbb84456d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -17,6 +17,7 @@
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
#include <asm/unwind.h>
+#include <asm/cfi.h>
static bool all_callee_regs_used[4] = {true, true, true, true};
@@ -51,9 +52,11 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
#ifdef CONFIG_X86_KERNEL_IBT
-#define EMIT_ENDBR() EMIT(gen_endbr(), 4)
+#define EMIT_ENDBR() EMIT(gen_endbr(), 4)
+#define EMIT_ENDBR_POISON() EMIT(gen_endbr_poison(), 4)
#else
#define EMIT_ENDBR()
+#define EMIT_ENDBR_POISON()
#endif
static bool is_imm8(int value)
@@ -305,6 +308,69 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
}
/*
+ * Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT
+ * in arch/x86/kernel/alternative.c
+ */
+
+static void emit_fineibt(u8 **pprog, u32 hash)
+{
+ u8 *prog = *pprog;
+
+ EMIT_ENDBR();
+ EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */
+ EMIT2(0x74, 0x07); /* jz.d8 +7 */
+ EMIT2(0x0f, 0x0b); /* ud2 */
+ EMIT1(0x90); /* nop */
+ EMIT_ENDBR_POISON();
+
+ *pprog = prog;
+}
+
+static void emit_kcfi(u8 **pprog, u32 hash)
+{
+ u8 *prog = *pprog;
+
+ EMIT1_off32(0xb8, hash); /* movl $hash, %eax */
+#ifdef CONFIG_CALL_PADDING
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+ EMIT1(0x90);
+#endif
+ EMIT_ENDBR();
+
+ *pprog = prog;
+}
+
+static void emit_cfi(u8 **pprog, u32 hash)
+{
+ u8 *prog = *pprog;
+
+ switch (cfi_mode) {
+ case CFI_FINEIBT:
+ emit_fineibt(&prog, hash);
+ break;
+
+ case CFI_KCFI:
+ emit_kcfi(&prog, hash);
+ break;
+
+ default:
+ EMIT_ENDBR();
+ break;
+ }
+
+ *pprog = prog;
+}
+
+/*
* Emit x86-64 prologue code for BPF program.
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
* while jumping to another program
@@ -315,10 +381,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
{
u8 *prog = *pprog;
+ emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
- EMIT_ENDBR();
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
if (!ebpf_from_cbpf) {
@@ -2198,7 +2264,8 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog,
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size,
- int run_ctx_off, bool save_ret)
+ int run_ctx_off, bool save_ret,
+ void *image, void *rw_image)
{
u8 *prog = *pprog;
u8 *jmp_insn;
@@ -2226,7 +2293,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
else
EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
- if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
+ if (emit_rsb_call(&prog, bpf_trampoline_enter(p), image + (prog - (u8 *)rw_image)))
return -EINVAL;
/* remember prog start time returned by __bpf_prog_enter */
emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@@ -2250,7 +2317,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
/* call JITed bpf program or interpreter */
- if (emit_rsb_call(&prog, p->bpf_func, prog))
+ if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image)))
return -EINVAL;
/*
@@ -2277,7 +2344,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
else
EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
- if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
+ if (emit_rsb_call(&prog, bpf_trampoline_exit(p), image + (prog - (u8 *)rw_image)))
return -EINVAL;
*pprog = prog;
@@ -2312,14 +2379,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- int run_ctx_off, bool save_ret)
+ int run_ctx_off, bool save_ret,
+ void *image, void *rw_image)
{
int i;
u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
- run_ctx_off, save_ret))
+ run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
}
*pprog = prog;
@@ -2328,7 +2396,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- int run_ctx_off, u8 **branches)
+ int run_ctx_off, u8 **branches,
+ void *image, void *rw_image)
{
u8 *prog = *pprog;
int i;
@@ -2339,7 +2408,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
+ if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+ image, rw_image))
return -EINVAL;
/* mod_ret prog stored return value into [rbp - 8]. Emit:
@@ -2422,10 +2492,11 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
* add rsp, 8 // skip eth_type_trans's frame
* ret // return to its caller
*/
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
- const struct btf_func_model *m, u32 flags,
- struct bpf_tramp_links *tlinks,
- void *func_addr)
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
+ void *rw_image_end, void *image,
+ const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
@@ -2437,10 +2508,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
u8 *prog;
bool save_ret;
+ /*
+ * F_INDIRECT is only compatible with F_RET_FENTRY_RET, it is
+ * explicitly incompatible with F_CALL_ORIG | F_SKIP_FRAME | F_IP_ARG
+ * because @func_addr.
+ */
+ WARN_ON_ONCE((flags & BPF_TRAMP_F_INDIRECT) &&
+ (flags & ~(BPF_TRAMP_F_INDIRECT | BPF_TRAMP_F_RET_FENTRY_RET)));
+
/* extra registers for struct arguments */
- for (i = 0; i < m->nr_args; i++)
+ for (i = 0; i < m->nr_args; i++) {
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
nr_regs += (m->arg_size[i] + 7) / 8 - 1;
+ }
/* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
* are passed through regs, the remains are through stack.
@@ -2521,22 +2601,29 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
orig_call += X86_PATCH_SIZE;
}
- prog = image;
+ prog = rw_image;
- EMIT_ENDBR();
- /*
- * This is the direct-call trampoline, as such it needs accounting
- * for the __fentry__ call.
- */
- x86_call_depth_emit_accounting(&prog, NULL);
+ if (flags & BPF_TRAMP_F_INDIRECT) {
+ /*
+ * Indirect call for bpf_struct_ops
+ */
+ emit_cfi(&prog, cfi_get_func_hash(func_addr));
+ } else {
+ /*
+ * Direct-call fentry stub, as such it needs accounting for the
+ * __fentry__ call.
+ */
+ x86_call_depth_emit_accounting(&prog, NULL);
+ }
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
- if (!is_imm8(stack_size))
+ if (!is_imm8(stack_size)) {
/* sub rsp, stack_size */
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
- else
+ } else {
/* sub rsp, stack_size */
EMIT4(0x48, 0x83, 0xEC, stack_size);
+ }
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
EMIT1(0x50); /* push rax */
/* mov QWORD PTR [rbp - rbx_off], rbx */
@@ -2563,16 +2650,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
- if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
+ if (emit_rsb_call(&prog, __bpf_tramp_enter,
+ image + (prog - (u8 *)rw_image))) {
ret = -EINVAL;
goto cleanup;
}
}
- if (fentry->nr_links)
+ if (fentry->nr_links) {
if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
- flags & BPF_TRAMP_F_RET_FENTRY_RET))
+ flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
return -EINVAL;
+ }
if (fmod_ret->nr_links) {
branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
@@ -2581,7 +2670,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
- run_ctx_off, branches)) {
+ run_ctx_off, branches, image, rw_image)) {
ret = -EINVAL;
goto cleanup;
}
@@ -2591,25 +2680,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
restore_regs(m, &prog, regs_off);
save_args(m, &prog, arg_stack_off, true);
- if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before calling the original function, restore the
* tail_call_cnt from stack to rax.
*/
RESTORE_TAIL_CALL_CNT(stack_size);
+ }
if (flags & BPF_TRAMP_F_ORIG_STACK) {
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
EMIT2(0xff, 0xd3); /* call *rbx */
} else {
/* call original function */
- if (emit_rsb_call(&prog, orig_call, prog)) {
+ if (emit_rsb_call(&prog, orig_call, image + (prog - (u8 *)rw_image))) {
ret = -EINVAL;
goto cleanup;
}
}
/* remember return value in a stack for bpf prog to access */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
- im->ip_after_call = prog;
+ im->ip_after_call = image + (prog - (u8 *)rw_image);
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
}
@@ -2624,16 +2714,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
/* Update the branches saved in invoke_bpf_mod_ret with the
* aligned address of do_fexit.
*/
- for (i = 0; i < fmod_ret->nr_links; i++)
- emit_cond_near_jump(&branches[i], prog, branches[i],
- X86_JNE);
+ for (i = 0; i < fmod_ret->nr_links; i++) {
+ emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
+ image + (branches[i] - (u8 *)rw_image), X86_JNE);
+ }
}
- if (fexit->nr_links)
- if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
+ if (fexit->nr_links) {
+ if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
+ false, image, rw_image)) {
ret = -EINVAL;
goto cleanup;
}
+ }
if (flags & BPF_TRAMP_F_RESTORE_REGS)
restore_regs(m, &prog, regs_off);
@@ -2643,18 +2736,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
* restored to R0.
*/
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = prog;
+ im->ip_epilogue = image + (prog - (u8 *)rw_image);
/* arg1: mov rdi, im */
emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
- if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
+ if (emit_rsb_call(&prog, __bpf_tramp_exit, image + (prog - (u8 *)rw_image))) {
ret = -EINVAL;
goto cleanup;
}
- } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before running the original function, restore the
* tail_call_cnt from stack to rax.
*/
RESTORE_TAIL_CALL_CNT(stack_size);
+ }
/* restore return value of orig_call or fentry prog back into RAX */
if (save_ret)
@@ -2662,22 +2756,94 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
EMIT1(0xC9); /* leave */
- if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
- emit_return(&prog, prog);
+ }
+ emit_return(&prog, image + (prog - (u8 *)rw_image));
/* Make sure the trampoline generation logic doesn't overflow */
- if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+ if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
goto cleanup;
}
- ret = prog - (u8 *)image;
+ ret = prog - (u8 *)rw_image + BPF_INSN_SAFETY;
cleanup:
kfree(branches);
return ret;
}
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+ return bpf_prog_pack_alloc(size, jit_fill_hole);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+ bpf_prog_pack_free(image, size);
+}
+
+void arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+{
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
+ const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ void *rw_image, *tmp;
+ int ret;
+ u32 size = image_end - image;
+
+ /* rw_image doesn't need to be in module memory range, so we can
+ * use kvmalloc.
+ */
+ rw_image = kvmalloc(size, GFP_KERNEL);
+ if (!rw_image)
+ return -ENOMEM;
+
+ ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
+ flags, tlinks, func_addr);
+ if (ret < 0)
+ goto out;
+
+ tmp = bpf_arch_text_copy(image, rw_image, size);
+ if (IS_ERR(tmp))
+ ret = PTR_ERR(tmp);
+out:
+ kvfree(rw_image);
+ return ret;
+}
+
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr)
+{
+ struct bpf_tramp_image im;
+ void *image;
+ int ret;
+
+ /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
+ * This will NOT cause fragmentation in direct map, as we do not
+ * call set_memory_*() on this buffer.
+ *
+ * We cannot use kvmalloc here, because we need image to be in
+ * module memory range.
+ */
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!image)
+ return -ENOMEM;
+
+ ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
+ m, flags, tlinks, func_addr);
+ bpf_jit_free_exec(image);
+ return ret;
+}
+
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf)
{
u8 *jg_reloc, *prog = *pprog;
@@ -2935,9 +3101,16 @@ out_image:
jit_data->header = header;
jit_data->rw_header = rw_header;
}
- prog->bpf_func = (void *)image;
+ /*
+ * ctx.prog_offset is used when CFI preambles put code *before*
+ * the function. See emit_cfi(). For FineIBT specifically this code
+ * can also be executed and bpf_prog_kallsyms_add() will
+ * generate an additional symbol to cover this, hence also
+ * decrement proglen.
+ */
+ prog->bpf_func = (void *)image + cfi_get_offset();
prog->jited = 1;
- prog->jited_len = proglen;
+ prog->jited_len = proglen - cfi_get_offset();
} else {
prog = orig_prog;
}
@@ -2992,6 +3165,7 @@ void bpf_jit_free(struct bpf_prog *prog)
kvfree(jit_data->addrs);
kfree(jit_data);
}
+ prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index fe17c7f98e81..6d07693c6b9f 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_trace_printk:
- if (perfmon_capable())
+ if (bpf_token_capable(prog->aux->token, CAP_PERFMON))
return bpf_get_trace_printk_proto();
fallthrough;
default:
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 50304e4a4fb0..2defac6d9168 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -1008,4 +1008,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
}
+
+extern const struct xdp_metadata_ops ice_xdp_md_ops;
#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index edad5f9ab16c..b25b7f415965 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -527,6 +527,19 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
return 0;
}
+static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring)
+{
+ void *ctx_ptr = &ring->pkt_ctx;
+ struct xsk_cb_desc desc = {};
+
+ XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
+ desc.src = &ctx_ptr;
+ desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) -
+ sizeof(struct xdp_buff);
+ desc.bytes = sizeof(ctx_ptr);
+ xsk_pool_fill_cb(ring->xsk_pool, &desc);
+}
+
/**
* ice_vsi_cfg_rxq - Configure an Rx queue
* @ring: the ring being configured
@@ -561,6 +574,7 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (err)
return err;
xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+ ice_xsk_pool_fill_cb(ring);
dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->q_index);
@@ -583,6 +597,7 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
ring->xdp.data = NULL;
+ ring->xdp_ext.pkt_ctx = &ring->pkt_ctx;
err = ice_setup_rx_ctx(ring);
if (err) {
dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 89f986a75cc8..d384ddfcb83e 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -673,6 +673,212 @@ struct ice_tlan_ctx {
* Use the enum ice_rx_l2_ptype to decode the packet type
* ENDIF
*/
+#define ICE_PTYPES \
+ /* L2 Packet types */ \
+ ICE_PTT_UNUSED_ENTRY(0), \
+ ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), \
+ ICE_PTT_UNUSED_ENTRY(2), \
+ ICE_PTT_UNUSED_ENTRY(3), \
+ ICE_PTT_UNUSED_ENTRY(4), \
+ ICE_PTT_UNUSED_ENTRY(5), \
+ ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \
+ ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \
+ ICE_PTT_UNUSED_ENTRY(8), \
+ ICE_PTT_UNUSED_ENTRY(9), \
+ ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \
+ ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \
+ ICE_PTT_UNUSED_ENTRY(12), \
+ ICE_PTT_UNUSED_ENTRY(13), \
+ ICE_PTT_UNUSED_ENTRY(14), \
+ ICE_PTT_UNUSED_ENTRY(15), \
+ ICE_PTT_UNUSED_ENTRY(16), \
+ ICE_PTT_UNUSED_ENTRY(17), \
+ ICE_PTT_UNUSED_ENTRY(18), \
+ ICE_PTT_UNUSED_ENTRY(19), \
+ ICE_PTT_UNUSED_ENTRY(20), \
+ ICE_PTT_UNUSED_ENTRY(21), \
+ \
+ /* Non Tunneled IPv4 */ \
+ ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), \
+ ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), \
+ ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(25), \
+ ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), \
+ ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), \
+ ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> IPv4 */ \
+ ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(32), \
+ ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> IPv6 */ \
+ ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(39), \
+ ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> GRE/NAT */ \
+ ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \
+ \
+ /* IPv4 --> GRE/NAT --> IPv4 */ \
+ ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(47), \
+ ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> GRE/NAT --> IPv6 */ \
+ ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(54), \
+ ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> GRE/NAT --> MAC */ \
+ ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \
+ \
+ /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ \
+ ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(62), \
+ ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ \
+ ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(69), \
+ ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 --> GRE/NAT --> MAC/VLAN */ \
+ ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \
+ \
+ /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ \
+ ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(77), \
+ ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ \
+ ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(84), \
+ ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* Non Tunneled IPv6 */ \
+ ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), \
+ ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), \
+ ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(91), \
+ ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), \
+ ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), \
+ ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> IPv4 */ \
+ ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(98), \
+ ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> IPv6 */ \
+ ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(105), \
+ ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> GRE/NAT */ \
+ ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \
+ \
+ /* IPv6 --> GRE/NAT -> IPv4 */ \
+ ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(113), \
+ ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> GRE/NAT -> IPv6 */ \
+ ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(120), \
+ ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> GRE/NAT -> MAC */ \
+ ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \
+ \
+ /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ \
+ ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(128), \
+ ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ \
+ ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(135), \
+ ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> GRE/NAT -> MAC/VLAN */ \
+ ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \
+ \
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ \
+ ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \
+ ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \
+ ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(143), \
+ ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \
+ ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \
+ ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \
+ \
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ \
+ ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \
+ ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \
+ ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \
+ ICE_PTT_UNUSED_ENTRY(150), \
+ ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \
+ ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \
+ ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+#define ICE_NUM_DEFINED_PTYPES 154
/* macro to make the table lines short, use explicit indexing with [PTYPE] */
#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
@@ -695,212 +901,10 @@ struct ice_tlan_ctx {
/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */
static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = {
- /* L2 Packet types */
- ICE_PTT_UNUSED_ENTRY(0),
- ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
- ICE_PTT_UNUSED_ENTRY(2),
- ICE_PTT_UNUSED_ENTRY(3),
- ICE_PTT_UNUSED_ENTRY(4),
- ICE_PTT_UNUSED_ENTRY(5),
- ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
- ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
- ICE_PTT_UNUSED_ENTRY(8),
- ICE_PTT_UNUSED_ENTRY(9),
- ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
- ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
- ICE_PTT_UNUSED_ENTRY(12),
- ICE_PTT_UNUSED_ENTRY(13),
- ICE_PTT_UNUSED_ENTRY(14),
- ICE_PTT_UNUSED_ENTRY(15),
- ICE_PTT_UNUSED_ENTRY(16),
- ICE_PTT_UNUSED_ENTRY(17),
- ICE_PTT_UNUSED_ENTRY(18),
- ICE_PTT_UNUSED_ENTRY(19),
- ICE_PTT_UNUSED_ENTRY(20),
- ICE_PTT_UNUSED_ENTRY(21),
-
- /* Non Tunneled IPv4 */
- ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
- ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
- ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(25),
- ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4),
- ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
- ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
- /* IPv4 --> IPv4 */
- ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
- ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
- ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(32),
- ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
- ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
- /* IPv4 --> IPv6 */
- ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
- ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
- ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(39),
- ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
- ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
- /* IPv4 --> GRE/NAT */
- ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
- /* IPv4 --> GRE/NAT --> IPv4 */
- ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
- ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
- ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(47),
- ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
- ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
- /* IPv4 --> GRE/NAT --> IPv6 */
- ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
- ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
- ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(54),
- ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
- ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
- /* IPv4 --> GRE/NAT --> MAC */
- ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
- /* IPv4 --> GRE/NAT --> MAC --> IPv4 */
- ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
- ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
- ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(62),
- ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
- ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
- /* IPv4 --> GRE/NAT -> MAC --> IPv6 */
- ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
- ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
- ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(69),
- ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
- ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
- /* IPv4 --> GRE/NAT --> MAC/VLAN */
- ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
- /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
- ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
- ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
- ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(77),
- ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
- ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
- /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
- ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
- ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
- ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(84),
- ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
- ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
-
- /* Non Tunneled IPv6 */
- ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
- ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
- ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(91),
- ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4),
- ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
- ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
-
- /* IPv6 --> IPv4 */
- ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
- ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
- ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(98),
- ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
- ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
-
- /* IPv6 --> IPv6 */
- ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
- ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
- ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(105),
- ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
- ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
-
- /* IPv6 --> GRE/NAT */
- ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
-
- /* IPv6 --> GRE/NAT -> IPv4 */
- ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
- ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
- ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(113),
- ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
- ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
-
- /* IPv6 --> GRE/NAT -> IPv6 */
- ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
- ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
- ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(120),
- ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
- ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
-
- /* IPv6 --> GRE/NAT -> MAC */
- ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
-
- /* IPv6 --> GRE/NAT -> MAC -> IPv4 */
- ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
- ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
- ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(128),
- ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
- ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
-
- /* IPv6 --> GRE/NAT -> MAC -> IPv6 */
- ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
- ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
- ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(135),
- ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
- ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
-
- /* IPv6 --> GRE/NAT -> MAC/VLAN */
- ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
-
- /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
- ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
- ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
- ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(143),
- ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
- ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
- ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
-
- /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
- ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
- ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
- ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
- ICE_PTT_UNUSED_ENTRY(150),
- ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
- ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
- ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+ ICE_PTYPES
/* unused entries */
- [154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+ [ICE_NUM_DEFINED_PTYPES ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 9b0c04d595ce..b97d116650be 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3426,6 +3426,7 @@ static void ice_set_ops(struct ice_vsi *vsi)
netdev->netdev_ops = &ice_netdev_ops;
netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
+ netdev->xdp_metadata_ops = &ice_xdp_md_ops;
ice_set_ethtool_ops(netdev);
if (vsi->type != ICE_VSI_PF)
@@ -6094,6 +6095,23 @@ ice_fix_features(struct net_device *netdev, netdev_features_t features)
}
/**
+ * ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto
+ * @vsi: PF's VSI
+ * @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order
+ *
+ * Store current stripped VLAN proto in ring packet context,
+ * so it can be accessed more efficiently by packet processing code.
+ */
+static void
+ice_set_rx_rings_vlan_proto(struct ice_vsi *vsi, __be16 vlan_ethertype)
+{
+ u16 i;
+
+ ice_for_each_alloc_rxq(vsi, i)
+ vsi->rx_rings[i]->pkt_ctx.vlan_proto = vlan_ethertype;
+}
+
+/**
* ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
* @vsi: PF's VSI
* @features: features used to determine VLAN offload settings
@@ -6135,6 +6153,9 @@ ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
if (strip_err || insert_err)
return -EIO;
+ ice_set_rx_rings_vlan_proto(vsi, enable_stripping ?
+ htons(vlan_ethertype) : 0);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index e9e59f4b5580..239cf8a2ee80 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -2129,30 +2129,26 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
}
/**
- * ice_ptp_rx_hwtstamp - Check for an Rx timestamp
- * @rx_ring: Ring to get the VSI info
+ * ice_ptp_get_rx_hwts - Get packet Rx timestamp in ns
* @rx_desc: Receive descriptor
- * @skb: Particular skb to send timestamp with
+ * @pkt_ctx: Packet context to get the cached time
*
* The driver receives a notification in the receive descriptor with timestamp.
- * The timestamp is in ns, so we must convert the result first.
*/
-void
-ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
+u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
+ const struct ice_pkt_ctx *pkt_ctx)
{
- struct skb_shared_hwtstamps *hwtstamps;
u64 ts_ns, cached_time;
u32 ts_high;
if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID))
- return;
+ return 0;
- cached_time = READ_ONCE(rx_ring->cached_phctime);
+ cached_time = READ_ONCE(pkt_ctx->cached_phctime);
/* Do not report a timestamp if we don't have a cached PHC time */
if (!cached_time)
- return;
+ return 0;
/* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached
* PHC value, rather than accessing the PF. This also allows us to
@@ -2163,9 +2159,7 @@ ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high);
- hwtstamps = skb_hwtstamps(skb);
- memset(hwtstamps, 0, sizeof(*hwtstamps));
- hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
+ return ts_ns;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index d79281061409..032653a7a133 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -298,9 +298,8 @@ void ice_ptp_extts_event(struct ice_pf *pf);
s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf);
-void
-ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
+u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
+ const struct ice_pkt_ctx *pkt_ctx);
void ice_ptp_reset(struct ice_pf *pf);
void ice_ptp_prepare_for_reset(struct ice_pf *pf);
void ice_ptp_init(struct ice_pf *pf);
@@ -329,9 +328,14 @@ static inline bool ice_ptp_process_ts(struct ice_pf *pf)
{
return true;
}
-static inline void
-ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
- union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
+
+static inline u64
+ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
+ const struct ice_pkt_ctx *pkt_ctx)
+{
+ return 0;
+}
+
static inline void ice_ptp_reset(struct ice_pf *pf) { }
static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
static inline void ice_ptp_init(struct ice_pf *pf) { }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 9e97ea863068..59617f055e35 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
* @xdp_prog: XDP program to run
* @xdp_ring: ring to be used for XDP_TX action
* @rx_buf: Rx buffer to store the XDP action
+ * @eop_desc: Last descriptor in packet to read metadata from
*
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
static void
ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
- struct ice_rx_buf *rx_buf)
+ struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc)
{
unsigned int ret = ICE_XDP_PASS;
u32 act;
@@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
if (!xdp_prog)
goto exit;
+ ice_xdp_meta_set_desc(xdp, eop_desc);
+
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -1180,8 +1183,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
struct sk_buff *skb;
unsigned int size;
u16 stat_err_bits;
- u16 vlan_tag = 0;
- u16 rx_ptype;
+ u16 vlan_tci;
/* get the Rx desc from Rx ring based on 'next_to_clean' */
rx_desc = ICE_RX_DESC(rx_ring, ntc);
@@ -1241,7 +1243,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
if (ice_is_non_eop(rx_ring, rx_desc))
continue;
- ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
+ ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc);
if (rx_buf->act == ICE_XDP_PASS)
goto construct_skb;
total_rx_bytes += xdp_get_buff_len(xdp);
@@ -1276,7 +1278,7 @@ construct_skb:
continue;
}
- vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
+ vlan_tci = ice_get_vlan_tci(rx_desc);
/* pad the skb if needed, to make a valid ethernet frame */
if (eth_skb_pad(skb))
@@ -1286,14 +1288,11 @@ construct_skb:
total_rx_bytes += skb->len;
/* populate checksum, VLAN, and protocol */
- rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
- ICE_RX_FLEX_DESC_PTYPE_M;
-
- ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+ ice_process_skb_fields(rx_ring, rx_desc, skb);
ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
/* send completed skb up the stack */
- ice_receive_skb(rx_ring, skb, vlan_tag);
+ ice_receive_skb(rx_ring, skb, vlan_tci);
/* update budget accounting */
total_rx_pkts++;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index daf7b9dbb143..b3379ff73674 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -257,6 +257,20 @@ enum ice_rx_dtype {
ICE_RX_DTYPE_SPLIT_ALWAYS = 2,
};
+struct ice_pkt_ctx {
+ u64 cached_phctime;
+ __be16 vlan_proto;
+};
+
+struct ice_xdp_buff {
+ struct xdp_buff xdp_buff;
+ const union ice_32b_rx_flex_desc *eop_desc;
+ const struct ice_pkt_ctx *pkt_ctx;
+};
+
+/* Required for compatibility with xdp_buffs from xsk_pool */
+static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
+
/* indices into GLINT_ITR registers */
#define ICE_RX_ITR ICE_IDX_ITR0
#define ICE_TX_ITR ICE_IDX_ITR1
@@ -298,7 +312,6 @@ enum ice_dynamic_itr {
/* descriptor ring, associated with a VSI */
struct ice_rx_ring {
/* CL1 - 1st cacheline starts here */
- struct ice_rx_ring *next; /* pointer to next ring in q_vector */
void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */
struct net_device *netdev; /* netdev ring maps to */
@@ -310,13 +323,24 @@ struct ice_rx_ring {
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
u16 next_to_alloc;
- /* CL2 - 2nd cacheline starts here */
+
union {
struct ice_rx_buf *rx_buf;
struct xdp_buff **xdp_buf;
};
- struct xdp_buff xdp;
+ /* CL2 - 2nd cacheline starts here */
+ union {
+ struct ice_xdp_buff xdp_ext;
+ struct xdp_buff xdp;
+ };
/* CL3 - 3rd cacheline starts here */
+ union {
+ struct ice_pkt_ctx pkt_ctx;
+ struct {
+ u64 cached_phctime;
+ __be16 vlan_proto;
+ };
+ };
struct bpf_prog *xdp_prog;
u16 rx_offset;
@@ -332,9 +356,9 @@ struct ice_rx_ring {
/* CL4 - 4th cacheline starts here */
struct ice_channel *ch;
struct ice_tx_ring *xdp_ring;
+ struct ice_rx_ring *next; /* pointer to next ring in q_vector */
struct xsk_buff_pool *xsk_pool;
dma_addr_t dma; /* physical address of ring */
- u64 cached_phctime;
u16 rx_buf_len;
u8 dcb_tc; /* Traffic class of ring */
u8 ptp_rx;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 7e06373e14d9..839e5da24ad5 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -63,28 +63,42 @@ static enum pkt_hash_types ice_ptype_to_htype(u16 ptype)
}
/**
- * ice_rx_hash - set the hash value in the skb
+ * ice_get_rx_hash - get RX hash value from descriptor
+ * @rx_desc: specific descriptor
+ *
+ * Returns hash, if present, 0 otherwise.
+ */
+static u32 ice_get_rx_hash(const union ice_32b_rx_flex_desc *rx_desc)
+{
+ const struct ice_32b_rx_flex_desc_nic *nic_mdid;
+
+ if (unlikely(rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC))
+ return 0;
+
+ nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+ return le32_to_cpu(nic_mdid->rss_hash);
+}
+
+/**
+ * ice_rx_hash_to_skb - set the hash value in the skb
* @rx_ring: descriptor ring
* @rx_desc: specific descriptor
* @skb: pointer to current skb
* @rx_ptype: the ptype value from the descriptor
*/
static void
-ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u16 rx_ptype)
+ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring,
+ const union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb, u16 rx_ptype)
{
- struct ice_32b_rx_flex_desc_nic *nic_mdid;
u32 hash;
if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
return;
- if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
- return;
-
- nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
- hash = le32_to_cpu(nic_mdid->rss_hash);
- skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+ hash = ice_get_rx_hash(rx_desc);
+ if (likely(hash))
+ skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
}
/**
@@ -171,11 +185,38 @@ checksum_fail:
}
/**
+ * ice_ptp_rx_hwts_to_skb - Put RX timestamp into skb
+ * @rx_ring: Ring to get the VSI info
+ * @rx_desc: Receive descriptor
+ * @skb: Particular skb to send timestamp with
+ *
+ * The timestamp is in ns, so we must convert the result first.
+ */
+static void
+ice_ptp_rx_hwts_to_skb(struct ice_rx_ring *rx_ring,
+ const union ice_32b_rx_flex_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u64 ts_ns = ice_ptp_get_rx_hwts(rx_desc, &rx_ring->pkt_ctx);
+
+ skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ts_ns);
+}
+
+/**
+ * ice_get_ptype - Read HW packet type from the descriptor
+ * @rx_desc: RX descriptor
+ */
+static u16 ice_get_ptype(const union ice_32b_rx_flex_desc *rx_desc)
+{
+ return le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+}
+
+/**
* ice_process_skb_fields - Populate skb header fields from Rx descriptor
* @rx_ring: Rx descriptor ring packet is being transacted on
* @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being populated
- * @ptype: the packet type decoded by hardware
*
* This function checks the ring, descriptor, and packet information in
* order to populate the hash, checksum, VLAN, protocol, and
@@ -184,9 +225,11 @@ checksum_fail:
void
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u16 ptype)
+ struct sk_buff *skb)
{
- ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+ u16 ptype = ice_get_ptype(rx_desc);
+
+ ice_rx_hash_to_skb(rx_ring, rx_desc, skb, ptype);
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
@@ -194,28 +237,24 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
ice_rx_csum(rx_ring, skb, rx_desc, ptype);
if (rx_ring->ptp_rx)
- ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+ ice_ptp_rx_hwts_to_skb(rx_ring, rx_desc, skb);
}
/**
* ice_receive_skb - Send a completed packet up the stack
* @rx_ring: Rx ring in play
* @skb: packet to send up
- * @vlan_tag: VLAN tag for packet
+ * @vlan_tci: VLAN TCI for packet
*
* This function sends the completed packet (via. skb) up the stack using
* gro receive functions (with/without VLAN tag)
*/
void
-ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci)
{
- netdev_features_t features = rx_ring->netdev->features;
- bool non_zero_vlan = !!(vlan_tag & VLAN_VID_MASK);
-
- if ((features & NETIF_F_HW_VLAN_CTAG_RX) && non_zero_vlan)
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
- else if ((features & NETIF_F_HW_VLAN_STAG_RX) && non_zero_vlan)
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+ if ((vlan_tci & VLAN_VID_MASK) && rx_ring->vlan_proto)
+ __vlan_hwaccel_put_tag(skb, rx_ring->vlan_proto,
+ vlan_tci);
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
@@ -464,3 +503,125 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
spin_unlock(&xdp_ring->tx_lock);
}
}
+
+/**
+ * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler
+ * @ctx: XDP buff pointer
+ * @ts_ns: destination address
+ *
+ * Copy HW timestamp (if available) to the destination address.
+ */
+static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
+{
+ const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+
+ *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->eop_desc,
+ xdp_ext->pkt_ctx);
+ if (!*ts_ns)
+ return -ENODATA;
+
+ return 0;
+}
+
+/* Define a ptype index -> XDP hash type lookup table.
+ * It uses the same ptype definitions as ice_decode_rx_desc_ptype[],
+ * avoiding possible copy-paste errors.
+ */
+#undef ICE_PTT
+#undef ICE_PTT_UNUSED_ENTRY
+
+#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+ [PTYPE] = XDP_RSS_L3_##OUTER_IP_VER | XDP_RSS_L4_##I | XDP_RSS_TYPE_##PL
+
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = 0
+
+/* A few supplementary definitions for when XDP hash types do not coincide
+ * with what can be generated from ptype definitions
+ * by means of preprocessor concatenation.
+ */
+#define XDP_RSS_L3_NONE XDP_RSS_TYPE_NONE
+#define XDP_RSS_L4_NONE XDP_RSS_TYPE_NONE
+#define XDP_RSS_TYPE_PAY2 XDP_RSS_TYPE_L2
+#define XDP_RSS_TYPE_PAY3 XDP_RSS_TYPE_NONE
+#define XDP_RSS_TYPE_PAY4 XDP_RSS_L4
+
+static const enum xdp_rss_hash_type
+ice_ptype_to_xdp_hash[ICE_NUM_DEFINED_PTYPES] = {
+ ICE_PTYPES
+};
+
+#undef XDP_RSS_L3_NONE
+#undef XDP_RSS_L4_NONE
+#undef XDP_RSS_TYPE_PAY2
+#undef XDP_RSS_TYPE_PAY3
+#undef XDP_RSS_TYPE_PAY4
+
+#undef ICE_PTT
+#undef ICE_PTT_UNUSED_ENTRY
+
+/**
+ * ice_xdp_rx_hash_type - Get XDP-specific hash type from the RX descriptor
+ * @eop_desc: End of Packet descriptor
+ */
+static enum xdp_rss_hash_type
+ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc)
+{
+ u16 ptype = ice_get_ptype(eop_desc);
+
+ if (unlikely(ptype >= ICE_NUM_DEFINED_PTYPES))
+ return 0;
+
+ return ice_ptype_to_xdp_hash[ptype];
+}
+
+/**
+ * ice_xdp_rx_hash - RX hash XDP hint handler
+ * @ctx: XDP buff pointer
+ * @hash: hash destination address
+ * @rss_type: XDP hash type destination address
+ *
+ * Copy RX hash (if available) and its type to the destination address.
+ */
+static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
+ enum xdp_rss_hash_type *rss_type)
+{
+ const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+
+ *hash = ice_get_rx_hash(xdp_ext->eop_desc);
+ *rss_type = ice_xdp_rx_hash_type(xdp_ext->eop_desc);
+ if (!likely(*hash))
+ return -ENODATA;
+
+ return 0;
+}
+
+/**
+ * ice_xdp_rx_vlan_tag - VLAN tag XDP hint handler
+ * @ctx: XDP buff pointer
+ * @vlan_proto: destination address for VLAN protocol
+ * @vlan_tci: destination address for VLAN TCI
+ *
+ * Copy VLAN tag (if was stripped) and corresponding protocol
+ * to the destination address.
+ */
+static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci)
+{
+ const struct ice_xdp_buff *xdp_ext = (void *)ctx;
+
+ *vlan_proto = xdp_ext->pkt_ctx->vlan_proto;
+ if (!*vlan_proto)
+ return -ENODATA;
+
+ *vlan_tci = ice_get_vlan_tci(xdp_ext->eop_desc);
+ if (!*vlan_tci)
+ return -ENODATA;
+
+ return 0;
+}
+
+const struct xdp_metadata_ops ice_xdp_md_ops = {
+ .xmo_rx_timestamp = ice_xdp_rx_hw_ts,
+ .xmo_rx_hash = ice_xdp_rx_hash,
+ .xmo_rx_vlan_tag = ice_xdp_rx_vlan_tag,
+};
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 115969ecdf7b..762047508619 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -84,7 +84,7 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
}
/**
- * ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor
+ * ice_get_vlan_tci - get VLAN TCI from Rx flex descriptor
* @rx_desc: Rx 32b flex descriptor with RXDID=2
*
* The OS and current PF implementation only support stripping a single VLAN tag
@@ -92,7 +92,7 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
* one is found return the tag, else return 0 to mean no VLAN tag was found.
*/
static inline u16
-ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc)
+ice_get_vlan_tci(const union ice_32b_rx_flex_desc *rx_desc)
{
u16 stat_err_bits;
@@ -148,7 +148,17 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
void
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
union ice_32b_rx_flex_desc *rx_desc,
- struct sk_buff *skb, u16 ptype);
+ struct sk_buff *skb);
void
-ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci);
+
+static inline void
+ice_xdp_meta_set_desc(struct xdp_buff *xdp,
+ union ice_32b_rx_flex_desc *eop_desc)
+{
+ struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff,
+ xdp_buff);
+
+ xdp_ext->eop_desc = eop_desc;
+}
#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 99954508184f..5d1ae8e4058a 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -458,6 +458,11 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.status_error0 = 0;
+ /* Put private info that changes on a per-packet basis
+ * into xdp_buff_xsk->cb.
+ */
+ ice_xdp_meta_set_desc(*xdp, rx_desc);
+
rx_desc++;
xdp++;
}
@@ -863,8 +868,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
struct xdp_buff *xdp;
struct sk_buff *skb;
u16 stat_err_bits;
- u16 vlan_tag = 0;
- u16 rx_ptype;
+ u16 vlan_tci;
rx_desc = ICE_RX_DESC(rx_ring, ntc);
@@ -942,13 +946,10 @@ construct_skb:
total_rx_bytes += skb->len;
total_rx_packets++;
- vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
-
- rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
- ICE_RX_FLEX_DESC_PTYPE_M;
+ vlan_tci = ice_get_vlan_tci(rx_desc);
- ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
- ice_receive_skb(rx_ring, skb, vlan_tag);
+ ice_process_skb_fields(rx_ring, rx_desc, skb);
+ ice_receive_skb(rx_ring, skb, vlan_tci);
}
rx_ring->next_to_clean = ntc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index e2e7d82cfca4..9e695ed122ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -256,9 +256,24 @@ static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
return 0;
}
+static int mlx5e_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci)
+{
+ const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
+ const struct mlx5_cqe64 *cqe = _ctx->cqe;
+
+ if (!cqe_has_vlan(cqe))
+ return -ENODATA;
+
+ *vlan_proto = htons(ETH_P_8021Q);
+ *vlan_tci = be16_to_cpu(cqe->vlan_info);
+ return 0;
+}
+
const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
.xmo_rx_timestamp = mlx5e_xdp_rx_timestamp,
.xmo_rx_hash = mlx5e_xdp_rx_hash,
+ .xmo_rx_vlan_tag = mlx5e_xdp_rx_vlan_tag,
};
struct mlx5e_xsk_tx_complete {
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 977861c46b1f..578e36ea1589 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1723,6 +1723,24 @@ static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
return 0;
}
+static int veth_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci)
+{
+ const struct veth_xdp_buff *_ctx = (void *)ctx;
+ const struct sk_buff *skb = _ctx->skb;
+ int err;
+
+ if (!skb)
+ return -ENODATA;
+
+ err = __vlan_hwaccel_get_tag(skb, vlan_tci);
+ if (err)
+ return err;
+
+ *vlan_proto = skb->vlan_proto;
+ return err;
+}
+
static const struct net_device_ops veth_netdev_ops = {
.ndo_init = veth_dev_init,
.ndo_open = veth_open,
@@ -1747,6 +1765,7 @@ static const struct net_device_ops veth_netdev_ops = {
static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
.xmo_rx_timestamp = veth_xdp_rx_timestamp,
.xmo_rx_hash = veth_xdp_rx_hash,
+ .xmo_rx_vlan_tag = veth_xdp_rx_vlan_tag,
};
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index d071a6e32581..a6a6b2749241 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -100,6 +100,16 @@ fsverity_msg(const struct inode *inode, const char *level,
#define fsverity_err(inode, fmt, ...) \
fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
+/* measure.c */
+
+#ifdef CONFIG_BPF_SYSCALL
+void __init fsverity_init_bpf(void);
+#else
+static inline void fsverity_init_bpf(void)
+{
+}
+#endif
+
/* open.c */
int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
diff --git a/fs/verity/init.c b/fs/verity/init.c
index a29f062f6047..1e207c0f71de 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -69,6 +69,7 @@ static int __init fsverity_init(void)
fsverity_init_workqueue();
fsverity_init_sysctl();
fsverity_init_signature();
+ fsverity_init_bpf();
return 0;
}
late_initcall(fsverity_init)
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
index eec5956141da..bf7a5f4cccaf 100644
--- a/fs/verity/measure.c
+++ b/fs/verity/measure.c
@@ -7,6 +7,8 @@
#include "fsverity_private.h"
+#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/uaccess.h>
/**
@@ -100,3 +102,85 @@ int fsverity_get_digest(struct inode *inode,
return hash_alg->digest_size;
}
EXPORT_SYMBOL_GPL(fsverity_get_digest);
+
+#ifdef CONFIG_BPF_SYSCALL
+
+/* bpf kfuncs */
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_get_fsverity_digest: read fsverity digest of file
+ * @file: file to get digest from
+ * @digest_ptr: (out) dynptr for struct fsverity_digest
+ *
+ * Read fsverity_digest of *file* into *digest_ptr*.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_kern *digest_ptr)
+{
+ const struct inode *inode = file_inode(file);
+ u32 dynptr_sz = __bpf_dynptr_size(digest_ptr);
+ struct fsverity_digest *arg;
+ const struct fsverity_info *vi;
+ const struct fsverity_hash_alg *hash_alg;
+ int out_digest_sz;
+
+ if (dynptr_sz < sizeof(struct fsverity_digest))
+ return -EINVAL;
+
+ arg = __bpf_dynptr_data_rw(digest_ptr, dynptr_sz);
+ if (!arg)
+ return -EINVAL;
+
+ if (!IS_ALIGNED((uintptr_t)arg, __alignof__(*arg)))
+ return -EINVAL;
+
+ vi = fsverity_get_info(inode);
+ if (!vi)
+ return -ENODATA; /* not a verity file */
+
+ hash_alg = vi->tree_params.hash_alg;
+
+ arg->digest_algorithm = hash_alg - fsverity_hash_algs;
+ arg->digest_size = hash_alg->digest_size;
+
+ out_digest_sz = dynptr_sz - sizeof(struct fsverity_digest);
+
+ /* copy digest */
+ memcpy(arg->digest, vi->file_digest, min_t(int, hash_alg->digest_size, out_digest_sz));
+
+ /* fill the extra buffer with zeros */
+ if (out_digest_sz > hash_alg->digest_size)
+ memset(arg->digest + arg->digest_size, 0, out_digest_sz - hash_alg->digest_size);
+
+ return 0;
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_SET8_START(fsverity_set_ids)
+BTF_ID_FLAGS(func, bpf_get_fsverity_digest, KF_TRUSTED_ARGS)
+BTF_SET8_END(fsverity_set_ids)
+
+static int bpf_get_fsverity_digest_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ if (!btf_id_set8_contains(&fsverity_set_ids, kfunc_id))
+ return 0;
+
+ /* Only allow to attach from LSM hooks, to avoid recursion */
+ return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0;
+}
+
+static const struct btf_kfunc_id_set bpf_fsverity_set = {
+ .owner = THIS_MODULE,
+ .set = &fsverity_set_ids,
+ .filter = bpf_get_fsverity_digest_filter,
+};
+
+void __init fsverity_init_bpf(void)
+{
+ register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fsverity_set);
+}
+
+#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index def242528b1d..d436bee4d129 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -11,6 +11,7 @@ mandatory-y += bitops.h
mandatory-y += bug.h
mandatory-y += bugs.h
mandatory-y += cacheflush.h
+mandatory-y += cfi.h
mandatory-y += checksum.h
mandatory-y += compat.h
mandatory-y += current.h
diff --git a/include/asm-generic/cfi.h b/include/asm-generic/cfi.h
new file mode 100644
index 000000000000..41fac3537bf9
--- /dev/null
+++ b/include/asm-generic/cfi.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_CFI_H
+#define __ASM_GENERIC_CFI_H
+
+#endif /* __ASM_GENERIC_CFI_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d5d8e2083610..2f54cc0436c4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -29,6 +29,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/static_call.h>
#include <linux/memcontrol.h>
+#include <linux/cfi.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -51,6 +52,10 @@ struct module;
struct bpf_func_state;
struct ftrace_ops;
struct cgroup;
+struct bpf_token;
+struct user_namespace;
+struct super_block;
+struct inode;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -106,7 +111,11 @@ struct bpf_map_ops {
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
int fd);
- void (*map_fd_put_ptr)(void *ptr);
+ /* If need_defer is true, the implementation should guarantee that
+ * the to-be-put element is still alive before the bpf program, which
+ * may manipulate it, exists.
+ */
+ void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer);
int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
u32 (*map_fd_sys_lookup_elem)(void *ptr);
void (*map_seq_show_elem)(struct bpf_map *map, void *key,
@@ -272,7 +281,11 @@ struct bpf_map {
*/
atomic64_t refcnt ____cacheline_aligned;
atomic64_t usercnt;
- struct work_struct work;
+ /* rcu is used before freeing and work is only used during freeing */
+ union {
+ struct work_struct work;
+ struct rcu_head rcu;
+ };
struct mutex freeze_mutex;
atomic64_t writecnt;
/* 'Ownership' of program-containing map is claimed by the first program
@@ -288,6 +301,9 @@ struct bpf_map {
} owner;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
+ bool free_after_mult_rcu_gp;
+ bool free_after_rcu_gp;
+ atomic64_t sleepable_refcnt;
s64 __percpu *elem_count;
};
@@ -1044,6 +1060,17 @@ struct btf_func_model {
*/
#define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7)
+/*
+ * Indicate the trampoline should be suitable to receive indirect calls;
+ * without this indirectly calling the generated code can result in #UD/#CP,
+ * depending on the CFI options.
+ *
+ * Used by bpf_struct_ops.
+ *
+ * Incompatible with FENTRY usage, overloads @func_addr argument.
+ */
+#define BPF_TRAMP_F_INDIRECT BIT(8)
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86.
*/
@@ -1083,10 +1110,17 @@ struct bpf_tramp_run_ctx;
* fexit = a set of program to run after original function
*/
struct bpf_tramp_image;
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
- void *orig_call);
+ void *func_addr);
+void *arch_alloc_bpf_trampoline(unsigned int size);
+void arch_free_bpf_trampoline(void *image, unsigned int size);
+void arch_protect_bpf_trampoline(void *image, unsigned int size);
+void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr);
+
u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
@@ -1119,6 +1153,7 @@ enum bpf_tramp_prog_type {
struct bpf_tramp_image {
void *image;
+ int size;
struct bpf_ksym ksym;
struct percpu_ref pcref;
void *ip_after_call;
@@ -1188,7 +1223,11 @@ struct bpf_dispatcher {
#endif
};
-static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
+#ifndef __bpfcall
+#define __bpfcall __nocfi
+#endif
+
+static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
bpf_func_t bpf_func)
@@ -1280,7 +1319,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
#define DEFINE_BPF_DISPATCHER(name) \
__BPF_DISPATCHER_SC(name); \
- noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \
+ noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
bpf_func_t bpf_func) \
@@ -1303,7 +1342,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
struct bpf_prog *to);
/* Called only from JIT-enabled code, so there's no need for stubs. */
-void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
+void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
@@ -1430,6 +1469,9 @@ struct bpf_prog_aux {
struct bpf_kfunc_desc_tab *kfunc_tab;
struct bpf_kfunc_btf_tab *kfunc_btf_tab;
u32 size_poke_tab;
+#ifdef CONFIG_FINEIBT
+ struct bpf_ksym ksym_prefix;
+#endif
struct bpf_ksym ksym;
const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
@@ -1442,10 +1484,11 @@ struct bpf_prog_aux {
int cgroup_atype; /* enum cgroup_bpf_attach_type */
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
- unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
+ u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64);
#ifdef CONFIG_SECURITY
void *security;
#endif
+ struct bpf_token *token;
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
@@ -1570,6 +1613,31 @@ struct bpf_link_primer {
u32 id;
};
+struct bpf_mount_opts {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+
+ /* BPF token-related delegation options */
+ u64 delegate_cmds;
+ u64 delegate_maps;
+ u64 delegate_progs;
+ u64 delegate_attachs;
+};
+
+struct bpf_token {
+ struct work_struct work;
+ atomic64_t refcnt;
+ struct user_namespace *userns;
+ u64 allowed_cmds;
+ u64 allowed_maps;
+ u64 allowed_progs;
+ u64 allowed_attachs;
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
+};
+
struct bpf_struct_ops_value;
struct btf_member;
@@ -1640,6 +1708,7 @@ struct bpf_struct_ops {
struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
u32 type_id;
u32 value_id;
+ void *cfi_stubs;
};
#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
@@ -1653,6 +1722,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
+ void *stub_func,
void *image, void *image_end);
static inline bool bpf_try_module_get(const void *data, struct module *owner)
{
@@ -2027,6 +2097,7 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}
+extern const struct super_operations bpf_super_ops;
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
@@ -2161,24 +2232,26 @@ static inline void bpf_map_dec_elem_count(struct bpf_map *map)
extern int sysctl_unprivileged_bpf_disabled;
-static inline bool bpf_allow_ptr_leaks(void)
+bool bpf_token_capable(const struct bpf_token *token, int cap);
+
+static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_allow_uninit_stack(void)
+static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
{
- return perfmon_capable();
+ return bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v1(void)
+static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
{
- return cpu_mitigations_off() || perfmon_capable();
+ return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
}
-static inline bool bpf_bypass_spec_v4(void)
+static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
{
- return cpu_mitigations_off() || perfmon_capable();
+ return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
}
int bpf_map_new_fd(struct bpf_map *map, int flags);
@@ -2195,8 +2268,21 @@ int bpf_link_new_fd(struct bpf_link *link);
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
+void bpf_token_inc(struct bpf_token *token);
+void bpf_token_put(struct bpf_token *token);
+int bpf_token_create(union bpf_attr *attr);
+struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type attach_type);
+
int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+ umode_t mode);
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
#define DEFINE_BPF_ITER_FUNC(target, args...) \
@@ -2431,7 +2517,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *reg, bool is_ex_cb);
+ struct bpf_reg_state *reg, u32 *nargs);
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
@@ -2440,7 +2526,8 @@ const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type
struct bpf_prog *bpf_prog_by_id(u32 id);
struct bpf_link *bpf_link_by_id(u32 id);
-const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog);
void bpf_task_storage_free(struct task_struct *task);
void bpf_cgrp_storage_free(struct cgroup *cgroup);
bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
@@ -2559,6 +2646,24 @@ static inline int bpf_obj_get_user(const char __user *pathname, int flags)
return -EOPNOTSUPP;
}
+static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+static inline void bpf_token_inc(struct bpf_token *token)
+{
+}
+
+static inline void bpf_token_put(struct bpf_token *token)
+{
+}
+
+static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static inline void __dev_flush(void)
{
}
@@ -2682,7 +2787,7 @@ static inline int btf_struct_access(struct bpf_verifier_log *log,
}
static inline const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return NULL;
}
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d99a636d36a7..c2819a6579a5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -275,6 +275,11 @@ struct bpf_reference_state {
int callback_ref;
};
+struct bpf_retval_range {
+ s32 minval;
+ s32 maxval;
+};
+
/* state of the program:
* type of all registers and stack info
*/
@@ -297,8 +302,8 @@ struct bpf_func_state {
* void foo(void) { bpf_timer_set_callback(,foo); }
*/
u32 async_entry_cnt;
+ struct bpf_retval_range callback_ret_range;
bool in_callback_fn;
- struct tnum callback_ret_range;
bool in_async_callback_fn;
bool in_exception_callback_fn;
/* For callback calling functions that limit number of possible
@@ -316,16 +321,48 @@ struct bpf_func_state {
/* The following fields should be last. See copy_func_state() */
int acquired_refs;
struct bpf_reference_state *refs;
- int allocated_stack;
+ /* The state of the stack. Each element of the array describes BPF_REG_SIZE
+ * (i.e. 8) bytes worth of stack memory.
+ * stack[0] represents bytes [*(r10-8)..*(r10-1)]
+ * stack[1] represents bytes [*(r10-16)..*(r10-9)]
+ * ...
+ * stack[allocated_stack/8 - 1] represents [*(r10-allocated_stack)..*(r10-allocated_stack+7)]
+ */
struct bpf_stack_state *stack;
+ /* Size of the current stack, in bytes. The stack state is tracked below, in
+ * `stack`. allocated_stack is always a multiple of BPF_REG_SIZE.
+ */
+ int allocated_stack;
+};
+
+#define MAX_CALL_FRAMES 8
+
+/* instruction history flags, used in bpf_jmp_history_entry.flags field */
+enum {
+ /* instruction references stack slot through PTR_TO_STACK register;
+ * we also store stack's frame number in lower 3 bits (MAX_CALL_FRAMES is 8)
+ * and accessed stack slot's index in next 6 bits (MAX_BPF_STACK is 512,
+ * 8 bytes per slot, so slot index (spi) is [0, 63])
+ */
+ INSN_F_FRAMENO_MASK = 0x7, /* 3 bits */
+
+ INSN_F_SPI_MASK = 0x3f, /* 6 bits */
+ INSN_F_SPI_SHIFT = 3, /* shifted 3 bits to the left */
+
+ INSN_F_STACK_ACCESS = BIT(9), /* we need 10 bits total */
};
-struct bpf_idx_pair {
- u32 prev_idx;
+static_assert(INSN_F_FRAMENO_MASK + 1 >= MAX_CALL_FRAMES);
+static_assert(INSN_F_SPI_MASK + 1 >= MAX_BPF_STACK / 8);
+
+struct bpf_jmp_history_entry {
u32 idx;
+ /* insn idx can't be bigger than 1 million */
+ u32 prev_idx : 22;
+ /* special flags, e.g., whether insn is doing register stack spill/load */
+ u32 flags : 10;
};
-#define MAX_CALL_FRAMES 8
/* Maximum number of register states that can exist at once */
#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
struct bpf_verifier_state {
@@ -408,7 +445,7 @@ struct bpf_verifier_state {
* For most states jmp_history_cnt is [0-3].
* For loops can go up to ~40.
*/
- struct bpf_idx_pair *jmp_history;
+ struct bpf_jmp_history_entry *jmp_history;
u32 jmp_history_cnt;
u32 dfs_depth;
u32 callback_unroll_depth;
@@ -574,12 +611,12 @@ struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u16 stack_depth; /* max. stack depth used by this function */
- bool has_tail_call;
- bool tail_call_reachable;
- bool has_ld_abs;
- bool is_cb;
- bool is_async_cb;
- bool is_exception_cb;
+ bool has_tail_call: 1;
+ bool tail_call_reachable: 1;
+ bool has_ld_abs: 1;
+ bool is_cb: 1;
+ bool is_async_cb: 1;
+ bool is_exception_cb: 1;
};
struct bpf_verifier_env;
@@ -631,6 +668,10 @@ struct bpf_verifier_env {
int exception_callback_subprog;
bool explore_alu_limits;
bool allow_ptr_leaks;
+ /* Allow access to uninitialized stack memory. Writes with fixed offset are
+ * always allowed, so this refers to reads (with fixed or variable offset),
+ * to writes with variable offset and to indirect (helper) accesses.
+ */
bool allow_uninit_stack;
bool bpf_capable;
bool bypass_spec_v1;
@@ -651,6 +692,7 @@ struct bpf_verifier_env {
int cur_stack;
} cfg;
struct backtrack_state bt;
+ struct bpf_jmp_history_entry *cur_hist_ent;
u32 pass_cnt; /* number of times do_check() was called */
u32 subprog_cnt;
/* number of instructions analyzed by the verifier */
diff --git a/include/linux/cfi.h b/include/linux/cfi.h
index 3552ec82b725..f0df518e11dd 100644
--- a/include/linux/cfi.h
+++ b/include/linux/cfi.h
@@ -9,6 +9,14 @@
#include <linux/bug.h>
#include <linux/module.h>
+#include <asm/cfi.h>
+
+#ifndef cfi_get_offset
+static inline int cfi_get_offset(void)
+{
+ return 0;
+}
+#endif
#ifdef CONFIG_CFI_CLANG
enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
@@ -38,4 +46,8 @@ static inline void module_cfi_finalize(const Elf_Ehdr *hdr,
#endif /* CONFIG_ARCH_USES_CFI_TRAPS */
#endif /* CONFIG_MODULES */
+#ifndef CFI_NOSEAL
+#define CFI_NOSEAL(x)
+#endif
+
#endif /* _LINUX_CFI_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a4953fafc8cb..12d907f17d36 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1067,7 +1067,7 @@ struct bpf_binary_header *
bpf_jit_binary_pack_hdr(const struct bpf_prog *fp);
void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns);
-void bpf_prog_pack_free(struct bpf_binary_header *hdr);
+void bpf_prog_pack_free(void *ptr, u32 size);
static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
{
@@ -1139,7 +1139,7 @@ static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
return false;
if (!bpf_jit_harden)
return false;
- if (bpf_jit_harden == 1 && bpf_capable())
+ if (bpf_jit_harden == 1 && bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
return true;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 3028af87716e..c1645c86eed9 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -540,7 +540,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
struct vlan_ethhdr *veth = skb_vlan_eth_hdr(skb);
if (!eth_type_vlan(veth->h_vlan_proto))
- return -EINVAL;
+ return -ENODATA;
*vlan_tci = ntohs(veth->h_vlan_TCI);
return 0;
@@ -561,7 +561,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
return 0;
} else {
*vlan_tci = 0;
- return -EINVAL;
+ return -ENODATA;
}
}
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index ff217a5ce552..3fdd00b452ac 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -398,10 +398,17 @@ LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
LSM_HOOK(int, 0, bpf, int cmd, union bpf_attr *attr, unsigned int size)
LSM_HOOK(int, 0, bpf_map, struct bpf_map *map, fmode_t fmode)
LSM_HOOK(int, 0, bpf_prog, struct bpf_prog *prog)
-LSM_HOOK(int, 0, bpf_map_alloc_security, struct bpf_map *map)
-LSM_HOOK(void, LSM_RET_VOID, bpf_map_free_security, struct bpf_map *map)
-LSM_HOOK(int, 0, bpf_prog_alloc_security, struct bpf_prog_aux *aux)
-LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux)
+LSM_HOOK(int, 0, bpf_map_create, struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
+LSM_HOOK(void, LSM_RET_VOID, bpf_map_free, struct bpf_map *map)
+LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
+LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
+LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
+LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
+LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
#endif /* CONFIG_BPF_SYSCALL */
LSM_HOOK(int, 0, locked_down, enum lockdown_reason what)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 820bca965fb6..01275c6e8468 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -918,7 +918,7 @@ static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe)
return (cqe->tls_outer_l3_tunneled >> 3) & 0x3;
}
-static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe)
+static inline bool cqe_has_vlan(const struct mlx5_cqe64 *cqe)
{
return cqe->l4_l3_hdr_type & 0x1;
}
diff --git a/include/linux/security.h b/include/linux/security.h
index 1d1df326c881..00809d2d5c38 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -32,6 +32,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/sockptr.h>
+#include <linux/bpf.h>
struct linux_binprm;
struct cred;
@@ -2020,15 +2021,22 @@ static inline void securityfs_remove(struct dentry *dentry)
union bpf_attr;
struct bpf_map;
struct bpf_prog;
-struct bpf_prog_aux;
+struct bpf_token;
#ifdef CONFIG_SECURITY
extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size);
extern int security_bpf_map(struct bpf_map *map, fmode_t fmode);
extern int security_bpf_prog(struct bpf_prog *prog);
-extern int security_bpf_map_alloc(struct bpf_map *map);
+extern int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token);
extern void security_bpf_map_free(struct bpf_map *map);
-extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux);
-extern void security_bpf_prog_free(struct bpf_prog_aux *aux);
+extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token);
+extern void security_bpf_prog_free(struct bpf_prog *prog);
+extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path);
+extern void security_bpf_token_free(struct bpf_token *token);
+extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
#else
static inline int security_bpf(int cmd, union bpf_attr *attr,
unsigned int size)
@@ -2046,7 +2054,8 @@ static inline int security_bpf_prog(struct bpf_prog *prog)
return 0;
}
-static inline int security_bpf_map_alloc(struct bpf_map *map)
+static inline int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
{
return 0;
}
@@ -2054,13 +2063,33 @@ static inline int security_bpf_map_alloc(struct bpf_map *map)
static inline void security_bpf_map_free(struct bpf_map *map)
{ }
-static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+static inline int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
{
return 0;
}
-static inline void security_bpf_prog_free(struct bpf_prog_aux *aux)
+static inline void security_bpf_prog_free(struct bpf_prog *prog)
{ }
+
+static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+{
+ return 0;
+}
+
+static inline void security_bpf_token_free(struct bpf_token *token)
+{ }
+
+static inline int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ return 0;
+}
+
+static inline int security_bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return 0;
+}
#endif /* CONFIG_SECURITY */
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ce38874dbd1..ea5c8ab3ed00 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4247,10 +4247,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
{
const void *a = skb_metadata_end(skb_a);
const void *b = skb_metadata_end(skb_b);
- /* Using more efficient variant than plain call to memcmp(). */
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
u64 diffs = 0;
+ if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+ BITS_PER_LONG != 64)
+ goto slow;
+
+ /* Using more efficient variant than plain call to memcmp(). */
switch (meta_len) {
#define __it(x, op) (x -= sizeof(u##op))
#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
@@ -4270,11 +4273,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
fallthrough;
case 4: diffs |= __it_diff(a, b, 32);
break;
+ default:
+slow:
+ return memcmp(a - meta_len, b - meta_len, meta_len);
}
return diffs;
-#else
- return memcmp(a - meta_len, b - meta_len, meta_len);
-#endif
}
static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 349c36fb5fd8..e6770dd40c91 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -16,7 +16,7 @@
*
* The XDP RX-queue info (xdp_rxq_info) is associated with the driver
* level RX-ring queues. It is information that is specific to how
- * the driver have configured a given RX-ring queue.
+ * the driver has configured a given RX-ring queue.
*
* Each xdp_buff frame received in the driver carries a (pointer)
* reference to this xdp_rxq_info structure. This provides the XDP
@@ -32,7 +32,7 @@
* The struct is not directly tied to the XDP prog. A new XDP prog
* can be attached as long as it doesn't change the underlying
* RX-ring. If the RX-ring does change significantly, the NIC driver
- * naturally need to stop the RX-ring before purging and reallocating
+ * naturally needs to stop the RX-ring before purging and reallocating
* memory. In that process the driver MUST call unregister (which
* also applies for driver shutdown and unload). The register API is
* also mandatory during RX-ring setup.
@@ -369,7 +369,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp)
static inline bool xdp_metalen_invalid(unsigned long metalen)
{
- return (metalen & (sizeof(__u32) - 1)) || (metalen > 32);
+ unsigned long meta_max;
+
+ meta_max = type_max(typeof_member(struct skb_shared_info, meta_len));
+ BUILD_BUG_ON(!__builtin_constant_p(meta_max));
+
+ return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max;
}
struct xdp_attachment_info {
@@ -399,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
NETDEV_XDP_RX_METADATA_HASH, \
bpf_xdp_metadata_rx_hash, \
xmo_rx_hash) \
+ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \
+ NETDEV_XDP_RX_METADATA_VLAN_TAG, \
+ bpf_xdp_metadata_rx_vlan_tag, \
+ xmo_rx_vlan_tag) \
enum xdp_rx_metadata {
#define XDP_METADATA_KFUNC(name, _, __, ___) name,
@@ -427,6 +436,7 @@ enum xdp_rss_hash_type {
XDP_RSS_L4_UDP = BIT(5),
XDP_RSS_L4_SCTP = BIT(6),
XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */
+ XDP_RSS_L4_ICMP = BIT(8),
/* Second part: RSS hash type combinations used for driver HW mapping */
XDP_RSS_TYPE_NONE = 0,
@@ -442,11 +452,13 @@ enum xdp_rss_hash_type {
XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
+ XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP,
XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
@@ -457,6 +469,8 @@ struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
+ int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto,
+ u16 *vlan_tci);
};
#ifdef CONFIG_NET
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 81e02de3f453..b62bb8525a5f 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -14,6 +14,12 @@
#ifdef CONFIG_XDP_SOCKETS
+struct xsk_cb_desc {
+ void *src;
+ u8 off;
+ u8 bytes;
+};
+
void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries);
bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max);
@@ -47,6 +53,12 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
xp_set_rxq_info(pool, rxq);
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+ xp_fill_cb(pool, desc);
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
@@ -274,6 +286,11 @@ static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool,
{
}
+static inline void xsk_pool_fill_cb(struct xsk_buff_pool *pool,
+ struct xsk_cb_desc *desc)
+{
+}
+
static inline unsigned int xsk_pool_get_napi_id(struct xsk_buff_pool *pool)
{
return 0;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index c9bb0f892f55..1d107241b901 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void)
#endif
+#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
+int register_xfrm_state_bpf(void);
+#else
+static inline int register_xfrm_state_bpf(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _NET_XFRM_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 8d48d37ab7c0..99dd7376df6a 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -12,6 +12,7 @@
struct xsk_buff_pool;
struct xdp_rxq_info;
+struct xsk_cb_desc;
struct xsk_queue;
struct xdp_desc;
struct xdp_umem;
@@ -135,6 +136,7 @@ static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_p
/* AF_XDP ZC drivers, via xdp_sock_buff.h */
void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc);
int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
unsigned long attrs, struct page **pages, u32 nr_pages);
void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b1e8c5bdfc82..42f4d3090efe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -847,6 +847,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +931,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +983,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1028,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1074,9 +1108,11 @@ enum bpf_link_type {
BPF_LINK_TYPE_TCX = 11,
BPF_LINK_TYPE_UPROBE_MULTI = 12,
BPF_LINK_TYPE_NETKIT = 13,
- MAX_BPF_LINK_TYPE,
+ __MAX_BPF_LINK_TYPE,
};
+#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
+
enum bpf_perf_event_type {
BPF_PERF_EVENT_UNSPEC = 0,
BPF_PERF_EVENT_UPROBE = 1,
@@ -1401,6 +1437,7 @@ union bpf_attr {
* to using 5 hash functions).
*/
__u64 map_extra;
+ __u32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1470,6 +1507,7 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ __u32 prog_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1582,6 +1620,7 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_token_fd;
};
struct {
@@ -1712,6 +1751,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 424c5e28f495..93cb411adf72 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -44,10 +44,13 @@ enum netdev_xdp_act {
* timestamp via bpf_xdp_metadata_rx_timestamp().
* @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
* hash via bpf_xdp_metadata_rx_hash().
+ * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+ * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
*/
enum netdev_xdp_rx_metadata {
NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
NETDEV_XDP_RX_METADATA_HASH = 2,
+ NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
};
/**
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f526b7573e97..4ce95acfcaa7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse
endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c85ff9162a5c..13358675ff2e 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -82,7 +82,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
- bool bypass_spec_v1 = bpf_bypass_spec_v1();
+ bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
u64 array_size, mask64;
struct bpf_array *array;
@@ -867,11 +867,11 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
}
if (old_ptr)
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
-static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
@@ -890,13 +890,18 @@ static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
if (old_ptr) {
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
return 0;
} else {
return -ENOENT;
}
}
+static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return __fd_array_map_delete_elem(map, key, true);
+}
+
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
@@ -913,8 +918,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map,
return prog;
}
-static void prog_fd_array_put_ptr(void *ptr)
+static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_prog is freed after one RCU or tasks trace grace period */
bpf_prog_put(ptr);
}
@@ -924,13 +930,13 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
-static void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, need_defer);
}
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -1071,7 +1077,7 @@ static void prog_array_map_clear_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_array_aux,
work)->map;
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, true);
bpf_map_put(map);
}
@@ -1151,7 +1157,7 @@ static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
{
struct bpf_event_entry *ee;
- ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
+ ee = kzalloc(sizeof(*ee), GFP_KERNEL);
if (ee) {
ee->event = perf_file->private_data;
ee->perf_file = perf_file;
@@ -1201,8 +1207,9 @@ err_out:
return ee;
}
-static void perf_event_fd_array_put_ptr(void *ptr)
+static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
@@ -1220,7 +1227,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, true);
}
rcu_read_unlock();
}
@@ -1228,7 +1235,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
static void perf_event_fd_array_map_free(struct bpf_map *map)
{
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1256,7 +1263,7 @@ static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
return cgroup_get_from_fd(fd);
}
-static void cgroup_fd_array_put_ptr(void *ptr)
+static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
@@ -1264,7 +1271,7 @@ static void cgroup_fd_array_put_ptr(void *ptr)
static void cgroup_fd_array_free(struct bpf_map *map)
{
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1309,7 +1316,7 @@ static void array_of_map_free(struct bpf_map *map)
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index d44fe8dd9732..28efd0a3f220 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -82,7 +82,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
int fd;
fd = *(int *)key;
- cgroup = cgroup_get_from_fd(fd);
+ cgroup = cgroup_v1v2_get_from_fd(fd);
if (IS_ERR(cgroup))
return ERR_CAST(cgroup);
@@ -101,7 +101,7 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
int fd;
fd = *(int *)key;
- cgroup = cgroup_get_from_fd(fd);
+ cgroup = cgroup_v1v2_get_from_fd(fd);
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
@@ -131,7 +131,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
int err, fd;
fd = *(int *)key;
- cgroup = cgroup_get_from_fd(fd);
+ cgroup = cgroup_v1v2_get_from_fd(fd);
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index e14c822f8911..63b4dc495125 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -260,9 +260,15 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
BTF_SET_START(sleepable_lsm_hooks)
BTF_ID(func, bpf_lsm_bpf)
BTF_ID(func, bpf_lsm_bpf_map)
-BTF_ID(func, bpf_lsm_bpf_map_alloc_security)
-BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_map_create)
+BTF_ID(func, bpf_lsm_bpf_map_free)
BTF_ID(func, bpf_lsm_bpf_prog)
+BTF_ID(func, bpf_lsm_bpf_prog_load)
+BTF_ID(func, bpf_lsm_bpf_prog_free)
+BTF_ID(func, bpf_lsm_bpf_token_create)
+BTF_ID(func, bpf_lsm_bpf_token_free)
+BTF_ID(func, bpf_lsm_bpf_token_cmd)
+BTF_ID(func, bpf_lsm_bpf_token_capable)
BTF_ID(func, bpf_lsm_bprm_check_security)
BTF_ID(func, bpf_lsm_bprm_committed_creds)
BTF_ID(func, bpf_lsm_bprm_committing_creds)
@@ -298,6 +304,18 @@ BTF_ID(func, bpf_lsm_kernel_module_request)
BTF_ID(func, bpf_lsm_kernel_read_file)
BTF_ID(func, bpf_lsm_kernfs_init_security)
+#ifdef CONFIG_SECURITY_PATH
+BTF_ID(func, bpf_lsm_path_unlink)
+BTF_ID(func, bpf_lsm_path_mkdir)
+BTF_ID(func, bpf_lsm_path_rmdir)
+BTF_ID(func, bpf_lsm_path_truncate)
+BTF_ID(func, bpf_lsm_path_symlink)
+BTF_ID(func, bpf_lsm_path_link)
+BTF_ID(func, bpf_lsm_path_rename)
+BTF_ID(func, bpf_lsm_path_chmod)
+BTF_ID(func, bpf_lsm_path_chown)
+#endif /* CONFIG_SECURITY_PATH */
+
#ifdef CONFIG_KEYS
BTF_ID(func, bpf_lsm_key_free)
#endif /* CONFIG_KEYS */
@@ -345,9 +363,8 @@ BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks)
BTF_SET_START(untrusted_lsm_hooks)
-BTF_ID(func, bpf_lsm_bpf_map_free_security)
-BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
-BTF_ID(func, bpf_lsm_bpf_prog_free_security)
+BTF_ID(func, bpf_lsm_bpf_map_free)
+BTF_ID(func, bpf_lsm_bpf_prog_free)
BTF_ID(func, bpf_lsm_file_alloc_security)
BTF_ID(func, bpf_lsm_file_free_security)
#ifdef CONFIG_SECURITY_NETWORK
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index db6176fb64dc..02068bd0e4d9 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -352,18 +352,24 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
- void *image, void *image_end)
+ void *stub_func, void *image, void *image_end)
{
- u32 flags;
+ u32 flags = BPF_TRAMP_F_INDIRECT;
+ int size;
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
- /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
- * and it must be used alone.
- */
- flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
+
+ if (model->ret_size > 0)
+ flags |= BPF_TRAMP_F_RET_FENTRY_RET;
+
+ size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
+ if (size < 0)
+ return size;
+ if (size > (unsigned long)image_end - (unsigned long)image)
+ return -E2BIG;
return arch_prepare_bpf_trampoline(NULL, image, image_end,
- model, flags, tlinks, NULL);
+ model, flags, tlinks, stub_func);
}
static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
@@ -497,11 +503,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[i],
+ *(void **)(st_ops->cfi_stubs + moff),
image, image_end);
if (err < 0)
goto reset_unlock;
- *(void **)(kdata + moff) = image;
+ *(void **)(kdata + moff) = image + cfi_get_offset();
image += err;
/* put prog_id to udata */
@@ -515,7 +522,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
if (err)
goto reset_unlock;
}
- set_memory_rox((long)st_map->image, 1);
+ arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
/* Let bpf_link handle registration & unregistration.
*
* Pair with smp_load_acquire() during lookup_elem().
@@ -524,7 +531,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto unlock;
}
- set_memory_rox((long)st_map->image, 1);
+ arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
err = st_ops->reg(kdata);
if (likely(!err)) {
/* This refcnt increment on the map here after
@@ -547,8 +554,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
* there was a race in registering the struct_ops (under the same name) to
* a sub-system through different struct_ops's maps.
*/
- set_memory_nx((long)st_map->image, 1);
- set_memory_rw((long)st_map->image, 1);
+ arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
reset_unlock:
bpf_struct_ops_map_put_progs(st_map);
@@ -616,7 +622,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
bpf_struct_ops_map_put_progs(st_map);
bpf_map_area_free(st_map->links);
if (st_map->image) {
- bpf_jit_free_exec(st_map->image);
+ arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
bpf_jit_uncharge_modmem(PAGE_SIZE);
}
bpf_map_area_free(st_map->uvalue);
@@ -691,7 +697,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
return ERR_PTR(ret);
}
- st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
+ st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
if (!st_map->image) {
/* __bpf_struct_ops_map_free() uses st_map->image as flag
* for "charged or not". In this case, we need to unchange
@@ -711,7 +717,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
}
mutex_init(&st_map->lock);
- set_vm_flush_reset_perms(st_map->image);
bpf_map_init_from_attr(map, attr);
return map;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 63cf4128fc05..d56433bf8aba 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6956,7 +6956,7 @@ int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
* (either PTR_TO_CTX or SCALAR_VALUE).
*/
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
- struct bpf_reg_state *regs, bool is_ex_cb)
+ struct bpf_reg_state *regs, u32 *arg_cnt)
{
struct bpf_verifier_log *log = &env->log;
struct bpf_prog *prog = env->prog;
@@ -7013,6 +7013,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
return -EINVAL;
}
+ *arg_cnt = nargs;
/* check that function returns int, exception cb also requires this */
t = btf_type_by_id(btf, t->type);
while (btf_type_is_modifier(t))
@@ -7062,14 +7063,6 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
i, btf_type_str(t), tname);
return -EINVAL;
}
- /* We have already ensured that the callback returns an integer, just
- * like all global subprogs. We need to determine it only has a single
- * scalar argument.
- */
- if (is_ex_cb && (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE)) {
- bpf_log(log, "exception cb only supports single integer argument\n");
- return -EINVAL;
- }
return 0;
}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 491d20038cbe..98e0e3835b28 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1630,7 +1630,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2191,7 +2191,7 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2348,7 +2348,7 @@ cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_event_output_data_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index fe254ae035fe..14ace23d517b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -121,6 +121,9 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
#endif
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
+#ifdef CONFIG_FINEIBT
+ INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
+#endif
mutex_init(&fp->aux->used_maps_mutex);
mutex_init(&fp->aux->dst_mutex);
@@ -679,7 +682,7 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
if (!bpf_prog_kallsyms_candidate(fp) ||
- !bpf_capable())
+ !bpf_token_capable(fp->aux->token, CAP_BPF))
return;
bpf_prog_ksym_set_addr(fp);
@@ -687,6 +690,23 @@ void bpf_prog_kallsyms_add(struct bpf_prog *fp)
fp->aux->ksym.prog = true;
bpf_ksym_add(&fp->aux->ksym);
+
+#ifdef CONFIG_FINEIBT
+ /*
+ * When FineIBT, code in the __cfi_foo() symbols can get executed
+ * and hence unwinder needs help.
+ */
+ if (cfi_mode != CFI_FINEIBT)
+ return;
+
+ snprintf(fp->aux->ksym_prefix.name, KSYM_NAME_LEN,
+ "__cfi_%s", fp->aux->ksym.name);
+
+ fp->aux->ksym_prefix.start = (unsigned long) fp->bpf_func - 16;
+ fp->aux->ksym_prefix.end = (unsigned long) fp->bpf_func;
+
+ bpf_ksym_add(&fp->aux->ksym_prefix);
+#endif
}
void bpf_prog_kallsyms_del(struct bpf_prog *fp)
@@ -695,6 +715,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp)
return;
bpf_ksym_del(&fp->aux->ksym);
+#ifdef CONFIG_FINEIBT
+ if (cfi_mode != CFI_FINEIBT)
+ return;
+ bpf_ksym_del(&fp->aux->ksym_prefix);
+#endif
}
static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
@@ -932,20 +957,20 @@ out:
return ptr;
}
-void bpf_prog_pack_free(struct bpf_binary_header *hdr)
+void bpf_prog_pack_free(void *ptr, u32 size)
{
struct bpf_prog_pack *pack = NULL, *tmp;
unsigned int nbits;
unsigned long pos;
mutex_lock(&pack_mutex);
- if (hdr->size > BPF_PROG_PACK_SIZE) {
- bpf_jit_free_exec(hdr);
+ if (size > BPF_PROG_PACK_SIZE) {
+ bpf_jit_free_exec(ptr);
goto out;
}
list_for_each_entry(tmp, &pack_list, list) {
- if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) {
+ if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) {
pack = tmp;
break;
}
@@ -954,10 +979,10 @@ void bpf_prog_pack_free(struct bpf_binary_header *hdr)
if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
goto out;
- nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
- pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
+ nbits = BPF_PROG_SIZE_TO_NBITS(size);
+ pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
- WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
+ WARN_ONCE(bpf_arch_text_invalidate(ptr, size),
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
bitmap_clear(pack->bitmap, pos, nbits);
@@ -1104,8 +1129,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr,
*rw_header = kvmalloc(size, GFP_KERNEL);
if (!*rw_header) {
- bpf_arch_text_copy(&ro_header->size, &size, sizeof(size));
- bpf_prog_pack_free(ro_header);
+ bpf_prog_pack_free(ro_header, size);
bpf_jit_uncharge_modmem(size);
return NULL;
}
@@ -1136,7 +1160,7 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
kvfree(rw_header);
if (IS_ERR(ptr)) {
- bpf_prog_pack_free(ro_header);
+ bpf_prog_pack_free(ro_header, ro_header->size);
return PTR_ERR(ptr);
}
return 0;
@@ -1157,7 +1181,7 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
{
u32 size = ro_header->size;
- bpf_prog_pack_free(ro_header);
+ bpf_prog_pack_free(ro_header, size);
kvfree(rw_header);
bpf_jit_uncharge_modmem(size);
}
@@ -2668,12 +2692,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
struct bpf_map **used_maps, u32 len)
{
struct bpf_map *map;
+ bool sleepable;
u32 i;
+ sleepable = aux->sleepable;
for (i = 0; i < len; i++) {
map = used_maps[i];
if (map->ops->map_poke_untrack)
map->ops->map_poke_untrack(map, aux);
+ if (sleepable)
+ atomic64_dec(&map->sleepable_refcnt);
bpf_map_put(map);
}
}
@@ -2751,6 +2779,7 @@ void bpf_prog_free(struct bpf_prog *fp)
if (aux->dst_prog)
bpf_prog_put(aux->dst_prog);
+ bpf_token_put(aux->token);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
schedule_work(&aux->work);
}
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index e01c741e54e7..2e73533a3811 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -96,6 +96,12 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
migrate_enable();
}
+__bpf_kfunc void bpf_cpumask_release_dtor(void *cpumask)
+{
+ bpf_cpumask_release(cpumask);
+}
+CFI_NOSEAL(bpf_cpumask_release_dtor);
+
/**
* bpf_cpumask_first() - Get the index of the first nonzero bit in the cpumask.
* @cpumask: The cpumask being queried.
@@ -405,6 +411,17 @@ __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
return cpumask_any_and_distribute(src1, src2);
}
+/**
+ * bpf_cpumask_weight() - Return the number of bits in @cpumask.
+ * @cpumask: The cpumask being queried.
+ *
+ * Count the number of set bits in the given cpumask.
+ */
+__bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask)
+{
+ return cpumask_weight(cpumask);
+}
+
__bpf_kfunc_end_defs();
BTF_SET8_START(cpumask_kfunc_btf_ids)
@@ -432,6 +449,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU)
BTF_SET8_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
@@ -441,7 +459,7 @@ static const struct btf_kfunc_id_set cpumask_kfunc_set = {
BTF_ID_LIST(cpumask_dtor_ids)
BTF_ID(struct, bpf_cpumask)
-BTF_ID(func, bpf_cpumask_release)
+BTF_ID(func, bpf_cpumask_release_dtor)
static int __init cpumask_kfunc_init(void)
{
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index fa3e9225aedc..70fb82bf1637 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -150,14 +150,11 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
goto out;
d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE);
if (!d->rw_image) {
- u32 size = PAGE_SIZE;
-
- bpf_arch_text_copy(d->image, &size, sizeof(size));
- bpf_prog_pack_free((struct bpf_binary_header *)d->image);
+ bpf_prog_pack_free(d->image, PAGE_SIZE);
d->image = NULL;
goto out;
}
- bpf_image_ksym_add(d->image, &d->ksym);
+ bpf_image_ksym_add(d->image, PAGE_SIZE, &d->ksym);
}
prev_num_progs = d->num_progs;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fd8d4b0addfc..ec3bdcc6a3cf 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -897,7 +897,7 @@ static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l)
if (map->ops->map_fd_put_ptr) {
ptr = fd_htab_map_get_ptr(map, l);
- map->ops->map_fd_put_ptr(ptr);
+ map->ops->map_fd_put_ptr(map, ptr, true);
}
}
@@ -2484,7 +2484,7 @@ static void fd_htab_map_free(struct bpf_map *map)
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
void *ptr = fd_htab_map_get_ptr(map, l);
- map->ops->map_fd_put_ptr(ptr);
+ map->ops->map_fd_put_ptr(map, ptr, false);
}
}
@@ -2523,9 +2523,15 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
if (IS_ERR(ptr))
return PTR_ERR(ptr);
+ /* The htab bucket lock is always held during update operations in fd
+ * htab map, and the following rcu_read_lock() is only used to avoid
+ * the WARN_ON_ONCE in htab_map_update_elem().
+ */
+ rcu_read_lock();
ret = htab_map_update_elem(map, key, &ptr, map_flags);
+ rcu_read_unlock();
if (ret)
- map->ops->map_fd_put_ptr(ptr);
+ map->ops->map_fd_put_ptr(map, ptr, false);
return ret;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index b45a8381f9bd..07fd4b5704f3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -32,12 +32,13 @@
*
* Different map implementations will rely on rcu in map methods
* lookup/update/delete, therefore eBPF programs must run under rcu lock
- * if program is allowed to access maps, so check rcu_read_lock_held in
- * all three functions.
+ * if program is allowed to access maps, so check rcu_read_lock_held() or
+ * rcu_read_lock_trace_held() in all three functions.
*/
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
return (unsigned long) map->ops->map_lookup_elem(map, key);
}
@@ -53,7 +54,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
void *, value, u64, flags)
{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
return map->ops->map_update_elem(map, key, value, flags);
}
@@ -70,7 +72,8 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
- WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
+ !rcu_read_lock_bh_held());
return map->ops->map_delete_elem(map, key);
}
@@ -1676,7 +1679,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
+bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
@@ -1727,7 +1730,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
break;
}
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return NULL;
switch (func_id) {
@@ -1785,7 +1788,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
break;
}
- if (!perfmon_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
return NULL;
switch (func_id) {
@@ -2147,6 +2150,12 @@ __bpf_kfunc void bpf_task_release(struct task_struct *p)
put_task_struct_rcu_user(p);
}
+__bpf_kfunc void bpf_task_release_dtor(void *p)
+{
+ put_task_struct_rcu_user(p);
+}
+CFI_NOSEAL(bpf_task_release_dtor);
+
#ifdef CONFIG_CGROUPS
/**
* bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by
@@ -2171,6 +2180,12 @@ __bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
cgroup_put(cgrp);
}
+__bpf_kfunc void bpf_cgroup_release_dtor(void *cgrp)
+{
+ cgroup_put(cgrp);
+}
+CFI_NOSEAL(bpf_cgroup_release_dtor);
+
/**
* bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor
* array. A cgroup returned by this kfunc which is not subsequently stored in a
@@ -2522,7 +2537,7 @@ __bpf_kfunc void bpf_throw(u64 cookie)
* which skips compiler generated instrumentation to do the same.
*/
kasan_unpoison_task_stack_below((void *)(long)ctx.sp);
- ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp);
+ ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp, 0, 0);
WARN(1, "A call to BPF exception callback should never return\n");
}
@@ -2567,10 +2582,10 @@ static const struct btf_kfunc_id_set generic_kfunc_set = {
BTF_ID_LIST(generic_dtor_ids)
BTF_ID(struct, task_struct)
-BTF_ID(func, bpf_task_release)
+BTF_ID(func, bpf_task_release_dtor)
#ifdef CONFIG_CGROUPS
BTF_ID(struct, cgroup)
-BTF_ID(func, bpf_cgroup_release)
+BTF_ID(func, bpf_cgroup_release_dtor)
#endif
BTF_SET8_START(common_btf_ids)
@@ -2627,6 +2642,7 @@ static int __init kfunc_init(void)
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
ARRAY_SIZE(generic_dtors),
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 1aafb2ff2e95..4383b3d13a55 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -20,6 +20,7 @@
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include <linux/kstrtox.h>
#include "preload/bpf_preload.h"
enum bpf_type {
@@ -98,9 +99,9 @@ static const struct inode_operations bpf_prog_iops = { };
static const struct inode_operations bpf_map_iops = { };
static const struct inode_operations bpf_link_iops = { };
-static struct inode *bpf_get_inode(struct super_block *sb,
- const struct inode *dir,
- umode_t mode)
+struct inode *bpf_get_inode(struct super_block *sb,
+ const struct inode *dir,
+ umode_t mode)
{
struct inode *inode;
@@ -594,15 +595,183 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
}
EXPORT_SYMBOL(bpf_prog_get_type_path);
+struct bpffs_btf_enums {
+ const struct btf *btf;
+ const struct btf_type *cmd_t;
+ const struct btf_type *map_t;
+ const struct btf_type *prog_t;
+ const struct btf_type *attach_t;
+};
+
+static int find_bpffs_btf_enums(struct bpffs_btf_enums *info)
+{
+ const struct btf *btf;
+ const struct btf_type *t;
+ const char *name;
+ int i, n;
+
+ memset(info, 0, sizeof(*info));
+
+ btf = bpf_get_btf_vmlinux();
+ if (IS_ERR(btf))
+ return PTR_ERR(btf);
+ if (!btf)
+ return -ENOENT;
+
+ info->btf = btf;
+
+ for (i = 1, n = btf_nr_types(btf); i < n; i++) {
+ t = btf_type_by_id(btf, i);
+ if (!btf_type_is_enum(t))
+ continue;
+
+ name = btf_name_by_offset(btf, t->name_off);
+ if (!name)
+ continue;
+
+ if (strcmp(name, "bpf_cmd") == 0)
+ info->cmd_t = t;
+ else if (strcmp(name, "bpf_map_type") == 0)
+ info->map_t = t;
+ else if (strcmp(name, "bpf_prog_type") == 0)
+ info->prog_t = t;
+ else if (strcmp(name, "bpf_attach_type") == 0)
+ info->attach_t = t;
+ else
+ continue;
+
+ if (info->cmd_t && info->map_t && info->prog_t && info->attach_t)
+ return 0;
+ }
+
+ return -ESRCH;
+}
+
+static bool find_btf_enum_const(const struct btf *btf, const struct btf_type *enum_t,
+ const char *prefix, const char *str, int *value)
+{
+ const struct btf_enum *e;
+ const char *name;
+ int i, n, pfx_len = strlen(prefix);
+
+ *value = 0;
+
+ if (!btf || !enum_t)
+ return false;
+
+ for (i = 0, n = btf_vlen(enum_t); i < n; i++) {
+ e = &btf_enum(enum_t)[i];
+
+ name = btf_name_by_offset(btf, e->name_off);
+ if (!name || strncasecmp(name, prefix, pfx_len) != 0)
+ continue;
+
+ /* match symbolic name case insensitive and ignoring prefix */
+ if (strcasecmp(name + pfx_len, str) == 0) {
+ *value = e->val;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void seq_print_delegate_opts(struct seq_file *m,
+ const char *opt_name,
+ const struct btf *btf,
+ const struct btf_type *enum_t,
+ const char *prefix,
+ u64 delegate_msk, u64 any_msk)
+{
+ const struct btf_enum *e;
+ bool first = true;
+ const char *name;
+ u64 msk;
+ int i, n, pfx_len = strlen(prefix);
+
+ delegate_msk &= any_msk; /* clear unknown bits */
+
+ if (delegate_msk == 0)
+ return;
+
+ seq_printf(m, ",%s", opt_name);
+ if (delegate_msk == any_msk) {
+ seq_printf(m, "=any");
+ return;
+ }
+
+ if (btf && enum_t) {
+ for (i = 0, n = btf_vlen(enum_t); i < n; i++) {
+ e = &btf_enum(enum_t)[i];
+ name = btf_name_by_offset(btf, e->name_off);
+ if (!name || strncasecmp(name, prefix, pfx_len) != 0)
+ continue;
+ msk = 1ULL << e->val;
+ if (delegate_msk & msk) {
+ /* emit lower-case name without prefix */
+ seq_printf(m, "%c", first ? '=' : ':');
+ name += pfx_len;
+ while (*name) {
+ seq_printf(m, "%c", tolower(*name));
+ name++;
+ }
+
+ delegate_msk &= ~msk;
+ first = false;
+ }
+ }
+ }
+ if (delegate_msk)
+ seq_printf(m, "%c0x%llx", first ? '=' : ':', delegate_msk);
+}
+
/*
* Display the mount options in /proc/mounts.
*/
static int bpf_show_options(struct seq_file *m, struct dentry *root)
{
- umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
-
+ struct bpf_mount_opts *opts = root->d_sb->s_fs_info;
+ struct inode *inode = d_inode(root);
+ umode_t mode = inode->i_mode & S_IALLUGO & ~S_ISVTX;
+ u64 mask;
+
+ if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, inode->i_uid));
+ if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, inode->i_gid));
if (mode != S_IRWXUGO)
seq_printf(m, ",mode=%o", mode);
+
+ if (opts->delegate_cmds || opts->delegate_maps ||
+ opts->delegate_progs || opts->delegate_attachs) {
+ struct bpffs_btf_enums info;
+
+ /* ignore errors, fallback to hex */
+ (void)find_bpffs_btf_enums(&info);
+
+ mask = (1ULL << __MAX_BPF_CMD) - 1;
+ seq_print_delegate_opts(m, "delegate_cmds",
+ info.btf, info.cmd_t, "BPF_",
+ opts->delegate_cmds, mask);
+
+ mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
+ seq_print_delegate_opts(m, "delegate_maps",
+ info.btf, info.map_t, "BPF_MAP_TYPE_",
+ opts->delegate_maps, mask);
+
+ mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
+ seq_print_delegate_opts(m, "delegate_progs",
+ info.btf, info.prog_t, "BPF_PROG_TYPE_",
+ opts->delegate_progs, mask);
+
+ mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
+ seq_print_delegate_opts(m, "delegate_attachs",
+ info.btf, info.attach_t, "BPF_",
+ opts->delegate_attachs, mask);
+ }
+
return 0;
}
@@ -617,7 +786,7 @@ static void bpf_free_inode(struct inode *inode)
free_inode_nonrcu(inode);
}
-static const struct super_operations bpf_super_ops = {
+const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.show_options = bpf_show_options,
@@ -625,23 +794,33 @@ static const struct super_operations bpf_super_ops = {
};
enum {
+ OPT_UID,
+ OPT_GID,
OPT_MODE,
+ OPT_DELEGATE_CMDS,
+ OPT_DELEGATE_MAPS,
+ OPT_DELEGATE_PROGS,
+ OPT_DELEGATE_ATTACHS,
};
static const struct fs_parameter_spec bpf_fs_parameters[] = {
+ fsparam_u32 ("uid", OPT_UID),
+ fsparam_u32 ("gid", OPT_GID),
fsparam_u32oct ("mode", OPT_MODE),
+ fsparam_string ("delegate_cmds", OPT_DELEGATE_CMDS),
+ fsparam_string ("delegate_maps", OPT_DELEGATE_MAPS),
+ fsparam_string ("delegate_progs", OPT_DELEGATE_PROGS),
+ fsparam_string ("delegate_attachs", OPT_DELEGATE_ATTACHS),
{}
};
-struct bpf_mount_opts {
- umode_t mode;
-};
-
static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- struct bpf_mount_opts *opts = fc->fs_private;
+ struct bpf_mount_opts *opts = fc->s_fs_info;
struct fs_parse_result result;
- int opt;
+ kuid_t uid;
+ kgid_t gid;
+ int opt, err;
opt = fs_parse(fc, bpf_fs_parameters, param, &result);
if (opt < 0) {
@@ -662,12 +841,104 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param)
}
switch (opt) {
+ case OPT_UID:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ goto bad_value;
+
+ /*
+ * The requested uid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kuid_has_mapping(fc->user_ns, uid))
+ goto bad_value;
+
+ opts->uid = uid;
+ break;
+ case OPT_GID:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ goto bad_value;
+
+ /*
+ * The requested gid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kgid_has_mapping(fc->user_ns, gid))
+ goto bad_value;
+
+ opts->gid = gid;
+ break;
case OPT_MODE:
opts->mode = result.uint_32 & S_IALLUGO;
break;
+ case OPT_DELEGATE_CMDS:
+ case OPT_DELEGATE_MAPS:
+ case OPT_DELEGATE_PROGS:
+ case OPT_DELEGATE_ATTACHS: {
+ struct bpffs_btf_enums info;
+ const struct btf_type *enum_t;
+ const char *enum_pfx;
+ u64 *delegate_msk, msk = 0;
+ char *p;
+ int val;
+
+ /* ignore errors, fallback to hex */
+ (void)find_bpffs_btf_enums(&info);
+
+ switch (opt) {
+ case OPT_DELEGATE_CMDS:
+ delegate_msk = &opts->delegate_cmds;
+ enum_t = info.cmd_t;
+ enum_pfx = "BPF_";
+ break;
+ case OPT_DELEGATE_MAPS:
+ delegate_msk = &opts->delegate_maps;
+ enum_t = info.map_t;
+ enum_pfx = "BPF_MAP_TYPE_";
+ break;
+ case OPT_DELEGATE_PROGS:
+ delegate_msk = &opts->delegate_progs;
+ enum_t = info.prog_t;
+ enum_pfx = "BPF_PROG_TYPE_";
+ break;
+ case OPT_DELEGATE_ATTACHS:
+ delegate_msk = &opts->delegate_attachs;
+ enum_t = info.attach_t;
+ enum_pfx = "BPF_";
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ while ((p = strsep(&param->string, ":"))) {
+ if (strcmp(p, "any") == 0) {
+ msk |= ~0ULL;
+ } else if (find_btf_enum_const(info.btf, enum_t, enum_pfx, p, &val)) {
+ msk |= 1ULL << val;
+ } else {
+ err = kstrtou64(p, 0, &msk);
+ if (err)
+ return err;
+ }
+ }
+
+ /* Setting delegation mount options requires privileges */
+ if (msk && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ *delegate_msk |= msk;
+ break;
+ }
+ default:
+ /* ignore unknown mount options */
+ break;
}
return 0;
+
+bad_value:
+ return invalfc(fc, "Bad value for '%s'", param->key);
}
struct bpf_preload_ops *bpf_preload_ops;
@@ -739,10 +1010,14 @@ out:
static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr bpf_rfiles[] = { { "" } };
- struct bpf_mount_opts *opts = fc->fs_private;
+ struct bpf_mount_opts *opts = sb->s_fs_info;
struct inode *inode;
int ret;
+ /* Mounting an instance of BPF FS requires privileges */
+ if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
if (ret)
return ret;
@@ -750,6 +1025,8 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_op = &bpf_super_ops;
inode = sb->s_root->d_inode;
+ inode->i_uid = opts->uid;
+ inode->i_gid = opts->gid;
inode->i_op = &bpf_dir_iops;
inode->i_mode &= ~S_IALLUGO;
populate_bpffs(sb->s_root);
@@ -764,7 +1041,7 @@ static int bpf_get_tree(struct fs_context *fc)
static void bpf_free_fc(struct fs_context *fc)
{
- kfree(fc->fs_private);
+ kfree(fc->s_fs_info);
}
static const struct fs_context_operations bpf_context_ops = {
@@ -785,18 +1062,35 @@ static int bpf_init_fs_context(struct fs_context *fc)
return -ENOMEM;
opts->mode = S_IRWXUGO;
+ opts->uid = current_fsuid();
+ opts->gid = current_fsgid();
+
+ /* start out with no BPF token delegation enabled */
+ opts->delegate_cmds = 0;
+ opts->delegate_maps = 0;
+ opts->delegate_progs = 0;
+ opts->delegate_attachs = 0;
- fc->fs_private = opts;
+ fc->s_fs_info = opts;
fc->ops = &bpf_context_ops;
return 0;
}
+static void bpf_kill_super(struct super_block *sb)
+{
+ struct bpf_mount_opts *opts = sb->s_fs_info;
+
+ kill_litter_super(sb);
+ kfree(opts);
+}
+
static struct file_system_type bpf_fs_type = {
.owner = THIS_MODULE,
.name = "bpf",
.init_fs_context = bpf_init_fs_context,
.parameters = bpf_fs_parameters,
- .kill_sb = kill_litter_super,
+ .kill_sb = bpf_kill_super,
+ .fs_flags = FS_USERNS_MOUNT,
};
static int __init bpf_init(void)
diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c
index 3505f3e5ae96..594a234f122b 100644
--- a/kernel/bpf/log.c
+++ b/kernel/bpf/log.c
@@ -539,6 +539,19 @@ static void verbose_snum(struct bpf_verifier_env *env, s64 num)
verbose(env, "%#llx", num);
}
+int tnum_strn(char *str, size_t size, struct tnum a)
+{
+ /* print as a constant, if tnum is fully known */
+ if (a.mask == 0) {
+ if (is_unum_decimal(a.value))
+ return snprintf(str, size, "%llu", a.value);
+ else
+ return snprintf(str, size, "%#llx", a.value);
+ }
+ return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+}
+EXPORT_SYMBOL_GPL(tnum_strn);
+
static void print_scalar_ranges(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
const char **sep)
@@ -615,6 +628,12 @@ static bool type_is_map_ptr(enum bpf_reg_type t) {
}
}
+/*
+ * _a stands for append, was shortened to avoid multiline statements below.
+ * This macro is used to output a comma separated list of attributes.
+ */
+#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; })
+
static void print_reg_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state,
const struct bpf_reg_state *reg)
@@ -630,11 +649,6 @@ static void print_reg_state(struct bpf_verifier_env *env,
verbose_snum(env, reg->var_off.value + reg->off);
return;
}
-/*
- * _a stands for append, was shortened to avoid multiline statements below.
- * This macro is used to output a comma separated list of attributes.
- */
-#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, ##__VA_ARGS__); sep = ","; })
verbose(env, "%s", reg_type_str(env, t));
if (t == PTR_TO_STACK) {
@@ -669,6 +683,12 @@ static void print_reg_state(struct bpf_verifier_env *env,
verbose_a("r=");
verbose_unum(env, reg->range);
}
+ if (base_type(t) == PTR_TO_MEM) {
+ verbose_a("sz=");
+ verbose_unum(env, reg->mem_size);
+ }
+ if (t == CONST_PTR_TO_DYNPTR)
+ verbose_a("type=%s", dynptr_type_str(reg->dynptr.type));
if (tnum_is_const(reg->var_off)) {
/* a pointer register with fixed offset */
if (reg->var_off.value) {
@@ -685,8 +705,6 @@ static void print_reg_state(struct bpf_verifier_env *env,
}
}
verbose(env, ")");
-
-#undef verbose_a
}
void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state,
@@ -710,6 +728,7 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_st
}
for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
char types_buf[BPF_REG_SIZE + 1];
+ const char *sep = "";
bool valid = false;
u8 slot_type;
int j;
@@ -748,9 +767,14 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_st
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
print_liveness(env, reg->live);
- verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
+ verbose(env, "=dynptr_%s(", dynptr_type_str(reg->dynptr.type));
+ if (reg->id)
+ verbose_a("id=%d", reg->id);
if (reg->ref_obj_id)
- verbose(env, "(ref_id=%d)", reg->ref_obj_id);
+ verbose_a("ref_id=%d", reg->ref_obj_id);
+ if (reg->dynptr_id)
+ verbose_a("dynptr_id=%d", reg->dynptr_id);
+ verbose(env, ")");
break;
case STACK_ITER:
/* only main slot has ref_obj_id set; skip others */
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index cd5eafaba97e..8ef269e66ba5 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -127,12 +127,21 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map,
return inner_map;
}
-void bpf_map_fd_put_ptr(void *ptr)
+void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
- /* ptr->ops->map_free() has to go through one
- * rcu grace period by itself.
+ struct bpf_map *inner_map = ptr;
+
+ /* Defer the freeing of inner map according to the sleepable attribute
+ * of bpf program which owns the outer map, so unnecessary waiting for
+ * RCU tasks trace grace period can be avoided.
*/
- bpf_map_put(ptr);
+ if (need_defer) {
+ if (atomic64_read(&map->sleepable_refcnt))
+ WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true);
+ else
+ WRITE_ONCE(inner_map->free_after_rcu_gp, true);
+ }
+ bpf_map_put(inner_map);
}
u32 bpf_map_fd_sys_lookup_elem(void *ptr)
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index bcb7534afb3c..7d61602354de 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -13,7 +13,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd);
void bpf_map_meta_free(struct bpf_map *map_meta);
void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
int ufd);
-void bpf_map_fd_put_ptr(void *ptr);
+void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer);
u32 bpf_map_fd_sys_lookup_elem(void *ptr);
#endif
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5e43ddd1b83f..8faa1a20edf8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -142,9 +142,13 @@ static u32 bpf_map_value_size(const struct bpf_map *map)
static void maybe_wait_bpf_programs(struct bpf_map *map)
{
- /* Wait for any running BPF programs to complete so that
- * userspace, when we return to it, knows that all programs
- * that could be running use the new map value.
+ /* Wait for any running non-sleepable BPF programs to complete so that
+ * userspace, when we return to it, knows that all non-sleepable
+ * programs that could be running use the new map value. For sleepable
+ * BPF programs, synchronize_rcu_tasks_trace() should be used to wait
+ * for the completions of these programs, but considering the waiting
+ * time can be very long and userspace may think it will hang forever,
+ * so don't handle sleepable BPF programs now.
*/
if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
@@ -180,15 +184,11 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
err = bpf_percpu_cgroup_storage_update(map, key, value,
flags);
} else if (IS_FD_ARRAY(map)) {
- rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, map_file, key, value,
flags);
- rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- rcu_read_lock();
err = bpf_fd_htab_map_update_elem(map, map_file, key, value,
flags);
- rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
/* rcu_read_lock() is not needed */
err = bpf_fd_reuseport_array_update_elem(map, key, value,
@@ -203,7 +203,6 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
rcu_read_unlock();
}
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
return err;
}
@@ -264,7 +263,6 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
}
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
return err;
}
@@ -694,6 +692,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_map, work);
struct btf_record *rec = map->record;
+ struct btf *btf = map->btf;
security_bpf_map_free(map);
bpf_map_release_memcg(map);
@@ -709,6 +708,10 @@ static void bpf_map_free_deferred(struct work_struct *work)
* template bpf_map struct used during verification.
*/
btf_record_free(rec);
+ /* Delay freeing of btf for maps, as map_free callback may need
+ * struct_meta info which will be freed with btf_put().
+ */
+ btf_put(btf);
}
static void bpf_map_put_uref(struct bpf_map *map)
@@ -719,6 +722,28 @@ static void bpf_map_put_uref(struct bpf_map *map)
}
}
+static void bpf_map_free_in_work(struct bpf_map *map)
+{
+ INIT_WORK(&map->work, bpf_map_free_deferred);
+ /* Avoid spawning kworkers, since they all might contend
+ * for the same mutex like slab_mutex.
+ */
+ queue_work(system_unbound_wq, &map->work);
+}
+
+static void bpf_map_free_rcu_gp(struct rcu_head *rcu)
+{
+ bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu));
+}
+
+static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_map_free_rcu_gp(rcu);
+ else
+ call_rcu(rcu, bpf_map_free_rcu_gp);
+}
+
/* decrement map refcnt and schedule it for freeing via workqueue
* (underlying map implementation ops->map_free() might sleep)
*/
@@ -727,12 +752,14 @@ void bpf_map_put(struct bpf_map *map)
if (atomic64_dec_and_test(&map->refcnt)) {
/* bpf_map_free_id() must be called first */
bpf_map_free_id(map);
- btf_put(map->btf);
- INIT_WORK(&map->work, bpf_map_free_deferred);
- /* Avoid spawning kworkers, since they all might contend
- * for the same mutex like slab_mutex.
- */
- queue_work(system_unbound_wq, &map->work);
+
+ WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt));
+ if (READ_ONCE(map->free_after_mult_rcu_gp))
+ call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp);
+ else if (READ_ONCE(map->free_after_rcu_gp))
+ call_rcu(&map->rcu, bpf_map_free_rcu_gp);
+ else
+ bpf_map_free_in_work(map);
}
}
EXPORT_SYMBOL_GPL(bpf_map_put);
@@ -984,8 +1011,8 @@ int map_check_no_btf(const struct bpf_map *map,
return -ENOTSUPP;
}
-static int map_check_btf(struct bpf_map *map, const struct btf *btf,
- u32 btf_key_id, u32 btf_value_id)
+static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
+ const struct btf *btf, u32 btf_key_id, u32 btf_value_id)
{
const struct btf_type *key_type, *value_type;
u32 key_size, value_size;
@@ -1013,7 +1040,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
if (!IS_ERR_OR_NULL(map->record)) {
int i;
- if (!bpf_capable()) {
+ if (!bpf_token_capable(token, CAP_BPF)) {
ret = -EPERM;
goto free_map_tab;
}
@@ -1096,11 +1123,17 @@ free_map_tab:
return ret;
}
-#define BPF_MAP_CREATE_LAST_FIELD map_extra
+static bool bpf_net_capable(void)
+{
+ return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN);
+}
+
+#define BPF_MAP_CREATE_LAST_FIELD map_token_fd
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
const struct bpf_map_ops *ops;
+ struct bpf_token *token = NULL;
int numa_node = bpf_map_attr_numa_node(attr);
u32 map_type = attr->map_type;
struct bpf_map *map;
@@ -1151,14 +1184,32 @@ static int map_create(union bpf_attr *attr)
if (!ops->map_mem_usage)
return -EINVAL;
+ if (attr->map_token_fd) {
+ token = bpf_token_get_from_fd(attr->map_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+
+ /* if current token doesn't grant map creation permissions,
+ * then we can't use this token, so ignore it and rely on
+ * system-wide capabilities checks
+ */
+ if (!bpf_token_allow_cmd(token, BPF_MAP_CREATE) ||
+ !bpf_token_allow_map_type(token, attr->map_type)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ err = -EPERM;
+
/* Intent here is for unprivileged_bpf_disabled to block BPF map
* creation for unprivileged users; other actions depend
* on fd availability and access to bpffs, so are dependent on
* object creation success. Even with unprivileged BPF disabled,
* capability checks are still carried out.
*/
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
- return -EPERM;
+ if (sysctl_unprivileged_bpf_disabled && !bpf_token_capable(token, CAP_BPF))
+ goto put_token;
/* check privileged map type permissions */
switch (map_type) {
@@ -1191,25 +1242,27 @@ static int map_create(union bpf_attr *attr)
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
case BPF_MAP_TYPE_STRUCT_OPS:
case BPF_MAP_TYPE_CPUMAP:
- if (!bpf_capable())
- return -EPERM;
+ if (!bpf_token_capable(token, CAP_BPF))
+ goto put_token;
break;
case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
case BPF_MAP_TYPE_XSKMAP:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
+ if (!bpf_token_capable(token, CAP_NET_ADMIN))
+ goto put_token;
break;
default:
WARN(1, "unsupported map type %d", map_type);
- return -EPERM;
+ goto put_token;
}
map = ops->map_alloc(attr);
- if (IS_ERR(map))
- return PTR_ERR(map);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto put_token;
+ }
map->ops = ops;
map->map_type = map_type;
@@ -1246,7 +1299,7 @@ static int map_create(union bpf_attr *attr)
map->btf = btf;
if (attr->btf_value_type_id) {
- err = map_check_btf(map, btf, attr->btf_key_type_id,
+ err = map_check_btf(map, token, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err)
goto free_map;
@@ -1258,15 +1311,16 @@ static int map_create(union bpf_attr *attr)
attr->btf_vmlinux_value_type_id;
}
- err = security_bpf_map_alloc(map);
+ err = security_bpf_map_create(map, attr, token);
if (err)
- goto free_map;
+ goto free_map_sec;
err = bpf_map_alloc_id(map);
if (err)
goto free_map_sec;
bpf_map_save_memcg(map);
+ bpf_token_put(token);
err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
@@ -1287,6 +1341,8 @@ free_map_sec:
free_map:
btf_put(map->btf);
map->ops->map_free(map);
+put_token:
+ bpf_token_put(token);
return err;
}
@@ -1524,6 +1580,8 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
}
err = bpf_map_update_value(map, f.file, key, value, attr->flags);
+ if (!err)
+ maybe_wait_bpf_programs(map);
kvfree(value);
free_key:
@@ -1579,7 +1637,8 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
err = map->ops->map_delete_elem(map, key);
rcu_read_unlock();
bpf_enable_instrumentation();
- maybe_wait_bpf_programs(map);
+ if (!err)
+ maybe_wait_bpf_programs(map);
out:
kvfree(key);
err_put:
@@ -1676,6 +1735,9 @@ int generic_map_delete_batch(struct bpf_map *map,
if (!max_count)
return 0;
+ if (put_user(0, &uattr->batch.count))
+ return -EFAULT;
+
key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
@@ -1705,7 +1767,6 @@ int generic_map_delete_batch(struct bpf_map *map,
kvfree(key);
- maybe_wait_bpf_programs(map);
return err;
}
@@ -1733,6 +1794,9 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
if (!max_count)
return 0;
+ if (put_user(0, &uattr->batch.count))
+ return -EFAULT;
+
key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
@@ -1763,6 +1827,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
kvfree(value);
kvfree(key);
+
return err;
}
@@ -2108,7 +2173,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
kvfree(aux->func_info);
kfree(aux->func_info_aux);
free_uid(aux->user);
- security_bpf_prog_free(aux);
+ security_bpf_prog_free(aux->prog);
bpf_prog_free(aux->prog);
}
@@ -2554,13 +2619,15 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD log_true_size
+#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog, *dst_prog = NULL;
struct btf *attach_btf = NULL;
+ struct bpf_token *token = NULL;
+ bool bpf_cap;
int err;
char license[128];
@@ -2577,10 +2644,31 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
BPF_F_TEST_REG_INVARIANTS))
return -EINVAL;
+ bpf_prog_load_fixup_attach_type(attr);
+
+ if (attr->prog_token_fd) {
+ token = bpf_token_get_from_fd(attr->prog_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ /* if current token doesn't grant prog loading permissions,
+ * then we can't use this token, so ignore it and rely on
+ * system-wide capabilities checks
+ */
+ if (!bpf_token_allow_cmd(token, BPF_PROG_LOAD) ||
+ !bpf_token_allow_prog_type(token, attr->prog_type,
+ attr->expected_attach_type)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ bpf_cap = bpf_token_capable(token, CAP_BPF);
+ err = -EPERM;
+
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
(attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
- !bpf_capable())
- return -EPERM;
+ !bpf_cap)
+ goto put_token;
/* Intent here is for unprivileged_bpf_disabled to block BPF program
* creation for unprivileged users; other actions depend
@@ -2589,21 +2677,23 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
* capability checks are still carried out for these
* and other operations.
*/
- if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
- return -EPERM;
+ if (sysctl_unprivileged_bpf_disabled && !bpf_cap)
+ goto put_token;
if (attr->insn_cnt == 0 ||
- attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
- return -E2BIG;
+ attr->insn_cnt > (bpf_cap ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) {
+ err = -E2BIG;
+ goto put_token;
+ }
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
type != BPF_PROG_TYPE_CGROUP_SKB &&
- !bpf_capable())
- return -EPERM;
+ !bpf_cap)
+ goto put_token;
- if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
- if (is_perfmon_prog_type(type) && !perfmon_capable())
- return -EPERM;
+ if (is_net_admin_prog_type(type) && !bpf_token_capable(token, CAP_NET_ADMIN))
+ goto put_token;
+ if (is_perfmon_prog_type(type) && !bpf_token_capable(token, CAP_PERFMON))
+ goto put_token;
/* attach_prog_fd/attach_btf_obj_fd can specify fd of either bpf_prog
* or btf, we need to check which one it is
@@ -2613,27 +2703,33 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (IS_ERR(dst_prog)) {
dst_prog = NULL;
attach_btf = btf_get_by_fd(attr->attach_btf_obj_fd);
- if (IS_ERR(attach_btf))
- return -EINVAL;
+ if (IS_ERR(attach_btf)) {
+ err = -EINVAL;
+ goto put_token;
+ }
if (!btf_is_kernel(attach_btf)) {
/* attaching through specifying bpf_prog's BTF
* objects directly might be supported eventually
*/
btf_put(attach_btf);
- return -ENOTSUPP;
+ err = -ENOTSUPP;
+ goto put_token;
}
}
} else if (attr->attach_btf_id) {
/* fall back to vmlinux BTF, if BTF type ID is specified */
attach_btf = bpf_get_btf_vmlinux();
- if (IS_ERR(attach_btf))
- return PTR_ERR(attach_btf);
- if (!attach_btf)
- return -EINVAL;
+ if (IS_ERR(attach_btf)) {
+ err = PTR_ERR(attach_btf);
+ goto put_token;
+ }
+ if (!attach_btf) {
+ err = -EINVAL;
+ goto put_token;
+ }
btf_get(attach_btf);
}
- bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
attach_btf, attr->attach_btf_id,
dst_prog)) {
@@ -2641,7 +2737,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bpf_prog_put(dst_prog);
if (attach_btf)
btf_put(attach_btf);
- return -EINVAL;
+ err = -EINVAL;
+ goto put_token;
}
/* plain bpf_prog allocation */
@@ -2651,7 +2748,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
bpf_prog_put(dst_prog);
if (attach_btf)
btf_put(attach_btf);
- return -ENOMEM;
+ err = -EINVAL;
+ goto put_token;
}
prog->expected_attach_type = attr->expected_attach_type;
@@ -2662,9 +2760,9 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
- err = security_bpf_prog_alloc(prog->aux);
- if (err)
- goto free_prog;
+ /* move token into prog->aux, reuse taken refcnt */
+ prog->aux->token = token;
+ token = NULL;
prog->aux->user = get_current_user();
prog->len = attr->insn_cnt;
@@ -2673,12 +2771,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (copy_from_bpfptr(prog->insns,
make_bpfptr(attr->insns, uattr.is_kernel),
bpf_prog_insn_size(prog)) != 0)
- goto free_prog_sec;
+ goto free_prog;
/* copy eBPF program license from user space */
if (strncpy_from_bpfptr(license,
make_bpfptr(attr->license, uattr.is_kernel),
sizeof(license) - 1) < 0)
- goto free_prog_sec;
+ goto free_prog;
license[sizeof(license) - 1] = 0;
/* eBPF programs must be GPL compatible to use GPL-ed functions */
@@ -2692,25 +2790,29 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_dev_bound_init(prog, attr);
if (err)
- goto free_prog_sec;
+ goto free_prog;
}
if (type == BPF_PROG_TYPE_EXT && dst_prog &&
bpf_prog_is_dev_bound(dst_prog->aux)) {
err = bpf_prog_dev_bound_inherit(prog, dst_prog);
if (err)
- goto free_prog_sec;
+ goto free_prog;
}
/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
if (err < 0)
- goto free_prog_sec;
+ goto free_prog;
prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name,
sizeof(attr->prog_name));
if (err < 0)
+ goto free_prog;
+
+ err = security_bpf_prog_load(prog, attr, token);
+ if (err)
goto free_prog_sec;
/* run eBPF verifier */
@@ -2756,13 +2858,16 @@ free_used_maps:
*/
__bpf_prog_put_noref(prog, prog->aux->real_func_cnt);
return err;
+
free_prog_sec:
- free_uid(prog->aux->user);
- security_bpf_prog_free(prog->aux);
+ security_bpf_prog_free(prog);
free_prog:
+ free_uid(prog->aux->user);
if (prog->aux->attach_btf)
btf_put(prog->aux->attach_btf);
bpf_prog_free(prog);
+put_token:
+ bpf_token_put(token);
return err;
}
@@ -3752,7 +3857,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
case BPF_PROG_TYPE_SK_LOOKUP:
return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
case BPF_PROG_TYPE_CGROUP_SKB:
- if (!capable(CAP_NET_ADMIN))
+ if (!bpf_token_capable(prog->aux->token, CAP_NET_ADMIN))
/* cg-skb progs can be loaded by unpriv user.
* check permissions at attach time.
*/
@@ -3955,7 +4060,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
- if (!capable(CAP_NET_ADMIN))
+ if (!bpf_net_capable())
return -EPERM;
if (CHECK_ATTR(BPF_PROG_QUERY))
return -EINVAL;
@@ -4723,15 +4828,31 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return err;
}
-#define BPF_BTF_LOAD_LAST_FIELD btf_log_true_size
+#define BPF_BTF_LOAD_LAST_FIELD btf_token_fd
static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
+ struct bpf_token *token = NULL;
+
if (CHECK_ATTR(BPF_BTF_LOAD))
return -EINVAL;
- if (!bpf_capable())
+ if (attr->btf_token_fd) {
+ token = bpf_token_get_from_fd(attr->btf_token_fd);
+ if (IS_ERR(token))
+ return PTR_ERR(token);
+ if (!bpf_token_allow_cmd(token, BPF_BTF_LOAD)) {
+ bpf_token_put(token);
+ token = NULL;
+ }
+ }
+
+ if (!bpf_token_capable(token, CAP_BPF)) {
+ bpf_token_put(token);
return -EPERM;
+ }
+
+ bpf_token_put(token);
return btf_new_fd(attr, uattr, uattr_size);
}
@@ -4920,8 +5041,10 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
else
BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
err_put:
- if (has_write)
+ if (has_write) {
+ maybe_wait_bpf_programs(map);
bpf_map_write_active_dec(map);
+ }
fdput(f);
return err;
}
@@ -5323,6 +5446,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
goto out_unlock;
}
+ /* The bpf program will not access the bpf map, but for the sake of
+ * simplicity, increase sleepable_refcnt for sleepable program as well.
+ */
+ if (prog->aux->sleepable)
+ atomic64_inc(&map->sleepable_refcnt);
memcpy(used_maps_new, used_maps_old,
sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
used_maps_new[prog->aux->used_map_cnt] = map;
@@ -5342,6 +5470,20 @@ out_prog_put:
return ret;
}
+#define BPF_TOKEN_CREATE_LAST_FIELD token_create.bpffs_fd
+
+static int token_create(union bpf_attr *attr)
+{
+ if (CHECK_ATTR(BPF_TOKEN_CREATE))
+ return -EINVAL;
+
+ /* no flags are supported yet */
+ if (attr->token_create.flags)
+ return -EINVAL;
+
+ return bpf_token_create(attr);
+}
+
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
@@ -5475,6 +5617,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
+ case BPF_TOKEN_CREATE:
+ err = token_create(&attr);
+ break;
default:
err = -EINVAL;
break;
@@ -5581,7 +5726,7 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
const struct bpf_func_proto * __weak
tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
BPF_CALL_1(bpf_sys_close, u32, fd)
@@ -5631,7 +5776,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_sys_bpf:
- return !perfmon_capable() ? NULL : &bpf_sys_bpf_proto;
+ return !bpf_token_capable(prog->aux->token, CAP_PERFMON)
+ ? NULL : &bpf_sys_bpf_proto;
case BPF_FUNC_btf_find_by_name_kind:
return &bpf_btf_find_by_name_kind_proto;
case BPF_FUNC_sys_close:
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index f4c91c9b27d7..9dbc31b25e3d 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -172,12 +172,6 @@ bool tnum_in(struct tnum a, struct tnum b)
return a.value == b.value;
}
-int tnum_strn(char *str, size_t size, struct tnum a)
-{
- return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
-}
-EXPORT_SYMBOL_GPL(tnum_strn);
-
int tnum_sbin(char *str, size_t size, struct tnum a)
{
size_t n;
diff --git a/kernel/bpf/token.c b/kernel/bpf/token.c
new file mode 100644
index 000000000000..a86fccd57e2d
--- /dev/null
+++ b/kernel/bpf/token.c
@@ -0,0 +1,271 @@
+#include <linux/bpf.h>
+#include <linux/vmalloc.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/idr.h>
+#include <linux/namei.h>
+#include <linux/user_namespace.h>
+#include <linux/security.h>
+
+bool bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ /* BPF token allows ns_capable() level of capabilities, but only if
+ * token's userns is *exactly* the same as current user's userns
+ */
+ if (token && current_user_ns() == token->userns) {
+ if (ns_capable(token->userns, cap) ||
+ (cap != CAP_SYS_ADMIN && ns_capable(token->userns, CAP_SYS_ADMIN)))
+ return security_bpf_token_capable(token, cap) == 0;
+ }
+ /* otherwise fallback to capable() checks */
+ return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+}
+
+void bpf_token_inc(struct bpf_token *token)
+{
+ atomic64_inc(&token->refcnt);
+}
+
+static void bpf_token_free(struct bpf_token *token)
+{
+ security_bpf_token_free(token);
+ put_user_ns(token->userns);
+ kvfree(token);
+}
+
+static void bpf_token_put_deferred(struct work_struct *work)
+{
+ struct bpf_token *token = container_of(work, struct bpf_token, work);
+
+ bpf_token_free(token);
+}
+
+void bpf_token_put(struct bpf_token *token)
+{
+ if (!token)
+ return;
+
+ if (!atomic64_dec_and_test(&token->refcnt))
+ return;
+
+ INIT_WORK(&token->work, bpf_token_put_deferred);
+ schedule_work(&token->work);
+}
+
+static int bpf_token_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_token *token = filp->private_data;
+
+ bpf_token_put(token);
+ return 0;
+}
+
+static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp)
+{
+ struct bpf_token *token = filp->private_data;
+ u64 mask;
+
+ BUILD_BUG_ON(__MAX_BPF_CMD >= 64);
+ mask = (1ULL << __MAX_BPF_CMD) - 1;
+ if ((token->allowed_cmds & mask) == mask)
+ seq_printf(m, "allowed_cmds:\tany\n");
+ else
+ seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds);
+
+ BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64);
+ mask = (1ULL << __MAX_BPF_MAP_TYPE) - 1;
+ if ((token->allowed_maps & mask) == mask)
+ seq_printf(m, "allowed_maps:\tany\n");
+ else
+ seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps);
+
+ BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64);
+ mask = (1ULL << __MAX_BPF_PROG_TYPE) - 1;
+ if ((token->allowed_progs & mask) == mask)
+ seq_printf(m, "allowed_progs:\tany\n");
+ else
+ seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs);
+
+ BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64);
+ mask = (1ULL << __MAX_BPF_ATTACH_TYPE) - 1;
+ if ((token->allowed_attachs & mask) == mask)
+ seq_printf(m, "allowed_attachs:\tany\n");
+ else
+ seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs);
+}
+
+#define BPF_TOKEN_INODE_NAME "bpf-token"
+
+static const struct inode_operations bpf_token_iops = { };
+
+static const struct file_operations bpf_token_fops = {
+ .release = bpf_token_release,
+ .show_fdinfo = bpf_token_show_fdinfo,
+};
+
+int bpf_token_create(union bpf_attr *attr)
+{
+ struct bpf_mount_opts *mnt_opts;
+ struct bpf_token *token = NULL;
+ struct user_namespace *userns;
+ struct inode *inode;
+ struct file *file;
+ struct path path;
+ struct fd f;
+ umode_t mode;
+ int err, fd;
+
+ f = fdget(attr->token_create.bpffs_fd);
+ if (!f.file)
+ return -EBADF;
+
+ path = f.file->f_path;
+ path_get(&path);
+ fdput(f);
+
+ if (path.dentry != path.mnt->mnt_sb->s_root) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ if (path.mnt->mnt_sb->s_op != &bpf_super_ops) {
+ err = -EINVAL;
+ goto out_path;
+ }
+ err = path_permission(&path, MAY_ACCESS);
+ if (err)
+ goto out_path;
+
+ userns = path.dentry->d_sb->s_user_ns;
+ /*
+ * Enforce that creators of BPF tokens are in the same user
+ * namespace as the BPF FS instance. This makes reasoning about
+ * permissions a lot easier and we can always relax this later.
+ */
+ if (current_user_ns() != userns) {
+ err = -EPERM;
+ goto out_path;
+ }
+ if (!ns_capable(userns, CAP_BPF)) {
+ err = -EPERM;
+ goto out_path;
+ }
+
+ mnt_opts = path.dentry->d_sb->s_fs_info;
+ if (mnt_opts->delegate_cmds == 0 &&
+ mnt_opts->delegate_maps == 0 &&
+ mnt_opts->delegate_progs == 0 &&
+ mnt_opts->delegate_attachs == 0) {
+ err = -ENOENT; /* no BPF token delegation is set up */
+ goto out_path;
+ }
+
+ mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
+ inode = bpf_get_inode(path.mnt->mnt_sb, NULL, mode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_path;
+ }
+
+ inode->i_op = &bpf_token_iops;
+ inode->i_fop = &bpf_token_fops;
+ clear_nlink(inode); /* make sure it is unlinked */
+
+ file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops);
+ if (IS_ERR(file)) {
+ iput(inode);
+ err = PTR_ERR(file);
+ goto out_path;
+ }
+
+ token = kvzalloc(sizeof(*token), GFP_USER);
+ if (!token) {
+ err = -ENOMEM;
+ goto out_file;
+ }
+
+ atomic64_set(&token->refcnt, 1);
+
+ /* remember bpffs owning userns for future ns_capable() checks */
+ token->userns = get_user_ns(userns);
+
+ token->allowed_cmds = mnt_opts->delegate_cmds;
+ token->allowed_maps = mnt_opts->delegate_maps;
+ token->allowed_progs = mnt_opts->delegate_progs;
+ token->allowed_attachs = mnt_opts->delegate_attachs;
+
+ err = security_bpf_token_create(token, attr, &path);
+ if (err)
+ goto out_token;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0) {
+ err = fd;
+ goto out_token;
+ }
+
+ file->private_data = token;
+ fd_install(fd, file);
+
+ path_put(&path);
+ return fd;
+
+out_token:
+ bpf_token_free(token);
+out_file:
+ fput(file);
+out_path:
+ path_put(&path);
+ return err;
+}
+
+struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+{
+ struct fd f = fdget(ufd);
+ struct bpf_token *token;
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+ if (f.file->f_op != &bpf_token_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ token = f.file->private_data;
+ bpf_token_inc(token);
+ fdput(f);
+
+ return token;
+}
+
+bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ /* BPF token can be used only within exactly the same userns in which
+ * it was created
+ */
+ if (!token || current_user_ns() != token->userns)
+ return false;
+ if (!(token->allowed_cmds & (1ULL << cmd)))
+ return false;
+ return security_bpf_token_cmd(token, cmd) == 0;
+}
+
+bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type)
+{
+ if (!token || type >= __MAX_BPF_MAP_TYPE)
+ return false;
+
+ return token->allowed_maps & (1ULL << type);
+}
+
+bool bpf_token_allow_prog_type(const struct bpf_token *token,
+ enum bpf_prog_type prog_type,
+ enum bpf_attach_type attach_type)
+{
+ if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE)
+ return false;
+
+ return (token->allowed_progs & (1ULL << prog_type)) &&
+ (token->allowed_attachs & (1ULL << attach_type));
+}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index e97aeda3a86b..d382f5ebe06c 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -115,10 +115,10 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
(ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC);
}
-void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym)
+void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym)
{
ksym->start = (unsigned long) data;
- ksym->end = ksym->start + PAGE_SIZE;
+ ksym->end = ksym->start + size;
bpf_ksym_add(ksym);
perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start,
PAGE_SIZE, false, ksym->name);
@@ -254,8 +254,8 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_a
static void bpf_tramp_image_free(struct bpf_tramp_image *im)
{
bpf_image_ksym_del(&im->ksym);
- bpf_jit_free_exec(im->image);
- bpf_jit_uncharge_modmem(PAGE_SIZE);
+ arch_free_bpf_trampoline(im->image, im->size);
+ bpf_jit_uncharge_modmem(im->size);
percpu_ref_exit(&im->pcref);
kfree_rcu(im, rcu);
}
@@ -349,7 +349,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im)
call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
}
-static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
+static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, int size)
{
struct bpf_tramp_image *im;
struct bpf_ksym *ksym;
@@ -360,15 +360,15 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
if (!im)
goto out;
- err = bpf_jit_charge_modmem(PAGE_SIZE);
+ err = bpf_jit_charge_modmem(size);
if (err)
goto out_free_im;
+ im->size = size;
err = -ENOMEM;
- im->image = image = bpf_jit_alloc_exec(PAGE_SIZE);
+ im->image = image = arch_alloc_bpf_trampoline(size);
if (!image)
goto out_uncharge;
- set_vm_flush_reset_perms(image);
err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
if (err)
@@ -377,13 +377,13 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key)
ksym = &im->ksym;
INIT_LIST_HEAD_RCU(&ksym->lnode);
snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key);
- bpf_image_ksym_add(image, ksym);
+ bpf_image_ksym_add(image, size, ksym);
return im;
out_free_image:
- bpf_jit_free_exec(im->image);
+ arch_free_bpf_trampoline(im->image, im->size);
out_uncharge:
- bpf_jit_uncharge_modmem(PAGE_SIZE);
+ bpf_jit_uncharge_modmem(size);
out_free_im:
kfree(im);
out:
@@ -396,7 +396,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
struct bpf_tramp_links *tlinks;
u32 orig_flags = tr->flags;
bool ip_arg = false;
- int err, total;
+ int err, total, size;
tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg);
if (IS_ERR(tlinks))
@@ -409,12 +409,6 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut
goto out;
}
- im = bpf_tramp_image_alloc(tr->key);
- if (IS_ERR(im)) {
- err = PTR_ERR(im);
- goto out;
- }
-
/* clear all bits except SHARE_IPMODIFY and TAIL_CALL_CTX */
tr->flags &= (BPF_TRAMP_F_SHARE_IPMODIFY | BPF_TRAMP_F_TAIL_CALL_CTX);
@@ -438,13 +432,31 @@ again:
tr->flags |= BPF_TRAMP_F_ORIG_STACK;
#endif
- err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
+ size = arch_bpf_trampoline_size(&tr->func.model, tr->flags,
+ tlinks, tr->func.addr);
+ if (size < 0) {
+ err = size;
+ goto out;
+ }
+
+ if (size > PAGE_SIZE) {
+ err = -E2BIG;
+ goto out;
+ }
+
+ im = bpf_tramp_image_alloc(tr->key, size);
+ if (IS_ERR(im)) {
+ err = PTR_ERR(im);
+ goto out;
+ }
+
+ err = arch_prepare_bpf_trampoline(im, im->image, im->image + size,
&tr->func.model, tr->flags, tlinks,
tr->func.addr);
if (err < 0)
goto out_free;
- set_memory_rox((long)im->image, 1);
+ arch_protect_bpf_trampoline(im->image, im->size);
WARN_ON(tr->cur_image && total == 0);
if (tr->cur_image)
@@ -464,9 +476,8 @@ again:
tr->fops->func = NULL;
tr->fops->trampoline = 0;
- /* reset im->image memory attr for arch_prepare_bpf_trampoline */
- set_memory_nx((long)im->image, 1);
- set_memory_rw((long)im->image, 1);
+ /* free im memory and reallocate later */
+ bpf_tramp_image_free(im);
goto again;
}
#endif
@@ -1032,10 +1043,50 @@ bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
}
int __weak
-arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
+arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks,
- void *orig_call)
+ void *func_addr)
+{
+ return -ENOTSUPP;
+}
+
+void * __weak arch_alloc_bpf_trampoline(unsigned int size)
+{
+ void *image;
+
+ if (WARN_ON_ONCE(size > PAGE_SIZE))
+ return NULL;
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (image)
+ set_vm_flush_reset_perms(image);
+ return image;
+}
+
+void __weak arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+ WARN_ON_ONCE(size > PAGE_SIZE);
+ /* bpf_jit_free_exec doesn't need "size", but
+ * bpf_prog_pack_free() needs it.
+ */
+ bpf_jit_free_exec(image);
+}
+
+void __weak arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
+ WARN_ON_ONCE(size > PAGE_SIZE);
+ set_memory_rox((long)image, 1);
+}
+
+void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+{
+ WARN_ON_ONCE(size > PAGE_SIZE);
+ set_memory_nx((long)image, 1);
+ set_memory_rw((long)image, 1);
+}
+
+int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr)
{
return -ENOTSUPP;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8e7b6072e3f4..9456ee0ad129 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -362,20 +362,23 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
- struct tnum *range, const char *ctx,
+ struct bpf_retval_range range, const char *ctx,
const char *reg_name)
{
- char tn_buf[48];
+ bool unknown = true;
- verbose(env, "At %s the register %s ", ctx, reg_name);
- if (!tnum_is_unknown(reg->var_off)) {
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "has value %s", tn_buf);
- } else {
- verbose(env, "has unknown scalar value");
+ verbose(env, "%s the register %s has", ctx, reg_name);
+ if (reg->smin_value > S64_MIN) {
+ verbose(env, " smin=%lld", reg->smin_value);
+ unknown = false;
+ }
+ if (reg->smax_value < S64_MAX) {
+ verbose(env, " smax=%lld", reg->smax_value);
+ unknown = false;
}
- tnum_strn(tn_buf, sizeof(tn_buf), *range);
- verbose(env, " should have been in %s\n", tn_buf);
+ if (unknown)
+ verbose(env, " unknown scalar value");
+ verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval);
}
static bool type_may_be_null(u32 type)
@@ -439,6 +442,25 @@ static struct bpf_func_info_aux *subprog_aux(const struct bpf_verifier_env *env,
return &env->prog->aux->func_info_aux[subprog];
}
+static struct bpf_subprog_info *subprog_info(struct bpf_verifier_env *env, int subprog)
+{
+ return &env->subprog_info[subprog];
+}
+
+static void mark_subprog_exc_cb(struct bpf_verifier_env *env, int subprog)
+{
+ struct bpf_subprog_info *info = subprog_info(env, subprog);
+
+ info->is_cb = true;
+ info->is_async_cb = true;
+ info->is_exception_cb = true;
+}
+
+static bool subprog_is_exc_cb(struct bpf_verifier_env *env, int subprog)
+{
+ return subprog_info(env, subprog)->is_exception_cb;
+}
+
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
{
return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
@@ -1141,6 +1163,21 @@ static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
stack->spilled_ptr.type == SCALAR_VALUE;
}
+/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
+ * case they are equivalent, or it's STACK_ZERO, in which case we preserve
+ * more precise STACK_ZERO.
+ * Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take
+ * env->allow_ptr_leaks into account and force STACK_MISC, if necessary.
+ */
+static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
+{
+ if (*stype == STACK_ZERO)
+ return;
+ if (env->allow_ptr_leaks && *stype == STACK_INVALID)
+ return;
+ *stype = STACK_MISC;
+}
+
static void scrub_spilled_slot(u8 *stype)
{
if (*stype != STACK_INVALID)
@@ -1241,9 +1278,16 @@ static int resize_reference_state(struct bpf_func_state *state, size_t n)
return 0;
}
-static int grow_stack_state(struct bpf_func_state *state, int size)
+/* Possibly update state->allocated_stack to be at least size bytes. Also
+ * possibly update the function's high-water mark in its bpf_subprog_info.
+ */
+static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
{
- size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
+ size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
+
+ /* The stack size is always a multiple of BPF_REG_SIZE. */
+ size = round_up(size, BPF_REG_SIZE);
+ n = size / BPF_REG_SIZE;
if (old_n >= n)
return 0;
@@ -1253,6 +1297,11 @@ static int grow_stack_state(struct bpf_func_state *state, int size)
return -ENOMEM;
state->allocated_stack = size;
+
+ /* update known max for given subprogram */
+ if (env->subprog_info[state->subprogno].stack_depth < size)
+ env->subprog_info[state->subprogno].stack_depth = size;
+
return 0;
}
@@ -1352,8 +1401,8 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
int i, err;
dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
- src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
- GFP_USER);
+ src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
+ GFP_USER);
if (!dst_state->jmp_history)
return -ENOMEM;
dst_state->jmp_history_cnt = src->jmp_history_cnt;
@@ -1728,10 +1777,14 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg)
__mark_reg_known(reg, 0);
}
-static void __mark_reg_const_zero(struct bpf_reg_state *reg)
+static void __mark_reg_const_zero(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
__mark_reg_known(reg, 0);
reg->type = SCALAR_VALUE;
+ /* all scalars are assumed imprecise initially (unless unprivileged,
+ * in which case everything is forced to be precise)
+ */
+ reg->precise = !env->bpf_capable;
}
static void mark_reg_known_zero(struct bpf_verifier_env *env,
@@ -2305,6 +2358,11 @@ static void init_reg_state(struct bpf_verifier_env *env,
regs[BPF_REG_FP].frameno = state->frameno;
}
+static struct bpf_retval_range retval_range(s32 minval, s32 maxval)
+{
+ return (struct bpf_retval_range){ minval, maxval };
+}
+
#define BPF_MAIN_FUNC (-1)
static void init_func_state(struct bpf_verifier_env *env,
struct bpf_func_state *state,
@@ -2313,7 +2371,7 @@ static void init_func_state(struct bpf_verifier_env *env,
state->callsite = callsite;
state->frameno = frameno;
state->subprogno = subprogno;
- state->callback_ret_range = tnum_range(0, 0);
+ state->callback_ret_range = retval_range(0, 0);
init_reg_state(env, state);
mark_verifier_state_scratched(env);
}
@@ -2857,6 +2915,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
if (env->subprog_info[i].start != ex_cb_insn)
continue;
env->exception_callback_subprog = i;
+ mark_subprog_exc_cb(env, i);
break;
}
}
@@ -3213,6 +3272,21 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return __check_reg_arg(env, state->regs, regno, t);
}
+static int insn_stack_access_flags(int frameno, int spi)
+{
+ return INSN_F_STACK_ACCESS | (spi << INSN_F_SPI_SHIFT) | frameno;
+}
+
+static int insn_stack_access_spi(int insn_flags)
+{
+ return (insn_flags >> INSN_F_SPI_SHIFT) & INSN_F_SPI_MASK;
+}
+
+static int insn_stack_access_frameno(int insn_flags)
+{
+ return insn_flags & INSN_F_FRAMENO_MASK;
+}
+
static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
{
env->insn_aux_data[idx].jmp_point = true;
@@ -3224,28 +3298,51 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
}
/* for any branch, call, exit record the history of jmps in the given state */
-static int push_jmp_history(struct bpf_verifier_env *env,
- struct bpf_verifier_state *cur)
+static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+ int insn_flags)
{
u32 cnt = cur->jmp_history_cnt;
- struct bpf_idx_pair *p;
+ struct bpf_jmp_history_entry *p;
size_t alloc_size;
- if (!is_jmp_point(env, env->insn_idx))
+ /* combine instruction flags if we already recorded this instruction */
+ if (env->cur_hist_ent) {
+ /* atomic instructions push insn_flags twice, for READ and
+ * WRITE sides, but they should agree on stack slot
+ */
+ WARN_ONCE((env->cur_hist_ent->flags & insn_flags) &&
+ (env->cur_hist_ent->flags & insn_flags) != insn_flags,
+ "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n",
+ env->insn_idx, env->cur_hist_ent->flags, insn_flags);
+ env->cur_hist_ent->flags |= insn_flags;
return 0;
+ }
cnt++;
alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
if (!p)
return -ENOMEM;
- p[cnt - 1].idx = env->insn_idx;
- p[cnt - 1].prev_idx = env->prev_insn_idx;
cur->jmp_history = p;
+
+ p = &cur->jmp_history[cnt - 1];
+ p->idx = env->insn_idx;
+ p->prev_idx = env->prev_insn_idx;
+ p->flags = insn_flags;
cur->jmp_history_cnt = cnt;
+ env->cur_hist_ent = p;
+
return 0;
}
+static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
+ u32 hist_end, int insn_idx)
+{
+ if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
+ return &st->jmp_history[hist_end - 1];
+ return NULL;
+}
+
/* Backtrack one insn at a time. If idx is not at the top of recorded
* history then previous instruction came from straight line execution.
* Return -ENOENT if we exhausted all instructions within given state.
@@ -3372,16 +3469,6 @@ static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u3
bt->stack_masks[frame] &= ~(1ull << slot);
}
-static inline void bt_set_slot(struct backtrack_state *bt, u32 slot)
-{
- bt_set_frame_slot(bt, bt->frame, slot);
-}
-
-static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot)
-{
- bt_clear_frame_slot(bt, bt->frame, slot);
-}
-
static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
{
return bt->reg_masks[frame];
@@ -3407,9 +3494,9 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
return bt->reg_masks[bt->frame] & (1 << reg);
}
-static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
+static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot)
{
- return bt->stack_masks[bt->frame] & (1ull << slot);
+ return bt->stack_masks[frame] & (1ull << slot);
}
/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
@@ -3463,7 +3550,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
* - *was* processed previously during backtracking.
*/
static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
- struct backtrack_state *bt)
+ struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
{
const struct bpf_insn_cbs cbs = {
.cb_call = disasm_kfunc_name,
@@ -3476,7 +3563,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
u8 mode = BPF_MODE(insn->code);
u32 dreg = insn->dst_reg;
u32 sreg = insn->src_reg;
- u32 spi, i;
+ u32 spi, i, fr;
if (insn->code == 0)
return 0;
@@ -3537,20 +3624,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* by 'precise' mark in corresponding register of this state.
* No further tracking necessary.
*/
- if (insn->src_reg != BPF_REG_FP)
+ if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
return 0;
-
/* dreg = *(u64 *)[fp - off] was a fill from the stack.
* that [fp - off] slot contains scalar that needs to be
* tracked with precision
*/
- spi = (-insn->off - 1) / BPF_REG_SIZE;
- if (spi >= 64) {
- verbose(env, "BUG spi %d\n", spi);
- WARN_ONCE(1, "verifier backtracking bug");
- return -EFAULT;
- }
- bt_set_slot(bt, spi);
+ spi = insn_stack_access_spi(hist->flags);
+ fr = insn_stack_access_frameno(hist->flags);
+ bt_set_frame_slot(bt, fr, spi);
} else if (class == BPF_STX || class == BPF_ST) {
if (bt_is_reg_set(bt, dreg))
/* stx & st shouldn't be using _scalar_ dst_reg
@@ -3559,17 +3641,13 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
*/
return -ENOTSUPP;
/* scalars can only be spilled into stack */
- if (insn->dst_reg != BPF_REG_FP)
+ if (!hist || !(hist->flags & INSN_F_STACK_ACCESS))
return 0;
- spi = (-insn->off - 1) / BPF_REG_SIZE;
- if (spi >= 64) {
- verbose(env, "BUG spi %d\n", spi);
- WARN_ONCE(1, "verifier backtracking bug");
- return -EFAULT;
- }
- if (!bt_is_slot_set(bt, spi))
+ spi = insn_stack_access_spi(hist->flags);
+ fr = insn_stack_access_frameno(hist->flags);
+ if (!bt_is_frame_slot_set(bt, fr, spi))
return 0;
- bt_clear_slot(bt, spi);
+ bt_clear_frame_slot(bt, fr, spi);
if (class == BPF_STX)
bt_set_reg(bt, sreg);
} else if (class == BPF_JMP || class == BPF_JMP32) {
@@ -3613,10 +3691,14 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT;
}
- /* we don't track register spills perfectly,
- * so fallback to force-precise instead of failing */
- if (bt_stack_mask(bt) != 0)
- return -ENOTSUPP;
+ /* we are now tracking register spills correctly,
+ * so any instance of leftover slots is a bug
+ */
+ if (bt_stack_mask(bt) != 0) {
+ verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
+ WARN_ONCE(1, "verifier backtracking bug (subprog leftover stack slots)");
+ return -EFAULT;
+ }
/* propagate r1-r5 to the caller */
for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
if (bt_is_reg_set(bt, i)) {
@@ -3641,8 +3723,11 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
WARN_ONCE(1, "verifier backtracking bug");
return -EFAULT;
}
- if (bt_stack_mask(bt) != 0)
- return -ENOTSUPP;
+ if (bt_stack_mask(bt) != 0) {
+ verbose(env, "BUG stack slots %llx\n", bt_stack_mask(bt));
+ WARN_ONCE(1, "verifier backtracking bug (callback leftover stack slots)");
+ return -EFAULT;
+ }
/* clear r1-r5 in callback subprog's mask */
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
bt_clear_reg(bt, i);
@@ -4079,6 +4164,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
for (;;) {
DECLARE_BITMAP(mask, 64);
u32 history = st->jmp_history_cnt;
+ struct bpf_jmp_history_entry *hist;
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
@@ -4142,7 +4228,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
err = 0;
skip_first = false;
} else {
- err = backtrack_insn(env, i, subseq_idx, bt);
+ hist = get_jmp_hist_entry(st, history, i);
+ err = backtrack_insn(env, i, subseq_idx, hist, bt);
}
if (err == -ENOTSUPP) {
mark_all_scalars_precise(env, env->cur_state);
@@ -4195,22 +4282,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
for_each_set_bit(i, mask, 64) {
if (i >= func->allocated_stack / BPF_REG_SIZE) {
- /* the sequence of instructions:
- * 2: (bf) r3 = r10
- * 3: (7b) *(u64 *)(r3 -8) = r0
- * 4: (79) r4 = *(u64 *)(r10 -8)
- * doesn't contain jmps. It's backtracked
- * as a single block.
- * During backtracking insn 3 is not recognized as
- * stack access, so at the end of backtracking
- * stack slot fp-8 is still marked in stack_mask.
- * However the parent state may not have accessed
- * fp-8 and it's "unallocated" stack space.
- * In such case fallback to conservative.
- */
- mark_all_scalars_precise(env, env->cur_state);
- bt_reset(bt);
- return 0;
+ verbose(env, "BUG backtracking (stack slot %d, total slots %d)\n",
+ i, func->allocated_stack / BPF_REG_SIZE);
+ WARN_ONCE(1, "verifier backtracking bug (stack slot out of bounds)");
+ return -EFAULT;
}
if (!is_spilled_scalar_reg(&func->stack[i])) {
@@ -4347,7 +4422,8 @@ static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_
dst->live = live;
}
-static void save_register_state(struct bpf_func_state *state,
+static void save_register_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *state,
int spi, struct bpf_reg_state *reg,
int size)
{
@@ -4362,7 +4438,7 @@ static void save_register_state(struct bpf_func_state *state,
/* size < 8 bytes spill */
for (; i; i--)
- scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
+ mark_stack_slot_misc(env, &state->stack[spi].slot_type[i - 1]);
}
static bool is_bpf_st_mem(struct bpf_insn *insn)
@@ -4383,16 +4459,13 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
struct bpf_reg_state *reg = NULL;
- u32 dst_reg = insn->dst_reg;
+ int insn_flags = insn_stack_access_flags(state->frameno, spi);
- err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
- if (err)
- return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
* so it's aligned access and [off, off + size) are within stack limits
*/
if (!env->allow_ptr_leaks &&
- state->stack[spi].slot_type[0] == STACK_SPILL &&
+ is_spilled_reg(&state->stack[spi]) &&
size != BPF_REG_SIZE) {
verbose(env, "attempt to corrupt spilled pointer on stack\n");
return -EACCES;
@@ -4422,20 +4495,8 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
mark_stack_slot_scratched(env, spi);
- if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
- !register_is_null(reg) && env->bpf_capable) {
- if (dst_reg != BPF_REG_FP) {
- /* The backtracking logic can only recognize explicit
- * stack slot address like [fp - 8]. Other spill of
- * scalar via different register has to be conservative.
- * Backtrack from here and mark all registers as precise
- * that contributed into 'reg' being a constant.
- */
- err = mark_chain_precision(env, value_regno);
- if (err)
- return err;
- }
- save_register_state(state, spi, reg, size);
+ if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) {
+ save_register_state(env, state, spi, reg, size);
/* Break the relation on a narrowing spill. */
if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
state->stack[spi].spilled_ptr.id = 0;
@@ -4445,7 +4506,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
__mark_reg_known(&fake_reg, insn->imm);
fake_reg.type = SCALAR_VALUE;
- save_register_state(state, spi, &fake_reg, size);
+ save_register_state(env, state, spi, &fake_reg, size);
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -4457,7 +4518,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
- save_register_state(state, spi, reg, size);
+ save_register_state(env, state, spi, reg, size);
} else {
u8 type = STACK_MISC;
@@ -4482,7 +4543,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
/* when we zero initialize stack slots mark them as such */
if ((reg && register_is_null(reg)) ||
(!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
- /* backtracking doesn't work for STACK_ZERO yet. */
+ /* STACK_ZERO case happened because register spill
+ * wasn't properly aligned at the stack slot boundary,
+ * so it's not a register spill anymore; force
+ * originating register to be precise to make
+ * STACK_ZERO correct for subsequent states
+ */
err = mark_chain_precision(env, value_regno);
if (err)
return err;
@@ -4491,9 +4557,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
/* Mark slots affected by this stack write. */
for (i = 0; i < size; i++)
- state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
- type;
+ state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
+ insn_flags = 0; /* not a register spill */
}
+
+ if (insn_flags)
+ return push_jmp_history(env, env->cur_state, insn_flags);
return 0;
}
@@ -4543,10 +4612,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
(!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
writing_zero = true;
- err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
- if (err)
- return err;
-
for (i = min_off; i < max_off; i++) {
int spi;
@@ -4645,21 +4710,10 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env,
zeros++;
}
if (zeros == max_off - min_off) {
- /* any access_size read into register is zero extended,
- * so the whole register == const_zero
+ /* Any access_size read into register is zero extended,
+ * so the whole register == const_zero.
*/
- __mark_reg_const_zero(&state->regs[dst_regno]);
- /* backtracking doesn't support STACK_ZERO yet,
- * so mark it precise here, so that later
- * backtracking can stop here.
- * Backtracking may not need this if this register
- * doesn't participate in pointer adjustment.
- * Forward propagation of precise flag is not
- * necessary either. This mark is only to stop
- * backtracking. Any register that contributed
- * to const 0 was marked precise before spill.
- */
- state->regs[dst_regno].precise = true;
+ __mark_reg_const_zero(env, &state->regs[dst_regno]);
} else {
/* have read misc data from the stack */
mark_reg_unknown(env, state->regs, dst_regno);
@@ -4686,6 +4740,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
struct bpf_reg_state *reg;
u8 *stype, type;
+ int insn_flags = insn_stack_access_flags(reg_state->frameno, spi);
stype = reg_state->stack[spi].slot_type;
reg = &reg_state->stack[spi].spilled_ptr;
@@ -4718,25 +4773,42 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
copy_register_state(&state->regs[dst_regno], reg);
state->regs[dst_regno].subreg_def = subreg_def;
} else {
+ int spill_cnt = 0, zero_cnt = 0;
+
for (i = 0; i < size; i++) {
type = stype[(slot - i) % BPF_REG_SIZE];
- if (type == STACK_SPILL)
+ if (type == STACK_SPILL) {
+ spill_cnt++;
continue;
+ }
if (type == STACK_MISC)
continue;
+ if (type == STACK_ZERO) {
+ zero_cnt++;
+ continue;
+ }
if (type == STACK_INVALID && env->allow_uninit_stack)
continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
}
- mark_reg_unknown(env, state->regs, dst_regno);
+
+ if (spill_cnt == size &&
+ tnum_is_const(reg->var_off) && reg->var_off.value == 0) {
+ __mark_reg_const_zero(env, &state->regs[dst_regno]);
+ /* this IS register fill, so keep insn_flags */
+ } else if (zero_cnt == size) {
+ /* similarly to mark_reg_stack_read(), preserve zeroes */
+ __mark_reg_const_zero(env, &state->regs[dst_regno]);
+ insn_flags = 0; /* not restoring original register state */
+ } else {
+ mark_reg_unknown(env, state->regs, dst_regno);
+ insn_flags = 0; /* not restoring original register state */
+ }
}
state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
- return 0;
- }
-
- if (dst_regno >= 0) {
+ } else if (dst_regno >= 0) {
/* restore register state from stack */
copy_register_state(&state->regs[dst_regno], reg);
/* mark reg as written since spilled pointer state likely
@@ -4772,7 +4844,10 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
if (dst_regno >= 0)
mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
+ insn_flags = 0; /* we are not restoring spilled register */
}
+ if (insn_flags)
+ return push_jmp_history(env, env->cur_state, insn_flags);
return 0;
}
@@ -5701,20 +5776,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
strict);
}
-static int update_stack_depth(struct bpf_verifier_env *env,
- const struct bpf_func_state *func,
- int off)
-{
- u16 stack = env->subprog_info[func->subprogno].stack_depth;
-
- if (stack >= -off)
- return 0;
-
- /* update known max for given subprogram */
- env->subprog_info[func->subprogno].stack_depth = -off;
- return 0;
-}
-
/* starting from main bpf function walk all instructions of the function
* and recursively walk all callees that given function can call.
* Ignore jump and exit insns.
@@ -6504,13 +6565,14 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
* The minimum valid offset is -MAX_BPF_STACK for writes, and
* -state->allocated_stack for reads.
*/
-static int check_stack_slot_within_bounds(int off,
- struct bpf_func_state *state,
- enum bpf_access_type t)
+static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
+ s64 off,
+ struct bpf_func_state *state,
+ enum bpf_access_type t)
{
int min_valid_off;
- if (t == BPF_WRITE)
+ if (t == BPF_WRITE || env->allow_uninit_stack)
min_valid_off = -MAX_BPF_STACK;
else
min_valid_off = -state->allocated_stack;
@@ -6533,7 +6595,7 @@ static int check_stack_access_within_bounds(
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
struct bpf_func_state *state = func(env, reg);
- int min_off, max_off;
+ s64 min_off, max_off;
int err;
char *err_extra;
@@ -6546,11 +6608,8 @@ static int check_stack_access_within_bounds(
err_extra = " write to";
if (tnum_is_const(reg->var_off)) {
- min_off = reg->var_off.value + off;
- if (access_size > 0)
- max_off = min_off + access_size - 1;
- else
- max_off = min_off;
+ min_off = (s64)reg->var_off.value + off;
+ max_off = min_off + access_size;
} else {
if (reg->smax_value >= BPF_MAX_VAR_OFF ||
reg->smin_value <= -BPF_MAX_VAR_OFF) {
@@ -6559,15 +6618,12 @@ static int check_stack_access_within_bounds(
return -EACCES;
}
min_off = reg->smin_value + off;
- if (access_size > 0)
- max_off = reg->smax_value + off + access_size - 1;
- else
- max_off = min_off;
+ max_off = reg->smax_value + off + access_size;
}
- err = check_stack_slot_within_bounds(min_off, state, type);
- if (!err)
- err = check_stack_slot_within_bounds(max_off, state, type);
+ err = check_stack_slot_within_bounds(env, min_off, state, type);
+ if (!err && max_off > 0)
+ err = -EINVAL; /* out of stack access into non-negative offsets */
if (err) {
if (tnum_is_const(reg->var_off)) {
@@ -6577,11 +6633,16 @@ static int check_stack_access_within_bounds(
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
- err_extra, regno, tn_buf, access_size);
+ verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
+ err_extra, regno, tn_buf, off, access_size);
}
+ return err;
}
- return err;
+
+ /* Note that there is no stack access with offset zero, so the needed stack
+ * size is -min_off, not -min_off+1.
+ */
+ return grow_stack_state(env, state, -min_off /* size */);
}
/* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -6596,7 +6657,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
- struct bpf_func_state *state;
int size, err = 0;
size = bpf_size_to_bytes(bpf_size);
@@ -6739,11 +6799,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
- state = func(env, reg);
- err = update_stack_depth(env, state, off);
- if (err)
- return err;
-
if (t == BPF_READ)
err = check_stack_read(env, regno, off, size,
value_regno);
@@ -6932,13 +6987,13 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
if (err)
return err;
-
return 0;
}
/* When register 'regno' is used to read the stack (either directly or through
* a helper function) make sure that it's within stack boundary and, depending
- * on the access type, that all elements of the stack are initialized.
+ * on the access type and privileges, that all elements of the stack are
+ * initialized.
*
* 'off' includes 'regno->off', but not its dynamic part (if any).
*
@@ -7046,8 +7101,11 @@ static int check_stack_range_initialized(
slot = -i - 1;
spi = slot / BPF_REG_SIZE;
- if (state->allocated_stack <= slot)
- goto err;
+ if (state->allocated_stack <= slot) {
+ verbose(env, "verifier bug: allocated_stack too small");
+ return -EFAULT;
+ }
+
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
if (*stype == STACK_MISC)
goto mark;
@@ -7071,7 +7129,6 @@ static int check_stack_range_initialized(
goto mark;
}
-err:
if (tnum_is_const(reg->var_off)) {
verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
err_extra, regno, min_off, i - min_off, access_size);
@@ -7096,7 +7153,7 @@ mark:
* helper may write to the entire memory range.
*/
}
- return update_stack_depth(env, state, min_off);
+ return 0;
}
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@ -7192,6 +7249,12 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
}
}
+/* verify arguments to helpers or kfuncs consisting of a pointer and an access
+ * size.
+ *
+ * @regno is the register containing the access size. regno-1 is the register
+ * containing the pointer.
+ */
static int check_mem_size_reg(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno,
bool zero_size_allowed,
@@ -7893,7 +7956,7 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
/* switch to DRAINED state, but keep the depth unchanged */
/* mark current iter state as drained and assume returned NULL */
cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
- __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]);
+ __mark_reg_const_zero(env, &cur_fr->regs[BPF_REG_0]);
return 0;
}
@@ -9396,7 +9459,7 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
return err;
callee->in_callback_fn = true;
- callee->callback_ret_range = tnum_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 1);
return 0;
}
@@ -9418,7 +9481,7 @@ static int set_loop_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
- callee->callback_ret_range = tnum_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 1);
return 0;
}
@@ -9448,7 +9511,7 @@ static int set_timer_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_async_callback_fn = true;
- callee->callback_ret_range = tnum_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 1);
return 0;
}
@@ -9476,7 +9539,7 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
- callee->callback_ret_range = tnum_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 1);
return 0;
}
@@ -9499,7 +9562,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
- callee->callback_ret_range = tnum_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 1);
return 0;
}
@@ -9531,7 +9594,7 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
callee->in_callback_fn = true;
- callee->callback_ret_range = tnum_range(0, 1);
+ callee->callback_ret_range = retval_range(0, 1);
return 0;
}
@@ -9560,6 +9623,11 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
return is_rbtree_lock_required_kfunc(kfunc_btf_id);
}
+static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
+{
+ return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
+}
+
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state, *prev_st;
@@ -9583,15 +9651,21 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
caller = state->frame[state->curframe - 1];
if (callee->in_callback_fn) {
- /* enforce R0 return value range [0, 1]. */
- struct tnum range = callee->callback_ret_range;
-
if (r0->type != SCALAR_VALUE) {
verbose(env, "R0 not a scalar value\n");
return -EACCES;
}
- if (!tnum_in(range, r0->var_off)) {
- verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
+
+ /* we are going to rely on register's precise value */
+ err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64);
+ err = err ?: mark_chain_precision(env, BPF_REG_0);
+ if (err)
+ return err;
+
+ /* enforce R0 return value range */
+ if (!retval_range_within(callee->callback_ret_range, r0)) {
+ verbose_invalid_scalar(env, r0, callee->callback_ret_range,
+ "At callback return", "R0");
return -EINVAL;
}
if (!calls_callback(env, callee->callsite)) {
@@ -11805,7 +11879,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
return 0;
}
-static int check_return_code(struct bpf_verifier_env *env, int regno);
+static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
@@ -11942,7 +12016,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
* to bpf_throw becomes the return value of the program.
*/
if (!env->exception_callback_subprog) {
- err = check_return_code(env, BPF_REG_1);
+ err = check_return_code(env, BPF_REG_1, "R1");
if (err < 0)
return err;
}
@@ -14972,12 +15046,13 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static int check_return_code(struct bpf_verifier_env *env, int regno)
+static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name)
{
+ const char *exit_ctx = "At program exit";
struct tnum enforce_attach_type_range = tnum_unknown;
const struct bpf_prog *prog = env->prog;
struct bpf_reg_state *reg;
- struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0);
+ struct bpf_retval_range range = retval_range(0, 1);
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
struct bpf_func_state *frame = env->cur_state->frame[0];
@@ -15019,17 +15094,9 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
if (frame->in_async_callback_fn) {
/* enforce return zero from async callbacks like timer */
- if (reg->type != SCALAR_VALUE) {
- verbose(env, "In async callback the register R%d is not a known value (%s)\n",
- regno, reg_type_str(env, reg->type));
- return -EINVAL;
- }
-
- if (!tnum_in(const_0, reg->var_off)) {
- verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0");
- return -EINVAL;
- }
- return 0;
+ exit_ctx = "At async callback return";
+ range = retval_range(0, 0);
+ goto enforce_retval;
}
if (is_subprog && !frame->in_exception_callback_fn) {
@@ -15052,14 +15119,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME ||
env->prog->expected_attach_type == BPF_CGROUP_UNIX_GETSOCKNAME)
- range = tnum_range(1, 1);
+ range = retval_range(1, 1);
if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
- range = tnum_range(0, 3);
+ range = retval_range(0, 3);
break;
case BPF_PROG_TYPE_CGROUP_SKB:
if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
- range = tnum_range(0, 3);
+ range = retval_range(0, 3);
enforce_attach_type_range = tnum_range(2, 3);
}
break;
@@ -15072,13 +15139,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
case BPF_PROG_TYPE_RAW_TRACEPOINT:
if (!env->prog->aux->attach_btf_id)
return 0;
- range = tnum_const(0);
+ range = retval_range(0, 0);
break;
case BPF_PROG_TYPE_TRACING:
switch (env->prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
- range = tnum_const(0);
+ range = retval_range(0, 0);
break;
case BPF_TRACE_RAW_TP:
case BPF_MODIFY_RETURN:
@@ -15090,7 +15157,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
}
break;
case BPF_PROG_TYPE_SK_LOOKUP:
- range = tnum_range(SK_DROP, SK_PASS);
+ range = retval_range(SK_DROP, SK_PASS);
break;
case BPF_PROG_TYPE_LSM:
@@ -15104,12 +15171,12 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
/* Make sure programs that attach to void
* hooks don't try to modify return value.
*/
- range = tnum_range(1, 1);
+ range = retval_range(1, 1);
}
break;
case BPF_PROG_TYPE_NETFILTER:
- range = tnum_range(NF_DROP, NF_ACCEPT);
+ range = retval_range(NF_DROP, NF_ACCEPT);
break;
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
@@ -15119,15 +15186,21 @@ static int check_return_code(struct bpf_verifier_env *env, int regno)
return 0;
}
+enforce_retval:
if (reg->type != SCALAR_VALUE) {
- verbose(env, "At program exit the register R%d is not a known value (%s)\n",
- regno, reg_type_str(env, reg->type));
+ verbose(env, "%s the register R%d is not a known value (%s)\n",
+ exit_ctx, regno, reg_type_str(env, reg->type));
return -EINVAL;
}
- if (!tnum_in(range, reg->var_off)) {
- verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
- if (prog->expected_attach_type == BPF_LSM_CGROUP &&
+ err = mark_chain_precision(env, regno);
+ if (err)
+ return err;
+
+ if (!retval_range_within(range, reg)) {
+ verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
+ if (!is_subprog &&
+ prog->expected_attach_type == BPF_LSM_CGROUP &&
prog_type == BPF_PROG_TYPE_LSM &&
!prog->aux->attach_func_proto->type)
verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
@@ -16892,7 +16965,8 @@ hit:
* the precision needs to be propagated back in
* the current state.
*/
- err = err ? : push_jmp_history(env, cur);
+ if (is_jmp_point(env, env->insn_idx))
+ err = err ? : push_jmp_history(env, cur, 0);
err = err ? : propagate_precision(env, &sl->state);
if (err)
return err;
@@ -17117,6 +17191,9 @@ static int do_check(struct bpf_verifier_env *env)
u8 class;
int err;
+ /* reset current history entry on each new instruction */
+ env->cur_hist_ent = NULL;
+
env->prev_insn_idx = prev_insn_idx;
if (env->insn_idx >= insn_cnt) {
verbose(env, "invalid insn idx %d insn_cnt %d\n",
@@ -17156,7 +17233,7 @@ static int do_check(struct bpf_verifier_env *env)
}
if (is_jmp_point(env, env->insn_idx)) {
- err = push_jmp_history(env, state);
+ err = push_jmp_history(env, state, 0);
if (err)
return err;
}
@@ -17410,7 +17487,7 @@ process_bpf_exit_full:
continue;
}
- err = check_return_code(env, BPF_REG_0);
+ err = check_return_code(env, BPF_REG_0, "R0");
if (err)
return err;
process_bpf_exit:
@@ -17871,10 +17948,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
return -E2BIG;
}
+ if (env->prog->aux->sleepable)
+ atomic64_inc(&map->sleepable_refcnt);
/* hold the map. If the program is rejected by verifier,
* the map will be released by release_maps() or it
* will be used by the valid program until it's unloaded
- * and all maps are released in free_used_maps()
+ * and all maps are released in bpf_free_used_maps()
*/
bpf_map_inc(map);
@@ -19091,9 +19170,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
env->exception_callback_subprog = env->subprog_cnt - 1;
/* Don't update insn_cnt, as add_hidden_subprog always appends insns */
- env->subprog_info[env->exception_callback_subprog].is_cb = true;
- env->subprog_info[env->exception_callback_subprog].is_async_cb = true;
- env->subprog_info[env->exception_callback_subprog].is_exception_cb = true;
+ mark_subprog_exc_cb(env, env->exception_callback_subprog);
}
for (i = 0; i < insn_cnt; i++, insn++) {
@@ -19793,7 +19870,7 @@ static void free_states(struct bpf_verifier_env *env)
}
}
-static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex_cb)
+static int do_check_common(struct bpf_verifier_env *env, int subprog)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
struct bpf_verifier_state *state;
@@ -19824,9 +19901,23 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex
regs = state->frame[state->curframe]->regs;
if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
- ret = btf_prepare_func_args(env, subprog, regs, is_ex_cb);
+ u32 nargs;
+
+ ret = btf_prepare_func_args(env, subprog, regs, &nargs);
if (ret)
goto out;
+ if (subprog_is_exc_cb(env, subprog)) {
+ state->frame[0]->in_exception_callback_fn = true;
+ /* We have already ensured that the callback returns an integer, just
+ * like all global subprogs. We need to determine it only has a single
+ * scalar argument.
+ */
+ if (nargs != 1 || regs[BPF_REG_1].type != SCALAR_VALUE) {
+ verbose(env, "exception cb only supports single integer argument\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
if (regs[i].type == PTR_TO_CTX)
mark_reg_known_zero(env, regs, i);
@@ -19840,12 +19931,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog, bool is_ex
regs[i].id = ++env->id_gen;
}
}
- if (is_ex_cb) {
- state->frame[0]->in_exception_callback_fn = true;
- env->subprog_info[subprog].is_cb = true;
- env->subprog_info[subprog].is_async_cb = true;
- env->subprog_info[subprog].is_exception_cb = true;
- }
} else {
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
@@ -19925,7 +20010,7 @@ again:
env->insn_idx = env->subprog_info[i].start;
WARN_ON_ONCE(env->insn_idx == 0);
- ret = do_check_common(env, i, env->exception_callback_subprog == i);
+ ret = do_check_common(env, i);
if (ret) {
return ret;
} else if (env->log.level & BPF_LOG_LEVEL) {
@@ -19955,7 +20040,7 @@ static int do_check_main(struct bpf_verifier_env *env)
int ret;
env->insn_idx = 0;
- ret = do_check_common(env, 0, false);
+ ret = do_check_common(env, 0);
if (!ret)
env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
return ret;
@@ -20509,7 +20594,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->prog = *prog;
env->ops = bpf_verifier_ops[env->prog->type];
env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
- is_priv = bpf_capable();
+
+ env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
+ env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
+ env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token);
+ env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token);
+ env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF);
bpf_get_btf_vmlinux();
@@ -20541,12 +20631,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
env->strict_alignment = false;
- env->allow_ptr_leaks = bpf_allow_ptr_leaks();
- env->allow_uninit_stack = bpf_allow_uninit_stack();
- env->bypass_spec_v1 = bpf_bypass_spec_v1();
- env->bypass_spec_v4 = bpf_bypass_spec_v4();
- env->bpf_capable = bpf_capable();
-
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index c284a4ad0315..492d60e9c480 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -24,6 +24,7 @@
#include <linux/key.h>
#include <linux/verification.h>
#include <linux/namei.h>
+#include <linux/fileattr.h>
#include <net/bpf_sk_storage.h>
@@ -41,6 +42,9 @@
#define bpf_event_rcu_dereference(p) \
rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
+#define MAX_UPROBE_MULTI_CNT (1U << 20)
+#define MAX_KPROBE_MULTI_CNT (1U << 20)
+
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@@ -1431,6 +1435,72 @@ static int __init bpf_key_sig_kfuncs_init(void)
late_initcall(bpf_key_sig_kfuncs_init);
#endif /* CONFIG_KEYS */
+/* filesystem kfuncs */
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_get_file_xattr - get xattr of a file
+ * @file: file to get xattr from
+ * @name__str: name of the xattr
+ * @value_ptr: output buffer of the xattr value
+ *
+ * Get xattr *name__str* of *file* and store the output in *value_ptr*.
+ *
+ * For security reasons, only *name__str* with prefix "user." is allowed.
+ *
+ * Return: 0 on success, a negative value on error.
+ */
+__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
+ struct bpf_dynptr_kern *value_ptr)
+{
+ struct dentry *dentry;
+ u32 value_len;
+ void *value;
+ int ret;
+
+ if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ return -EPERM;
+
+ value_len = __bpf_dynptr_size(value_ptr);
+ value = __bpf_dynptr_data_rw(value_ptr, value_len);
+ if (!value)
+ return -EINVAL;
+
+ dentry = file_dentry(file);
+ ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ);
+ if (ret)
+ return ret;
+ return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_SET8_START(fs_kfunc_set_ids)
+BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_SET8_END(fs_kfunc_set_ids)
+
+static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id))
+ return 0;
+
+ /* Only allow to attach from LSM hooks, to avoid recursion */
+ return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0;
+}
+
+static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &fs_kfunc_set_ids,
+ .filter = bpf_get_file_xattr_filter,
+};
+
+static int __init bpf_fs_kfuncs_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
+}
+
+late_initcall(bpf_fs_kfuncs_init);
+
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1559,7 +1629,7 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -2903,6 +2973,8 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
cnt = attr->link_create.kprobe_multi.cnt;
if (!cnt)
return -EINVAL;
+ if (cnt > MAX_KPROBE_MULTI_CNT)
+ return -E2BIG;
size = cnt * sizeof(*addrs);
addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
@@ -3277,6 +3349,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (!upath || !uoffsets || !cnt)
return -EINVAL;
+ if (cnt > MAX_UPROBE_MULTI_CNT)
+ return -E2BIG;
uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
@@ -3317,15 +3391,19 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error_free;
for (i = 0; i < cnt; i++) {
- if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
+ if (__get_user(uprobes[i].offset, uoffsets + i)) {
err = -EFAULT;
goto error_free;
}
+ if (uprobes[i].offset < 0) {
+ err = -EINVAL;
+ goto error_free;
+ }
if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) {
err = -EFAULT;
goto error_free;
}
- if (__get_user(uprobes[i].offset, uoffsets + i)) {
+ if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
err = -EFAULT;
goto error_free;
}
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index e380fdf756db..569e6d2dc55c 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6277,7 +6277,7 @@ static struct bpf_test tests[] = {
},
/* BPF_ALU64 | BPF_MOD | BPF_K off=1 (SMOD64) */
{
- "ALU64_SMOD_X: -7 % 2 = -1",
+ "ALU64_SMOD_K: -7 % 2 = -1",
.u.insns_int = {
BPF_LD_IMM64(R0, -7),
BPF_ALU64_IMM_OFF(BPF_MOD, R0, 2, 1),
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 5918d1b32e19..8906f7bdf4a9 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -12,6 +12,11 @@ extern struct bpf_struct_ops bpf_bpf_dummy_ops;
/* A common type for test_N with return value in bpf_dummy_ops */
typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
+static int dummy_ops_test_ret_function(struct bpf_dummy_ops_state *state, ...)
+{
+ return 0;
+}
+
struct bpf_dummy_ops_test_args {
u64 args[MAX_BPF_FUNC_ARGS];
struct bpf_dummy_ops_state state;
@@ -62,7 +67,7 @@ static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args)
static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
{
- dummy_ops_test_ret_fn test = (void *)image;
+ dummy_ops_test_ret_fn test = (void *)image + cfi_get_offset();
struct bpf_dummy_ops_state *state = NULL;
/* state needs to be NULL if args[0] is 0 */
@@ -101,12 +106,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
}
- image = bpf_jit_alloc_exec(PAGE_SIZE);
+ image = arch_alloc_bpf_trampoline(PAGE_SIZE);
if (!image) {
err = -ENOMEM;
goto out;
}
- set_vm_flush_reset_perms(image);
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
@@ -120,11 +124,12 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
op_idx = prog->expected_attach_type;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
+ &dummy_ops_test_ret_function,
image, image + PAGE_SIZE);
if (err < 0)
goto out;
- set_memory_rox((long)image, 1);
+ arch_protect_bpf_trampoline(image, PAGE_SIZE);
prog_ret = dummy_ops_call_op(image, args);
err = dummy_ops_copy_args(args);
@@ -134,7 +139,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
- bpf_jit_free_exec(image);
+ arch_free_bpf_trampoline(image, PAGE_SIZE);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
@@ -220,6 +225,28 @@ static void bpf_dummy_unreg(void *kdata)
{
}
+static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+{
+ return 0;
+}
+
+static int bpf_dummy_test_2(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ return 0;
+}
+
+static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
+{
+ return 0;
+}
+
+static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
+ .test_1 = bpf_dummy_test_1,
+ .test_2 = bpf_dummy_test_2,
+ .test_sleepable = bpf_dummy_test_sleepable,
+};
+
struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
@@ -228,4 +255,5 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
.reg = bpf_dummy_reg,
.unreg = bpf_dummy_unreg,
.name = "bpf_dummy_ops",
+ .cfi_stubs = &__bpf_bpf_dummy_ops,
};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 711cf5d59816..dfd919374017 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -600,10 +600,21 @@ __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
refcount_dec(&p->cnt);
}
+__bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p)
+{
+ bpf_kfunc_call_test_release(p);
+}
+CFI_NOSEAL(bpf_kfunc_call_test_release_dtor);
+
__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
+__bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p)
+{
+}
+CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor);
+
__bpf_kfunc_end_defs();
BTF_SET8_START(bpf_test_modify_return_ids)
@@ -1671,9 +1682,9 @@ static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
BTF_ID(struct, prog_test_ref_kfunc)
-BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_release_dtor)
BTF_ID(struct, prog_test_member)
-BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb_release_dtor)
static int __init bpf_prog_test_run_init(void)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index 6d89a9cf33c9..4ff6100c6a27 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,7 +87,7 @@
#include "dev.h"
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id);
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
{
@@ -7862,7 +7862,7 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -7955,7 +7955,7 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return NULL;
}
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -7974,7 +7974,7 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8161,7 +8161,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8220,7 +8220,7 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
@@ -8281,7 +8281,7 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_tcp_sock_proto;
#endif /* CONFIG_INET */
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8323,7 +8323,7 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8367,7 +8367,7 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_lookup_tcp_proto;
#endif
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8378,7 +8378,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_flow_dissector_load_bytes_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8405,7 +8405,7 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -8580,7 +8580,7 @@ static bool cg_skb_is_valid_access(int off, int size,
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
}
@@ -8592,7 +8592,7 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
case bpf_ctx_range(struct __sk_buff, tstamp):
- if (!bpf_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_BPF))
return false;
break;
default:
@@ -11236,7 +11236,7 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -11418,7 +11418,7 @@ sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_sk_release:
return &bpf_sk_release_proto;
default:
- return bpf_sk_base_func_proto(func_id);
+ return bpf_sk_base_func_proto(func_id, prog);
}
}
@@ -11752,7 +11752,7 @@ const struct bpf_func_proto bpf_sock_from_file_proto = {
};
static const struct bpf_func_proto *
-bpf_sk_base_func_proto(enum bpf_func_id func_id)
+bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
const struct bpf_func_proto *func;
@@ -11781,10 +11781,10 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
- if (!perfmon_capable())
+ if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
return NULL;
return func;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index b6f1d6dab3f2..4869c1c2d8f3 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -736,6 +736,39 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
return -EOPNOTSUPP;
}
+/**
+ * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag
+ * @ctx: XDP context pointer.
+ * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID).
+ * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP)
+ *
+ * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*,
+ * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use
+ * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)**
+ * and should be used as follows:
+ * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();``
+ *
+ * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag.
+ * Driver is expected to provide those in **host byte order (usually LE)**,
+ * so the bpf program should not perform byte conversion.
+ * According to 802.1Q standard, *VLAN TCI (Tag control information)*
+ * is a bit field that contains:
+ * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``,
+ * *Drop eligible indicator (DEI)* - 1 bit,
+ * *Priority code point (PCP)* - 3 bits.
+ * For detailed meaning of DEI and PCP, please refer to other sources.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
+ * * ``-ENODATA`` : VLAN tag was not stripped or is not available
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto, u16 *vlan_tci)
+{
+ return -EOPNOTSUPP;
+}
+
__bpf_kfunc_end_defs();
BTF_SET8_START(xdp_metadata_kfunc_ids)
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 39dcccf0f174..634cfafa583d 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -191,7 +191,7 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
}
@@ -271,6 +271,74 @@ static int bpf_tcp_ca_validate(void *kdata)
return tcp_validate_congestion_control(kdata);
}
+static u32 bpf_tcp_ca_ssthresh(struct sock *sk)
+{
+ return 0;
+}
+
+static void bpf_tcp_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+}
+
+static void bpf_tcp_ca_set_state(struct sock *sk, u8 new_state)
+{
+}
+
+static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+{
+}
+
+static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
+{
+}
+
+static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample)
+{
+}
+
+static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+{
+ return 0;
+}
+
+static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+{
+}
+
+static u32 bpf_tcp_ca_undo_cwnd(struct sock *sk)
+{
+ return 0;
+}
+
+static u32 bpf_tcp_ca_sndbuf_expand(struct sock *sk)
+{
+ return 0;
+}
+
+static void __bpf_tcp_ca_init(struct sock *sk)
+{
+}
+
+static void __bpf_tcp_ca_release(struct sock *sk)
+{
+}
+
+static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
+ .ssthresh = bpf_tcp_ca_ssthresh,
+ .cong_avoid = bpf_tcp_ca_cong_avoid,
+ .set_state = bpf_tcp_ca_set_state,
+ .cwnd_event = bpf_tcp_ca_cwnd_event,
+ .in_ack_event = bpf_tcp_ca_in_ack_event,
+ .pkts_acked = bpf_tcp_ca_pkts_acked,
+ .min_tso_segs = bpf_tcp_ca_min_tso_segs,
+ .cong_control = bpf_tcp_ca_cong_control,
+ .undo_cwnd = bpf_tcp_ca_undo_cwnd,
+ .sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
+
+ .init = __bpf_tcp_ca_init,
+ .release = __bpf_tcp_ca_release,
+};
+
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
@@ -281,6 +349,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.init = bpf_tcp_ca_init,
.validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
+ .cfi_stubs = &__bpf_ops_tcp_congestion_ops,
};
static int __init bpf_tcp_ca_kfunc_init(void)
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 0e4beae421f8..5257d5e7eb09 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -314,7 +314,7 @@ static bool nf_is_valid_access(int off, int size, enum bpf_access_type type,
static const struct bpf_func_proto *
bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- return bpf_base_func_proto(func_id);
+ return bpf_base_func_proto(func_id, prog);
}
const struct bpf_verifier_ops netfilter_verifier_ops = {
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 4f6f538a5462..28711cc44ced 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -125,6 +125,18 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
}
EXPORT_SYMBOL(xp_set_rxq_info);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc)
+{
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ memcpy(xskb->cb + desc->off, desc->src, desc->bytes);
+ }
+}
+EXPORT_SYMBOL(xp_fill_cb);
+
static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
{
struct netdev_bpf bpf;
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index cd47f88921f5..547cec77ba03 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
+obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..1b7e75159727 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -4218,6 +4218,8 @@ void __init xfrm_init(void)
#ifdef CONFIG_XFRM_ESPINTCP
espintcp_init();
#endif
+
+ register_xfrm_state_bpf();
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
new file mode 100644
index 000000000000..9e20d4a377f7
--- /dev/null
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM state BPF helpers.
+ *
+ * Note that it is allowed to break compatibility for these functions since the
+ * interface they are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <net/xdp.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers
+ *
+ * Members:
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - No xfrm_state found
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @mark - XFRM mark to match on
+ * @daddr - Destination address to match on
+ * @spi - Security parameter index to match on
+ * @proto - IP protocol to match on (eg. IPPROTO_ESP)
+ * @family - Protocol family to match on (AF_INET/AF_INET6)
+ */
+struct bpf_xfrm_state_opts {
+ s32 error;
+ s32 netns_id;
+ u32 mark;
+ xfrm_address_t daddr;
+ __be32 spi;
+ u8 proto;
+ u16 family;
+};
+
+enum {
+ BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts),
+};
+
+__bpf_kfunc_start_defs();
+
+/* bpf_xdp_get_xfrm_state - Get XFRM state
+ *
+ * A `struct xfrm_state *`, if found, must be released with a corresponding
+ * bpf_xdp_xfrm_state_release.
+ *
+ * Parameters:
+ * @ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @opts - Options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_xfrm_state_opts structure
+ * Must be BPF_XFRM_STATE_OPTS_SZ
+ */
+__bpf_kfunc struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+ struct net *net = dev_net(xdp->rxq->dev);
+ struct xfrm_state *x;
+
+ if (!opts || opts__sz < sizeof(opts->error))
+ return NULL;
+
+ if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
+ if (unlikely(!net)) {
+ opts->error = -ENONET;
+ return NULL;
+ }
+ }
+
+ x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi,
+ opts->proto, opts->family);
+
+ if (opts->netns_id >= 0)
+ put_net(net);
+ if (!x)
+ opts->error = -ENOENT;
+
+ return x;
+}
+
+/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @x - Pointer to referenced xfrm_state object, obtained using
+ * bpf_xdp_get_xfrm_state.
+ */
+__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
+{
+ xfrm_state_put(x);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
+BTF_SET8_END(xfrm_state_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xfrm_state_kfunc_set,
+};
+
+int __init register_xfrm_state_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &xfrm_state_xdp_kfunc_set);
+}
diff --git a/security/security.c b/security/security.c
index dcb3e7014f9b..088a79c35c26 100644
--- a/security/security.c
+++ b/security/security.c
@@ -5167,29 +5167,87 @@ int security_bpf_prog(struct bpf_prog *prog)
}
/**
- * security_bpf_map_alloc() - Allocate a bpf map LSM blob
- * @map: bpf map
+ * security_bpf_map_create() - Check if BPF map creation is allowed
+ * @map: BPF map object
+ * @attr: BPF syscall attributes used to create BPF map
+ * @token: BPF token used to grant user access
+ *
+ * Do a check when the kernel creates a new BPF map. This is also the
+ * point where LSM blob is allocated for LSMs that need them.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
+{
+ return call_int_hook(bpf_map_create, 0, map, attr, token);
+}
+
+/**
+ * security_bpf_prog_load() - Check if loading of BPF program is allowed
+ * @prog: BPF program object
+ * @attr: BPF syscall attributes used to create BPF program
+ * @token: BPF token used to grant user access to BPF subsystem
*
- * Initialize the security field inside bpf map.
+ * Perform an access control check when the kernel loads a BPF program and
+ * allocates associated BPF program object. This hook is also responsible for
+ * allocating any required LSM state for the BPF program.
*
* Return: Returns 0 on success, error on failure.
*/
-int security_bpf_map_alloc(struct bpf_map *map)
+int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
{
- return call_int_hook(bpf_map_alloc_security, 0, map);
+ return call_int_hook(bpf_prog_load, 0, prog, attr, token);
}
/**
- * security_bpf_prog_alloc() - Allocate a bpf program LSM blob
- * @aux: bpf program aux info struct
+ * security_bpf_token_create() - Check if creating of BPF token is allowed
+ * @token: BPF token object
+ * @attr: BPF syscall attributes used to create BPF token
+ * @path: path pointing to BPF FS mount point from which BPF token is created
*
- * Initialize the security field inside bpf program.
+ * Do a check when the kernel instantiates a new BPF token object from BPF FS
+ * instance. This is also the point where LSM blob can be allocated for LSMs.
*
* Return: Returns 0 on success, error on failure.
*/
-int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
{
- return call_int_hook(bpf_prog_alloc_security, 0, aux);
+ return call_int_hook(bpf_token_create, 0, token, attr, path);
+}
+
+/**
+ * security_bpf_token_cmd() - Check if BPF token is allowed to delegate
+ * requested BPF syscall command
+ * @token: BPF token object
+ * @cmd: BPF syscall command requested to be delegated by BPF token
+ *
+ * Do a check when the kernel decides whether provided BPF token should allow
+ * delegation of requested BPF syscall command.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd)
+{
+ return call_int_hook(bpf_token_cmd, 0, token, cmd);
+}
+
+/**
+ * security_bpf_token_capable() - Check if BPF token is allowed to delegate
+ * requested BPF-related capability
+ * @token: BPF token object
+ * @cap: capabilities requested to be delegated by BPF token
+ *
+ * Do a check when the kernel decides whether provided BPF token should allow
+ * delegation of requested BPF-related capabilities.
+ *
+ * Return: Returns 0 on success, error on failure.
+ */
+int security_bpf_token_capable(const struct bpf_token *token, int cap)
+{
+ return call_int_hook(bpf_token_capable, 0, token, cap);
}
/**
@@ -5200,18 +5258,29 @@ int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
*/
void security_bpf_map_free(struct bpf_map *map)
{
- call_void_hook(bpf_map_free_security, map);
+ call_void_hook(bpf_map_free, map);
+}
+
+/**
+ * security_bpf_prog_free() - Free a BPF program's LSM blob
+ * @prog: BPF program struct
+ *
+ * Clean up the security information stored inside BPF program.
+ */
+void security_bpf_prog_free(struct bpf_prog *prog)
+{
+ call_void_hook(bpf_prog_free, prog);
}
/**
- * security_bpf_prog_free() - Free a bpf program's LSM blob
- * @aux: bpf program aux info struct
+ * security_bpf_token_free() - Free a BPF token's LSM blob
+ * @token: BPF token struct
*
- * Clean up the security information stored inside bpf prog.
+ * Clean up the security information stored inside BPF token.
*/
-void security_bpf_prog_free(struct bpf_prog_aux *aux)
+void security_bpf_token_free(struct bpf_token *token)
{
- call_void_hook(bpf_prog_free_security, aux);
+ call_void_hook(bpf_token_free, token);
}
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index feda711c6b7b..1501e95366a1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6783,7 +6783,8 @@ static int selinux_bpf_prog(struct bpf_prog *prog)
BPF__PROG_RUN, NULL);
}
-static int selinux_bpf_map_alloc(struct bpf_map *map)
+static int selinux_bpf_map_create(struct bpf_map *map, union bpf_attr *attr,
+ struct bpf_token *token)
{
struct bpf_security_struct *bpfsec;
@@ -6805,7 +6806,8 @@ static void selinux_bpf_map_free(struct bpf_map *map)
kfree(bpfsec);
}
-static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux)
+static int selinux_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
+ struct bpf_token *token)
{
struct bpf_security_struct *bpfsec;
@@ -6814,16 +6816,39 @@ static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux)
return -ENOMEM;
bpfsec->sid = current_sid();
- aux->security = bpfsec;
+ prog->aux->security = bpfsec;
return 0;
}
-static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
+static void selinux_bpf_prog_free(struct bpf_prog *prog)
{
- struct bpf_security_struct *bpfsec = aux->security;
+ struct bpf_security_struct *bpfsec = prog->aux->security;
- aux->security = NULL;
+ prog->aux->security = NULL;
+ kfree(bpfsec);
+}
+
+static int selinux_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
+ struct path *path)
+{
+ struct bpf_security_struct *bpfsec;
+
+ bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL);
+ if (!bpfsec)
+ return -ENOMEM;
+
+ bpfsec->sid = current_sid();
+ token->security = bpfsec;
+
+ return 0;
+}
+
+static void selinux_bpf_token_free(struct bpf_token *token)
+{
+ struct bpf_security_struct *bpfsec = token->security;
+
+ token->security = NULL;
kfree(bpfsec);
}
#endif
@@ -7179,8 +7204,9 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(bpf, selinux_bpf),
LSM_HOOK_INIT(bpf_map, selinux_bpf_map),
LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog),
- LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
- LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
+ LSM_HOOK_INIT(bpf_map_free, selinux_bpf_map_free),
+ LSM_HOOK_INIT(bpf_prog_free, selinux_bpf_prog_free),
+ LSM_HOOK_INIT(bpf_token_free, selinux_bpf_token_free),
#endif
#ifdef CONFIG_PERF_EVENTS
@@ -7237,8 +7263,9 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
LSM_HOOK_INIT(audit_rule_init, selinux_audit_rule_init),
#endif
#ifdef CONFIG_BPF_SYSCALL
- LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc),
- LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc),
+ LSM_HOOK_INIT(bpf_map_create, selinux_bpf_map_create),
+ LSM_HOOK_INIT(bpf_prog_load, selinux_bpf_prog_load),
+ LSM_HOOK_INIT(bpf_token_create, selinux_bpf_token_create),
#endif
#ifdef CONFIG_PERF_EVENTS
LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc),
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e88746ba7d21..e0545201b55f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -847,6 +847,36 @@ union bpf_iter_link_info {
* Returns zero on success. On error, -1 is returned and *errno*
* is set appropriately.
*
+ * BPF_TOKEN_CREATE
+ * Description
+ * Create BPF token with embedded information about what
+ * BPF-related functionality it allows:
+ * - a set of allowed bpf() syscall commands;
+ * - a set of allowed BPF map types to be created with
+ * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+ * - a set of allowed BPF program types and BPF program attach
+ * types to be loaded with BPF_PROG_LOAD command, if
+ * BPF_PROG_LOAD itself is allowed.
+ *
+ * BPF token is created (derived) from an instance of BPF FS,
+ * assuming it has necessary delegation mount options specified.
+ * This BPF token can be passed as an extra parameter to various
+ * bpf() syscall commands to grant BPF subsystem functionality to
+ * unprivileged processes.
+ *
+ * When created, BPF token is "associated" with the owning
+ * user namespace of BPF FS instance (super block) that it was
+ * derived from, and subsequent BPF operations performed with
+ * BPF token would be performing capabilities checks (i.e.,
+ * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+ * that user namespace. Without BPF token, such capabilities
+ * have to be granted in init user namespace, making bpf()
+ * syscall incompatible with user namespace, for the most part.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
* NOTES
* eBPF objects (maps and programs) can be shared between processes.
*
@@ -901,6 +931,8 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
+ BPF_TOKEN_CREATE,
+ __MAX_BPF_CMD,
};
enum bpf_map_type {
@@ -951,6 +983,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_BLOOM_FILTER,
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
+ __MAX_BPF_MAP_TYPE
};
/* Note that tracing related programs such as
@@ -995,6 +1028,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
BPF_PROG_TYPE_NETFILTER,
+ __MAX_BPF_PROG_TYPE
};
enum bpf_attach_type {
@@ -1074,9 +1108,11 @@ enum bpf_link_type {
BPF_LINK_TYPE_TCX = 11,
BPF_LINK_TYPE_UPROBE_MULTI = 12,
BPF_LINK_TYPE_NETKIT = 13,
- MAX_BPF_LINK_TYPE,
+ __MAX_BPF_LINK_TYPE,
};
+#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
+
enum bpf_perf_event_type {
BPF_PERF_EVENT_UNSPEC = 0,
BPF_PERF_EVENT_UPROBE = 1,
@@ -1401,6 +1437,7 @@ union bpf_attr {
* to using 5 hash functions).
*/
__u64 map_extra;
+ __u32 map_token_fd;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -1470,6 +1507,7 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 log_true_size;
+ __u32 prog_token_fd;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -1582,6 +1620,7 @@ union bpf_attr {
* truncated), or smaller (if log buffer wasn't filled completely).
*/
__u32 btf_log_true_size;
+ __u32 btf_token_fd;
};
struct {
@@ -1712,6 +1751,11 @@ union bpf_attr {
__u32 flags; /* extra flags */
} prog_bind_map;
+ struct { /* struct used by BPF_TOKEN_CREATE command */
+ __u32 flags;
+ __u32 bpffs_fd;
+ } token_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 424c5e28f495..93cb411adf72 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -44,10 +44,13 @@ enum netdev_xdp_act {
* timestamp via bpf_xdp_metadata_rx_timestamp().
* @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
* hash via bpf_xdp_metadata_rx_hash().
+ * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+ * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
*/
enum netdev_xdp_rx_metadata {
NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
NETDEV_XDP_RX_METADATA_HASH = 2,
+ NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
};
/**
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 2d0c282c8588..b6619199a706 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
- usdt.o zip.o elf.o
+ usdt.o zip.o elf.o features.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9dc9625651dc..0ad8e532b3cf 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
* [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
* [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
*/
-int probe_memcg_account(void)
+int probe_memcg_account(int token_fd)
{
const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
struct bpf_insn insns[] = {
@@ -120,6 +120,7 @@ int probe_memcg_account(void)
attr.insns = ptr_to_u64(insns);
attr.insn_cnt = insn_cnt;
attr.license = ptr_to_u64("GPL");
+ attr.prog_token_fd = token_fd;
prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
if (prog_fd >= 0) {
@@ -146,7 +147,7 @@ int bump_rlimit_memlock(void)
struct rlimit rlim;
/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
- if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
+ if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
return 0;
memlock_bumped = true;
@@ -169,7 +170,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
union bpf_attr attr;
int fd;
@@ -181,7 +182,7 @@ int bpf_map_create(enum bpf_map_type map_type,
return libbpf_err(-EINVAL);
attr.map_type = map_type;
- if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
+ if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -198,6 +199,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.numa_node = OPTS_GET(opts, numa_node, 0);
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+ attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
}
@@ -232,7 +235,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, log_true_size);
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -261,8 +264,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
attr.kern_version = OPTS_GET(opts, kern_version, 0);
+ attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
- if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
+ if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
attr.license = ptr_to_u64(license);
@@ -1182,7 +1186,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size);
+ const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
union bpf_attr attr;
char *log_buf;
size_t log_size;
@@ -1207,6 +1211,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts
attr.btf = ptr_to_u64(btf_data);
attr.btf_size = btf_size;
+ attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
+
/* log_level == 0 and log_buf != NULL means "try loading without
* log_buf, but retry with log_buf and log_level=1 on error", which is
* consistent across low-level and high-level BTF and program loading
@@ -1287,3 +1293,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd,
ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
return libbpf_err_errno(ret);
}
+
+int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, token_create);
+ union bpf_attr attr;
+ int fd;
+
+ if (!OPTS_VALID(opts, bpf_token_create_opts))
+ return libbpf_err(-EINVAL);
+
+ memset(&attr, 0, attr_sz);
+ attr.token_create.bpffs_fd = bpffs_fd;
+ attr.token_create.flags = OPTS_GET(opts, flags, 0);
+
+ fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
+ return libbpf_err_errno(fd);
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index d0f53772bdc0..991b86bfe7e4 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -51,8 +51,11 @@ struct bpf_map_create_opts {
__u32 numa_node;
__u32 map_ifindex;
+
+ __u32 token_fd;
+ size_t :0;
};
-#define bpf_map_create_opts__last_field map_ifindex
+#define bpf_map_create_opts__last_field token_fd
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
@@ -102,9 +105,10 @@ struct bpf_prog_load_opts {
* If kernel doesn't support this feature, log_size is left unchanged.
*/
__u32 log_true_size;
+ __u32 token_fd;
size_t :0;
};
-#define bpf_prog_load_opts__last_field log_true_size
+#define bpf_prog_load_opts__last_field token_fd
LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
const char *prog_name, const char *license,
@@ -130,9 +134,10 @@ struct bpf_btf_load_opts {
* If kernel doesn't support this feature, log_size is left unchanged.
*/
__u32 log_true_size;
+ __u32 token_fd;
size_t :0;
};
-#define bpf_btf_load_opts__last_field log_true_size
+#define bpf_btf_load_opts__last_field token_fd
LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
struct bpf_btf_load_opts *opts);
@@ -640,6 +645,30 @@ struct bpf_test_run_opts {
LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
struct bpf_test_run_opts *opts);
+struct bpf_token_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags;
+ size_t :0;
+};
+#define bpf_token_create_opts__last_field flags
+
+/**
+ * @brief **bpf_token_create()** creates a new instance of BPF token derived
+ * from specified BPF FS mount point.
+ *
+ * BPF token created with this API can be passed to bpf() syscall for
+ * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc.
+ *
+ * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token
+ * instance.
+ * @param opts optional BPF token creation options, can be NULL
+ *
+ * @return BPF token FD > 0, on success; negative error code, otherwise (errno
+ * is also set to the error code)
+ */
+LIBBPF_API int bpf_token_create(int bpffs_fd,
+ struct bpf_token_create_opts *opts);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 1ac57bb7ac55..7325a12692a3 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -111,6 +111,38 @@ enum bpf_enum_value_kind {
val; \
})
+/*
+ * Write to a bitfield, identified by s->field.
+ * This is the inverse of BPF_CORE_WRITE_BITFIELD().
+ */
+#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \
+ void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
+ unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE); \
+ unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64); \
+ unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64); \
+ unsigned long long mask, val, nval = new_val; \
+ unsigned int rpad = rshift - lshift; \
+ \
+ asm volatile("" : "+r"(p)); \
+ \
+ switch (byte_size) { \
+ case 1: val = *(unsigned char *)p; break; \
+ case 2: val = *(unsigned short *)p; break; \
+ case 4: val = *(unsigned int *)p; break; \
+ case 8: val = *(unsigned long long *)p; break; \
+ } \
+ \
+ mask = (~0ULL << rshift) >> lshift; \
+ val = (val & ~mask) | ((nval << rpad) & mask); \
+ \
+ switch (byte_size) { \
+ case 1: *(unsigned char *)p = val; break; \
+ case 2: *(unsigned short *)p = val; break; \
+ case 4: *(unsigned int *)p = val; break; \
+ case 8: *(unsigned long long *)p = val; break; \
+ } \
+})
+
#define ___bpf_field_ref1(field) (field)
#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field)
#define ___bpf_field_ref(args...) \
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ee95fd379d4d..63033c334320 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd)
{
LIBBPF_OPTS(bpf_btf_load_opts, opts);
__u32 buf_sz = 0, raw_size;
@@ -1367,6 +1369,7 @@ retry_load:
opts.log_level = log_level;
}
+ opts.token_fd = token_fd;
btf->fd = bpf_btf_load(raw_data, raw_size, &opts);
if (btf->fd < 0) {
/* time to turn on verbose mode and try again */
@@ -1394,7 +1397,7 @@ done:
int btf__load_into_kernel(struct btf *btf)
{
- return btf_load_into_kernel(btf, NULL, 0, 0);
+ return btf_load_into_kernel(btf, NULL, 0, 0, 0);
}
int btf__fd(const struct btf *btf)
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index b02faec748a5..c92e02394159 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -11,8 +11,6 @@
#include "libbpf_internal.h"
#include "str_error.h"
-#define STRERR_BUFSIZE 128
-
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an
* explicit version number. This is a GNU extension.
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
new file mode 100644
index 000000000000..ce98a334be21
--- /dev/null
+++ b/tools/lib/bpf/features.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/kernel.h>
+#include <linux/filter.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "str_error.h"
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int probe_fd(int fd)
+{
+ if (fd >= 0)
+ close(fd);
+ return fd >= 0;
+}
+
+static int probe_kern_prog_name(int token_fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, attr_sz);
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.license = ptr_to_u64("GPL");
+ attr.insns = ptr_to_u64(insns);
+ attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+ attr.prog_token_fd = token_fd;
+ libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
+
+ /* make sure loading with name works */
+ ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
+ return probe_fd(ret);
+}
+
+static int probe_kern_global_data(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
+ int ret, map, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ insns[0].imm = map;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ close(map);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_func_global(int token_fd)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* static void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_datasec(int token_fd)
+{
+ static const char strs[] = "\0x\0.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC val */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_float(int token_fd)
+{
+ static const char strs[] = "\0float";
+ __u32 types[] = {
+ /* float */
+ BTF_TYPE_FLOAT_ENC(1, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_decl_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* attr */
+ BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_btf_type_tag(int token_fd)
+{
+ static const char strs[] = "\0tag";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* attr */
+ BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
+ /* ptr */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+static int probe_kern_array_mmap(int token_fd)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .map_flags = BPF_F_MMAPABLE,
+ .token_fd = token_fd,
+ );
+ int fd;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_exp_attach_type(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .token_fd = token_fd,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ /* use any valid combination of program type and (optional)
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+ * to see if kernel supports expected_attach_type field for
+ * BPF_PROG_LOAD command
+ */
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_kern_probe_read_kernel(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
+ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
+ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
+ BPF_EXIT_INSN(),
+ };
+ int fd, insn_cnt = ARRAY_SIZE(insns);
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(fd);
+}
+
+static int probe_prog_bind_map(int token_fd)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd);
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd);
+ int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts);
+ if (map < 0) {
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, -ret);
+ return ret;
+ }
+
+ prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts);
+ if (prog < 0) {
+ close(map);
+ return 0;
+ }
+
+ ret = bpf_prog_bind_map(prog, map, NULL);
+
+ close(map);
+ close(prog);
+
+ return ret >= 0;
+}
+
+static int probe_module_btf(int token_fd)
+{
+ static const char strs[] = "\0int";
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+ };
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ char name[16];
+ int fd, err;
+
+ fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd);
+ if (fd < 0)
+ return 0; /* BTF not supported at all */
+
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
+ * kernel's module BTF support coincides with support for
+ * name/name_len fields in struct bpf_btf_info.
+ */
+ err = bpf_btf_get_info_by_fd(fd, &info, &len);
+ close(fd);
+ return !err;
+}
+
+static int probe_perf_link(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
+ int prog_fd, link_fd, err;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_uprobe_multi_link(int token_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
+ .token_fd = token_fd,
+ );
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+ unsigned long offset = 0;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
+ insns, ARRAY_SIZE(insns), &load_opts);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* Creating uprobe in '/' binary should fail with -EBADF. */
+ link_opts.uprobe_multi.path = "/";
+ link_opts.uprobe_multi.offsets = &offset;
+ link_opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
+static int probe_kern_bpf_cookie(int token_fd)
+{
+ struct bpf_insn insns[] = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+ BPF_EXIT_INSN(),
+ };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd);
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+ return probe_fd(ret);
+}
+
+static int probe_kern_btf_enum64(int token_fd)
+{
+ static const char strs[] = "\0enum64";
+ __u32 types[] = {
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs), token_fd));
+}
+
+typedef int (*feature_probe_fn)(int /* token_fd */);
+
+static struct kern_feature_cache feature_cache;
+
+static struct kern_feature_desc {
+ const char *desc;
+ feature_probe_fn probe;
+} feature_probes[__FEAT_CNT] = {
+ [FEAT_PROG_NAME] = {
+ "BPF program name", probe_kern_prog_name,
+ },
+ [FEAT_GLOBAL_DATA] = {
+ "global variables", probe_kern_global_data,
+ },
+ [FEAT_BTF] = {
+ "minimal BTF", probe_kern_btf,
+ },
+ [FEAT_BTF_FUNC] = {
+ "BTF functions", probe_kern_btf_func,
+ },
+ [FEAT_BTF_GLOBAL_FUNC] = {
+ "BTF global function", probe_kern_btf_func_global,
+ },
+ [FEAT_BTF_DATASEC] = {
+ "BTF data section and variable", probe_kern_btf_datasec,
+ },
+ [FEAT_ARRAY_MMAP] = {
+ "ARRAY map mmap()", probe_kern_array_mmap,
+ },
+ [FEAT_EXP_ATTACH_TYPE] = {
+ "BPF_PROG_LOAD expected_attach_type attribute",
+ probe_kern_exp_attach_type,
+ },
+ [FEAT_PROBE_READ_KERN] = {
+ "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
+ },
+ [FEAT_PROG_BIND_MAP] = {
+ "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
+ },
+ [FEAT_MODULE_BTF] = {
+ "module BTF support", probe_module_btf,
+ },
+ [FEAT_BTF_FLOAT] = {
+ "BTF_KIND_FLOAT support", probe_kern_btf_float,
+ },
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
+ [FEAT_BTF_DECL_TAG] = {
+ "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
+ },
+ [FEAT_BTF_TYPE_TAG] = {
+ "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
+ },
+ [FEAT_MEMCG_ACCOUNT] = {
+ "memcg-based memory accounting", probe_memcg_account,
+ },
+ [FEAT_BPF_COOKIE] = {
+ "BPF cookie support", probe_kern_bpf_cookie,
+ },
+ [FEAT_BTF_ENUM64] = {
+ "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
+ },
+ [FEAT_SYSCALL_WRAPPER] = {
+ "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
+ },
+ [FEAT_UPROBE_MULTI_LINK] = {
+ "BPF multi-uprobe link support", probe_uprobe_multi_link,
+ },
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
+{
+ struct kern_feature_desc *feat = &feature_probes[feat_id];
+ int ret;
+
+ /* assume global feature cache, unless custom one is provided */
+ if (!cache)
+ cache = &feature_cache;
+
+ if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) {
+ ret = feat->probe(cache->token_fd);
+ if (ret > 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED);
+ } else if (ret == 0) {
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ } else {
+ pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
+ WRITE_ONCE(cache->res[feat_id], FEAT_MISSING);
+ }
+ }
+
+ return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED;
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ea9b8158c20d..4b5ff9508e18 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -59,6 +59,8 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
+
#define BPF_INSN_SZ (sizeof(struct bpf_insn))
/* vsprintf() in __base_pr() uses nonliteral format string. It may break
@@ -693,6 +695,10 @@ struct bpf_object {
struct usdt_manager *usdt_man;
+ struct kern_feature_cache *feat_cache;
+ char *token_path;
+ int token_fd;
+
char path[];
};
@@ -2192,7 +2198,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
int err;
if (!path)
- path = "/sys/fs/bpf";
+ path = BPF_FS_DEFAULT_PATH;
err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
if (err)
@@ -3054,9 +3060,15 @@ static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
return false;
}
+static bool map_needs_vmlinux_btf(struct bpf_map *map)
+{
+ return bpf_map__is_struct_ops(map);
+}
+
static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
{
struct bpf_program *prog;
+ struct bpf_map *map;
int i;
/* CO-RE relocations need kernel BTF, only when btf_custom_path
@@ -3081,6 +3093,11 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
return true;
}
+ bpf_object__for_each_map(map, obj) {
+ if (map_needs_vmlinux_btf(map))
+ return true;
+ }
+
return false;
}
@@ -3268,7 +3285,7 @@ skip_exception_cb:
} else {
/* currently BPF_BTF_LOAD only supports log_level 1 */
err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
- obj->log_level ? 1 : 0);
+ obj->log_level ? 1 : 0, obj->token_fd);
}
if (sanitize) {
if (!err) {
@@ -4591,6 +4608,63 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
return 0;
}
+static int bpf_object_prepare_token(struct bpf_object *obj)
+{
+ const char *bpffs_path;
+ int bpffs_fd = -1, token_fd, err;
+ bool mandatory;
+ enum libbpf_print_level level;
+
+ /* token is already set up */
+ if (obj->token_fd > 0)
+ return 0;
+ /* token is explicitly prevented */
+ if (obj->token_fd < 0) {
+ pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
+ /* reset to zero to avoid extra checks during map_create and prog_load steps */
+ obj->token_fd = 0;
+ return 0;
+ }
+
+ mandatory = obj->token_path != NULL;
+ level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
+
+ bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
+ bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
+ if (bpffs_fd < 0) {
+ err = -errno;
+ __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
+ obj->name, err, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? err : 0;
+ }
+
+ token_fd = bpf_token_create(bpffs_fd, 0);
+ close(bpffs_fd);
+ if (token_fd < 0) {
+ if (!mandatory && token_fd == -ENOENT) {
+ pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
+ obj->name, bpffs_path);
+ return 0;
+ }
+ __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
+ obj->name, token_fd, bpffs_path,
+ mandatory ? "" : ", skipping optional step...");
+ return mandatory ? token_fd : 0;
+ }
+
+ obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
+ if (!obj->feat_cache) {
+ close(token_fd);
+ return -ENOMEM;
+ }
+
+ obj->token_fd = token_fd;
+ obj->feat_cache->token_fd = token_fd;
+
+ return 0;
+}
+
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
@@ -4600,6 +4674,7 @@ bpf_object__probe_loading(struct bpf_object *obj)
BPF_EXIT_INSN(),
};
int ret, insn_cnt = ARRAY_SIZE(insns);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd);
if (obj->gen_loader)
return 0;
@@ -4609,9 +4684,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
/* make sure basic loading works */
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
+ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0)
- ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
+ ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
if (ret < 0) {
ret = errno;
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4626,462 +4701,18 @@ bpf_object__probe_loading(struct bpf_object *obj)
return 0;
}
-static int probe_fd(int fd)
-{
- if (fd >= 0)
- close(fd);
- return fd >= 0;
-}
-
-static int probe_kern_prog_name(void)
-{
- const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- union bpf_attr attr;
- int ret;
-
- memset(&attr, 0, attr_sz);
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.license = ptr_to_u64("GPL");
- attr.insns = ptr_to_u64(insns);
- attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
- libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
-
- /* make sure loading with name works */
- ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
- return probe_fd(ret);
-}
-
-static int probe_kern_global_data(void)
-{
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, insn_cnt = ARRAY_SIZE(insns);
-
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- insns[0].imm = map;
-
- ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
- close(map);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_func_global(void)
-{
- static const char strs[] = "\0int\0x\0a";
- /* static void x(int a) {} */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* FUNC_PROTO */ /* [2] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
- BTF_PARAM_ENC(7, 1),
- /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */
- BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_datasec(void)
-{
- static const char strs[] = "\0x\0.data";
- /* static int a; */
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* DATASEC val */ /* [3] */
- BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
- BTF_VAR_SECINFO_ENC(2, 0, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_float(void)
-{
- static const char strs[] = "\0float";
- __u32 types[] = {
- /* float */
- BTF_TYPE_FLOAT_ENC(1, 4),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_decl_tag(void)
-{
- static const char strs[] = "\0tag";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* VAR x */ /* [2] */
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
- BTF_VAR_STATIC,
- /* attr */
- BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_btf_type_tag(void)
-{
- static const char strs[] = "\0tag";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- /* attr */
- BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */
- /* ptr */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_array_mmap(void)
-{
- LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
- int fd;
-
- fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
- return probe_fd(fd);
-}
-
-static int probe_kern_exp_attach_type(void)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int fd, insn_cnt = ARRAY_SIZE(insns);
-
- /* use any valid combination of program type and (optional)
- * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
- * to see if kernel supports expected_attach_type field for
- * BPF_PROG_LOAD command
- */
- fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
- return probe_fd(fd);
-}
-
-static int probe_kern_probe_read_kernel(void)
-{
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */
- BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */
- BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- };
- int fd, insn_cnt = ARRAY_SIZE(insns);
-
- fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
- return probe_fd(fd);
-}
-
-static int probe_prog_bind_map(void)
-{
- char *cp, errmsg[STRERR_BUFSIZE];
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
-
- map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
- if (map < 0) {
- ret = -errno;
- cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, -ret);
- return ret;
- }
-
- prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
- if (prog < 0) {
- close(map);
- return 0;
- }
-
- ret = bpf_prog_bind_map(prog, map, NULL);
-
- close(map);
- close(prog);
-
- return ret >= 0;
-}
-
-static int probe_module_btf(void)
-{
- static const char strs[] = "\0int";
- __u32 types[] = {
- /* int */
- BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
- };
- struct bpf_btf_info info;
- __u32 len = sizeof(info);
- char name[16];
- int fd, err;
-
- fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
- if (fd < 0)
- return 0; /* BTF not supported at all */
-
- memset(&info, 0, sizeof(info));
- info.name = ptr_to_u64(name);
- info.name_len = sizeof(name);
-
- /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
- * kernel's module BTF support coincides with support for
- * name/name_len fields in struct bpf_btf_info.
- */
- err = bpf_btf_get_info_by_fd(fd, &info, &len);
- close(fd);
- return !err;
-}
-
-static int probe_perf_link(void)
-{
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int prog_fd, link_fd, err;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
- insns, ARRAY_SIZE(insns), NULL);
- if (prog_fd < 0)
- return -errno;
-
- /* use invalid perf_event FD to get EBADF, if link is supported;
- * otherwise EINVAL should be returned
- */
- link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
- err = -errno; /* close() can clobber errno */
-
- if (link_fd >= 0)
- close(link_fd);
- close(prog_fd);
-
- return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_uprobe_multi_link(void)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
- .expected_attach_type = BPF_TRACE_UPROBE_MULTI,
- );
- LIBBPF_OPTS(bpf_link_create_opts, link_opts);
- struct bpf_insn insns[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
- int prog_fd, link_fd, err;
- unsigned long offset = 0;
-
- prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL",
- insns, ARRAY_SIZE(insns), &load_opts);
- if (prog_fd < 0)
- return -errno;
-
- /* Creating uprobe in '/' binary should fail with -EBADF. */
- link_opts.uprobe_multi.path = "/";
- link_opts.uprobe_multi.offsets = &offset;
- link_opts.uprobe_multi.cnt = 1;
-
- link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
- err = -errno; /* close() can clobber errno */
-
- if (link_fd >= 0)
- close(link_fd);
- close(prog_fd);
-
- return link_fd < 0 && err == -EBADF;
-}
-
-static int probe_kern_bpf_cookie(void)
-{
- struct bpf_insn insns[] = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
- BPF_EXIT_INSN(),
- };
- int ret, insn_cnt = ARRAY_SIZE(insns);
-
- ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
- return probe_fd(ret);
-}
-
-static int probe_kern_btf_enum64(void)
-{
- static const char strs[] = "\0enum64";
- __u32 types[] = {
- BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
- };
-
- return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs)));
-}
-
-static int probe_kern_syscall_wrapper(void);
-
-enum kern_feature_result {
- FEAT_UNKNOWN = 0,
- FEAT_SUPPORTED = 1,
- FEAT_MISSING = 2,
-};
-
-typedef int (*feature_probe_fn)(void);
-
-static struct kern_feature_desc {
- const char *desc;
- feature_probe_fn probe;
- enum kern_feature_result res;
-} feature_probes[__FEAT_CNT] = {
- [FEAT_PROG_NAME] = {
- "BPF program name", probe_kern_prog_name,
- },
- [FEAT_GLOBAL_DATA] = {
- "global variables", probe_kern_global_data,
- },
- [FEAT_BTF] = {
- "minimal BTF", probe_kern_btf,
- },
- [FEAT_BTF_FUNC] = {
- "BTF functions", probe_kern_btf_func,
- },
- [FEAT_BTF_GLOBAL_FUNC] = {
- "BTF global function", probe_kern_btf_func_global,
- },
- [FEAT_BTF_DATASEC] = {
- "BTF data section and variable", probe_kern_btf_datasec,
- },
- [FEAT_ARRAY_MMAP] = {
- "ARRAY map mmap()", probe_kern_array_mmap,
- },
- [FEAT_EXP_ATTACH_TYPE] = {
- "BPF_PROG_LOAD expected_attach_type attribute",
- probe_kern_exp_attach_type,
- },
- [FEAT_PROBE_READ_KERN] = {
- "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
- },
- [FEAT_PROG_BIND_MAP] = {
- "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
- },
- [FEAT_MODULE_BTF] = {
- "module BTF support", probe_module_btf,
- },
- [FEAT_BTF_FLOAT] = {
- "BTF_KIND_FLOAT support", probe_kern_btf_float,
- },
- [FEAT_PERF_LINK] = {
- "BPF perf link support", probe_perf_link,
- },
- [FEAT_BTF_DECL_TAG] = {
- "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
- },
- [FEAT_BTF_TYPE_TAG] = {
- "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
- },
- [FEAT_MEMCG_ACCOUNT] = {
- "memcg-based memory accounting", probe_memcg_account,
- },
- [FEAT_BPF_COOKIE] = {
- "BPF cookie support", probe_kern_bpf_cookie,
- },
- [FEAT_BTF_ENUM64] = {
- "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
- },
- [FEAT_SYSCALL_WRAPPER] = {
- "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
- },
- [FEAT_UPROBE_MULTI_LINK] = {
- "BPF multi-uprobe link support", probe_uprobe_multi_link,
- },
-};
-
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
{
- struct kern_feature_desc *feat = &feature_probes[feat_id];
- int ret;
-
if (obj && obj->gen_loader)
/* To generate loader program assume the latest kernel
* to avoid doing extra prog_load, map_create syscalls.
*/
return true;
- if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
- ret = feat->probe();
- if (ret > 0) {
- WRITE_ONCE(feat->res, FEAT_SUPPORTED);
- } else if (ret == 0) {
- WRITE_ONCE(feat->res, FEAT_MISSING);
- } else {
- pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
- WRITE_ONCE(feat->res, FEAT_MISSING);
- }
- }
+ if (obj->token_fd)
+ return feat_supported(obj->feat_cache, feat_id);
- return READ_ONCE(feat->res) == FEAT_SUPPORTED;
+ return feat_supported(NULL, feat_id);
}
static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
@@ -5200,6 +4831,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.map_flags = def->map_flags;
create_attr.numa_node = map->numa_node;
create_attr.map_extra = map->map_extra;
+ create_attr.token_fd = obj->token_fd;
if (bpf_map__is_struct_ops(map))
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -7035,6 +6667,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog
load_attr.attach_btf_id = prog->attach_btf_id;
load_attr.kern_version = kern_version;
load_attr.prog_ifindex = prog->prog_ifindex;
+ load_attr.token_fd = obj->token_fd;
/* specify func_info/line_info only if kernel supports them */
btf_fd = bpf_object__btf_fd(obj);
@@ -7496,10 +7129,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig, *btf_tmp_path;
+ const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
struct bpf_object *obj;
char tmp_name[64];
- int err;
+ int err, token_fd;
char *log_buf;
size_t log_size;
__u32 log_level;
@@ -7533,6 +7166,28 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
if (log_size && !log_buf)
return ERR_PTR(-EINVAL);
+ token_path = OPTS_GET(opts, bpf_token_path, NULL);
+ token_fd = OPTS_GET(opts, bpf_token_fd, -1);
+ /* non-empty token path can't be combined with invalid token FD */
+ if (token_path && token_path[0] != '\0' && token_fd < 0)
+ return ERR_PTR(-EINVAL);
+ /* empty token path can't be combined with valid token FD */
+ if (token_path && token_path[0] == '\0' && token_fd > 0)
+ return ERR_PTR(-EINVAL);
+ /* if user didn't specify bpf_token_path/bpf_token_fd explicitly,
+ * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as
+ * bpf_token_path option
+ */
+ if (token_fd == 0 && !token_path)
+ token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
+ /* empty token_path is equivalent to invalid token_fd */
+ if (token_path && token_path[0] == '\0') {
+ token_path = NULL;
+ token_fd = -1;
+ }
+ if (token_path && strlen(token_path) >= PATH_MAX)
+ return ERR_PTR(-ENAMETOOLONG);
+
obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
if (IS_ERR(obj))
return obj;
@@ -7541,6 +7196,19 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf,
obj->log_size = log_size;
obj->log_level = log_level;
+ obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd);
+ if (token_fd > 0 && obj->token_fd < 0) {
+ err = -errno;
+ goto out;
+ }
+ if (token_path) {
+ obj->token_path = strdup(token_path);
+ if (!obj->token_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
if (btf_tmp_path) {
if (strlen(btf_tmp_path) >= PATH_MAX) {
@@ -8051,7 +7719,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
if (obj->gen_loader)
bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
- err = bpf_object__probe_loading(obj);
+ err = bpf_object_prepare_token(obj);
+ err = err ? : bpf_object__probe_loading(obj);
err = err ? : bpf_object__load_vmlinux_btf(obj, false);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
@@ -8588,6 +8257,11 @@ void bpf_object__close(struct bpf_object *obj)
}
zfree(&obj->programs);
+ zfree(&obj->feat_cache);
+ zfree(&obj->token_path);
+ if (obj->token_fd > 0)
+ close(obj->token_fd);
+
free(obj);
}
@@ -10601,7 +10275,7 @@ static const char *arch_specific_syscall_pfx(void)
#endif
}
-static int probe_kern_syscall_wrapper(void)
+int probe_kern_syscall_wrapper(int token_fd)
{
char syscall_name[64];
const char *ksys_pfx;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6cd9c501624f..916904bd2a7a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -177,10 +177,45 @@ struct bpf_object_open_opts {
* logs through its print callback.
*/
__u32 kernel_log_level;
+ /* FD of a BPF token instantiated by user through bpf_token_create()
+ * API. BPF object will keep dup()'ed FD internally, so passed token
+ * FD can be closed after BPF object/skeleton open step.
+ *
+ * Setting bpf_token_fd to negative value disables libbpf's automatic
+ * attempt to create BPF token from default BPF FS mount point
+ * (/sys/fs/bpf), in case this default behavior is undesirable.
+ *
+ * If bpf_token_path and bpf_token_fd are not specified, libbpf will
+ * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will
+ * be taken as a value of bpf_token_path option and will force libbpf
+ * to either create BPF token from provided custom BPF FS path, or
+ * will disable implicit BPF token creation, if envvar value is an
+ * empty string.
+ *
+ * bpf_token_path and bpf_token_fd are mutually exclusive and only one
+ * of those options should be set. Either of them overrides
+ * LIBBPF_BPF_TOKEN_PATH envvar.
+ */
+ int bpf_token_fd;
+ /* Path to BPF FS mount point to derive BPF token from.
+ *
+ * Created BPF token will be used for all bpf() syscall operations
+ * that accept BPF token (e.g., map creation, BTF and program loads,
+ * etc) automatically within instantiated BPF object.
+ *
+ * Setting bpf_token_path option to empty string disables libbpf's
+ * automatic attempt to create BPF token from default BPF FS mount
+ * point (/sys/fs/bpf), in case this default behavior is undesirable.
+ *
+ * bpf_token_path and bpf_token_fd are mutually exclusive and only one
+ * of those options should be set. Either of them overrides
+ * LIBBPF_BPF_TOKEN_PATH envvar.
+ */
+ const char *bpf_token_path;
size_t :0;
};
-#define bpf_object_open_opts__last_field kernel_log_level
+#define bpf_object_open_opts__last_field bpf_token_path
/**
* @brief **bpf_object__open()** creates a bpf_object by opening
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 91c5aef7dae7..df7657b65c47 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -401,6 +401,7 @@ LIBBPF_1.3.0 {
bpf_program__attach_netkit;
bpf_program__attach_tcx;
bpf_program__attach_uprobe_multi;
+ bpf_token_create;
ring__avail_data_size;
ring__consume;
ring__consumer_pos;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b5d334754e5d..4cda32298c49 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -360,15 +360,32 @@ enum kern_feature_id {
__FEAT_CNT,
};
-int probe_memcg_account(void);
+enum kern_feature_result {
+ FEAT_UNKNOWN = 0,
+ FEAT_SUPPORTED = 1,
+ FEAT_MISSING = 2,
+};
+
+struct kern_feature_cache {
+ enum kern_feature_result res[__FEAT_CNT];
+ int token_fd;
+};
+
+bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+
+int probe_kern_syscall_wrapper(int token_fd);
+int probe_memcg_account(int token_fd);
int bump_rlimit_memlock(void);
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len);
-int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);
+ const char *str_sec, size_t str_len,
+ int token_fd);
+int btf_load_into_kernel(struct btf *btf,
+ char *log_buf, size_t log_sz, __u32 log_level,
+ int token_fd);
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -532,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn)
return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
}
+/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range.
+ * Original FD is not closed or altered in any other way.
+ * Preserves original FD value, if it's invalid (negative).
+ */
+static inline int dup_good_fd(int fd)
+{
+ if (fd < 0)
+ return fd;
+ return fcntl(fd, F_DUPFD_CLOEXEC, 3);
+}
+
/* if fd is stdin, stdout, or stderr, dup to a fd greater than 2
* Takes ownership of the fd passed in, and closes it if calling
* fcntl(fd, F_DUPFD_CLOEXEC, 3).
@@ -543,7 +571,7 @@ static inline int ensure_good_fd(int fd)
if (fd < 0)
return fd;
if (fd < 3) {
- fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
+ fd = dup_good_fd(fd);
saved_errno = errno;
close(old_fd);
errno = saved_errno;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9c4db90b92b6..8e7437006639 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
}
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
- const char *str_sec, size_t str_len)
+ const char *str_sec, size_t str_len,
+ int token_fd)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
@@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
.str_off = types_len,
.str_len = str_len,
};
+ LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd);
int btf_fd, btf_len;
__u8 *raw_btf;
@@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
- btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);
+ btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
free(raw_btf);
return btf_fd;
@@ -271,7 +273,7 @@ static int load_local_storage_btf(void)
};
return libbpf__load_raw_btf((char *)types, sizeof(types),
- strs, sizeof(strs));
+ strs, sizeof(strs), 0);
}
static int probe_map_create(enum bpf_map_type map_type)
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 5ced96d99f8c..52a2901e8bd0 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -719,13 +719,25 @@ static int linker_sanity_check_elf(struct src_obj *obj)
return -EINVAL;
}
- if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign))
+ if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) {
+ pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
+ obj->filename);
return -EINVAL;
- if (sec->shdr->sh_addralign != sec->data->d_align)
+ }
+ if (sec->shdr->sh_addralign != sec->data->d_align) {
+ pr_warn("ELF section #%zu has inconsistent alignment addr=%llu != d=%llu in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign,
+ (long long unsigned)sec->data->d_align, obj->filename);
return -EINVAL;
+ }
- if (sec->shdr->sh_size != sec->data->d_size)
+ if (sec->shdr->sh_size != sec->data->d_size) {
+ pr_warn("ELF section #%zu has inconsistent section size sh=%llu != d=%llu in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_size,
+ (long long unsigned)sec->data->d_size, obj->filename);
return -EINVAL;
+ }
switch (sec->shdr->sh_type) {
case SHT_SYMTAB:
@@ -737,8 +749,12 @@ static int linker_sanity_check_elf(struct src_obj *obj)
break;
case SHT_PROGBITS:
if (sec->shdr->sh_flags & SHF_EXECINSTR) {
- if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0)
+ if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF section #%zu has unexpected size alignment %llu in %s\n",
+ sec->sec_idx, (long long unsigned)sec->shdr->sh_size,
+ obj->filename);
return -EINVAL;
+ }
}
break;
case SHT_NOBITS:
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
index a139334d57b6..626d7ffb03d6 100644
--- a/tools/lib/bpf/str_error.h
+++ b/tools/lib/bpf/str_error.h
@@ -2,5 +2,8 @@
#ifndef __LIBBPF_STR_ERROR_H
#define __LIBBPF_STR_ERROR_H
+#define STRERR_BUFSIZE 128
+
char *libbpf_strerror_r(int err, char *dst, int len);
+
#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 5ca68ff0b59f..b4e78c1eb37b 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -55,4 +55,14 @@ void *bpf_cast_to_kern_ctx(void *) __ksym;
void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym;
+extern int bpf_get_file_xattr(struct file *file, const char *name,
+ struct bpf_dynptr *value_ptr) __ksym;
+extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
+
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+ struct bpf_dynptr *sig_ptr,
+ struct bpf_key *trusted_keyring) __ksym;
#endif
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5aa133bf3688..19be9c63d5e8 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -689,3 +689,19 @@ int get_cgroup1_hierarchy_id(const char *subsys_name)
fclose(file);
return found ? id : -1;
}
+
+/**
+ * open_classid() - Open a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * open it here.
+ *
+ * On success, it returns the file descriptor. On failure it returns -1.
+ */
+int open_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return open(cgroup_workdir, O_RDONLY);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index ee053641c026..502845160d88 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -33,6 +33,7 @@ void cleanup_cgroup_environment(void);
int set_classid(void);
int join_classid(void);
unsigned long long get_classid_cgroup_id(void);
+int open_classid(void);
int setup_classid_environment(void);
void cleanup_classid_environment(void);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3ec5927ec3e5..c125c441abc7 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -23,6 +23,7 @@ CONFIG_FPROBE=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FUNCTION_ERROR_INJECTION=y
CONFIG_FUNCTION_TRACER=y
+CONFIG_FS_VERITY=y
CONFIG_GENEVE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -82,7 +83,7 @@ CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
CONFIG_TEST_BPF=m
CONFIG_USERFAULTFD=y
+CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
-CONFIG_VSOCKETS=y
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 8fb4a04fbbc0..816145bcb647 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4630,11 +4630,6 @@ static int test_btf_id(unsigned int test_num)
/* The map holds the last ref to BTF and its btf_id */
close(map_fd);
map_fd = -1;
- btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
- err = -1;
- goto done;
- }
fprintf(stderr, "OK");
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 63e776f4176e..747761572098 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -19,6 +19,21 @@ struct socket_cookie {
__u64 cookie_value;
};
+static bool is_cgroup1;
+static int target_hid;
+
+#define CGROUP_MODE_SET(skel) \
+{ \
+ skel->bss->is_cgroup1 = is_cgroup1; \
+ skel->bss->target_hid = target_hid; \
+}
+
+static void cgroup_mode_value_init(bool cgroup, int hid)
+{
+ is_cgroup1 = cgroup;
+ target_hid = hid;
+}
+
static void test_tp_btf(int cgroup_fd)
{
struct cgrp_ls_tp_btf *skel;
@@ -29,6 +44,8 @@ static void test_tp_btf(int cgroup_fd)
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
+ CGROUP_MODE_SET(skel);
+
/* populate a value in map_b */
err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY);
if (!ASSERT_OK(err, "map_update_elem"))
@@ -130,6 +147,8 @@ static void test_recursion(int cgroup_fd)
if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
+ CGROUP_MODE_SET(skel);
+
err = cgrp_ls_recursion__attach(skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
@@ -165,6 +184,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ CGROUP_MODE_SET(skel);
+
bpf_program__set_autoload(skel->progs.cgroup_iter, true);
err = cgrp_ls_sleepable__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -202,6 +223,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ CGROUP_MODE_SET(skel);
skel->bss->target_pid = syscall(SYS_gettid);
bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
@@ -229,6 +251,8 @@ static void test_no_rcu_lock(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ CGROUP_MODE_SET(skel);
+
bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
ASSERT_ERR(err, "skel_load");
@@ -236,7 +260,25 @@ static void test_no_rcu_lock(void)
cgrp_ls_sleepable__destroy(skel);
}
-void test_cgrp_local_storage(void)
+static void test_cgrp1_no_rcu_lock(void)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ ASSERT_OK(err, "skel_load");
+
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void cgrp2_local_storage(void)
{
__u64 cgroup_id;
int cgroup_fd;
@@ -245,6 +287,8 @@ void test_cgrp_local_storage(void)
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
return;
+ cgroup_mode_value_init(0, -1);
+
cgroup_id = get_cgroup_id("/cgrp_local_storage");
if (test__start_subtest("tp_btf"))
test_tp_btf(cgroup_fd);
@@ -263,3 +307,55 @@ void test_cgrp_local_storage(void)
close(cgroup_fd);
}
+
+static void cgrp1_local_storage(void)
+{
+ int cgrp1_fd, cgrp1_hid, cgrp1_id, err;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ return;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ cgrp1_fd = open_classid();
+ if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd"))
+ goto cleanup;
+
+ cgrp1_id = get_classid_cgroup_id();
+ if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id"))
+ goto close_fd;
+
+ cgrp1_hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid"))
+ goto close_fd;
+
+ cgroup_mode_value_init(1, cgrp1_hid);
+
+ if (test__start_subtest("cgrp1_tp_btf"))
+ test_tp_btf(cgrp1_fd);
+ if (test__start_subtest("cgrp1_recursion"))
+ test_recursion(cgrp1_fd);
+ if (test__start_subtest("cgrp1_negative"))
+ test_negative();
+ if (test__start_subtest("cgrp1_iter_sleepable"))
+ test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id);
+ if (test__start_subtest("cgrp1_yes_rcu_lock"))
+ test_yes_rcu_lock(cgrp1_id);
+ if (test__start_subtest("cgrp1_no_rcu_lock"))
+ test_cgrp1_no_rcu_lock();
+
+close_fd:
+ close(cgrp1_fd);
+cleanup:
+ cleanup_classid_environment();
+}
+
+void test_cgrp_local_storage(void)
+{
+ cgrp2_local_storage();
+ cgrp1_local_storage();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index 756ea8b590b6..c2e886399e3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -18,6 +18,7 @@ static const char * const cpumask_success_testcases[] = {
"test_insert_leave",
"test_insert_remove_release",
"test_global_mask_rcu",
+ "test_cpumask_weight",
};
static void verify_success(const char *prog_name)
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
new file mode 100644
index 000000000000..37056ba73847
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_get_xattr.skel.h"
+#include "test_fsverity.skel.h"
+
+static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
+
+static void test_xattr(void)
+{
+ struct test_get_xattr *skel = NULL;
+ int fd = -1, err;
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel = test_get_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_get_xattr__attach(skel);
+
+ if (!ASSERT_OK(err, "test_get_xattr__attach"))
+ goto out;
+
+ fd = open(testfile, O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "open_file"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr");
+
+out:
+ close(fd);
+ test_get_xattr__destroy(skel);
+ remove(testfile);
+}
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+static void test_fsverity(void)
+{
+ struct fsverity_enable_arg arg = {0};
+ struct test_fsverity *skel = NULL;
+ struct fsverity_digest *d;
+ int fd, err;
+ char buffer[4096];
+
+ fd = open(testfile, O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ /* Write random buffer, so the file is not empty */
+ err = write(fd, buffer, 4096);
+ if (!ASSERT_EQ(err, 4096, "write_file"))
+ goto out;
+ close(fd);
+
+ /* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */
+ fd = open(testfile, O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "open_file1"))
+ return;
+
+ /* Enable fsverity for the file.
+ * If the file system doesn't support verity, this will fail. Skip
+ * the test in such case.
+ */
+ arg.version = 1;
+ arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256;
+ arg.block_size = 4096;
+ err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg);
+ if (err) {
+ printf("%s:SKIP:local fs doesn't support fsverity (%d)\n"
+ "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ skel = test_fsverity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load"))
+ goto out;
+
+ /* Get fsverity_digest from ioctl */
+ d = (struct fsverity_digest *)skel->bss->expected_digest;
+ d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256;
+ d->digest_size = SHA256_DIGEST_SIZE;
+ err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest);
+ if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_fsverity__attach(skel);
+ if (!ASSERT_OK(err, "test_fsverity__attach"))
+ goto out;
+
+ /* Reopen the file to trigger the program */
+ close(fd);
+ fd = open(testfile, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_file2"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity");
+ ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches");
+out:
+ close(fd);
+ test_fsverity__destroy(skel);
+ remove(testfile);
+}
+
+void test_fs_kfuncs(void)
+{
+ if (test__start_subtest("xattr"))
+ test_xattr();
+
+ if (test__start_subtest("fsverity"))
+ test_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
new file mode 100644
index 000000000000..65309894b27a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "verifier_global_subprogs.skel.h"
+#include "freplace_dead_global_func.skel.h"
+
+void test_global_func_dead_code(void)
+{
+ struct verifier_global_subprogs *tgt_skel = NULL;
+ struct freplace_dead_global_func *skel = NULL;
+ char log_buf[4096];
+ int err, tgt_fd;
+
+ /* first, try to load target with good global subprog */
+ tgt_skel = verifier_global_subprogs__open();
+ if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open"))
+ return;
+
+ bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true);
+
+ err = verifier_global_subprogs__load(tgt_skel);
+ if (!ASSERT_OK(err, "tgt_skel_good_load"))
+ goto out;
+
+ tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success);
+
+ /* Attach to good non-eliminated subprog */
+ skel = freplace_dead_global_func__open();
+ if (!ASSERT_OK_PTR(skel, "skel_good_open"))
+ goto out;
+
+ err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good");
+ ASSERT_OK(err, "attach_target_good");
+
+ err = freplace_dead_global_func__load(skel);
+ if (!ASSERT_OK(err, "skel_good_load"))
+ goto out;
+
+ freplace_dead_global_func__destroy(skel);
+
+ /* Try attaching to dead code-eliminated subprog */
+ skel = freplace_dead_global_func__open();
+ if (!ASSERT_OK_PTR(skel, "skel_dead_open"))
+ goto out;
+
+ bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf));
+ err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead");
+ ASSERT_OK(err, "attach_target_dead");
+
+ err = freplace_dead_global_func__load(skel);
+ if (!ASSERT_ERR(err, "skel_dead_load"))
+ goto out;
+
+ ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg");
+
+out:
+ verifier_global_subprogs__destroy(tgt_skel);
+ freplace_dead_global_func__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 4041cfa670eb..05000810e28e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -222,6 +222,7 @@ static void test_attach_api_fails(void)
"bpf_fentry_test2",
};
__u64 cookies[2];
+ int saved_error;
addrs[0] = ksym_get_addr("bpf_fentry_test1");
addrs[1] = ksym_get_addr("bpf_fentry_test2");
@@ -238,10 +239,11 @@ static void test_attach_api_fails(void)
/* fail_1 - pattern and opts NULL */
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
NULL, NULL);
+ saved_error = -errno;
if (!ASSERT_ERR_PTR(link, "fail_1"))
goto cleanup;
- if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_1_error"))
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error"))
goto cleanup;
/* fail_2 - both addrs and syms set */
@@ -252,10 +254,11 @@ static void test_attach_api_fails(void)
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
NULL, &opts);
+ saved_error = -errno;
if (!ASSERT_ERR_PTR(link, "fail_2"))
goto cleanup;
- if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_2_error"))
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error"))
goto cleanup;
/* fail_3 - pattern and addrs set */
@@ -266,10 +269,11 @@ static void test_attach_api_fails(void)
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
"ksys_*", &opts);
+ saved_error = -errno;
if (!ASSERT_ERR_PTR(link, "fail_3"))
goto cleanup;
- if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_3_error"))
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error"))
goto cleanup;
/* fail_4 - pattern and cnt set */
@@ -280,10 +284,11 @@ static void test_attach_api_fails(void)
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
"ksys_*", &opts);
+ saved_error = -errno;
if (!ASSERT_ERR_PTR(link, "fail_4"))
goto cleanup;
- if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_4_error"))
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error"))
goto cleanup;
/* fail_5 - pattern and cookies */
@@ -294,10 +299,26 @@ static void test_attach_api_fails(void)
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
"ksys_*", &opts);
+ saved_error = -errno;
if (!ASSERT_ERR_PTR(link, "fail_5"))
goto cleanup;
- if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_5_error"))
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error"))
+ goto cleanup;
+
+ /* fail_6 - abnormal cnt */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = NULL;
+ opts.cnt = INT_MAX;
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_6"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error"))
goto cleanup;
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
index 9f766ddd946a..4ed46ed58a7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void)
if (prog_type == BPF_PROG_TYPE_UNSPEC)
continue;
+ if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
+ continue;
if (!test__start_subtest(prog_type_name))
continue;
@@ -68,6 +70,8 @@ void test_libbpf_probe_map_types(void)
if (map_type == BPF_MAP_TYPE_UNSPEC)
continue;
+ if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
+ continue;
if (!test__start_subtest(map_type_name))
continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
index c440ea3311ed..62ea855ec4d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -87,7 +87,7 @@ static void test_libbpf_bpf_link_type_str(void)
const char *link_type_str;
char buf[256];
- if (link_type == MAX_BPF_LINK_TYPE)
+ if (link_type == __MAX_BPF_LINK_TYPE)
continue;
link_type_name = btf__str_by_offset(btf, e->name_off);
@@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void)
const char *map_type_str;
char buf[256];
+ if (map_type == __MAX_BPF_MAP_TYPE)
+ continue;
+
map_type_name = btf__str_by_offset(btf, e->name_off);
map_type_str = libbpf_bpf_map_type_str(map_type);
ASSERT_OK_PTR(map_type_str, map_type_name);
@@ -186,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void)
const char *prog_type_str;
char buf[256];
+ if (prog_type == __MAX_BPF_PROG_TYPE)
+ continue;
+
prog_type_name = btf__str_by_offset(btf, e->name_off);
prog_type_str = libbpf_bpf_prog_type_str(prog_type);
ASSERT_OK_PTR(prog_type_str, prog_type_name);
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index e6e50a394472..827e713f6cf1 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -48,6 +48,27 @@ static void test_local_kptr_stash_plain(void)
local_kptr_stash__destroy(skel);
}
+static void test_local_kptr_stash_local_with_root(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
static void test_local_kptr_stash_unstash(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -115,6 +136,8 @@ void test_local_kptr_stash(void)
test_local_kptr_stash_simple();
if (test__start_subtest("local_kptr_stash_plain"))
test_local_kptr_stash_plain();
+ if (test__start_subtest("local_kptr_stash_local_with_root"))
+ test_local_kptr_stash_local_with_root();
if (test__start_subtest("local_kptr_stash_unstash"))
test_local_kptr_stash_unstash();
if (test__start_subtest("refcount_acquire_without_unstash"))
diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c
new file mode 100644
index 000000000000..2c4ef6037573
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "normal_map_btf.skel.h"
+#include "map_in_map_btf.skel.h"
+
+static void do_test_normal_map_btf(void)
+{
+ struct normal_map_btf *skel;
+ int i, err, new_fd = -1;
+ int map_fd_arr[64];
+
+ skel = normal_map_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = normal_map_btf__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_TRUE(skel->bss->done, "done");
+
+ /* Use percpu_array to slow bpf_map_free_deferred() down.
+ * The memory allocation may fail, so doesn't check the returned fd.
+ */
+ for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++)
+ map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL);
+
+ /* Close array fd later */
+ new_fd = dup(bpf_map__fd(skel->maps.array));
+out:
+ normal_map_btf__destroy(skel);
+ if (new_fd < 0)
+ return;
+ /* Use kern_sync_rcu() to wait for the start of the free of the bpf
+ * program and use an assumed delay to wait for the release of the map
+ * btf which is held by other maps (e.g, bss). After that, array map
+ * holds the last reference of map btf.
+ */
+ kern_sync_rcu();
+ usleep(4000);
+ /* Spawn multiple kworkers to delay the invocation of
+ * bpf_map_free_deferred() for array map.
+ */
+ for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) {
+ if (map_fd_arr[i] < 0)
+ continue;
+ close(map_fd_arr[i]);
+ }
+ close(new_fd);
+}
+
+static void do_test_map_in_map_btf(void)
+{
+ int err, zero = 0, new_fd = -1;
+ struct map_in_map_btf *skel;
+
+ skel = map_in_map_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = map_in_map_btf__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_TRUE(skel->bss->done, "done");
+
+ /* Close inner_array fd later */
+ new_fd = dup(bpf_map__fd(skel->maps.inner_array));
+ /* Defer the free of inner_array */
+ err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0);
+ ASSERT_OK(err, "delete inner map");
+out:
+ map_in_map_btf__destroy(skel);
+ if (new_fd < 0)
+ return;
+ /* Use kern_sync_rcu() to wait for the start of the free of the bpf
+ * program and use an assumed delay to wait for the free of the outer
+ * map and the release of map btf. After that, inner map holds the last
+ * reference of map btf.
+ */
+ kern_sync_rcu();
+ usleep(10000);
+ close(new_fd);
+}
+
+void test_map_btf(void)
+{
+ if (test__start_subtest("array_btf"))
+ do_test_normal_map_btf();
+ if (test__start_subtest("inner_array_btf"))
+ do_test_map_in_map_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
new file mode 100644
index 000000000000..d2a10eb4e5b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "access_map_in_map.skel.h"
+
+struct thread_ctx {
+ pthread_barrier_t barrier;
+ int outer_map_fd;
+ int start, abort;
+ int loop, err;
+};
+
+static int wait_for_start_or_abort(struct thread_ctx *ctx)
+{
+ while (!ctx->start && !ctx->abort)
+ usleep(1);
+ return ctx->abort ? -1 : 0;
+}
+
+static void *update_map_fn(void *data)
+{
+ struct thread_ctx *ctx = data;
+ int loop = ctx->loop, err = 0;
+
+ if (wait_for_start_or_abort(ctx) < 0)
+ return NULL;
+ pthread_barrier_wait(&ctx->barrier);
+
+ while (loop-- > 0) {
+ int fd, zero = 0;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (fd < 0) {
+ err |= 1;
+ pthread_barrier_wait(&ctx->barrier);
+ continue;
+ }
+
+ /* Remove the old inner map */
+ if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0)
+ err |= 2;
+ close(fd);
+ pthread_barrier_wait(&ctx->barrier);
+ }
+
+ ctx->err = err;
+
+ return NULL;
+}
+
+static void *access_map_fn(void *data)
+{
+ struct thread_ctx *ctx = data;
+ int loop = ctx->loop;
+
+ if (wait_for_start_or_abort(ctx) < 0)
+ return NULL;
+ pthread_barrier_wait(&ctx->barrier);
+
+ while (loop-- > 0) {
+ /* Access the old inner map */
+ syscall(SYS_getpgid);
+ pthread_barrier_wait(&ctx->barrier);
+ }
+
+ return NULL;
+}
+
+static void test_map_in_map_access(const char *prog_name, const char *map_name)
+{
+ struct access_map_in_map *skel;
+ struct bpf_map *outer_map;
+ struct bpf_program *prog;
+ struct thread_ctx ctx;
+ pthread_t tid[2];
+ int err;
+
+ skel = access_map_in_map__open();
+ if (!ASSERT_OK_PTR(skel, "access_map_in_map open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "find program"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+
+ outer_map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(outer_map, "find map"))
+ goto out;
+
+ err = access_map_in_map__load(skel);
+ if (!ASSERT_OK(err, "access_map_in_map load"))
+ goto out;
+
+ err = access_map_in_map__attach(skel);
+ if (!ASSERT_OK(err, "access_map_in_map attach"))
+ goto out;
+
+ skel->bss->tgid = getpid();
+
+ memset(&ctx, 0, sizeof(ctx));
+ pthread_barrier_init(&ctx.barrier, NULL, 2);
+ ctx.outer_map_fd = bpf_map__fd(outer_map);
+ ctx.loop = 4;
+
+ err = pthread_create(&tid[0], NULL, update_map_fn, &ctx);
+ if (!ASSERT_OK(err, "close_thread"))
+ goto out;
+
+ err = pthread_create(&tid[1], NULL, access_map_fn, &ctx);
+ if (!ASSERT_OK(err, "read_thread")) {
+ ctx.abort = 1;
+ pthread_join(tid[0], NULL);
+ goto out;
+ }
+
+ ctx.start = 1;
+ pthread_join(tid[0], NULL);
+ pthread_join(tid[1], NULL);
+
+ ASSERT_OK(ctx.err, "err");
+out:
+ access_map_in_map__destroy(skel);
+}
+
+void test_map_in_map(void)
+{
+ if (test__start_subtest("acc_map_in_array"))
+ test_map_in_map_access("access_map_in_array", "outer_array_map");
+ if (test__start_subtest("sleepable_acc_map_in_array"))
+ test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map");
+ if (test__start_subtest("acc_map_in_htab"))
+ test_map_in_map_access("access_map_in_htab", "outer_htab_map");
+ if (test__start_subtest("sleepable_acc_map_in_htab"))
+ test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map");
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
index f4d40001155a..0be8301c0ffd 100644
--- a/tools/testing/selftests/bpf/prog_tests/syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -12,7 +12,7 @@ struct args {
int btf_fd;
};
-void test_syscall(void)
+static void test_syscall_load_prog(void)
{
static char verifier_log[8192];
struct args ctx = {
@@ -32,7 +32,7 @@ void test_syscall(void)
if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
- prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+ prog_fd = bpf_program__fd(skel->progs.load_prog);
err = bpf_prog_test_run_opts(prog_fd, &tattr);
ASSERT_EQ(err, 0, "err");
ASSERT_EQ(tattr.retval, 1, "retval");
@@ -53,3 +53,29 @@ cleanup:
if (ctx.btf_fd > 0)
close(ctx.btf_fd);
}
+
+static void test_syscall_update_outer_map(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct syscall *skel;
+ int err, prog_fd;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.update_outer_map);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(opts.retval, 1, "retval");
+cleanup:
+ syscall__destroy(skel);
+}
+
+void test_syscall(void)
+{
+ if (test__start_subtest("load_prog"))
+ test_syscall_load_prog();
+ if (test__start_subtest("update_outer_map"))
+ test_syscall_update_outer_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index d149ab98798d..2b3c6dd66259 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -50,6 +50,7 @@
*/
#include <arpa/inet.h>
+#include <linux/if_link.h>
#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
@@ -92,6 +93,11 @@
#define IPIP_TUNL_DEV0 "ipip00"
#define IPIP_TUNL_DEV1 "ipip11"
+#define XFRM_AUTH "0x1111111111111111111111111111111111111111"
+#define XFRM_ENC "0x22222222222222222222222222222222"
+#define XFRM_SPI_IN_TO_OUT 0x1
+#define XFRM_SPI_OUT_TO_IN 0x2
+
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
static int config_device(void)
@@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void)
SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
}
+static int add_xfrm_tunnel(void)
+{
+ /* at_ns0 namespace
+ * at_ns0 -> root
+ */
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 1 mode tunnel replay-window 42 "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm policy add src %s/32 dst %s/32 dir out "
+ "tmpl src %s dst %s proto esp reqid 1 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ /* root -> at_ns0 */
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 2 mode tunnel "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm policy add src %s/32 dst %s/32 dir in "
+ "tmpl src %s dst %s proto esp reqid 2 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+ /* address & route */
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32",
+ IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0);
+
+ /* root namespace
+ * at_ns0 -> root
+ */
+ SYS(fail,
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 1 mode tunnel replay-window 42 "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip xfrm policy add src %s/32 dst %s/32 dir in "
+ "tmpl src %s dst %s proto esp reqid 1 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ /* root -> at_ns0 */
+ SYS(fail,
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 2 mode tunnel "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip xfrm policy add src %s/32 dst %s/32 dir out "
+ "tmpl src %s dst %s proto esp reqid 2 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+ /* address & route */
+ SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip route add %s dev veth1 via %s src %s",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_xfrm_tunnel(void)
+{
+ SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
+ SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
+}
+
static int test_ping(int family, const char *addr)
{
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -532,25 +624,85 @@ done:
test_tunnel_kern__destroy(skel);
}
+static void test_xfrm_tunnel(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int xdp_prog_fd;
+ int tc_prog_fd;
+ int ifindex;
+ int err;
+
+ err = add_xfrm_tunnel();
+ if (!ASSERT_OK(err, "add_xfrm_tunnel"))
+ return;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
+
+ /* attach tc prog to tunnel dev */
+ tc_hook.ifindex = ifindex;
+ tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
+ if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+ goto done;
+
+ /* attach xdp prog to tunnel dev */
+ xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "bpf_xdp_attach"))
+ goto done;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+
+ if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window"))
+ goto done;
+
+done:
+ delete_xfrm_tunnel();
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
#define RUN_TEST(name, ...) \
({ \
if (test__start_subtest(#name)) { \
+ config_device(); \
test_ ## name(__VA_ARGS__); \
+ cleanup(); \
} \
})
static void *test_tunnel_run_tests(void *arg)
{
- cleanup();
- config_device();
-
RUN_TEST(vxlan_tunnel);
RUN_TEST(ip6vxlan_tunnel);
RUN_TEST(ipip_tunnel, NONE);
RUN_TEST(ipip_tunnel, FOU);
RUN_TEST(ipip_tunnel, GUE);
-
- cleanup();
+ RUN_TEST(xfrm_tunnel);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c
index a31119823666..f45af1b0ef2c 100644
--- a/tools/testing/selftests/bpf/prog_tests/time_tai.c
+++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c
@@ -56,7 +56,7 @@ void test_time_tai(void)
ASSERT_NEQ(ts2, 0, "tai_ts2");
/* TAI is moving forward only */
- ASSERT_GT(ts2, ts1, "tai_forward");
+ ASSERT_GE(ts2, ts1, "tai_forward");
/* Check for future */
ret = clock_gettime(CLOCK_TAI, &now_tai);
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
new file mode 100644
index 000000000000..b5dce630e0e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -0,0 +1,1031 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "cap_helpers.h"
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include "priv_map.skel.h"
+#include "priv_prog.skel.h"
+#include "dummy_st_ops_success.skel.h"
+
+static inline int sys_mount(const char *dev_name, const char *dir_name,
+ const char *type, unsigned long flags,
+ const void *data)
+{
+ return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
+}
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fspick(int dfd, const char *path, unsigned flags)
+{
+ return syscall(__NR_fspick, dfd, path, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
+}
+
+static int drop_priv_caps(__u64 *old_caps)
+{
+ return cap_disable_effective((1ULL << CAP_BPF) |
+ (1ULL << CAP_PERFMON) |
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_SYS_ADMIN), old_caps);
+}
+
+static int restore_priv_caps(__u64 old_caps)
+{
+ return cap_enable_effective(old_caps, NULL);
+}
+
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
+{
+ char buf[32];
+ int err;
+
+ if (!mask_str) {
+ if (mask == ~0ULL) {
+ mask_str = "any";
+ } else {
+ snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+ mask_str = buf;
+ }
+ }
+
+ err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
+ mask_str, 0);
+ if (err < 0)
+ err = -errno;
+ return err;
+}
+
+#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
+
+struct bpffs_opts {
+ __u64 cmds;
+ __u64 maps;
+ __u64 progs;
+ __u64 attachs;
+ const char *cmds_str;
+ const char *maps_str;
+ const char *progs_str;
+ const char *attachs_str;
+};
+
+static int create_bpffs_fd(void)
+{
+ int fs_fd;
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ ASSERT_GE(fs_fd, 0, "fs_fd");
+
+ return fs_fd;
+}
+
+static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
+{
+ int mnt_fd, err;
+
+ /* set up token delegation mount options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
+ if (!ASSERT_OK(err, "fs_cfg_cmds"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
+ if (!ASSERT_OK(err, "fs_cfg_maps"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
+ if (!ASSERT_OK(err, "fs_cfg_progs"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
+ if (!ASSERT_OK(err, "fs_cfg_attachs"))
+ return err;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (err < 0)
+ return -errno;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (err < 0)
+ return -errno;
+
+ return mnt_fd;
+}
+
+/* send FD over Unix domain (AF_UNIX) socket */
+static int sendfd(int sockfd, int fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1] = { fd }, err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
+ err = sendmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "sendmsg"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* receive FD over Unix domain (AF_UNIX) socket */
+static int recvfd(int sockfd, int *fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1], err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+
+ err = recvmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "recvmsg"))
+ return -EINVAL;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
+ !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
+ !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
+ !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
+ return -EINVAL;
+
+ memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
+ *fd = fds[0];
+
+ return 0;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+typedef int (*child_callback_fn)(int);
+
+static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+ int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1;
+
+ /* setup userns with root mappings */
+ err = create_and_enter_userns();
+ if (!ASSERT_OK(err, "create_and_enter_userns"))
+ goto cleanup;
+
+ /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "create_mountns"))
+ goto cleanup;
+
+ err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+ if (!ASSERT_OK(err, "remount_root"))
+ goto cleanup;
+
+ fs_fd = create_bpffs_fd();
+ if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, fs_fd);
+ if (!ASSERT_OK(err, "send_fs_fd"))
+ goto cleanup;
+ zclose(fs_fd);
+
+ /* avoid mucking around with mount namespaces and mounting at
+ * well-known path, just get detach-mounted BPF FS fd back from parent
+ */
+ err = recvfd(sock_fd, &mnt_fd);
+ if (!ASSERT_OK(err, "recv_mnt_fd"))
+ goto cleanup;
+
+ /* try to fspick() BPF FS and try to add some delegation options */
+ fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
+ if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot reconfigure to set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
+ if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* do custom test logic with customly set up BPF FS instance */
+ err = callback(bpffs_fd);
+ if (!ASSERT_OK(err, "test_callback"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ zclose(sock_fd);
+ zclose(mnt_fd);
+ zclose(fs_fd);
+ zclose(bpffs_fd);
+
+ exit(-err);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
+{
+ int fs_fd = -1, mnt_fd = -1, err;
+
+ err = recvfd(sock_fd, &fs_fd);
+ if (!ASSERT_OK(err, "recv_bpffs_fd"))
+ goto cleanup;
+
+ mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, mnt_fd);
+ if (!ASSERT_OK(err, "send_mnt_fd"))
+ goto cleanup;
+ zclose(mnt_fd);
+
+ err = wait_for_pid(child_pid);
+ ASSERT_OK(err, "waitpid_child");
+
+cleanup:
+ zclose(sock_fd);
+ zclose(fs_fd);
+ zclose(mnt_fd);
+
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb)
+{
+ int sock_fds[2] = { -1, -1 };
+ int child_pid = 0, err;
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
+ if (!ASSERT_OK(err, "socketpair"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GE(child_pid, 0, "fork"))
+ goto cleanup;
+
+ if (child_pid == 0) {
+ zclose(sock_fds[0]);
+ return child(sock_fds[1], bpffs_opts, cb);
+
+ } else {
+ zclose(sock_fds[1]);
+ return parent(child_pid, bpffs_opts, sock_fds[0]);
+ }
+
+cleanup:
+ zclose(sock_fds[0]);
+ zclose(sock_fds[1]);
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static int userns_map_create(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+ int err, token_fd = -1, map_fd = -1;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no token, no CAP_BPF -> fail */
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* token without CAP_BPF -> fail */
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* CAP_BPF without token -> fail */
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* finally, namespaced CAP_BPF + token -> success */
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+cleanup:
+ zclose(token_fd);
+ zclose(map_fd);
+ return err;
+}
+
+static int userns_btf_load(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
+ int err, token_fd = -1, btf_fd = -1;
+ const void *raw_btf_data;
+ struct btf *btf = NULL;
+ __u32 raw_btf_size;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* setup a trivial BTF data to load to the kernel */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ goto cleanup;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
+ goto cleanup;
+
+ /* no token + no CAP_BPF -> failure */
+ btf_opts.token_fd = 0;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* token + no CAP_BPF -> failure */
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* token + CAP_BPF -> success */
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ btf__free(btf);
+ zclose(btf_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_prog_load(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
+ int err, token_fd = -1, prog_fd = -1;
+ struct bpf_insn insns[] = {
+ /* bpf_jiffies64() requires CAP_BPF */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ /* bpf_get_current_task() requires CAP_PERFMON */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
+ /* r0 = 0; exit; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* validate we can successfully load BPF program with token; this
+ * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
+ * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
+ * BPF token wired properly in a bunch of places in the kernel
+ */
+ prog_opts.token_fd = token_fd;
+ prog_opts.expected_attach_type = BPF_XDP;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no token + caps -> failure */
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no caps + token -> failure */
+ prog_opts.token_fd = token_fd;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no caps + no token -> definitely a failure */
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = 0;
+cleanup:
+ zclose(prog_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_obj_priv_map(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_map *skel;
+ int err, token_fd;
+
+ skel = priv_map__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_map__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_map__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = priv_map__load(skel);
+ priv_map__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ /* create token and pass it through bpf_token_fd */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "create_token"))
+ return -EINVAL;
+
+ opts.bpf_token_path = NULL;
+ opts.bpf_token_fd = token_fd;
+ skel = priv_map__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_fd_open"))
+ return -EINVAL;
+
+ /* we can close our token FD, bpf_object owns dup()'ed FD now */
+ close(token_fd);
+
+ err = priv_map__load(skel);
+ priv_map__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_fd_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_prog(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_prog *skel;
+ int err;
+
+ skel = priv_prog__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_prog__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
+ * which should cause struct_ops application to fail, as BTF won't be uploaded
+ * into the kernel, even if STRUCT_OPS programs themselves are allowed
+ */
+static int validate_struct_ops_load(int mnt_fd, bool expect_success)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (expect_success) {
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+ } else /* expect failure */ {
+ if (!ASSERT_ERR(err, "obj_token_path_load"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int userns_obj_priv_btf_fail(int mnt_fd)
+{
+ return validate_struct_ops_load(mnt_fd, false /* should fail */);
+}
+
+static int userns_obj_priv_btf_success(int mnt_fd)
+{
+ return validate_struct_ops_load(mnt_fd, true /* should succeed */);
+}
+
+#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
+#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
+
+static int userns_obj_priv_implicit_token(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
+ * token automatically and implicitly
+ */
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ return -EINVAL;
+
+ /* disable implicit BPF token creation by setting
+ * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
+ */
+ err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ return -EINVAL;
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
+ unsetenv(TOKEN_ENVVAR);
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+ unsetenv(TOKEN_ENVVAR);
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from /sys/fs/bpf mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ return -EINVAL;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, should fail */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ return -EINVAL;
+
+ /* now disable implicit token through negative bpf_token_fd, should fail */
+ opts.bpf_token_path = NULL;
+ opts.bpf_token_fd = -1;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_implicit_token_envvar(int mnt_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over custom location, so libbpf can't create
+ * BPF token implicitly, unless pointed to it through
+ * LIBBPF_BPF_TOKEN_PATH envvar
+ */
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
+ goto err_out;
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ goto err_out;
+
+ /* even though we have BPF FS with delegation, it's not at default
+ * /sys/fs/bpf location, so we still fail to load until envvar is set up
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
+ dummy_st_ops_success__destroy(skel);
+ goto err_out;
+ }
+
+ err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ goto err_out;
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from custom mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ goto err_out;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, envvar
+ * will be ignored, should fail
+ */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ goto err_out;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ goto err_out;
+
+ /* now disable implicit token through negative bpf_token_fd, envvar
+ * will be ignored, should fail
+ */
+ opts.bpf_token_path = NULL;
+ opts.bpf_token_fd = -1;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open"))
+ goto err_out;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_neg_token_fd_load"))
+ goto err_out;
+
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return 0;
+err_out:
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return -EINVAL;
+}
+
+#define bit(n) (1ULL << (n))
+
+void test_token(void)
+{
+ if (test__start_subtest("map_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "map_create",
+ .maps_str = "stack",
+ };
+
+ subtest_userns(&opts, userns_map_create);
+ }
+ if (test__start_subtest("btf_token")) {
+ struct bpffs_opts opts = {
+ .cmds = 1ULL << BPF_BTF_LOAD,
+ };
+
+ subtest_userns(&opts, userns_btf_load);
+ }
+ if (test__start_subtest("prog_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "PROG_LOAD",
+ .progs_str = "XDP",
+ .attachs_str = "xdp",
+ };
+
+ subtest_userns(&opts, userns_prog_load);
+ }
+ if (test__start_subtest("obj_priv_map")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .maps = bit(BPF_MAP_TYPE_QUEUE),
+ };
+
+ subtest_userns(&opts, userns_obj_priv_map);
+ }
+ if (test__start_subtest("obj_priv_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_PROG_LOAD),
+ .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_prog);
+ }
+ if (test__start_subtest("obj_priv_btf_fail")) {
+ struct bpffs_opts opts = {
+ /* disallow BTF loading */
+ .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_fail);
+ }
+ if (test__start_subtest("obj_priv_btf_success")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_success);
+ }
+ if (test__start_subtest("obj_priv_implicit_token")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token);
+ }
+ if (test__start_subtest("obj_priv_implicit_token_envvar")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index ece260cf2c0b..8269cdee33ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -234,6 +234,177 @@ static void test_attach_api_syms(void)
test_attach_api("/proc/self/exe", NULL, &opts);
}
+static void test_attach_api_fails(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *path = "/proc/self/exe";
+ struct uprobe_multi *skel = NULL;
+ int prog_fd, link_fd = -1;
+ unsigned long offset = 0;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+
+ /* abnormal cnt */
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = &offset;
+ opts.uprobe_multi.cnt = INT_MAX;
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt"))
+ goto cleanup;
+
+ /* cnt is 0 */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero"))
+ goto cleanup;
+
+ /* negative offset */
+ offset = -1;
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = (unsigned long *) &offset;
+ opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative"))
+ goto cleanup;
+
+ /* offsets is NULL */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null"))
+ goto cleanup;
+
+ /* wrong offsets pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) 1,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong"))
+ goto cleanup;
+
+ /* path is NULL */
+ offset = 1;
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null"))
+ goto cleanup;
+
+ /* wrong path pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = (const char *) 1,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong"))
+ goto cleanup;
+
+ /* wrong path type */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = "/",
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type"))
+ goto cleanup;
+
+ /* wrong cookies pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cookies = (__u64 *) 1ULL,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong"))
+ goto cleanup;
+
+ /* wrong ref_ctr_offsets pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cookies = (__u64 *) &offset,
+ .uprobe_multi.ref_ctr_offsets = (unsigned long *) 1,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong"))
+ goto cleanup;
+
+ /* wrong flags */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.flags = 1 << 31,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags"))
+ goto cleanup;
+
+ /* wrong pid */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ .uprobe_multi.pid = -2,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ uprobe_multi__destroy(skel);
+}
+
static void __test_link_api(struct child *child)
{
int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1;
@@ -311,7 +482,7 @@ cleanup:
free(offsets);
}
-void test_link_api(void)
+static void test_link_api(void)
{
struct child *child;
@@ -412,4 +583,6 @@ void test_uprobe_multi_test(void)
test_bench_attach_uprobe();
if (test__start_subtest("bench_usdt"))
test_bench_attach_usdt();
+ if (test__start_subtest("attach_api_fails"))
+ test_attach_api_fails();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 8d746642cbd7..ac49ec25211d 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -6,6 +6,7 @@
#include "verifier_and.skel.h"
#include "verifier_array_access.skel.h"
#include "verifier_basic_stack.skel.h"
+#include "verifier_bitfield_write.skel.h"
#include "verifier_bounds.skel.h"
#include "verifier_bounds_deduction.skel.h"
#include "verifier_bounds_deduction_non_const.skel.h"
@@ -116,6 +117,7 @@ static void run_tests_aux(const char *skel_name,
void test_verifier_and(void) { RUN(verifier_and); }
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
+void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
void test_verifier_bounds(void) { RUN(verifier_bounds); }
void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); }
void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index dd7f2bc70048..ab0f02faa80c 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -16,9 +16,12 @@
#include <sys/wait.h>
#include <sys/mman.h>
#include <linux/keyctl.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
#include <test_progs.h>
#include "test_verify_pkcs7_sig.skel.h"
+#include "test_sig_in_xattr.skel.h"
#define MAX_DATA_SIZE (1024 * 1024)
#define MAX_SIG_SIZE 1024
@@ -26,6 +29,10 @@
#define VERIFY_USE_SECONDARY_KEYRING (1UL)
#define VERIFY_USE_PLATFORM_KEYRING (2UL)
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
#define MODULE_SIG_STRING "~Module signature appended~\n"
@@ -254,7 +261,7 @@ out:
return ret;
}
-void test_verify_pkcs7_sig(void)
+static void test_verify_pkcs7_sig_from_map(void)
{
libbpf_print_fn_t old_print_cb;
char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
@@ -400,3 +407,159 @@ close_prog:
skel->bss->monitored_pid = 0;
test_verify_pkcs7_sig__destroy(skel);
}
+
+static int get_signature_size(const char *sig_path)
+{
+ struct stat st;
+
+ if (stat(sig_path, &st) == -1)
+ return -1;
+
+ return st.st_size;
+}
+
+static int add_signature_to_xattr(const char *data_path, const char *sig_path)
+{
+ char sig[MAX_SIG_SIZE] = {0};
+ int fd, size, ret;
+
+ if (sig_path) {
+ fd = open(sig_path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ size = read(fd, sig, MAX_SIG_SIZE);
+ close(fd);
+ if (size <= 0)
+ return -1;
+ } else {
+ /* no sig_path, just write 32 bytes of zeros */
+ size = 32;
+ }
+ ret = setxattr(data_path, "user.sig", sig, size, 0);
+ if (!ASSERT_OK(ret, "setxattr"))
+ return -1;
+
+ return 0;
+}
+
+static int test_open_file(struct test_sig_in_xattr *skel, char *data_path,
+ pid_t pid, bool should_success, char *name)
+{
+ int ret;
+
+ skel->bss->monitored_pid = pid;
+ ret = open(data_path, O_RDONLY);
+ close(ret);
+ skel->bss->monitored_pid = 0;
+
+ if (should_success) {
+ if (!ASSERT_GE(ret, 0, name))
+ return -1;
+ } else {
+ if (!ASSERT_LT(ret, 0, name))
+ return -1;
+ }
+ return 0;
+}
+
+static void test_pkcs7_sig_fsverity(void)
+{
+ char data_path[PATH_MAX];
+ char sig_path[PATH_MAX];
+ char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+ char *tmp_dir;
+ struct test_sig_in_xattr *skel = NULL;
+ pid_t pid;
+ int ret;
+
+ tmp_dir = mkdtemp(tmp_dir_template);
+ if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+ return;
+
+ snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir);
+ snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir);
+
+ ret = _run_setup_process(tmp_dir, "setup");
+ if (!ASSERT_OK(ret, "_run_setup_process"))
+ goto out;
+
+ ret = _run_setup_process(tmp_dir, "fsverity-create-sign");
+
+ if (ret) {
+ printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n"
+ "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+ __func__);
+ test__skip();
+ goto out;
+ }
+
+ skel = test_sig_in_xattr__open();
+ if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open"))
+ goto out;
+ ret = get_signature_size(sig_path);
+ if (!ASSERT_GT(ret, 0, "get_signature_size"))
+ goto out;
+ skel->bss->sig_size = ret;
+ skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+ "ebpf_testing_keyring", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+ memcpy(skel->bss->digest, "FSVerity", 8);
+
+ ret = test_sig_in_xattr__load(skel);
+ if (!ASSERT_OK(ret, "test_sig_in_xattr__load"))
+ goto out;
+
+ ret = test_sig_in_xattr__attach(skel);
+ if (!ASSERT_OK(ret, "test_sig_in_xattr__attach"))
+ goto out;
+
+ pid = getpid();
+
+ /* Case 1: fsverity is not enabled, open should succeed */
+ if (test_open_file(skel, data_path, pid, true, "open_1"))
+ goto out;
+
+ /* Case 2: fsverity is enabled, xattr is missing, open should
+ * fail
+ */
+ ret = _run_setup_process(tmp_dir, "fsverity-enable");
+ if (!ASSERT_OK(ret, "fsverity-enable"))
+ goto out;
+ if (test_open_file(skel, data_path, pid, false, "open_2"))
+ goto out;
+
+ /* Case 3: fsverity is enabled, xattr has valid signature, open
+ * should succeed
+ */
+ ret = add_signature_to_xattr(data_path, sig_path);
+ if (!ASSERT_OK(ret, "add_signature_to_xattr_1"))
+ goto out;
+
+ if (test_open_file(skel, data_path, pid, true, "open_3"))
+ goto out;
+
+ /* Case 4: fsverity is enabled, xattr has invalid signature, open
+ * should fail
+ */
+ ret = add_signature_to_xattr(data_path, NULL);
+ if (!ASSERT_OK(ret, "add_signature_to_xattr_2"))
+ goto out;
+ test_open_file(skel, data_path, pid, false, "open_4");
+
+out:
+ _run_setup_process(tmp_dir, "cleanup");
+ if (!skel)
+ return;
+
+ skel->bss->monitored_pid = 0;
+ test_sig_in_xattr__destroy(skel);
+}
+
+void test_verify_pkcs7_sig(void)
+{
+ if (test__start_subtest("pkcs7_sig_from_map"))
+ test_verify_pkcs7_sig_from_map();
+ if (test__start_subtest("pkcs7_sig_fsverity"))
+ test_pkcs7_sig_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index ab4952b9fb1d..e6a783c7f5db 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -77,8 +77,8 @@ void test_xdp_context_test_run(void)
test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
0, 0, 0);
- /* Meta data must be 32 bytes or smaller */
- test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
+ /* Meta data must be 255 bytes or smaller */
+ test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
/* Total size of data must match data_end - data_meta */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 33cdf88efa6b..05edcf32f528 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -20,7 +20,7 @@
#define UDP_PAYLOAD_BYTES 4
-#define AF_XDP_SOURCE_PORT 1234
+#define UDP_SOURCE_PORT 1234
#define AF_XDP_CONSUMER_PORT 8080
#define UMEM_NUM 16
@@ -33,6 +33,18 @@
#define RX_ADDR "10.0.0.2"
#define PREFIX_LEN "8"
#define FAMILY AF_INET
+#define TX_NETNS_NAME "xdp_metadata_tx"
+#define RX_NETNS_NAME "xdp_metadata_rx"
+#define TX_MAC "00:00:00:00:00:01"
+#define RX_MAC "00:00:00:00:00:02"
+
+#define VLAN_ID 59
+#define VLAN_PROTO "802.1Q"
+#define VLAN_PID htons(ETH_P_8021Q)
+#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID)
+
+#define XDP_RSS_TYPE_L4 BIT(3)
+#define VLAN_VID_MASK 0xfff
struct xsk {
void *umem_area;
@@ -181,7 +193,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
ip_csum(iph);
- udph->source = htons(AF_XDP_SOURCE_PORT);
+ udph->source = htons(UDP_SOURCE_PORT);
udph->dest = htons(dst_port);
udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -204,6 +216,30 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
return 0;
}
+static int generate_packet_inet(void)
+{
+ char udp_payload[UDP_PAYLOAD_BYTES];
+ struct sockaddr_in rx_addr;
+ int sock_fd, err = 0;
+
+ /* Build a packet */
+ memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES);
+ rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR);
+ rx_addr.sin_family = AF_INET;
+ rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT);
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)"))
+ return sock_fd;
+
+ err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT,
+ (void *)&rx_addr, sizeof(rx_addr));
+ ASSERT_GE(err, 0, "sendto");
+
+ close(sock_fd);
+ return err;
+}
+
static void complete_tx(struct xsk *xsk)
{
struct xsk_tx_metadata *meta;
@@ -236,7 +272,7 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
}
}
-static int verify_xsk_metadata(struct xsk *xsk)
+static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp)
{
const struct xdp_desc *rx_desc;
struct pollfd fds = {};
@@ -290,17 +326,42 @@ static int verify_xsk_metadata(struct xsk *xsk)
if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
return -1;
+ if (!sent_from_af_xdp) {
+ if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type"))
+ return -1;
+
+ if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci"))
+ return -1;
+
+ if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto"))
+ return -1;
+ goto done;
+ }
+
ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
/* checksum offload */
ASSERT_EQ(udph->check, htons(0x721c), "csum");
+done:
xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);
return 0;
}
+static void switch_ns_to_rx(struct nstoken **tok)
+{
+ close_netns(*tok);
+ *tok = open_netns(RX_NETNS_NAME);
+}
+
+static void switch_ns_to_tx(struct nstoken **tok)
+{
+ close_netns(*tok);
+ *tok = open_netns(TX_NETNS_NAME);
+}
+
void test_xdp_metadata(void)
{
struct xdp_metadata2 *bpf_obj2 = NULL;
@@ -318,27 +379,35 @@ void test_xdp_metadata(void)
int sock_fd;
int ret;
- /* Setup new networking namespace, with a veth pair. */
+ /* Setup new networking namespaces, with a veth pair. */
+ SYS(out, "ip netns add " TX_NETNS_NAME);
+ SYS(out, "ip netns add " RX_NETNS_NAME);
- SYS(out, "ip netns add xdp_metadata");
- tok = open_netns("xdp_metadata");
+ tok = open_netns(TX_NETNS_NAME);
SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
" type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
- SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
- SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
+ SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
+
+ SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
SYS(out, "ip link set dev " TX_NAME " up");
+
+ SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN
+ " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID));
+ SYS(out, "ip link set dev " TX_NAME_VLAN " up");
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN);
+
+ /* Avoid ARP calls */
+ SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
+
+ switch_ns_to_rx(&tok);
+
+ SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
SYS(out, "ip link set dev " RX_NAME " up");
- SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
rx_ifindex = if_nametoindex(RX_NAME);
- tx_ifindex = if_nametoindex(TX_NAME);
-
- /* Setup separate AF_XDP for TX and RX interfaces. */
- ret = open_xsk(tx_ifindex, &tx_xsk);
- if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
- goto out;
+ /* Setup separate AF_XDP for RX interface. */
ret = open_xsk(rx_ifindex, &rx_xsk);
if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
@@ -379,18 +448,38 @@ void test_xdp_metadata(void)
if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
goto out;
- /* Send packet destined to RX AF_XDP socket. */
+ switch_ns_to_tx(&tok);
+
+ /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
+ tx_ifindex = if_nametoindex(TX_NAME);
+ ret = open_xsk(tx_ifindex, &tx_xsk);
+ if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
+ goto out;
+
if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
"generate AF_XDP_CONSUMER_PORT"))
goto out;
- /* Verify AF_XDP RX packet has proper metadata. */
- if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
+ switch_ns_to_rx(&tok);
+
+ /* Verify packet sent from AF_XDP has proper metadata. */
+ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
"verify_xsk_metadata"))
goto out;
+ switch_ns_to_tx(&tok);
complete_tx(&tx_xsk);
+ /* Now check metadata of packet, generated with network stack */
+ if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+
+ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
+ "verify_xsk_metadata"))
+ goto out;
+
/* Make sure freplace correctly picks up original bound device
* and doesn't crash.
*/
@@ -408,11 +497,15 @@ void test_xdp_metadata(void)
if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
goto out;
+ switch_ns_to_tx(&tok);
+
/* Send packet to trigger . */
if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
"generate freplace packet"))
goto out;
+ switch_ns_to_rx(&tok);
+
while (!retries--) {
if (bpf_obj2->bss->called)
break;
@@ -427,5 +520,6 @@ out:
xdp_metadata__destroy(bpf_obj);
if (tok)
close_netns(tok);
- SYS_NOFAIL("ip netns del xdp_metadata");
+ SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+ SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
}
diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c
new file mode 100644
index 000000000000..1126871c2ebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_htab_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+char _license[] SEC("license") = "GPL";
+
+int tgid = 0;
+
+static int acc_map_in_map(void *outer_map)
+{
+ int i, key, value = 0xdeadbeef;
+ void *inner_map;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != tgid)
+ return 0;
+
+ /* Find nonexistent inner map */
+ key = 1;
+ inner_map = bpf_map_lookup_elem(outer_map, &key);
+ if (inner_map)
+ return 0;
+
+ /* Find the old inner map */
+ key = 0;
+ inner_map = bpf_map_lookup_elem(outer_map, &key);
+ if (!inner_map)
+ return 0;
+
+ /* Wait for the old inner map to be replaced */
+ for (i = 0; i < 2048; i++)
+ bpf_map_update_elem(inner_map, &key, &value, 0);
+
+ return 0;
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_array(void *ctx)
+{
+ return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_array(void *ctx)
+{
+ return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_htab(void *ctx)
+{
+ return acc_map_in_map(&outer_htab_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_htab(void *ctx)
+{
+ return acc_map_in_map(&outer_htab_map);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 799fff4995d8..2fd59970c43a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -71,6 +71,7 @@
#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
+#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
/* Convenience macro for use with 'asm volatile' blocks */
#define __naked __attribute__((naked))
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 0b793a102791..1bdc680b0e0e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -26,6 +26,7 @@
#define IPV6_AUTOFLOWLABEL 70
#define TC_ACT_UNSPEC (-1)
+#define TC_ACT_OK 0
#define TC_ACT_SHOT 2
#define SOL_TCP 6
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
index a043d8fefdac..610c2427fd93 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -21,50 +21,100 @@ struct {
__type(value, long);
} map_b SEC(".maps");
+int target_hid = 0;
+bool is_cgroup1 = 0;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_lookup(struct cgroup *cgrp)
+{
+ bpf_cgrp_storage_delete(&map_a, cgrp);
+ bpf_cgrp_storage_delete(&map_b, cgrp);
+}
+
SEC("fentry/bpf_local_storage_lookup")
int BPF_PROG(on_lookup)
{
struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
- bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
- bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp);
+ __on_lookup(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_lookup(task->cgroups->dfl_cgrp);
return 0;
}
-SEC("fentry/bpf_local_storage_update")
-int BPF_PROG(on_update)
+static void __on_update(struct cgroup *cgrp)
{
- struct task_struct *task = bpf_get_current_task_btf();
long *ptr;
- ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
- ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
+ ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr += 1;
+}
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_update(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_update(task->cgroups->dfl_cgrp);
return 0;
}
-SEC("tp_btf/sys_enter")
-int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
{
- struct task_struct *task;
long *ptr;
- task = bpf_get_current_task_btf();
- ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 200;
- ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
+ ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
*ptr = 100;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_enter(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_enter(regs, id, task->cgroups->dfl_cgrp);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index 4c7844e1dbfa..facedd8b8250 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -17,7 +17,11 @@ struct {
__u32 target_pid;
__u64 cgroup_id;
+int target_hid;
+bool is_cgroup1;
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
@@ -37,23 +41,50 @@ int cgroup_iter(struct bpf_iter__cgroup *ctx)
return 0;
}
+static void __no_rcu_lock(struct cgroup *cgrp)
+{
+ long *ptr;
+
+ /* Note that trace rcu is held in sleepable prog, so we can use
+ * bpf_cgrp_storage_get() in sleepable prog.
+ */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+}
+
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-int no_rcu_lock(void *ctx)
+int cgrp1_no_rcu_lock(void *ctx)
{
struct task_struct *task;
struct cgroup *cgrp;
- long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* bpf_task_get_cgroup1 can work in sleepable prog */
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __no_rcu_lock(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int no_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
/* task->cgroups is untrusted in sleepable prog outside of RCU CS */
- cgrp = task->cgroups->dfl_cgrp;
- ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (ptr)
- cgroup_id = cgrp->kn->id;
+ __no_rcu_lock(task->cgroups->dfl_cgrp);
return 0;
}
@@ -68,6 +99,22 @@ int yes_rcu_lock(void *ctx)
if (task->pid != target_pid)
return 0;
+ if (is_cgroup1) {
+ bpf_rcu_read_lock();
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ bpf_cgroup_release(cgrp);
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
bpf_rcu_read_lock();
cgrp = task->cgroups->dfl_cgrp;
/* cgrp is trusted under RCU CS */
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
index 9ebb8e2fe541..1c348f000f38 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
@@ -27,62 +27,100 @@ pid_t target_pid = 0;
int mismatch_cnt = 0;
int enter_cnt = 0;
int exit_cnt = 0;
+int target_hid = 0;
+bool is_cgroup1 = 0;
-SEC("tp_btf/sys_enter")
-int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
{
- struct task_struct *task;
long *ptr;
int err;
- task = bpf_get_current_task_btf();
- if (task->pid != target_pid)
- return 0;
-
/* populate value 0 */
- ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
- return 0;
+ return;
/* delete value 0 */
- err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp);
+ err = bpf_cgrp_storage_delete(&map_a, cgrp);
if (err)
- return 0;
+ return;
/* value is not available */
- ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0);
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0);
if (ptr)
- return 0;
+ return;
/* re-populate the value */
- ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
- return 0;
+ return;
__sync_fetch_and_add(&enter_cnt, 1);
*ptr = MAGIC_VALUE + enter_cnt;
-
- return 0;
}
-SEC("tp_btf/sys_exit")
-int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
{
struct task_struct *task;
- long *ptr;
+ struct cgroup *cgrp;
task = bpf_get_current_task_btf();
if (task->pid != target_pid)
return 0;
- ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0,
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_enter(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_enter(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!ptr)
- return 0;
+ return;
__sync_fetch_and_add(&exit_cnt, 1);
if (*ptr != MAGIC_VALUE + exit_cnt)
__sync_fetch_and_add(&mismatch_cnt, 1);
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_exit(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_exit(regs, id, task->cgroups->dfl_cgrp);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index b15c588ace15..0cd4aebb97cf 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -54,6 +54,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym;
u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym;
+u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 674a63424dee..fc3666edf456 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -461,6 +461,49 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
}
SEC("tp_btf/task_newtask")
+int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ if (bpf_cpumask_weight(cast(local)) != 0) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_cpumask_set_cpu(0, local);
+ if (bpf_cpumask_weight(cast(local)) != 1) {
+ err = 4;
+ goto out;
+ }
+
+ /*
+ * Make sure that adding additional CPUs changes the weight. Test to
+ * see whether the CPU was set to account for running on UP machines.
+ */
+ bpf_cpumask_set_cpu(1, local);
+ if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) {
+ err = 5;
+ goto out;
+ }
+
+ bpf_cpumask_clear(local);
+ if (bpf_cpumask_weight(cast(local)) != 0) {
+ err = 6;
+ goto out;
+ }
+out:
+ bpf_cpumask_release(local);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
__success
int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
{
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 49efaed143fc..0ef81040da59 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx)
}
SEC("?fentry/bpf_check")
-__failure __msg("At program exit the register R0 has value (0x40; 0x0)")
+__failure __msg("At program exit the register R1 has smin=64 smax=64")
int check_assert_with_return(void *ctx)
{
bpf_assert_with(!ctx, 64);
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 8c0ef2742208..9cceb6521143 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx)
}
SEC("?fentry/bpf_check")
-__failure __msg("At program exit the register R0 has value (0x40; 0x0) should")
+__failure __msg("At program exit the register R1 has smin=64 smax=64 should")
int reject_set_exception_cb_bad_ret2(void *ctx)
{
bpf_throw(64);
diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
new file mode 100644
index 000000000000..e6a75f86cac6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("freplace")
+int freplace_prog(void)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index b2181f850d3e..3aca3dc145b5 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -846,7 +846,7 @@ __naked int delayed_precision_mark(void)
"call %[bpf_iter_num_next];"
"if r0 == 0 goto 2f;"
"if r6 != 42 goto 3f;"
- "r7 = -32;"
+ "r7 = -33;"
"call %[bpf_get_prandom_u32];"
"r6 = r0;"
"goto 1b;\n"
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index 1769fdff6aea..75043ffc5dad 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -37,11 +37,18 @@ struct plain_local {
long data;
};
+struct local_with_root {
+ long key;
+ struct bpf_spin_lock l;
+ struct bpf_rb_root r __contains(node_data, node);
+};
+
struct map_value {
struct prog_test_ref_kfunc *not_kptr;
struct prog_test_ref_kfunc __kptr *val;
struct node_data __kptr *node;
struct plain_local __kptr *plain;
+ struct local_with_root __kptr *local_root;
};
/* This is necessary so that LLVM generates BTF for node_data struct
@@ -65,6 +72,17 @@ struct {
__uint(max_entries, 2);
} some_nodes SEC(".maps");
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
static int create_and_stash(int idx, int val)
{
struct map_value *mapval;
@@ -114,6 +132,41 @@ long stash_plain(void *ctx)
}
SEC("tc")
+long stash_local_with_root(void *ctx)
+{
+ struct local_with_root *res;
+ struct map_value *mapval;
+ struct node_data *n;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 2;
+ res->key = 41;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n) {
+ bpf_obj_drop(res);
+ return 3;
+ }
+
+ bpf_spin_lock(&res->l);
+ bpf_rbtree_add(&res->r, &n->node, less);
+ bpf_spin_unlock(&res->l);
+
+ res = bpf_kptr_xchg(&mapval->local_root, res);
+ if (res) {
+ bpf_obj_drop(res);
+ return 4;
+ }
+ return 0;
+}
+
+SEC("tc")
long unstash_rb_node(void *ctx)
{
struct map_value *mapval;
diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
new file mode 100644
index 000000000000..7a1336d7b16a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ __u64 data;
+ struct bpf_list_node node;
+};
+
+struct map_value {
+ struct bpf_list_head head __contains(node_data, node);
+ struct bpf_spin_lock lock;
+};
+
+struct inner_array_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} inner_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+ __array(values, struct inner_array_type);
+} outer_array SEC(".maps") = {
+ .values = {
+ [0] = &inner_array,
+ },
+};
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_inner_array(void *ctx)
+{
+ struct map_value *value;
+ struct node_data *new;
+ struct bpf_map *map;
+ int zero = 0;
+
+ if (done || (u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ map = bpf_map_lookup_elem(&outer_array, &zero);
+ if (!map)
+ return 0;
+
+ value = bpf_map_lookup_elem(map, &zero);
+ if (!value)
+ return 0;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new)
+ return 0;
+
+ bpf_spin_lock(&value->lock);
+ bpf_list_push_back(&value->head, &new->node);
+ bpf_spin_unlock(&value->lock);
+ done = true;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c
new file mode 100644
index 000000000000..66cde82aa86d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ __u64 data;
+ struct bpf_list_node node;
+};
+
+struct map_value {
+ struct bpf_list_head head __contains(node_data, node);
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_array(void *ctx)
+{
+ struct map_value *value;
+ struct node_data *new;
+ int zero = 0;
+
+ if (done || (u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ value = bpf_map_lookup_elem(&array, &zero);
+ if (!value)
+ return 0;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new)
+ return 0;
+
+ bpf_spin_lock(&value->lock);
+ bpf_list_push_back(&value->head, &new->node);
+ bpf_spin_unlock(&value->lock);
+ done = true;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
new file mode 100644
index 000000000000..9085be50f03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_map.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, 1);
+ __type(value, __u32);
+} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
new file mode 100644
index 000000000000..3c7b2b618c8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe")
+int kprobe_prog(void *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
index e550f728962d..3d3cafdebe72 100644
--- a/tools/testing/selftests/bpf/progs/syscall.c
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -6,9 +6,15 @@
#include <bpf/bpf_tracing.h>
#include <../../../tools/include/linux/filter.h>
#include <linux/btf.h>
+#include <string.h>
+#include <errno.h>
char _license[] SEC("license") = "GPL";
+struct bpf_map {
+ int id;
+} __attribute__((preserve_access_index));
+
struct args {
__u64 log_buf;
__u32 log_size;
@@ -27,6 +33,37 @@ struct args {
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
BTF_INT_ENC(encoding, bits_offset, bits)
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, union bpf_attr);
+ __uint(max_entries, 1);
+} bpf_attr_array SEC(".maps");
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
static int btf_load(void)
{
struct btf_blob {
@@ -58,7 +95,7 @@ static int btf_load(void)
}
SEC("syscall")
-int bpf_prog(struct args *ctx)
+int load_prog(struct args *ctx)
{
static char license[] = "GPL";
static struct bpf_insn insns[] = {
@@ -94,8 +131,8 @@ int bpf_prog(struct args *ctx)
map_create_attr.max_entries = ctx->max_entries;
map_create_attr.btf_fd = ret;
- prog_load_attr.license = (long) license;
- prog_load_attr.insns = (long) insns;
+ prog_load_attr.license = ptr_to_u64(license);
+ prog_load_attr.insns = ptr_to_u64(insns);
prog_load_attr.log_buf = ctx->log_buf;
prog_load_attr.log_size = ctx->log_size;
prog_load_attr.log_level = 1;
@@ -107,8 +144,8 @@ int bpf_prog(struct args *ctx)
insns[3].imm = ret;
map_update_attr.map_fd = ret;
- map_update_attr.key = (long) &key;
- map_update_attr.value = (long) &value;
+ map_update_attr.key = ptr_to_u64(&key);
+ map_update_attr.value = ptr_to_u64(&value);
ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
if (ret < 0)
return ret;
@@ -119,3 +156,52 @@ int bpf_prog(struct args *ctx)
ctx->prog_fd = ret;
return 1;
}
+
+SEC("syscall")
+int update_outer_map(void *ctx)
+{
+ int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err;
+ const int attr_sz = sizeof(union bpf_attr);
+ union bpf_attr *attr;
+
+ attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero);
+ if (!attr)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_id = ((struct bpf_map *)&outer_array_map)->id;
+ outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz);
+ if (outer_fd < 0)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_type = BPF_MAP_TYPE_ARRAY;
+ attr->key_size = 4;
+ attr->value_size = 4;
+ attr->max_entries = 1;
+ inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz);
+ if (inner_fd < 0)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_fd = outer_fd;
+ attr->key = ptr_to_u64(&zero);
+ attr->value = ptr_to_u64(&inner_fd);
+ err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz);
+ if (err)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_fd = outer_fd;
+ attr->key = ptr_to_u64(&zero);
+ err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz);
+ if (err)
+ goto out;
+ ret = 1;
+out:
+ if (inner_fd >= 0)
+ bpf_sys_close(inner_fd);
+ if (outer_fd >= 0)
+ bpf_sys_close(outer_fd);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c
new file mode 100644
index 000000000000..3975495b75c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_fsverity.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */
+
+char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+__u32 monitored_pid;
+__u32 got_fsverity;
+__u32 digest_matches;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr digest_ptr;
+ __u32 pid;
+ int ret;
+ int i;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+ ret = bpf_get_fsverity_digest(f, &digest_ptr);
+ if (ret < 0)
+ return 0;
+ got_fsverity = 1;
+
+ for (i = 0; i < sizeof(digest); i++) {
+ if (digest[i] != expected_digest[i])
+ return 0;
+ }
+
+ digest_matches = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
new file mode 100644
index 000000000000..7eb2a4e5a3e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 found_xattr;
+
+static const char expected_value[] = "hello";
+char value[32];
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+
+ ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+ if (ret != sizeof(expected_value))
+ return 0;
+ if (bpf_strncmp(value, ret, expected_value))
+ return 0;
+ found_xattr = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c
index b512d6a6c75e..b4e089d6981d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func15.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func15.c
@@ -13,7 +13,7 @@ __noinline int foo(unsigned int *v)
}
SEC("cgroup_skb/ingress")
-__failure __msg("At program exit the register R0 has value")
+__failure __msg("At program exit the register R0 has ")
int global_func15(struct __sk_buff *skb)
{
unsigned int v = 1;
@@ -22,3 +22,35 @@ int global_func15(struct __sk_buff *skb)
return v;
}
+
+SEC("cgroup_skb/ingress")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1")
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7")
+__msg("At program exit the register R0 has ")
+__naked int global_func15_tricky_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 1;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* cgroup_skb/ingress program is expected to return [0, 1]
+ * values, so branch above makes sure that in a fallthrough
+ * case we have a valid 1 stored in R0 register, but in
+ * a branch case we assign some random value to R0. So if
+ * there is something wrong with precision tracking for R0 at
+ * program exit, we might erronenously prune branch case,
+ * because R0 in fallthrough case is imprecise (and thus any
+ * value is valid from POV of verifier is_state_equal() logic)
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c
index e7206304632e..e3e64bc472cd 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func16.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func16.c
@@ -13,7 +13,7 @@ __noinline int foo(int (*arr)[10])
}
SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect read from stack")
+__success
int global_func16(struct __sk_buff *skb)
{
int array[10];
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
new file mode 100644
index 000000000000..2f0eb1334d65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+#define MAX_SIG_SIZE 1024
+
+/* By default, "fsverity sign" signs a file with fsverity_formatted_digest
+ * of the file. fsverity_formatted_digest on the kernel side is only used
+ * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not
+ * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have
+ * fsverity_formatted_digest. In this test, we intentionally avoid using
+ * fsverity_formatted_digest.
+ *
+ * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by
+ * fsverity_digest. We use a char array of size fsverity_formatted_digest
+ * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space,
+ * and the rest of it is filled by bpf_get_fsverity_digest.
+ *
+ * Note that, generating signatures based on fsverity_formatted_digest is
+ * the design choice of this selftest (and "fsverity sign"). With BPF
+ * LSM, we have the flexibility to generate signature based on other data
+ * sets, for example, fsverity_digest or only the digest[] part of it.
+ */
+#define MAGIC_SIZE 8
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */
+char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+
+__u32 monitored_pid;
+char sig[MAX_SIG_SIZE];
+__u32 sig_size;
+__u32 user_keyring_serial;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr digest_ptr, sig_ptr;
+ struct bpf_key *trusted_keyring;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* digest_ptr points to fsverity_digest */
+ bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr);
+
+ ret = bpf_get_fsverity_digest(f, &digest_ptr);
+ /* No verity, allow access */
+ if (ret < 0)
+ return 0;
+
+ /* Move digest_ptr to fsverity_formatted_digest */
+ bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+
+ /* Read signature from xattr */
+ bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr);
+ ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr);
+ /* No signature, reject access */
+ if (ret < 0)
+ return -EPERM;
+
+ trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+ if (!trusted_keyring)
+ return -ENOENT;
+
+ /* Verify signature */
+ ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index f66af753bbbb..3e436e6f7312 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -6,66 +6,34 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <stddef.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_tunnel.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/icmp.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/pkt_cls.h>
-#include <linux/erspan.h>
-#include <linux/udp.h>
+#include "vmlinux.h"
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_kfuncs.h"
+#include "bpf_tracing_net.h"
#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
-#define VXLAN_UDP_PORT 4789
+#define VXLAN_UDP_PORT 4789
+#define ETH_P_IP 0x0800
+#define PACKET_HOST 0
+#define TUNNEL_CSUM bpf_htons(0x01)
+#define TUNNEL_KEY bpf_htons(0x04)
/* Only IPv4 address assigned to veth1.
* 172.16.1.200
*/
#define ASSIGNED_ADDR_VETH1 0xac1001c8
-struct geneve_opt {
- __be16 opt_class;
- __u8 type;
- __u8 length:5;
- __u8 r3:1;
- __u8 r2:1;
- __u8 r1:1;
- __u8 opt_data[8]; /* hard-coded to 8 byte */
-};
-
-struct vxlanhdr {
- __be32 vx_flags;
- __be32 vx_vni;
-} __attribute__((packed));
-
-struct vxlan_metadata {
- __u32 gbp;
-};
-
-struct bpf_fou_encap {
- __be16 sport;
- __be16 dport;
-};
-
-enum bpf_fou_encap_type {
- FOU_BPF_ENCAP_FOU,
- FOU_BPF_ENCAP_GUE,
-};
-
int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
struct bpf_fou_encap *encap, int type) __ksym;
int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
struct bpf_fou_encap *encap) __ksym;
+struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
+ u32 opts__sz) __ksym;
+void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -205,9 +173,9 @@ int erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 7;
md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
@@ -246,8 +214,9 @@ int erspan_get_tunnel(struct __sk_buff *skb)
bpf_printk("\tindex %x\n", index);
#else
bpf_printk("\tdirection %d hwid %x timestamp %u\n",
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+ (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+ BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
bpf_ntohl(md.u.md2.timestamp));
#endif
@@ -284,9 +253,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 17;
md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
@@ -326,8 +295,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
bpf_printk("\tindex %x\n", index);
#else
bpf_printk("\tdirection %d hwid %x timestamp %u\n",
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+ (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+ BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
bpf_ntohl(md.u.md2.timestamp));
#endif
@@ -963,6 +933,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+volatile int xfrm_reqid = 0;
+volatile int xfrm_spi = 0;
+volatile int xfrm_remote_ip = 0;
+
SEC("tc")
int xfrm_get_state(struct __sk_buff *skb)
{
@@ -973,10 +947,58 @@ int xfrm_get_state(struct __sk_buff *skb)
if (ret < 0)
return TC_ACT_OK;
- bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n",
- x.reqid, bpf_ntohl(x.spi),
- bpf_ntohl(x.remote_ipv4));
+ xfrm_reqid = x.reqid;
+ xfrm_spi = bpf_ntohl(x.spi);
+ xfrm_remote_ip = bpf_ntohl(x.remote_ipv4);
+
return TC_ACT_OK;
}
+volatile int xfrm_replay_window = 0;
+
+SEC("xdp")
+int xfrm_get_state_xdp(struct xdp_md *xdp)
+{
+ struct bpf_xfrm_state_opts opts = {};
+ struct xfrm_state *x = NULL;
+ struct ip_esp_hdr *esph;
+ struct bpf_dynptr ptr;
+ u8 esph_buf[8] = {};
+ u8 iph_buf[20] = {};
+ struct iphdr *iph;
+ u32 off;
+
+ if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
+ goto out;
+
+ off = sizeof(struct ethhdr);
+ iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
+ if (!iph || iph->protocol != IPPROTO_ESP)
+ goto out;
+
+ off += sizeof(struct iphdr);
+ esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
+ if (!esph)
+ goto out;
+
+ opts.netns_id = BPF_F_CURRENT_NETNS;
+ opts.daddr.a4 = iph->daddr;
+ opts.spi = esph->spi;
+ opts.proto = IPPROTO_ESP;
+ opts.family = AF_INET;
+
+ x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
+ if (!x)
+ goto out;
+
+ if (!x->replay_esn)
+ goto out;
+
+ xfrm_replay_window = x->replay_esn->replay_window;
+out:
+ if (x)
+ bpf_xdp_xfrm_state_release(x);
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index 7748cc23de8a..f42e9f3831a1 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -10,17 +10,11 @@
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
#define MAX_DATA_SIZE (1024 * 1024)
#define MAX_SIG_SIZE 1024
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
-extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
-extern void bpf_key_put(struct bpf_key *key) __ksym;
-extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
- struct bpf_dynptr *sig_ptr,
- struct bpf_key *trusted_keyring) __ksym;
-
__u32 monitored_pid;
__u32 user_keyring_serial;
__u64 system_keyring_id;
diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
index 226d33b5a05c..0996c2486f05 100644
--- a/tools/testing/selftests/bpf/progs/timer_failure.c
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -21,17 +21,38 @@ struct {
__type(value, struct elem);
} timer_map SEC(".maps");
-static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer)
+__naked __noinline __used
+static unsigned long timer_cb_ret_bad()
{
- if (bpf_get_smp_processor_id() % 2)
- return 1;
- else
- return 0;
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 0;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* async callback is expected to return 0, so branch above
+ * skipping r0 = 0; should lead to a failure, but if exit
+ * instruction doesn't enforce r0's precision, this callback
+ * will be successfully verified
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
}
SEC("fentry/bpf_fentry_test1")
-__failure __msg("should have been in (0x0; 0x0)")
-int BPF_PROG2(test_ret_1, int, a)
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before")
+__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7")
+__msg("should have been in [0, 0]")
+long BPF_PROG2(test_bad_ret, int, a)
{
int key = 0;
struct bpf_timer *timer;
@@ -39,7 +60,7 @@ int BPF_PROG2(test_ret_1, int, a)
timer = bpf_map_lookup_elem(&timer_map, &key);
if (timer) {
bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME);
- bpf_timer_set_callback(timer, timer_cb_ret1);
+ bpf_timer_set_callback(timer, timer_cb_ret_bad);
bpf_timer_start(timer, 1000, 0);
}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index 03ee946c6bf7..11ab25c42c36 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -184,7 +184,7 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
* not be able to write to that pointer.
*/
SEC("?raw_tp")
-__failure __msg("At callback return the register R0 has value")
+__failure __msg("At callback return the register R0 has ")
int user_ringbuf_callback_invalid_return(void *ctx)
{
bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
index 359df865a8f3..8d77cc5323d3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -27,8 +27,8 @@ __naked void stack_out_of_bounds(void)
SEC("socket")
__description("uninitialized stack1")
-__failure __msg("invalid indirect read from stack")
-__failure_unpriv
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid indirect read from stack")
__naked void uninitialized_stack1(void)
{
asm volatile (" \
@@ -45,8 +45,8 @@ __naked void uninitialized_stack1(void)
SEC("socket")
__description("uninitialized stack2")
-__failure __msg("invalid read from stack")
-__failure_unpriv
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid read from stack")
__naked void uninitialized_stack2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
new file mode 100644
index 000000000000..623f130a3198
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+
+struct core_reloc_bitfields {
+ /* unsigned bitfields */
+ uint8_t ub1: 1;
+ uint8_t ub2: 2;
+ uint32_t ub7: 7;
+ /* signed bitfields */
+ int8_t sb4: 4;
+ int32_t sb20: 20;
+ /* non-bitfields */
+ uint32_t u32;
+ int32_t s32;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+__description("single CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(3)
+int single_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+ return BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+}
+
+SEC("tc")
+__description("multiple CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x3FD)
+int multiple_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint8_t ub2;
+ int8_t sb4;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1);
+
+ ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+ sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4);
+
+ return (((uint8_t)sb4) << 2) | ub2;
+}
+
+SEC("tc")
+__description("adjacent CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(7)
+int adjacent_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint8_t ub1, ub2;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+
+ ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+ ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+
+ return (ub2 << 1) | ub1;
+}
+
+SEC("tc")
+__description("multibyte CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x21)
+int multibyte_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint32_t ub7;
+ uint8_t ub1;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16);
+
+ ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+ ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7);
+
+ return (ub7 << 1) | ub1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
index d6c4a7f3f790..6e0f349f8f15 100644
--- a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
@@ -7,7 +7,7 @@
SEC("cgroup/sock")
__description("bpf_exit with invalid return code. test1")
-__failure __msg("R0 has value (0x0; 0xffffffff)")
+__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]")
__naked void with_invalid_return_code_test1(void)
{
asm volatile (" \
@@ -30,7 +30,7 @@ __naked void with_invalid_return_code_test2(void)
SEC("cgroup/sock")
__description("bpf_exit with invalid return code. test3")
-__failure __msg("R0 has value (0x0; 0x3)")
+__failure __msg("smin=0 smax=3 should have been in [0, 1]")
__naked void with_invalid_return_code_test3(void)
{
asm volatile (" \
@@ -53,7 +53,7 @@ __naked void with_invalid_return_code_test4(void)
SEC("cgroup/sock")
__description("bpf_exit with invalid return code. test5")
-__failure __msg("R0 has value (0x2; 0x0)")
+__failure __msg("smin=2 smax=2 should have been in [0, 1]")
__naked void with_invalid_return_code_test5(void)
{
asm volatile (" \
@@ -75,7 +75,7 @@ __naked void with_invalid_return_code_test6(void)
SEC("cgroup/sock")
__description("bpf_exit with invalid return code. test7")
-__failure __msg("R0 has unknown scalar value")
+__failure __msg("R0 has unknown scalar value should have been in [0, 1]")
__naked void with_invalid_return_code_test7(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 99a23dea8233..be95570ab382 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -411,7 +411,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("direct packet access: test17 (pruning, alignment)")
-__failure __msg("misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4")
+__failure __msg("misaligned packet access off 2+0+15+-4 size 4")
__flag(BPF_F_STRICT_ALIGNMENT)
__naked void packet_access_test17_pruning_alignment(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index a0a5efd1caa1..bd696a431244 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -10,6 +10,7 @@
int arr[1];
int unkn_idx;
+const volatile bool call_dead_subprog = false;
__noinline long global_bad(void)
{
@@ -31,23 +32,31 @@ __noinline long global_calls_good_only(void)
return global_good();
}
+__noinline long global_dead(void)
+{
+ return arr[0] * 2;
+}
+
SEC("?raw_tp")
__success __log_level(2)
/* main prog is validated completely first */
__msg("('global_calls_good_only') is global and assumed valid.")
-__msg("1: (95) exit")
/* eventually global_good() is transitively validated as well */
__msg("Validating global_good() func")
__msg("('global_good') is safe for any args that match its prototype")
int chained_global_func_calls_success(void)
{
- return global_calls_good_only();
+ int sum = 0;
+
+ if (call_dead_subprog)
+ sum += global_dead();
+ return global_calls_good_only() + sum;
}
SEC("?raw_tp")
__failure __log_level(2)
/* main prog validated successfully first */
-__msg("1: (95) exit")
+__msg("('global_calls_bad') is global and assumed valid.")
/* eventually we validate global_bad() and fail */
__msg("Validating global_bad() func")
__msg("math between map_value pointer and register") /* BOOM */
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index b054f9c48143..9fc3fae5cd83 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -5,9 +5,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-SEC("cgroup/sysctl")
+SEC("socket")
__description("ARG_PTR_TO_LONG uninitialized")
-__failure __msg("invalid indirect read from stack R4 off -16+0 size 8")
+__success
+__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8")
__naked void arg_ptr_to_long_uninitialized(void)
{
asm volatile (" \
@@ -67,7 +68,7 @@ __naked void ptr_to_long_half_uninitialized(void)
SEC("cgroup/sysctl")
__description("ARG_PTR_TO_LONG misaligned")
-__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8")
+__failure __msg("misaligned stack access off 0+-20+0 size 8")
__naked void arg_ptr_to_long_misaligned(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
index 353ae6da00e1..e1ffa5d32ff0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
@@ -39,7 +39,7 @@ __naked void with_valid_return_code_test3(void)
SEC("netfilter")
__description("bpf_exit with invalid return code. test4")
-__failure __msg("R0 has value (0x2; 0x0)")
+__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]")
__naked void with_invalid_return_code_test4(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
index efbfc3a4ad6a..f67390224a9c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -5,9 +5,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-SEC("tc")
+SEC("socket")
__description("raw_stack: no skb_load_bytes")
-__failure __msg("invalid read from stack R6 off=-8 size=8")
+__success
+__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8")
__naked void stack_no_skb_load_bytes(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 6115520154e3..39fe3372e0e0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include <../../../tools/include/linux/filter.h>
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
@@ -450,4 +451,290 @@ l0_%=: r1 >>= 16; \
: __clobber_all);
}
+SEC("raw_tp")
+__log_level(2)
+__success
+__msg("fp-8=0m??mmmm")
+__msg("fp-16=00mm??mm")
+__msg("fp-24=00mm???m")
+__naked void spill_subregs_preserve_stack_zero(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+
+ /* 32-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */
+ "*(u8 *)(r10 -2) = r0;" /* MISC */
+ /* fp-3 and fp-4 stay INVALID */
+ "*(u32 *)(r10 -8) = r0;"
+
+ /* 16-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */
+ "*(u16 *)(r10 -12) = r0;" /* MISC */
+ /* fp-13 and fp-14 stay INVALID */
+ "*(u16 *)(r10 -16) = r0;"
+
+ /* 8-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */
+ "*(u16 *)(r10 -20) = r0;" /* MISC */
+ /* fp-21, fp-22, and fp-23 stay INVALID */
+ "*(u8 *)(r10 -24) = r0;"
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)),
+ __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)),
+ __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0))
+ : __clobber_all);
+}
+
+char single_byte_buf[1] SEC(".data.single_byte_buf");
+
+SEC("raw_tp")
+__log_level(2)
+__success
+/* make sure fp-8 is all STACK_ZERO */
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
+/* validate that assigning R2 from STACK_ZERO doesn't mark register
+ * precise immediately; if necessary, it will be marked precise later
+ */
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000")
+/* similarly, when R2 is assigned from spilled register, it is initially
+ * imprecise, but will be marked precise later once it is used in precise context
+ */
+__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0")
+__msg("11: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
+__naked void partial_stack_load_preserves_zeros(void)
+{
+ asm volatile (
+ /* fp-8 is all STACK_ZERO */
+ ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
+
+ /* fp-16 is const zero register */
+ "r0 = 0;"
+ "*(u64 *)(r10 -16) = r0;"
+
+ /* load single U8 from non-aligned STACK_ZERO slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U8 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -9);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned STACK_ZERO slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -10);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned STACK_ZERO slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -12);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* for completeness, load U64 from STACK_ZERO slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u64 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* for completeness, load U64 from ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u64 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0))
+ : __clobber_common);
+}
+
+char two_byte_buf[2] SEC(".data.two_byte_buf");
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is IMPRECISE fake register spill */
+__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1")
+/* and fp-16 is spilled IMPRECISE const reg */
+__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
+__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+/* now both fp-8 and fp-16 are precise, very good */
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__naked void stack_load_preserves_const_precision(void)
+{
+ asm volatile (
+ /* establish checkpoint with state that has no stack slots;
+ * if we bubble up to this state without finding desired stack
+ * slot, then it's a bug and should be caught
+ */
+ "goto +0;"
+
+ /* fp-8 is const 1 *fake* register */
+ ".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */
+
+ /* fp-16 is const 1 register */
+ "r0 = 1;"
+ "*(u64 *)(r10 -16) = r0;"
+
+ /* force checkpoint to check precision marks preserved in parent states */
+ "goto +0;"
+
+ /* load single U64 from aligned FAKE_REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u64 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U64 from aligned REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u64 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(two_byte_buf),
+ __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1))
+ : __clobber_common);
+}
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is 32-bit FAKE subregister spill */
+__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
+__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__naked void stack_load_preserves_const_precision_subreg(void)
+{
+ asm volatile (
+ /* establish checkpoint with state that has no stack slots;
+ * if we bubble up to this state without finding desired stack
+ * slot, then it's a bug and should be caught
+ */
+ "goto +0;"
+
+ /* fp-8 is const 1 *fake* SUB-register */
+ ".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */
+
+ /* fp-16 is const 1 SUB-register */
+ "r0 = 1;"
+ "*(u32 *)(r10 -16) = r0;"
+
+ /* force checkpoint to check precision marks preserved in parent states */
+ "goto +0;"
+
+ /* load single U32 from aligned FAKE_REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u32 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from aligned REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u32 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(two_byte_buf),
+ __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */
+ : __clobber_common);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index e0f77e3e7869..417c61cd4b19 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void)
SEC("socket")
__description("PTR_TO_STACK store/load - bad alignment on off")
-__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8")
+__failure __msg("misaligned stack access off 0+-8+2 size 8")
__failure_unpriv
__naked void load_bad_alignment_on_off(void)
{
@@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void)
SEC("socket")
__description("PTR_TO_STACK store/load - bad alignment on reg")
-__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8")
+__failure __msg("misaligned stack access off 0+-10+8 size 8")
__failure_unpriv
__naked void load_bad_alignment_on_reg(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index b5efcaeaa1ae..6f5d19665cf6 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -117,6 +117,56 @@ __naked int global_subprog_result_precise(void)
);
}
+__naked __noinline __used
+static unsigned long loop_callback_bad()
+{
+ /* bpf_loop() callback that can return values outside of [0, 1] range */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 0;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* bpf_loop() expects [0, 1] values, so branch above skipping
+ * r0 = 0; should lead to a failure, but if exit instruction
+ * doesn't enforce r0's precision, this callback will be
+ * successfully verified
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0")
+/* check that we have branch code path doing its own validation */
+__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001")
+/* check that branch code path marks r0 as precise, before failing */
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7")
+__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]")
+__naked int callback_precise_return_fail(void)
+{
+ asm volatile (
+ "r1 = 1;" /* nr_loops */
+ "r2 = %[loop_callback_bad];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(loop_callback_bad),
+ __imm(bpf_loop)
+ : __clobber_common
+ );
+}
+
SEC("?raw_tp")
__success __log_level(2)
/* First simulated path does not include callback body,
@@ -539,11 +589,24 @@ static __u64 subprog_spill_reg_precise(void)
SEC("?raw_tp")
__success __log_level(2)
-/* precision backtracking can't currently handle stack access not through r10,
- * so we won't be able to mark stack slot fp-8 as precise, and so will
- * fallback to forcing all as precise
- */
-__msg("mark_precise: frame0: falling back to forcing all scalars precise")
+__msg("10: (0f) r1 += r7")
+__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
+__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
+__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
+__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)")
+__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2")
+__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2")
+__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6")
+__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
__naked int subprog_spill_into_parent_stack_slot_precise(void)
{
asm volatile (
@@ -578,14 +641,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void)
);
}
-__naked __noinline __used
-static __u64 subprog_with_checkpoint(void)
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("17: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
+__naked int stack_slot_aliases_precision(void)
{
asm volatile (
- "r0 = 0;"
- /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */
- "goto +0;"
+ "r6 = 1;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * this whole chain will have to be marked as precise later
+ */
+ "r1 = r6;"
+ "call identity_subprog;"
+ /* let's setup two registers that are aliased to r10 */
+ "r7 = r10;"
+ "r7 += -8;" /* r7 = r10 - 8 */
+ "r8 = r10;"
+ "r8 += -32;" /* r8 = r10 - 32 */
+ /* now spill subprog's return value (a r6 -> r1 -> r0 chain)
+ * a few times through different stack pointer regs, making
+ * sure to use r10, r7, and r8 both in LDX and STX insns, and
+ * *importantly* also using a combination of const var_off and
+ * insn->off to validate that we record final stack slot
+ * correctly, instead of relying on just insn->off derivation,
+ * which is only valid for r10-based stack offset
+ */
+ "*(u64 *)(r10 - 16) = r0;"
+ "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */
+ "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */
+ "r0 = *(u64 *)(r8 + 16);"
+ "*(u64 *)(r7 - 8) = r0;"
+ "r0 = *(u64 *)(r10 - 16);"
+ /* get ready to use r0 as an index into array to force precision */
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0->r1->r6 chain is forced to be precise and has to be
+ * propagated back to the beginning, including through the
+ * subprog call and all the stack spills and loads
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
"exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
);
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index 83a90afba785..c810f4f6f479 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -59,9 +59,10 @@ __naked void stack_read_priv_vs_unpriv(void)
" ::: __clobber_all);
}
-SEC("lwt_in")
+SEC("cgroup/skb")
__description("variable-offset stack read, uninitialized")
-__failure __msg("invalid variable-offset read from stack R2")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
__naked void variable_offset_stack_read_uninitialized(void)
{
asm volatile (" \
@@ -83,13 +84,56 @@ __naked void variable_offset_stack_read_uninitialized(void)
SEC("socket")
__description("variable-offset stack write, priv vs unpriv")
-__success __failure_unpriv
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
/* Variable stack access is rejected for unprivileged.
*/
__msg_unpriv("R2 variable stack access prohibited for !root")
__retval(0)
__naked void stack_write_priv_vs_unpriv(void)
{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or \
+ * fp-16, but we don't know which \
+ */ \
+ r2 += r10; \
+ /* Dereference it for a stack write */ \
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+/* Similar to the previous test, but this time also perform a read from the
+ * address written to with a variable offset. The read is allowed, showing that,
+ * after a variable-offset write, a priviledged program can read the slots that
+ * were in the range of that write (even if the verifier doesn't actually know if
+ * the slot being read was really written to or not.
+ *
+ * Despite this test being mostly a superset, the previous test is also kept for
+ * the sake of it checking the stack depth in the case where there is no read.
+ */
+SEC("socket")
+__description("variable-offset stack write followed by read")
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_followed_by_read(void)
+{
asm volatile (" \
/* Get an unknown value */ \
r2 = *(u32*)(r1 + 0); \
@@ -103,12 +147,7 @@ __naked void stack_write_priv_vs_unpriv(void)
/* Dereference it for a stack write */ \
r0 = 0; \
*(u64*)(r2 + 0) = r0; \
- /* Now read from the address we just wrote. This shows\
- * that, after a variable-offset write, a priviledged\
- * program can read the slots that were in the range of\
- * that write (even if the verifier doesn't actually know\
- * if the slot being read was really written to or not.\
- */ \
+ /* Now read from the address we just wrote. */ \
r3 = *(u64*)(r2 + 0); \
r0 = 0; \
exit; \
@@ -224,6 +263,35 @@ __naked void access_max_out_of_bound(void)
: __clobber_all);
}
+/* Similar to the test above, but this time check the special case of a
+ * zero-sized stack access. We used to have a bug causing crashes for zero-sized
+ * out-of-bounds accesses.
+ */
+SEC("socket")
+__description("indirect variable-offset stack access, zero-sized, max out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R1")
+__naked void zero_sized_access_max_out_of_bound(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* Fill some stack */ \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Get an unknown value */ \
+ r1 = *(u32*)(r1 + 0); \
+ r1 &= 63; \
+ r1 += -16; \
+ /* r1 is now anywhere in [-16,48) */ \
+ r1 += r10; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
SEC("lwt_in")
__description("indirect variable-offset stack access, min out of bound")
__failure __msg("invalid variable-offset indirect access to stack R2")
@@ -253,9 +321,10 @@ __naked void access_min_out_of_bound(void)
: __clobber_all);
}
-SEC("lwt_in")
+SEC("cgroup/skb")
__description("indirect variable-offset stack access, min_off < min_initialized")
-__failure __msg("invalid indirect read from stack R2 var_off")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
__naked void access_min_off_min_initialized(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index f6d1cc9ad892..330ece2eabdb 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -20,21 +20,32 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto,
+ __u16 *vlan_tci) __ksym;
SEC("xdp.frags")
int rx(struct xdp_md *ctx)
{
void *data, *data_meta, *data_end;
struct ipv6hdr *ip6h = NULL;
- struct ethhdr *eth = NULL;
struct udphdr *udp = NULL;
struct iphdr *iph = NULL;
struct xdp_meta *meta;
+ struct ethhdr *eth;
int err;
data = (void *)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
eth = data;
+
+ if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) ||
+ eth->h_proto == bpf_htons(ETH_P_8021Q)))
+ eth = (void *)eth + sizeof(struct vlan_hdr);
+
+ if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q))
+ eth = (void *)eth + sizeof(struct vlan_hdr);
+
if (eth + 1 < data_end) {
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
iph = (void *)(eth + 1);
@@ -76,15 +87,28 @@ int rx(struct xdp_md *ctx)
return XDP_PASS;
}
+ meta->hint_valid = 0;
+
+ meta->xdp_timestamp = bpf_ktime_get_tai_ns();
err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
- if (!err)
- meta->xdp_timestamp = bpf_ktime_get_tai_ns();
+ if (err)
+ meta->rx_timestamp_err = err;
else
- meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
+ meta->hint_valid |= XDP_META_FIELD_TS;
- err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
- if (err < 0)
- meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */
+ err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash,
+ &meta->rx_hash_type);
+ if (err)
+ meta->rx_hash_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_RSS;
+
+ err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+ &meta->rx_vlan_tci);
+ if (err)
+ meta->rx_vlan_tag_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_VLAN_TAG;
__sync_add_and_fetch(&pkts_redir, 1);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index d151d406a123..31ca229bb3c0 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -23,15 +23,47 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto,
+ __u16 *vlan_tci) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
{
- void *data, *data_meta;
+ void *data, *data_meta, *data_end;
+ struct ipv6hdr *ip6h = NULL;
+ struct ethhdr *eth = NULL;
+ struct udphdr *udp = NULL;
+ struct iphdr *iph = NULL;
struct xdp_meta *meta;
u64 timestamp = -1;
int ret;
+ data = (void *)(long)ctx->data;
+ data_end = (void *)(long)ctx->data_end;
+ eth = data;
+ if (eth + 1 < data_end) {
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+ udp = (void *)(iph + 1);
+ }
+ if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+ udp = (void *)(ip6h + 1);
+ }
+ if (udp && udp + 1 > data_end)
+ udp = NULL;
+ }
+
+ if (!udp)
+ return XDP_PASS;
+
+ /* Forwarding UDP:8080 to AF_XDP */
+ if (udp->dest != bpf_htons(8080))
+ return XDP_PASS;
+
/* Reserve enough for all custom metadata. */
ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
@@ -57,6 +89,8 @@ int rx(struct xdp_md *ctx)
meta->rx_timestamp = 1;
bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
+ bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+ &meta->rx_vlan_tci);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 80f620602d50..518329c666e9 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -467,13 +467,13 @@ static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bo
unsigned long status = ct->status;
bpf_ct_release(ct);
- if (status & IPS_CONFIRMED_BIT)
+ if (status & IPS_CONFIRMED)
return XDP_PASS;
} else if (ct_lookup_opts.error != -ENOENT) {
return XDP_ABORTED;
}
- /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */
+ /* error == -ENOENT || !(status & IPS_CONFIRMED) */
return XDP_TX;
}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index a350ecdfba4a..74ceb7877ae2 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -27,6 +27,7 @@
#define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv="
#define TEST_TAG_AUXILIARY "comment:test_auxiliary"
#define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv"
+#define TEST_BTF_PATH "comment:test_btf_path="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xcafe4all
@@ -58,6 +59,7 @@ struct test_spec {
const char *prog_name;
struct test_subspec priv;
struct test_subspec unpriv;
+ const char *btf_custom_path;
int log_level;
int prog_flags;
int mode_mask;
@@ -288,6 +290,8 @@ static int parse_test_spec(struct test_loader *tester,
goto cleanup;
update_flags(&spec->prog_flags, flags, clear);
}
+ } else if (str_has_pfx(s, TEST_BTF_PATH)) {
+ spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
}
}
@@ -578,6 +582,9 @@ void run_subtest(struct test_loader *tester,
}
}
+ /* Implicitly reset to NULL if next test case doesn't specify */
+ open_opts->btf_custom_path = spec->btf_custom_path;
+
tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts);
if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
goto subtest_cleanup;
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 2dec7dbf29a2..d9661b9988ba 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -517,90 +517,6 @@ test_ip6ip6()
echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
}
-setup_xfrm_tunnel()
-{
- auth=0x$(printf '1%.0s' {1..40})
- enc=0x$(printf '2%.0s' {1..32})
- spi_in_to_out=0x1
- spi_out_to_in=0x2
- # at_ns0 namespace
- # at_ns0 -> root
- ip netns exec at_ns0 \
- ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
- spi $spi_in_to_out reqid 1 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip netns exec at_ns0 \
- ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
- tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
- mode tunnel
- # root -> at_ns0
- ip netns exec at_ns0 \
- ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
- spi $spi_out_to_in reqid 2 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip netns exec at_ns0 \
- ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
- tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
- mode tunnel
- # address & route
- ip netns exec at_ns0 \
- ip addr add dev veth0 10.1.1.100/32
- ip netns exec at_ns0 \
- ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
- src 10.1.1.100
-
- # root namespace
- # at_ns0 -> root
- ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
- spi $spi_in_to_out reqid 1 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
- tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
- mode tunnel
- # root -> at_ns0
- ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
- spi $spi_out_to_in reqid 2 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
- tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
- mode tunnel
- # address & route
- ip addr add dev veth1 10.1.1.200/32
- ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
-}
-
-test_xfrm_tunnel()
-{
- if [[ -e /sys/kernel/tracing/trace ]]; then
- TRACE=/sys/kernel/tracing/trace
- else
- TRACE=/sys/kernel/debug/tracing/trace
- fi
- config_device
- > ${TRACE}
- setup_xfrm_tunnel
- mkdir -p ${BPF_PIN_TUNNEL_DIR}
- bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}
- tc qdisc add dev veth1 clsact
- tc filter add dev veth1 proto ip ingress bpf da object-pinned \
- ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- sleep 1
- grep "reqid 1" ${TRACE}
- check_err $?
- grep "spi 0x1" ${TRACE}
- check_err $?
- grep "remote ip 0xac100164" ${TRACE}
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: xfrm tunnel"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
-}
-
attach_bpf()
{
DEV=$1
@@ -630,10 +546,6 @@ cleanup()
ip link del ip6geneve11 2> /dev/null
ip link del erspan11 2> /dev/null
ip link del ip6erspan11 2> /dev/null
- ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
- ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
- ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
- ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
}
cleanup_exit()
@@ -716,10 +628,6 @@ bpf_tunnel_test()
test_ip6ip6
errors=$(( $errors + $? ))
- echo "Testing IPSec tunnel..."
- test_xfrm_tunnel
- errors=$(( $errors + $? ))
-
return $errors
}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 5b7a55136741..35284faff4f2 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -9,6 +9,9 @@
#include <bpf/libbpf.h>
#include <time.h>
+#define __TO_STR(x) #x
+#define TO_STR(x) __TO_STR(x)
+
int parse_num_list(const char *s, bool **set, int *set_len);
__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index 319337bdcfc8..9a7b1106fda8 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -84,17 +84,6 @@
.errstr = "!read_ok",
},
{
- "Can't use cmpxchg on uninit memory",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 3),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid read from stack",
-},
-{
"BPF_W cmpxchg should zero top 32 bits",
.insns = {
/* r0 = U64_MAX; */
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 3d5cd51071f0..ab25a81fd3a1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1505,7 +1505,9 @@
.prog_type = BPF_PROG_TYPE_XDP,
.fixup_map_hash_8b = { 23 },
.result = REJECT,
- .errstr = "invalid read from stack R7 off=-16 size=8",
+ .errstr = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
},
{
"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 0d84dd1f38b6..8a2ff81d8350 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -140,10 +140,11 @@
.result = REJECT,
},
{
- "precise: ST insn causing spi > allocated_stack",
+ "precise: ST zero to stack insn is supported",
.insns = {
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ /* not a register spill, so we stop precision propagation for R4 here */
BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
BPF_MOV64_IMM(BPF_REG_0, -1),
@@ -157,11 +158,11 @@
mark_precise: frame0: last_idx 4 first_idx 2\
mark_precise: frame0: regs=r4 stack= before 4\
mark_precise: frame0: regs=r4 stack= before 3\
- mark_precise: frame0: regs= stack=-8 before 2\
- mark_precise: frame0: falling back to forcing all scalars precise\
- force_precise: frame0: forcing r0 to be precise\
mark_precise: frame0: last_idx 5 first_idx 5\
- mark_precise: frame0: parent state regs= stack=:",
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 4 first_idx 2\
+ mark_precise: frame0: regs=r0 stack= before 4\
+ 5: R0=-1 R4=0",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
@@ -169,6 +170,8 @@
"precise: STX insn causing spi > allocated_stack",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ /* make later reg spill more interesting by having somewhat known scalar */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
@@ -179,18 +182,21 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\
+ .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
mark_precise: frame0: parent state regs=r4 stack=:\
- mark_precise: frame0: last_idx 5 first_idx 3\
- mark_precise: frame0: regs=r4 stack= before 5\
- mark_precise: frame0: regs=r4 stack= before 4\
- mark_precise: frame0: regs= stack=-8 before 3\
- mark_precise: frame0: falling back to forcing all scalars precise\
- force_precise: frame0: forcing r0 to be precise\
- force_precise: frame0: forcing r0 to be precise\
- force_precise: frame0: forcing r0 to be precise\
- force_precise: frame0: forcing r0 to be precise\
- mark_precise: frame0: last_idx 6 first_idx 6\
+ mark_precise: frame0: last_idx 6 first_idx 4\
+ mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\
+ mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\
+ mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 3 first_idx 3\
+ mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\
+ mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\
+ mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 0 first_idx 0\
+ mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\
+ mark_precise: frame0: last_idx 7 first_idx 7\
mark_precise: frame0: parent state regs= stack=:",
.result = VERBOSE_ACCEPT,
.retval = -1,
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
index ba08922b4a27..f2cac42298ba 100755
--- a/tools/testing/selftests/bpf/verify_sig_setup.sh
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -60,6 +60,27 @@ cleanup() {
rm -rf ${tmp_dir}
}
+fsverity_create_sign_file() {
+ local tmp_dir="$1"
+
+ data_file=${tmp_dir}/data-file
+ sig_file=${tmp_dir}/sig-file
+ dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null
+ fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file
+
+ # We do not want to enable fsverity on $data_file yet. Try whether
+ # the file system support fsverity on a different file.
+ touch ${tmp_dir}/tmp-file
+ fsverity enable ${tmp_dir}/tmp-file
+}
+
+fsverity_enable_file() {
+ local tmp_dir="$1"
+
+ data_file=${tmp_dir}/data-file
+ fsverity enable $data_file
+}
+
catch()
{
local exit_code="$1"
@@ -86,6 +107,10 @@ main()
setup "${tmp_dir}"
elif [[ "${action}" == "cleanup" ]]; then
cleanup "${tmp_dir}"
+ elif [[ "${action}" == "fsverity-create-sign" ]]; then
+ fsverity_create_sign_file "${tmp_dir}"
+ elif [[ "${action}" == "fsverity-enable" ]]; then
+ fsverity_enable_file "${tmp_dir}"
else
echo "Unknown action: ${action}"
exit 1
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1d418d66e375..244d4996e06e 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -1254,7 +1254,7 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
bool asc, bool abs)
{
const char *str1 = NULL, *str2 = NULL;
- double v1, v2;
+ double v1 = 0.0, v2 = 0.0;
int cmp = 0;
fetch_join_stat_value(s1, id, var, &str1, &v1);
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 3291625ba4fb..878d68db0325 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -21,6 +21,9 @@
#include "xsk.h"
#include <error.h>
+#include <linux/kernel.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
#include <linux/errqueue.h>
#include <linux/if_link.h>
#include <linux/net_tstamp.h>
@@ -79,7 +82,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
.flags = XSK_UMEM__DEFAULT_FLAGS,
.tx_metadata_len = sizeof(struct xsk_tx_metadata),
};
- __u32 idx;
+ __u32 idx = 0;
u64 addr;
int ret;
int i;
@@ -182,19 +185,31 @@ static void print_tstamp_delta(const char *name, const char *refname,
(double)delta / 1000);
}
+#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */
+#define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */
+#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */
+static void print_vlan_tci(__u16 tag)
+{
+ __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag);
+ __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag);
+ bool dei = FIELD_GET(VLAN_DEI_MASK, tag);
+
+ printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id);
+}
+
static void verify_xdp_metadata(void *data, clockid_t clock_id)
{
struct xdp_meta *meta;
meta = data - sizeof(*meta);
- if (meta->rx_hash_err < 0)
- printf("No rx_hash err=%d\n", meta->rx_hash_err);
- else
+ if (meta->hint_valid & XDP_META_FIELD_RSS)
printf("rx_hash: 0x%X with RSS type:0x%X\n",
meta->rx_hash, meta->rx_hash_type);
+ else
+ printf("No rx_hash, err=%d\n", meta->rx_hash_err);
- if (meta->rx_timestamp) {
+ if (meta->hint_valid & XDP_META_FIELD_TS) {
__u64 ref_tstamp = gettime(clock_id);
/* store received timestamps to calculate a delta at tx */
@@ -206,7 +221,16 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id)
print_tstamp_delta("XDP RX-time", "User RX-time",
meta->xdp_timestamp, ref_tstamp);
} else {
- printf("No rx_timestamp\n");
+ printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err);
+ }
+
+ if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) {
+ printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto));
+ printf("rx_vlan_tci: ");
+ print_vlan_tci(meta->rx_vlan_tci);
+ } else {
+ printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n",
+ meta->rx_vlan_tag_err);
}
}
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
index 938a729bd307..87318ad1117a 100644
--- a/tools/testing/selftests/bpf/xdp_metadata.h
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -9,12 +9,44 @@
#define ETH_P_IPV6 0x86DD
#endif
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef ETH_P_8021AD
+#define ETH_P_8021AD 0x88A8
+#endif
+
+#ifndef BIT
+#define BIT(nr) (1 << (nr))
+#endif
+
+/* Non-existent checksum status */
+#define XDP_CHECKSUM_MAGIC BIT(2)
+
+enum xdp_meta_field {
+ XDP_META_FIELD_TS = BIT(0),
+ XDP_META_FIELD_RSS = BIT(1),
+ XDP_META_FIELD_VLAN_TAG = BIT(2),
+};
+
struct xdp_meta {
- __u64 rx_timestamp;
+ union {
+ __u64 rx_timestamp;
+ __s32 rx_timestamp_err;
+ };
__u64 xdp_timestamp;
__u32 rx_hash;
union {
__u32 rx_hash_type;
__s32 rx_hash_err;
};
+ union {
+ struct {
+ __be16 rx_vlan_proto;
+ __u16 rx_vlan_tci;
+ };
+ __s32 rx_vlan_tag_err;
+ };
+ enum xdp_meta_field hint_valid;
};
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index b604c570309a..b1102ee13faa 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -634,16 +634,24 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
return nb_frags;
}
+static bool set_pkt_valid(int offset, u32 len)
+{
+ return len <= MAX_ETH_JUMBO_SIZE;
+}
+
static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
{
pkt->offset = offset;
pkt->len = len;
- if (len > MAX_ETH_JUMBO_SIZE) {
- pkt->valid = false;
- } else {
- pkt->valid = true;
- pkt_stream->nb_valid_entries++;
- }
+ pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ bool prev_pkt_valid = pkt->valid;
+
+ pkt_set(pkt_stream, pkt, offset, len);
+ pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
}
static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
@@ -665,7 +673,7 @@ static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb
for (i = 0; i < nb_pkts; i++) {
struct pkt *pkt = &pkt_stream->pkts[i];
- pkt_set(pkt_stream, pkt, 0, pkt_len);
+ pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
pkt->pkt_nb = nb_start + i * nb_off;
}
@@ -700,10 +708,9 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
- pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+ pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
ifobj->xsk->pkt_stream = pkt_stream;
- pkt_stream->nb_valid_entries /= 2;
}
static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)