summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/bpf_design_QA.rst4
-rw-r--r--Documentation/bpf/bpf_devel_QA.rst14
-rw-r--r--Documentation/bpf/cpumasks.rst4
-rw-r--r--Documentation/bpf/instruction-set.rst40
-rw-r--r--Documentation/bpf/kfuncs.rst41
-rw-r--r--Documentation/bpf/maps.rst7
-rw-r--r--arch/loongarch/net/bpf_jit.c6
-rw-r--r--arch/mips/Kconfig5
-rw-r--r--arch/mips/net/bpf_jit_comp.c4
-rw-r--r--arch/mips/net/bpf_jit_comp64.c3
-rw-r--r--arch/riscv/net/bpf_jit_comp64.c5
-rw-r--r--include/linux/bpf.h97
-rw-r--r--include/linux/bpf_mem_alloc.h7
-rw-r--r--include/linux/bpf_verifier.h4
-rw-r--r--include/linux/btf.h2
-rw-r--r--include/linux/filter.h46
-rw-r--r--include/uapi/linux/bpf.h33
-rw-r--r--kernel/bpf/bpf_local_storage.c85
-rw-r--r--kernel/bpf/btf.c42
-rw-r--r--kernel/bpf/cgroup.c53
-rw-r--r--kernel/bpf/cpumask.c46
-rw-r--r--kernel/bpf/hashtab.c59
-rw-r--r--kernel/bpf/helpers.c257
-rw-r--r--kernel/bpf/syscall.c8
-rw-r--r--kernel/bpf/verifier.c974
-rw-r--r--kernel/trace/bpf_trace.c4
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/core/filter.c193
-rw-r--r--tools/arch/arm64/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--tools/arch/s390/include/uapi/asm/ptrace.h458
-rw-r--r--tools/bpf/bpftool/json_writer.c3
-rw-r--r--tools/bpf/resolve_btfids/.gitignore1
-rw-r--r--tools/include/uapi/linux/bpf.h33
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.h69
-rw-r--r--tools/lib/bpf/bpf_helpers.h2
-rw-r--r--tools/lib/bpf/bpf_tracing.h3
-rw-r--r--tools/lib/bpf/btf.c2
-rw-r--r--tools/lib/bpf/libbpf.c197
-rw-r--r--tools/lib/bpf/libbpf.h50
-rw-r--r--tools/lib/bpf/linker.c11
-rw-r--r--tools/lib/bpf/netlink.c8
-rw-r--r--tools/lib/bpf/relo_core.c3
-rw-r--r--tools/lib/bpf/zip.c328
-rw-r--r--tools/lib/bpf/zip.h47
-rw-r--r--tools/scripts/Makefile.include2
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x2
-rw-r--r--tools/testing/selftests/bpf/Makefile7
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h38
l---------tools/testing/selftests/bpf/disasm.c1
l---------tools/testing/selftests/bpf/disasm.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c291
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c917
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c136
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uninit_stack.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xfrm_info.c67
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h23
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h3
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c54
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c287
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c55
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/jit_probe_mem.c2
-rw-r--r--tools/testing/selftests/bpf/progs/lru_bug.c2
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c360
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c10
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c7
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c6
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c36
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c35
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe_manual.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c980
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c487
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c119
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c114
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c257
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c45
-rw-r--r--tools/testing/selftests/bpf/progs/uninit_stack.c87
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_success.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c69
-rw-r--r--tools/testing/selftests/bpf/test_progs.h16
-rw-r--r--tools/testing/selftests/bpf/test_tcp_hdr_options.h1
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c17
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx.c11
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c104
-rw-r--r--tools/testing/selftests/bpf/verifier/int_ptr.c9
-rw-r--r--tools/testing/selftests/bpf/verifier/map_kptr.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/search_pruning.c13
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c27
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c7
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c23
-rw-r--r--tools/testing/selftests/bpf/verifier/var_off.c52
131 files changed, 7102 insertions, 1792 deletions
diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index bfff0e7e37c2..38372a956d65 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -314,7 +314,7 @@ Q: What is the compatibility story for special BPF types in map values?
Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
values (when using BTF support for BPF maps). This allows to use helpers for
such objects on these fields inside map values. Users are also allowed to embed
-pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the
+pointers to some kernel types (with __kptr_untrusted and __kptr BTF tags). Will the
kernel preserve backwards compatibility for these features?
A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
@@ -324,7 +324,7 @@ For struct types that have been added already, like bpf_spin_lock and bpf_timer,
the kernel will preserve backwards compatibility, as they are part of UAPI.
For kptrs, they are also part of UAPI, but only with respect to the kptr
-mechanism. The types that you can use with a __kptr and __kptr_ref tagged
+mechanism. The types that you can use with a __kptr_untrusted and __kptr tagged
pointer in your struct are NOT part of the UAPI contract. The supported types can
and will change across kernel releases. However, operations like accessing kptr
fields and bpf_kptr_xchg() helper will continue to be supported across kernel
diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 03d4993eda6f..5f5f9ccc3862 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -128,7 +128,7 @@ into the bpf-next tree will make their way into net-next tree. net and
net-next are both run by David S. Miller. From there, they will go
into the kernel mainline tree run by Linus Torvalds. To read up on the
process of net and net-next being merged into the mainline tree, see
-the :ref:`netdev-FAQ`
+the `netdev-FAQ`_.
@@ -147,7 +147,7 @@ request)::
Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
---------------------------------------------------------------------------------
-A: The process is the very same as described in the :ref:`netdev-FAQ`,
+A: The process is the very same as described in the `netdev-FAQ`_,
so please read up on it. The subject line must indicate whether the
patch is a fix or rather "next-like" content in order to let the
maintainers know whether it is targeted at bpf or bpf-next.
@@ -206,7 +206,7 @@ ii) run extensive BPF test suite and
Once the BPF pull request was accepted by David S. Miller, then
the patches end up in net or net-next tree, respectively, and
make their way from there further into mainline. Again, see the
-:ref:`netdev-FAQ` for additional information e.g. on how often they are
+`netdev-FAQ`_ for additional information e.g. on how often they are
merged to mainline.
Q: How long do I need to wait for feedback on my BPF patches?
@@ -230,7 +230,7 @@ Q: Are patches applied to bpf-next when the merge window is open?
-----------------------------------------------------------------
A: For the time when the merge window is open, bpf-next will not be
processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the :ref:`netdev-FAQ` about further details.
+so feel free to read up on the `netdev-FAQ`_ about further details.
During those two weeks of merge window, we might ask you to resend
your patch series once bpf-next is open again. Once Linus released
@@ -394,7 +394,7 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
netdev@vger.kernel.org
The process in general is the same as on netdev itself, see also the
-:ref:`netdev-FAQ`.
+`netdev-FAQ`_.
Q: Do you also backport to kernels not currently maintained as stable?
----------------------------------------------------------------------
@@ -410,7 +410,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
What should I do?
A: The same rules apply as with netdev patch submissions in general, see
-the :ref:`netdev-FAQ`.
+the `netdev-FAQ`_.
Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
ask the BPF maintainers to queue the patches instead. This can be done
@@ -685,7 +685,7 @@ when:
.. Links
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
+.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html
.. _selftests:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
.. _Documentation/dev-tools/kselftest.rst:
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 24bef9cbbeee..75344cd230e5 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -51,7 +51,7 @@ For example:
.. code-block:: c
struct cpumask_map_value {
- struct bpf_cpumask __kptr_ref * cpumask;
+ struct bpf_cpumask __kptr * cpumask;
};
struct array_map {
@@ -128,7 +128,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
/* struct containing the struct bpf_cpumask kptr which is stored in the map. */
struct cpumasks_kfunc_map_value {
- struct bpf_cpumask __kptr_ref * bpf_cpumask;
+ struct bpf_cpumask __kptr * bpf_cpumask;
};
/* The map containing struct cpumasks_kfunc_map_value entries. */
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index af515de5fc38..db8789e6969e 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -38,14 +38,11 @@ eBPF has two instruction encodings:
* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
constant) value after the basic instruction for a total of 128 bits.
-The basic instruction encoding is as follows, where MSB and LSB mean the most significant
-bits and least significant bits, respectively:
+The fields conforming an encoded basic instruction are stored in the
+following order::
-============= ======= ======= ======= ============
-32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
-============= ======= ======= ======= ============
-imm offset src_reg dst_reg opcode
-============= ======= ======= ======= ============
+ opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF.
+ opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF.
**imm**
signed integer immediate value
@@ -63,6 +60,18 @@ imm offset src_reg dst_reg opcode
**opcode**
operation to perform
+Note that the contents of multi-byte fields ('imm' and 'offset') are
+stored using big-endian byte ordering in big-endian BPF and
+little-endian byte ordering in little-endian BPF.
+
+For example::
+
+ opcode offset imm assembly
+ src_reg dst_reg
+ 07 0 1 00 00 44 33 22 11 r1 += 0x11223344 // little
+ dst_reg src_reg
+ 07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big
+
Note that most instructions do not use all of the fields.
Unused fields shall be cleared to zero.
@@ -72,18 +81,23 @@ The 64 bits following the basic instruction contain a pseudo instruction
using the same format but with opcode, dst_reg, src_reg, and offset all set to zero,
and imm containing the high 32 bits of the immediate value.
-================= ==================
-64 bits (MSB) 64 bits (LSB)
-================= ==================
-basic instruction pseudo instruction
-================= ==================
+This is depicted in the following figure::
+
+ basic_instruction
+ .-----------------------------.
+ | |
+ code:8 regs:8 offset:16 imm:32 unused:32 imm:32
+ | |
+ '--------------'
+ pseudo instruction
Thus the 64-bit immediate value is constructed as follows:
imm64 = (next_imm << 32) | imm
where 'next_imm' refers to the imm value of the pseudo instruction
-following the basic instruction.
+following the basic instruction. The unused bytes in the pseudo
+instruction are reserved and shall be cleared to zero.
Instruction classes
-------------------
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ca96ef3f6896..69eccf6f98ef 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,6 +100,23 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
size parameter, and the value of the constant matters for program safety, __k
suffix should be used.
+2.2.2 __uninit Annotation
+-------------------------
+
+This annotation is used to indicate that the argument will be treated as
+uninitialized.
+
+An example is given below::
+
+ __bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit)
+ {
+ ...
+ }
+
+Here, the dynptr will be treated as an uninitialized dynptr. Without this
+annotation, the verifier will reject the program if the dynptr passed in is
+not initialized.
+
.. _BPF_kfunc_nodef:
2.3 Using an existing kernel function
@@ -232,11 +249,13 @@ added later.
2.4.8 KF_RCU flag
-----------------
-The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
-When used together with KF_ACQUIRE, it indicates the kfunc should have a
-single argument which must be a trusted argument or a MEM_RCU pointer.
-The argument may have reference count of 0 and the kfunc must take this
-into consideration.
+The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
+KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees
+that the objects are valid and there is no use-after-free. The pointers are not
+NULL, but the object's refcount could have reached zero. The kfuncs need to
+consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
+pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
+also be KF_RET_NULL.
.. _KF_deprecated_flag:
@@ -527,7 +546,7 @@ Here's an example of how it can be used:
/* struct containing the struct task_struct kptr which is actually stored in the map. */
struct __cgroups_kfunc_map_value {
- struct cgroup __kptr_ref * cgroup;
+ struct cgroup __kptr * cgroup;
};
/* The map containing struct __cgroups_kfunc_map_value entries. */
@@ -583,13 +602,17 @@ Here's an example of how it can be used:
----
-Another kfunc available for interacting with ``struct cgroup *`` objects is
-bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
-and return it as a cgroup kptr.
+Other kfuncs available for interacting with ``struct cgroup *`` objects are
+bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
+the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
+return a cgroup kptr.
.. kernel-doc:: kernel/bpf/helpers.c
:identifiers: bpf_cgroup_ancestor
+.. kernel-doc:: kernel/bpf/helpers.c
+ :identifiers: bpf_cgroup_from_id
+
Eventually, BPF should be updated to allow this to happen with a normal memory
load in the program itself. This is currently not possible without more work in
the verifier. bpf_cgroup_ancestor() can be used as follows:
diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst
index 4906ff0f8382..6f069f3d6f4b 100644
--- a/Documentation/bpf/maps.rst
+++ b/Documentation/bpf/maps.rst
@@ -11,9 +11,9 @@ maps are accessed from BPF programs via BPF helpers which are documented in the
`man-pages`_ for `bpf-helpers(7)`_.
BPF maps are accessed from user space via the ``bpf`` syscall, which provides
-commands to create maps, lookup elements, update elements and delete
-elements. More details of the BPF syscall are available in
-:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_.
+commands to create maps, lookup elements, update elements and delete elements.
+More details of the BPF syscall are available in `ebpf-syscall`_ and in the
+`man-pages`_ for `bpf(2)`_.
Map Types
=========
@@ -79,3 +79,4 @@ Find and delete element by key in a given map using ``attr->map_fd``,
.. _man-pages: https://www.kernel.org/doc/man-pages/
.. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
.. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
+.. _ebpf-syscall: https://docs.kernel.org/userspace-api/ebpf/syscall.html
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 288003a9f0ca..e70c846efaa1 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -1248,3 +1248,9 @@ out:
return prog;
}
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 37072e15b263..5ea07c833c5b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -63,10 +63,7 @@ config MIPS
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_EBPF_JIT if !CPU_MICROMIPS && \
- !CPU_DADDI_WORKAROUNDS && \
- !CPU_R4000_WORKAROUNDS && \
- !CPU_R4400_WORKAROUNDS
+ select HAVE_EBPF_JIT if !CPU_MICROMIPS
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/mips/net/bpf_jit_comp.c b/arch/mips/net/bpf_jit_comp.c
index b17130d510d4..a40d926b6513 100644
--- a/arch/mips/net/bpf_jit_comp.c
+++ b/arch/mips/net/bpf_jit_comp.c
@@ -218,9 +218,13 @@ bool valid_alu_i(u8 op, s32 imm)
/* All legal eBPF values are valid */
return true;
case BPF_ADD:
+ if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS))
+ return false;
/* imm must be 16 bits */
return imm >= -0x8000 && imm <= 0x7fff;
case BPF_SUB:
+ if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS))
+ return false;
/* -imm must be 16 bits */
return imm >= -0x7fff && imm <= 0x8000;
case BPF_AND:
diff --git a/arch/mips/net/bpf_jit_comp64.c b/arch/mips/net/bpf_jit_comp64.c
index 0e7c1bdcf914..fa7e9aa37f49 100644
--- a/arch/mips/net/bpf_jit_comp64.c
+++ b/arch/mips/net/bpf_jit_comp64.c
@@ -228,6 +228,9 @@ static void emit_alu_r64(struct jit_context *ctx, u8 dst, u8 src, u8 op)
} else {
emit(ctx, dmultu, dst, src);
emit(ctx, mflo, dst);
+ /* Ensure multiplication is completed */
+ if (IS_ENABLED(CONFIG_CPU_R4000_WORKAROUNDS))
+ emit(ctx, mfhi, MIPS_R_ZERO);
}
break;
/* dst = dst / src */
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index f5a668736c79..a9270366dc57 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1751,3 +1751,8 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
{
__build_epilogue(false, ctx);
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 520b238abd5a..d3456804f7aa 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -607,11 +607,18 @@ enum bpf_type_flag {
*/
NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to sk_buff */
+ DYNPTR_TYPE_SKB = BIT(15 + BPF_BASE_TYPE_BITS),
+
+ /* DYNPTR points to xdp_buff */
+ DYNPTR_TYPE_XDP = BIT(16 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
-#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
+#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
+ | DYNPTR_TYPE_XDP)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -1124,6 +1131,37 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
+/* the implementation of the opaque uapi struct bpf_dynptr */
+struct bpf_dynptr_kern {
+ void *data;
+ /* Size represents the number of usable bytes of dynptr data.
+ * If for example the offset is at 4 for a local dynptr whose data is
+ * of type u64, the number of usable bytes is 4.
+ *
+ * The upper 8 bits are reserved. It is as follows:
+ * Bits 0 - 23 = size
+ * Bits 24 - 30 = dynptr type
+ * Bit 31 = whether dynptr is read-only
+ */
+ u32 size;
+ u32 offset;
+} __aligned(8);
+
+enum bpf_dynptr_type {
+ BPF_DYNPTR_TYPE_INVALID,
+ /* Points to memory that is local to the bpf program */
+ BPF_DYNPTR_TYPE_LOCAL,
+ /* Underlying data is a ringbuf record */
+ BPF_DYNPTR_TYPE_RINGBUF,
+ /* Underlying data is a sk_buff */
+ BPF_DYNPTR_TYPE_SKB,
+ /* Underlying data is a xdp_buff */
+ BPF_DYNPTR_TYPE_XDP,
+};
+
+int bpf_dynptr_check_size(u32 size);
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
+
#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
@@ -2241,7 +2279,7 @@ struct bpf_core_ctx {
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off);
+ int off, const char *suffix);
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
const struct btf *reg_btf, u32 reg_id,
@@ -2266,6 +2304,11 @@ static inline bool has_current_bpf_ctx(void)
}
void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog);
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size);
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -2495,6 +2538,19 @@ static inline void bpf_prog_inc_misses_counter(struct bpf_prog *prog)
static inline void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
}
+
+static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
+ enum bpf_dynptr_type type, u32 offset, u32 size)
+{
+}
+
+static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+}
+
+static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@ -2801,6 +2857,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_insn *insn_buf,
struct bpf_prog *prog,
u32 *target_size);
+int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr);
#else
static inline bool bpf_sock_common_is_valid_access(int off, int size,
enum bpf_access_type type,
@@ -2822,6 +2880,11 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
{
return 0;
}
+static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_INET
@@ -2913,36 +2976,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data);
void bpf_bprintf_cleanup(struct bpf_bprintf_data *data);
-/* the implementation of the opaque uapi struct bpf_dynptr */
-struct bpf_dynptr_kern {
- void *data;
- /* Size represents the number of usable bytes of dynptr data.
- * If for example the offset is at 4 for a local dynptr whose data is
- * of type u64, the number of usable bytes is 4.
- *
- * The upper 8 bits are reserved. It is as follows:
- * Bits 0 - 23 = size
- * Bits 24 - 30 = dynptr type
- * Bit 31 = whether dynptr is read-only
- */
- u32 size;
- u32 offset;
-} __aligned(8);
-
-enum bpf_dynptr_type {
- BPF_DYNPTR_TYPE_INVALID,
- /* Points to memory that is local to the bpf program */
- BPF_DYNPTR_TYPE_LOCAL,
- /* Underlying data is a kernel-produced ringbuf record */
- BPF_DYNPTR_TYPE_RINGBUF,
-};
-
-void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
- enum bpf_dynptr_type type, u32 offset, u32 size);
-void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
-int bpf_dynptr_check_size(u32 size);
-u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
-
#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
void bpf_cgroup_atype_put(int cgroup_atype);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index 3e164b8efaa9..a7104af61ab4 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -14,6 +14,13 @@ struct bpf_mem_alloc {
struct work_struct work;
};
+/* 'size != 0' is for bpf_mem_alloc which manages fixed-size objects.
+ * Alloc and free are done with bpf_mem_cache_{alloc,free}().
+ *
+ * 'size = 0' is for bpf_mem_alloc which manages many fixed-size objects.
+ * Alloc and free are done with bpf_mem_{alloc,free}() and the size of
+ * the returned object is given by the size argument of bpf_mem_alloc().
+ */
int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index cf1bb1cf4a7b..18538bad2b8c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -537,7 +537,6 @@ struct bpf_verifier_env {
bool bypass_spec_v1;
bool bypass_spec_v4;
bool seen_direct_write;
- bool rcu_tag_supported;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
const struct bpf_line_info *prev_linfo;
struct bpf_verifier_log log;
@@ -616,9 +615,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
enum bpf_arg_type arg_type);
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
u32 regno, u32 mem_size);
-struct bpf_call_arg_meta;
-int process_dynptr_func(struct bpf_verifier_env *env, int regno,
- enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 49e0fe6d8274..556b3e2e7471 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -70,7 +70,7 @@
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
-#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
+#define KF_RCU (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
/*
* Tag marking a kernel function as a kfunc. This is meant to minimize the
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1727898f1641..efa5d4a1677e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1542,4 +1542,50 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u64 index
return XDP_REDIRECT;
}
+#ifdef CONFIG_NET
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags);
+int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
+int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
+void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
+void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush);
+#else /* CONFIG_NET */
+static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
+ void *to, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset,
+ const void *from, u32 len, u64 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset,
+ void *buf, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset,
+ void *buf, u32 len)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+{
+ return NULL;
+}
+
+static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
+ unsigned long len, bool flush)
+{
+ return NULL;
+}
+#endif /* CONFIG_NET */
+
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 62ce1f5d1b1d..976b194eb775 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4969,6 +4969,12 @@ union bpf_attr {
* different maps if key/value layout matches across maps.
* Every bpf_timer_set_callback() can have different callback_fn.
*
+ * *flags* can be one of:
+ *
+ * **BPF_F_TIMER_ABS**
+ * Start the timer in absolute expire value instead of the
+ * default relative one.
+ *
* Return
* 0 on success.
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
@@ -5325,11 +5331,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). This is
+ * because writing may pull the skb and change the
+ * underlying packet buffer.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
@@ -5337,6 +5354,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
+ *
+ * skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
@@ -7083,4 +7103,13 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+/*
+ * Flags to control bpf_timer_start() behaviour.
+ * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
+ * relative to current time.
+ */
+enum {
+ BPF_F_TIMER_ABS = (1ULL << 0),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 35f4138a54dc..3d320393a12c 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -51,11 +51,21 @@ owner_storage(struct bpf_local_storage_map *smap, void *owner)
return map->ops->map_owner_storage_ptr(owner);
}
+static bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem)
+{
+ return !hlist_unhashed_lockless(&selem->snode);
+}
+
static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->snode);
}
+static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
+{
+ return !hlist_unhashed_lockless(&selem->map_node);
+}
+
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
@@ -75,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (selem) {
if (value)
copy_map_value(&smap->map, SDATA(selem)->data, value);
+ /* No need to call check_and_init_map_value as memory is zero init */
return selem;
}
@@ -98,7 +109,28 @@ void bpf_local_storage_free_rcu(struct rcu_head *rcu)
kfree_rcu(local_storage, rcu);
}
-static void bpf_selem_free_rcu(struct rcu_head *rcu)
+static void bpf_selem_free_fields_rcu(struct rcu_head *rcu)
+{
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage_map *smap;
+
+ selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
+ /* protected by the rcu_barrier*() */
+ smap = rcu_dereference_protected(SDATA(selem)->smap, true);
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ kfree(selem);
+}
+
+static void bpf_selem_free_fields_trace_rcu(struct rcu_head *rcu)
+{
+ /* Free directly if Tasks Trace RCU GP also implies RCU GP */
+ if (rcu_trace_implies_rcu_gp())
+ bpf_selem_free_fields_rcu(rcu);
+ else
+ call_rcu(rcu, bpf_selem_free_fields_rcu);
+}
+
+static void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
{
struct bpf_local_storage_elem *selem;
@@ -119,6 +151,7 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
+ struct btf_record *rec;
void *owner;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
@@ -159,10 +192,26 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
- if (use_trace_rcu)
- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
- else
- kfree_rcu(selem, rcu);
+ /* A different RCU callback is chosen whenever we need to free
+ * additional fields in selem data before freeing selem.
+ * bpf_local_storage_map_free only executes rcu_barrier to wait for RCU
+ * callbacks when it has special fields, hence we can only conditionally
+ * dereference smap, as by this time the map might have already been
+ * freed without waiting for our call_rcu callback if it did not have
+ * any special fields.
+ */
+ rec = smap->map.record;
+ if (use_trace_rcu) {
+ if (!IS_ERR_OR_NULL(rec))
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_fields_trace_rcu);
+ else
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
+ } else {
+ if (!IS_ERR_OR_NULL(rec))
+ call_rcu(&selem->rcu, bpf_selem_free_fields_rcu);
+ else
+ kfree_rcu(selem, rcu);
+ }
return free_local_storage;
}
@@ -174,7 +223,7 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
bool free_local_storage = false;
unsigned long flags;
- if (unlikely(!selem_linked_to_storage(selem)))
+ if (unlikely(!selem_linked_to_storage_lockless(selem)))
/* selem has already been unlinked from sk */
return;
@@ -208,7 +257,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
struct bpf_local_storage_map_bucket *b;
unsigned long flags;
- if (unlikely(!selem_linked_to_map(selem)))
+ if (unlikely(!selem_linked_to_map_lockless(selem)))
/* selem has already be unlinked from smap */
return;
@@ -420,7 +469,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
err = check_flags(old_sdata, map_flags);
if (err)
return ERR_PTR(err);
- if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
+ if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
copy_map_value_locked(&smap->map, old_sdata->data,
value, false);
return old_sdata;
@@ -713,6 +762,26 @@ void bpf_local_storage_map_free(struct bpf_map *map,
*/
synchronize_rcu();
+ /* Only delay freeing of smap, buckets are not needed anymore */
kvfree(smap->buckets);
+
+ /* When local storage has special fields, callbacks for
+ * bpf_selem_free_fields_rcu and bpf_selem_free_fields_trace_rcu will
+ * keep using the map BTF record, we need to execute an RCU barrier to
+ * wait for them as the record will be freed right after our map_free
+ * callback.
+ */
+ if (!IS_ERR_OR_NULL(smap->map.record)) {
+ rcu_barrier_tasks_trace();
+ /* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp()
+ * is true, because while call_rcu invocation is skipped in that
+ * case in bpf_selem_free_fields_trace_rcu (and all local
+ * storage maps pass use_trace_rcu = true), there can be
+ * call_rcu callbacks based on use_trace_rcu = false in the
+ * while ((selem = ...)) loop above or when owner's free path
+ * calls bpf_local_storage_unlink_nolock.
+ */
+ rcu_barrier();
+ }
bpf_map_area_free(smap);
}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index fa22ec79ac0e..a8cb09e5973b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -207,6 +207,11 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
BTF_KFUNC_HOOK_FMODRET,
+ BTF_KFUNC_HOOK_CGROUP_SKB,
+ BTF_KFUNC_HOOK_SCHED_ACT,
+ BTF_KFUNC_HOOK_SK_SKB,
+ BTF_KFUNC_HOOK_SOCKET_FILTER,
+ BTF_KFUNC_HOOK_LWT,
BTF_KFUNC_HOOK_MAX,
};
@@ -3283,9 +3288,9 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
/* Reject extra tags */
if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
return -EINVAL;
- if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+ if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_UNREF;
- else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off)))
+ else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_REF;
else
return -EINVAL;
@@ -5683,6 +5688,10 @@ again:
* int socket_filter_bpf_prog(struct __sk_buff *skb)
* { // no fields of skb are ever used }
*/
+ if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0)
+ return ctx_type;
+ if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0)
+ return ctx_type;
if (strcmp(ctx_tname, tname)) {
/* bpf_user_pt_regs_t is a typedef, so resolve it to
* underlying struct and check name again
@@ -6154,6 +6163,7 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
const char *tname, *mname, *tag_value;
u32 vlen, elem_id, mid;
+ *flag = 0;
again:
tname = __btf_name_by_offset(btf, t->name_off);
if (!btf_type_is_struct(t)) {
@@ -6320,6 +6330,15 @@ error:
* of this field or inside of this struct
*/
if (btf_type_is_struct(mtype)) {
+ if (BTF_INFO_KIND(mtype->info) == BTF_KIND_UNION &&
+ btf_type_vlen(mtype) != 1)
+ /*
+ * walking unions yields untrusted pointers
+ * with exception of __bpf_md_ptr and other
+ * unions with a single member
+ */
+ *flag |= PTR_UNTRUSTED;
+
/* our field must be inside that union or struct */
t = mtype;
@@ -6364,7 +6383,7 @@ error:
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
if (btf_type_is_struct(stype)) {
*next_btf_id = id;
- *flag = tmp_flag;
+ *flag |= tmp_flag;
return WALK_PTR;
}
}
@@ -7704,6 +7723,19 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
return BTF_KFUNC_HOOK_TRACING;
case BPF_PROG_TYPE_SYSCALL:
return BTF_KFUNC_HOOK_SYSCALL;
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ return BTF_KFUNC_HOOK_CGROUP_SKB;
+ case BPF_PROG_TYPE_SCHED_ACT:
+ return BTF_KFUNC_HOOK_SCHED_ACT;
+ case BPF_PROG_TYPE_SK_SKB:
+ return BTF_KFUNC_HOOK_SK_SKB;
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ return BTF_KFUNC_HOOK_SOCKET_FILTER;
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+ return BTF_KFUNC_HOOK_LWT;
default:
return BTF_KFUNC_HOOK_MAX;
}
@@ -8335,7 +8367,7 @@ out:
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
- int off)
+ int off, const char *suffix)
{
struct btf *btf = reg->btf;
const struct btf_type *walk_type, *safe_type;
@@ -8352,7 +8384,7 @@ bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
tname = btf_name_by_offset(btf, walk_type->name_off);
- ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname);
+ ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix);
if (ret < 0)
return false;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index bf2fdb33fb31..53edb8ad2471 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -2223,10 +2223,12 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, ppos));
- *insn++ = BPF_STX_MEM(
- BPF_SIZEOF(u32), treg, si->src_reg,
+ *insn++ = BPF_RAW_INSN(
+ BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32),
+ treg, si->src_reg,
bpf_ctx_narrow_access_offset(
- 0, sizeof(u32), sizeof(loff_t)));
+ 0, sizeof(u32), sizeof(loff_t)),
+ si->imm);
*insn++ = BPF_LDX_MEM(
BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sysctl_kern, tmp_reg));
@@ -2376,10 +2378,17 @@ static bool cg_sockopt_is_valid_access(int off, int size,
return true;
}
-#define CG_SOCKOPT_ACCESS_FIELD(T, F) \
- T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
- si->dst_reg, si->src_reg, \
- offsetof(struct bpf_sockopt_kern, F))
+#define CG_SOCKOPT_READ_FIELD(F) \
+ BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sockopt_kern, F))
+
+#define CG_SOCKOPT_WRITE_FIELD(F) \
+ BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \
+ BPF_MEM | BPF_CLASS(si->code)), \
+ si->dst_reg, si->src_reg, \
+ offsetof(struct bpf_sockopt_kern, F), \
+ si->imm)
static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
@@ -2391,25 +2400,25 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_sockopt, sk):
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
+ *insn++ = CG_SOCKOPT_READ_FIELD(sk);
break;
case offsetof(struct bpf_sockopt, level):
if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
+ *insn++ = CG_SOCKOPT_WRITE_FIELD(level);
else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
+ *insn++ = CG_SOCKOPT_READ_FIELD(level);
break;
case offsetof(struct bpf_sockopt, optname):
if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
+ *insn++ = CG_SOCKOPT_WRITE_FIELD(optname);
else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optname);
break;
case offsetof(struct bpf_sockopt, optlen):
if (type == BPF_WRITE)
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
+ *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen);
else
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optlen);
break;
case offsetof(struct bpf_sockopt, retval):
BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
@@ -2429,9 +2438,11 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
treg, treg,
offsetof(struct task_struct, bpf_ctx));
- *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
- treg, si->src_reg,
- offsetof(struct bpf_cg_run_ctx, retval));
+ *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM |
+ BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+ treg, si->src_reg,
+ offsetof(struct bpf_cg_run_ctx, retval),
+ si->imm);
*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
offsetof(struct bpf_sockopt_kern, tmp_reg));
} else {
@@ -2447,10 +2458,10 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
}
break;
case offsetof(struct bpf_sockopt, optval):
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optval);
break;
case offsetof(struct bpf_sockopt, optval_end):
- *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
+ *insn++ = CG_SOCKOPT_READ_FIELD(optval_end);
break;
}
@@ -2529,10 +2540,6 @@ cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_comm:
return &bpf_get_current_comm_proto;
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 52b981512a35..b6587ec40f1b 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -55,7 +55,7 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_create(void)
/* cpumask must be the first element so struct bpf_cpumask be cast to struct cpumask. */
BUILD_BUG_ON(offsetof(struct bpf_cpumask, cpumask) != 0);
- cpumask = bpf_mem_alloc(&bpf_cpumask_ma, sizeof(*cpumask));
+ cpumask = bpf_mem_cache_alloc(&bpf_cpumask_ma);
if (!cpumask)
return NULL;
@@ -123,7 +123,7 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
if (refcount_dec_and_test(&cpumask->usage)) {
migrate_disable();
- bpf_mem_free(&bpf_cpumask_ma, cpumask);
+ bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
migrate_enable();
}
}
@@ -427,26 +427,26 @@ BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS)
-BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_and, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_or, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU)
BTF_SET8_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
@@ -468,7 +468,7 @@ static int __init cpumask_kfunc_init(void)
},
};
- ret = bpf_mem_alloc_init(&bpf_cpumask_ma, 0, false);
+ ret = bpf_mem_alloc_init(&bpf_cpumask_ma, sizeof(struct bpf_cpumask), false);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &cpumask_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &cpumask_kfunc_set);
return ret ?: register_btf_id_dtor_kfuncs(cpumask_dtors,
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 5dfcb5ad0d06..653aeb481c79 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -249,7 +249,18 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)
struct htab_elem *elem;
elem = get_htab_elem(htab, i);
- bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
+ if (htab_is_percpu(htab)) {
+ void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
+ cond_resched();
+ }
+ } else {
+ bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8));
+ cond_resched();
+ }
cond_resched();
}
}
@@ -759,9 +770,17 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
static void check_and_free_fields(struct bpf_htab *htab,
struct htab_elem *elem)
{
- void *map_value = elem->key + round_up(htab->map.key_size, 8);
+ if (htab_is_percpu(htab)) {
+ void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size);
+ int cpu;
- bpf_obj_free_fields(htab->map.record, map_value);
+ for_each_possible_cpu(cpu)
+ bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
+ } else {
+ void *map_value = elem->key + round_up(htab->map.key_size, 8);
+
+ bpf_obj_free_fields(htab->map.record, map_value);
+ }
}
/* It is called from the bpf_lru_list when the LRU needs to delete
@@ -858,9 +877,9 @@ find_first_elem:
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
+ check_and_free_fields(htab, l);
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
- check_and_free_fields(htab, l);
bpf_mem_cache_free(&htab->ma, l);
}
@@ -918,14 +937,13 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
{
if (!onallcpus) {
/* copy true value_size bytes */
- memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+ copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
} else {
u32 size = round_up(htab->map.value_size, 8);
int off = 0, cpu;
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
- value + off, size);
+ copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
off += size;
}
}
@@ -940,16 +958,14 @@ static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr,
* (onallcpus=false always when coming from bpf prog).
*/
if (!onallcpus) {
- u32 size = round_up(htab->map.value_size, 8);
int current_cpu = raw_smp_processor_id();
int cpu;
for_each_possible_cpu(cpu) {
if (cpu == current_cpu)
- bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value,
- size);
- else
- memset(per_cpu_ptr(pptr, cpu), 0, size);
+ copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value);
+ else /* Since elem is preallocated, we cannot touch special fields */
+ zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu));
}
} else {
pcpu_copy_value(htab, pptr, value, onallcpus);
@@ -1575,9 +1591,8 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
pptr = htab_elem_get_ptr(l, key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(value + off,
- per_cpu_ptr(pptr, cpu),
- roundup_value_size);
+ copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, value + off);
off += roundup_value_size;
}
} else {
@@ -1772,8 +1787,8 @@ again_nocopy:
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(dst_val + off,
- per_cpu_ptr(pptr, cpu), size);
+ copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(&htab->map, dst_val + off);
off += size;
}
} else {
@@ -2046,9 +2061,9 @@ static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem)
roundup_value_size = round_up(map->value_size, 8);
pptr = htab_elem_get_ptr(elem, map->key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(info->percpu_value_buf + off,
- per_cpu_ptr(pptr, cpu),
- roundup_value_size);
+ copy_map_value_long(map, info->percpu_value_buf + off,
+ per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, info->percpu_value_buf + off);
off += roundup_value_size;
}
ctx.value = info->percpu_value_buf;
@@ -2292,8 +2307,8 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
*/
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
- bpf_long_memcpy(value + off,
- per_cpu_ptr(pptr, cpu), size);
+ copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu));
+ check_and_init_map_value(map, value + off);
off += size;
}
ret = 0;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5b278a38ae58..637ac4e92e75 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1264,10 +1264,11 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
{
struct bpf_hrtimer *t;
int ret = 0;
+ enum hrtimer_mode mode;
if (in_nmi())
return -EOPNOTSUPP;
- if (flags)
+ if (flags > BPF_F_TIMER_ABS)
return -EINVAL;
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer;
@@ -1275,7 +1276,13 @@ BPF_CALL_3(bpf_timer_start, struct bpf_timer_kern *, timer, u64, nsecs, u64, fla
ret = -EINVAL;
goto out;
}
- hrtimer_start(&t->timer, ns_to_ktime(nsecs), HRTIMER_MODE_REL_SOFT);
+
+ if (flags & BPF_F_TIMER_ABS)
+ mode = HRTIMER_MODE_ABS_SOFT;
+ else
+ mode = HRTIMER_MODE_REL_SOFT;
+
+ hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
out:
__bpf_spin_unlock_irqrestore(&timer->lock);
return ret;
@@ -1420,11 +1427,21 @@ static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
return ptr->size & DYNPTR_RDONLY_BIT;
}
+void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
+{
+ ptr->size |= DYNPTR_RDONLY_BIT;
+}
+
static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}
+static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
+{
+ return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
+}
+
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
@@ -1497,6 +1514,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
+ enum bpf_dynptr_type type;
int err;
if (!src->data || flags)
@@ -1506,13 +1524,25 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern
if (err)
return err;
- /* Source and destination may possibly overlap, hence use memmove to
- * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
- * pointing to overlapping PTR_TO_MAP_VALUE regions.
- */
- memmove(dst, src->data + src->offset + offset, len);
+ type = bpf_dynptr_get_type(src);
- return 0;
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst, src->data + src->offset + offset, len);
+ return 0;
+ case BPF_DYNPTR_TYPE_SKB:
+ return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
+ case BPF_DYNPTR_TYPE_XDP:
+ return __bpf_xdp_load_bytes(src->data, src->offset + offset, dst, len);
+ default:
+ WARN_ONCE(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
+ return -EFAULT;
+ }
}
static const struct bpf_func_proto bpf_dynptr_read_proto = {
@@ -1529,22 +1559,40 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
+ enum bpf_dynptr_type type;
int err;
- if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
+ if (!dst->data || bpf_dynptr_is_rdonly(dst))
return -EINVAL;
err = bpf_dynptr_check_off_len(dst, offset, len);
if (err)
return err;
- /* Source and destination may possibly overlap, hence use memmove to
- * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
- * pointing to overlapping PTR_TO_MAP_VALUE regions.
- */
- memmove(dst->data + dst->offset + offset, src, len);
+ type = bpf_dynptr_get_type(dst);
- return 0;
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ if (flags)
+ return -EINVAL;
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst->data + dst->offset + offset, src, len);
+ return 0;
+ case BPF_DYNPTR_TYPE_SKB:
+ return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
+ flags);
+ case BPF_DYNPTR_TYPE_XDP:
+ if (flags)
+ return -EINVAL;
+ return __bpf_xdp_store_bytes(dst->data, dst->offset + offset, src, len);
+ default:
+ WARN_ONCE(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
+ return -EFAULT;
+ }
}
static const struct bpf_func_proto bpf_dynptr_write_proto = {
@@ -1560,6 +1608,7 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
+ enum bpf_dynptr_type type;
int err;
if (!ptr->data)
@@ -1572,7 +1621,20 @@ BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u3
if (bpf_dynptr_is_rdonly(ptr))
return 0;
- return (unsigned long)(ptr->data + ptr->offset + offset);
+ type = bpf_dynptr_get_type(ptr);
+
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return (unsigned long)(ptr->data + ptr->offset + offset);
+ case BPF_DYNPTR_TYPE_SKB:
+ case BPF_DYNPTR_TYPE_XDP:
+ /* skb and xdp dynptrs should use bpf_dynptr_slice / bpf_dynptr_slice_rdwr */
+ return 0;
+ default:
+ WARN_ONCE(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
+ return 0;
+ }
}
static const struct bpf_func_proto bpf_dynptr_data_proto = {
@@ -1693,6 +1755,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
return &bpf_cgrp_storage_delete_proto;
+ case BPF_FUNC_get_current_cgroup_id:
+ return &bpf_get_current_cgroup_id_proto;
+ case BPF_FUNC_get_current_ancestor_cgroup_id:
+ return &bpf_get_current_ancestor_cgroup_id_proto;
#endif
default:
break;
@@ -2097,10 +2163,28 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
if (level > cgrp->level || level < 0)
return NULL;
+ /* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
ancestor = cgrp->ancestors[level];
- cgroup_get(ancestor);
+ if (!cgroup_tryget(ancestor))
+ return NULL;
return ancestor;
}
+
+/**
+ * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
+ * kfunc which is not subsequently stored in a map, must be released by calling
+ * bpf_cgroup_release().
+ * @cgid: cgroup id.
+ */
+__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
+{
+ struct cgroup *cgrp;
+
+ cgrp = cgroup_get_from_id(cgid);
+ if (IS_ERR(cgrp))
+ return NULL;
+ return cgrp;
+}
#endif /* CONFIG_CGROUPS */
/**
@@ -2122,6 +2206,140 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
return p;
}
+/**
+ * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
+ * @ptr: The dynptr whose data slice to retrieve
+ * @offset: Offset into the dynptr
+ * @buffer: User-provided buffer to copy contents into
+ * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
+ * requested slice. This must be a constant.
+ *
+ * For non-skb and non-xdp type dynptrs, there is no difference between
+ * bpf_dynptr_slice and bpf_dynptr_data.
+ *
+ * If the intention is to write to the data slice, please use
+ * bpf_dynptr_slice_rdwr.
+ *
+ * The user must check that the returned pointer is not null before using it.
+ *
+ * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice
+ * does not change the underlying packet data pointers, so a call to
+ * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in
+ * the bpf program.
+ *
+ * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only
+ * data slice (can be either direct pointer to the data or a pointer to the user
+ * provided buffer, with its contents containing the data, if unable to obtain
+ * direct pointer)
+ */
+__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
+ void *buffer, u32 buffer__szk)
+{
+ enum bpf_dynptr_type type;
+ u32 len = buffer__szk;
+ int err;
+
+ if (!ptr->data)
+ return NULL;
+
+ err = bpf_dynptr_check_off_len(ptr, offset, len);
+ if (err)
+ return NULL;
+
+ type = bpf_dynptr_get_type(ptr);
+
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return ptr->data + ptr->offset + offset;
+ case BPF_DYNPTR_TYPE_SKB:
+ return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
+ case BPF_DYNPTR_TYPE_XDP:
+ {
+ void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
+ if (xdp_ptr)
+ return xdp_ptr;
+
+ bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
+ return buffer;
+ }
+ default:
+ WARN_ONCE(true, "unknown dynptr type %d\n", type);
+ return NULL;
+ }
+}
+
+/**
+ * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data.
+ * @ptr: The dynptr whose data slice to retrieve
+ * @offset: Offset into the dynptr
+ * @buffer: User-provided buffer to copy contents into
+ * @buffer__szk: Size (in bytes) of the buffer. This is the length of the
+ * requested slice. This must be a constant.
+ *
+ * For non-skb and non-xdp type dynptrs, there is no difference between
+ * bpf_dynptr_slice and bpf_dynptr_data.
+ *
+ * The returned pointer is writable and may point to either directly the dynptr
+ * data at the requested offset or to the buffer if unable to obtain a direct
+ * data pointer to (example: the requested slice is to the paged area of an skb
+ * packet). In the case where the returned pointer is to the buffer, the user
+ * is responsible for persisting writes through calling bpf_dynptr_write(). This
+ * usually looks something like this pattern:
+ *
+ * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer));
+ * if (!eth)
+ * return TC_ACT_SHOT;
+ *
+ * // mutate eth header //
+ *
+ * if (eth == buffer)
+ * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
+ *
+ * Please note that, as in the example above, the user must check that the
+ * returned pointer is not null before using it.
+ *
+ * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr
+ * does not change the underlying packet data pointers, so a call to
+ * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in
+ * the bpf program.
+ *
+ * Return: NULL if the call failed (eg invalid dynptr), pointer to a
+ * data slice (can be either direct pointer to the data or a pointer to the user
+ * provided buffer, with its contents containing the data, if unable to obtain
+ * direct pointer)
+ */
+__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
+ void *buffer, u32 buffer__szk)
+{
+ if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
+ return NULL;
+
+ /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
+ *
+ * For skb-type dynptrs, it is safe to write into the returned pointer
+ * if the bpf program allows skb data writes. There are two possiblities
+ * that may occur when calling bpf_dynptr_slice_rdwr:
+ *
+ * 1) The requested slice is in the head of the skb. In this case, the
+ * returned pointer is directly to skb data, and if the skb is cloned, the
+ * verifier will have uncloned it (see bpf_unclone_prologue()) already.
+ * The pointer can be directly written into.
+ *
+ * 2) Some portion of the requested slice is in the paged buffer area.
+ * In this case, the requested data will be copied out into the buffer
+ * and the returned pointer will be a pointer to the buffer. The skb
+ * will not be pulled. To persist the write, the user will need to call
+ * bpf_dynptr_write(), which will pull the skb and commit the write.
+ *
+ * Similarly for xdp programs, if the requested slice is not across xdp
+ * fragments, then a direct pointer will be returned, otherwise the data
+ * will be copied out into the buffer and the user will need to call
+ * bpf_dynptr_write() to commit changes.
+ */
+ return bpf_dynptr_slice(ptr, offset, buffer, buffer__szk);
+}
+
__bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
{
return obj;
@@ -2166,7 +2384,8 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_SET8_END(generic_btf_ids)
@@ -2190,6 +2409,8 @@ BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
+BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index adc83cb82f37..ede5f987484f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1059,9 +1059,15 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY &&
- map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) {
+ map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
+ map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_TASK_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_CGRP_STORAGE) {
ret = -EOPNOTSUPP;
goto free_map_tab;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 272563a0b770..b2116ca78d9a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -268,7 +268,41 @@ struct bpf_call_arg_meta {
u32 ret_btf_id;
u32 subprogno;
struct btf_field *kptr_field;
- u8 uninit_dynptr_regno;
+};
+
+struct bpf_kfunc_call_arg_meta {
+ /* In parameters */
+ struct btf *btf;
+ u32 func_id;
+ u32 kfunc_flags;
+ const struct btf_type *func_proto;
+ const char *func_name;
+ /* Out parameters */
+ u32 ref_obj_id;
+ u8 release_regno;
+ bool r0_rdonly;
+ u32 ret_btf_id;
+ u64 r0_size;
+ u32 subprogno;
+ struct {
+ u64 value;
+ bool found;
+ } arg_constant;
+ struct {
+ struct btf *btf;
+ u32 btf_id;
+ } arg_obj_drop;
+ struct {
+ struct btf_field *field;
+ } arg_list_head;
+ struct {
+ struct btf_field *field;
+ } arg_rbtree_root;
+ struct {
+ enum bpf_dynptr_type type;
+ u32 id;
+ } initialized_dynptr;
+ u64 mem_size;
};
struct btf *btf_vmlinux;
@@ -453,7 +487,8 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
- type == PTR_TO_SOCK_COMMON;
+ type == PTR_TO_SOCK_COMMON ||
+ type == PTR_TO_MEM;
}
static bool type_is_ptr_alloc_obj(u32 type)
@@ -675,37 +710,62 @@ static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_sl
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}
-static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ const char *obj_kind, int nr_slots)
{
int off, spi;
if (!tnum_is_const(reg->var_off)) {
- verbose(env, "dynptr has to be at a constant offset\n");
+ verbose(env, "%s has to be at a constant offset\n", obj_kind);
return -EINVAL;
}
off = reg->off + reg->var_off.value;
if (off % BPF_REG_SIZE) {
- verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+ verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
spi = __get_spi(off);
- if (spi < 1) {
- verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
+ if (spi + 1 < nr_slots) {
+ verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
return -EINVAL;
}
- if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS))
+ if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
return -ERANGE;
return spi;
}
+static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
+}
+
static const char *kernel_type_name(const struct btf* btf, u32 id)
{
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
+static const char *dynptr_type_str(enum bpf_dynptr_type type)
+{
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ return "local";
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return "ringbuf";
+ case BPF_DYNPTR_TYPE_SKB:
+ return "skb";
+ case BPF_DYNPTR_TYPE_XDP:
+ return "xdp";
+ case BPF_DYNPTR_TYPE_INVALID:
+ return "<invalid>";
+ default:
+ WARN_ONCE(1, "unknown dynptr type %d\n", type);
+ return "<unknown>";
+ }
+}
+
static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
{
env->scratched_regs |= 1U << regno;
@@ -751,11 +811,31 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
return BPF_DYNPTR_TYPE_LOCAL;
case DYNPTR_TYPE_RINGBUF:
return BPF_DYNPTR_TYPE_RINGBUF;
+ case DYNPTR_TYPE_SKB:
+ return BPF_DYNPTR_TYPE_SKB;
+ case DYNPTR_TYPE_XDP:
+ return BPF_DYNPTR_TYPE_XDP;
default:
return BPF_DYNPTR_TYPE_INVALID;
}
}
+static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
+{
+ switch (type) {
+ case BPF_DYNPTR_TYPE_LOCAL:
+ return DYNPTR_TYPE_LOCAL;
+ case BPF_DYNPTR_TYPE_RINGBUF:
+ return DYNPTR_TYPE_RINGBUF;
+ case BPF_DYNPTR_TYPE_SKB:
+ return DYNPTR_TYPE_SKB;
+ case BPF_DYNPTR_TYPE_XDP:
+ return DYNPTR_TYPE_XDP;
+ default:
+ return 0;
+ }
+}
+
static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
{
return type == BPF_DYNPTR_TYPE_RINGBUF;
@@ -895,6 +975,14 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
+static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+ if (!env->allow_ptr_leaks)
+ __mark_reg_not_init(env, reg);
+ else
+ __mark_reg_unknown(env, reg);
+}
+
static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
struct bpf_func_state *state, int spi)
{
@@ -934,12 +1022,8 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
continue;
- if (dreg->dynptr_id == dynptr_id) {
- if (!env->allow_ptr_leaks)
- __mark_reg_not_init(env, dreg);
- else
- __mark_reg_unknown(env, dreg);
- }
+ if (dreg->dynptr_id == dynptr_id)
+ mark_reg_invalid(env, dreg);
}));
/* Do not release reference state, we are destroying dynptr on stack,
@@ -955,39 +1039,49 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
return 0;
}
-static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- int spi)
+static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
+ int spi;
+
if (reg->type == CONST_PTR_TO_DYNPTR)
return false;
- /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
- * will do check_mem_access to check and update stack bounds later, so
- * return true for that case.
+ spi = dynptr_get_spi(env, reg);
+
+ /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
+ * error because this just means the stack state hasn't been updated yet.
+ * We will do check_mem_access to check and update stack bounds later.
*/
- if (spi < 0)
- return spi == -ERANGE;
- /* We allow overwriting existing unreferenced STACK_DYNPTR slots, see
- * mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to
- * ensure dynptr objects at the slots we are touching are completely
- * destructed before we reinitialize them for a new one. For referenced
- * ones, destroy_if_dynptr_stack_slot returns an error early instead of
- * delaying it until the end where the user will get "Unreleased
+ if (spi < 0 && spi != -ERANGE)
+ return false;
+
+ /* We don't need to check if the stack slots are marked by previous
+ * dynptr initializations because we allow overwriting existing unreferenced
+ * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
+ * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
+ * touching are completely destructed before we reinitialize them for a new
+ * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
+ * instead of delaying it until the end where the user will get "Unreleased
* reference" error.
*/
return true;
}
-static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
- int spi)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int i;
+ int i, spi;
- /* This already represents first slot of initialized bpf_dynptr */
+ /* This already represents first slot of initialized bpf_dynptr.
+ *
+ * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
+ * check_func_arg_reg_off's logic, so we don't need to check its
+ * offset and alignment.
+ */
if (reg->type == CONST_PTR_TO_DYNPTR)
return true;
+ spi = dynptr_get_spi(env, reg);
if (spi < 0)
return false;
if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
@@ -1143,26 +1237,49 @@ static void print_verifier_state(struct bpf_verifier_env *env,
for (j = 0; j < BPF_REG_SIZE; j++) {
if (state->stack[i].slot_type[j] != STACK_INVALID)
valid = true;
- types_buf[j] = slot_type_char[
- state->stack[i].slot_type[j]];
+ types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
}
types_buf[BPF_REG_SIZE] = 0;
if (!valid)
continue;
if (!print_all && !stack_slot_scratched(env, i))
continue;
- verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
- print_liveness(env, state->stack[i].spilled_ptr.live);
- if (is_spilled_reg(&state->stack[i])) {
+ switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
+ case STACK_SPILL:
reg = &state->stack[i].spilled_ptr;
t = reg->type;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
if (t == SCALAR_VALUE && reg->precise)
verbose(env, "P");
if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
verbose(env, "%lld", reg->var_off.value + reg->off);
- } else {
+ break;
+ case STACK_DYNPTR:
+ i += BPF_DYNPTR_NR_SLOTS - 1;
+ reg = &state->stack[i].spilled_ptr;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
+ verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
+ if (reg->ref_obj_id)
+ verbose(env, "(ref_id=%d)", reg->ref_obj_id);
+ break;
+ case STACK_MISC:
+ case STACK_ZERO:
+ default:
+ reg = &state->stack[i].spilled_ptr;
+
+ for (j = 0; j < BPF_REG_SIZE; j++)
+ types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
+ types_buf[BPF_REG_SIZE] = 0;
+
+ verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
+ print_liveness(env, reg->live);
verbose(env, "=%s", types_buf);
+ break;
}
}
if (state->acquired_refs && state->refs[0].id) {
@@ -1664,6 +1781,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
reg->type == PTR_TO_PACKET_END;
}
+static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
+{
+ return base_type(reg->type) == PTR_TO_MEM &&
+ (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
+}
+
/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
enum bpf_reg_type which)
@@ -2475,8 +2598,8 @@ static int check_subprogs(struct bpf_verifier_env *env)
u8 code = insn[i].code;
if (code == (BPF_JMP | BPF_CALL) &&
- insn[i].imm == BPF_FUNC_tail_call &&
- insn[i].src_reg != BPF_PSEUDO_CALL)
+ insn[i].src_reg == 0 &&
+ insn[i].imm == BPF_FUNC_tail_call)
subprog[cur_subprog].has_tail_call = true;
if (BPF_CLASS(code) == BPF_LD &&
(BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
@@ -3826,6 +3949,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_MISC)
continue;
+ if (type == STACK_INVALID && env->allow_uninit_stack)
+ continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
@@ -3863,6 +3988,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
continue;
if (type == STACK_ZERO)
continue;
+ if (type == STACK_INVALID && env->allow_uninit_stack)
+ continue;
verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
@@ -4175,7 +4302,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
struct bpf_reg_state *reg, u32 regno)
{
const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
- int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
+ int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
const char *reg_name = "";
/* Only unreferenced case accepts untrusted pointers */
@@ -4242,6 +4369,34 @@ bad_type:
return -EINVAL;
}
+/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
+ * can dereference RCU protected pointers and result is PTR_TRUSTED.
+ */
+static bool in_rcu_cs(struct bpf_verifier_env *env)
+{
+ return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable;
+}
+
+/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
+BTF_SET_START(rcu_protected_types)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(struct, cgroup)
+BTF_SET_END(rcu_protected_types)
+
+static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
+{
+ if (!btf_is_kernel(btf))
+ return false;
+ return btf_id_set_contains(&rcu_protected_types, btf_id);
+}
+
+static bool rcu_safe_kptr(const struct btf_field *field)
+{
+ const struct btf_field_kptr *kptr = &field->kptr;
+
+ return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+}
+
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
int value_regno, int insn_idx,
struct btf_field *kptr_field)
@@ -4276,7 +4431,10 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ kptr_field->kptr.btf_id,
+ rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
+ PTR_MAYBE_NULL | MEM_RCU :
+ PTR_MAYBE_NULL | PTR_UNTRUSTED);
/* For mark_ptr_or_null_reg */
val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
@@ -4999,23 +5157,76 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
return 0;
}
-#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields)
+#define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu)
+#define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted)
-BTF_TYPE_SAFE_NESTED(struct task_struct) {
+/*
+ * Allow list few fields as RCU trusted or full trusted.
+ * This logic doesn't allow mix tagging and will be removed once GCC supports
+ * btf_type_tag.
+ */
+
+/* RCU trusted: these fields are trusted in RCU CS and never NULL */
+BTF_TYPE_SAFE_RCU(struct task_struct) {
const cpumask_t *cpus_ptr;
+ struct css_set __rcu *cgroups;
+ struct task_struct __rcu *real_parent;
+ struct task_struct *group_leader;
};
-static bool nested_ptr_is_trusted(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- int off)
+BTF_TYPE_SAFE_RCU(struct css_set) {
+ struct cgroup *dfl_cgrp;
+};
+
+/* full trusted: these fields are trusted even outside of RCU CS and never NULL */
+BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
+ __bpf_md_ptr(struct seq_file *, seq);
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct task_struct *, task);
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
+ struct file *file;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct file) {
+ struct inode *f_inode;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct dentry) {
+ /* no negative dentry-s in places where bpf can see it */
+ struct inode *d_inode;
+};
+
+BTF_TYPE_SAFE_TRUSTED(struct socket) {
+ struct sock *sk;
+};
+
+static bool type_is_rcu(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
{
- /* If its parent is not trusted, it can't regain its trusted status. */
- if (!is_trusted_reg(reg))
- return false;
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct));
+ return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_rcu");
+}
- return btf_nested_type_is_trusted(&env->log, reg, off);
+static bool type_is_trusted(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg,
+ int off)
+{
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
+
+ return btf_nested_type_is_trusted(&env->log, reg, off, "__safe_trusted");
}
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
@@ -5101,41 +5312,56 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (ret < 0)
return ret;
- /* If this is an untrusted pointer, all pointers formed by walking it
- * also inherit the untrusted flag.
- */
- if (type_flag(reg->type) & PTR_UNTRUSTED)
- flag |= PTR_UNTRUSTED;
-
- /* By default any pointer obtained from walking a trusted pointer is no
- * longer trusted, unless the field being accessed has explicitly been
- * marked as inheriting its parent's state of trust.
- *
- * An RCU-protected pointer can also be deemed trusted if we are in an
- * RCU read region. This case is handled below.
- */
- if (nested_ptr_is_trusted(env, reg, off))
- flag |= PTR_TRUSTED;
- else
- flag &= ~PTR_TRUSTED;
+ if (ret != PTR_TO_BTF_ID) {
+ /* just mark; */
- if (flag & MEM_RCU) {
- /* Mark value register as MEM_RCU only if it is protected by
- * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
- * itself can already indicate trustedness inside the rcu
- * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
- * it could be null in some cases.
+ } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
+ /* If this is an untrusted pointer, all pointers formed by walking it
+ * also inherit the untrusted flag.
*/
- if (!env->cur_state->active_rcu_lock ||
- !(is_trusted_reg(reg) || is_rcu_reg(reg)))
- flag &= ~MEM_RCU;
- else
- flag |= PTR_MAYBE_NULL;
- } else if (reg->type & MEM_RCU) {
- /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
- * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
+ flag = PTR_UNTRUSTED;
+
+ } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
+ /* By default any pointer obtained from walking a trusted pointer is no
+ * longer trusted, unless the field being accessed has explicitly been
+ * marked as inheriting its parent's state of trust (either full or RCU).
+ * For example:
+ * 'cgroups' pointer is untrusted if task->cgroups dereference
+ * happened in a sleepable program outside of bpf_rcu_read_lock()
+ * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
+ * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
+ *
+ * A regular RCU-protected pointer with __rcu tag can also be deemed
+ * trusted if we are in an RCU CS. Such pointer can be NULL.
*/
- flag |= PTR_UNTRUSTED;
+ if (type_is_trusted(env, reg, off)) {
+ flag |= PTR_TRUSTED;
+ } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
+ if (type_is_rcu(env, reg, off)) {
+ /* ignore __rcu tag and mark it MEM_RCU */
+ flag |= MEM_RCU;
+ } else if (flag & MEM_RCU) {
+ /* __rcu tagged pointers can be NULL */
+ flag |= PTR_MAYBE_NULL;
+ } else if (flag & (MEM_PERCPU | MEM_USER)) {
+ /* keep as-is */
+ } else {
+ /* walking unknown pointers yields untrusted pointer */
+ flag = PTR_UNTRUSTED;
+ }
+ } else {
+ /*
+ * If not in RCU CS or MEM_RCU pointer can be NULL then
+ * aggressively mark as untrusted otherwise such
+ * pointers will be plain PTR_TO_BTF_ID without flags
+ * and will be allowed to be passed into helpers for
+ * compat reasons.
+ */
+ flag = PTR_UNTRUSTED;
+ }
+ } else {
+ /* Old compat. Deprecated */
+ flag &= ~PTR_TRUSTED;
}
if (atype == BPF_READ && value_regno >= 0)
@@ -5754,7 +5980,8 @@ static int check_stack_range_initialized(
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
if (*stype == STACK_MISC)
goto mark;
- if (*stype == STACK_ZERO) {
+ if ((*stype == STACK_ZERO) ||
+ (*stype == STACK_INVALID && env->allow_uninit_stack)) {
if (clobber) {
/* helper can write anything into the stack */
*stype = STACK_MISC;
@@ -6206,11 +6433,11 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
* Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
* type, and declare it as 'const struct bpf_dynptr *' in their prototype.
*/
-int process_dynptr_func(struct bpf_verifier_env *env, int regno,
- enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
+static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
+ enum bpf_arg_type arg_type)
{
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- int spi = 0;
+ int err;
/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
@@ -6219,15 +6446,6 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
return -EFAULT;
}
- /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
- * check_func_arg_reg_off's logic. We only need to check offset
- * and its alignment for PTR_TO_STACK.
- */
- if (reg->type == PTR_TO_STACK) {
- spi = dynptr_get_spi(env, reg);
- if (spi < 0 && spi != -ERANGE)
- return spi;
- }
/* MEM_UNINIT - Points to memory that is an appropriate candidate for
* constructing a mutable bpf_dynptr object.
@@ -6245,30 +6463,30 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
* to.
*/
if (arg_type & MEM_UNINIT) {
- if (!is_dynptr_reg_valid_uninit(env, reg, spi)) {
+ int i;
+
+ if (!is_dynptr_reg_valid_uninit(env, reg)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
return -EINVAL;
}
- /* We only support one dynptr being uninitialized at the moment,
- * which is sufficient for the helper functions we have right now.
- */
- if (meta->uninit_dynptr_regno) {
- verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
- return -EFAULT;
+ /* we write BPF_DW bits (8 bytes) at a time */
+ for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
+ err = check_mem_access(env, insn_idx, regno,
+ i, BPF_DW, BPF_WRITE, -1, false);
+ if (err)
+ return err;
}
- meta->uninit_dynptr_regno = regno;
+ err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx);
} else /* MEM_RDONLY and None case from above */ {
- int err;
-
/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
return -EINVAL;
}
- if (!is_dynptr_reg_valid_init(env, reg, spi)) {
+ if (!is_dynptr_reg_valid_init(env, reg)) {
verbose(env,
"Expected an initialized dynptr as arg #%d\n",
regno);
@@ -6277,30 +6495,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
- const char *err_extra = "";
-
- switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
- case DYNPTR_TYPE_LOCAL:
- err_extra = "local";
- break;
- case DYNPTR_TYPE_RINGBUF:
- err_extra = "ringbuf";
- break;
- default:
- err_extra = "<unknown>";
- break;
- }
verbose(env,
"Expected a dynptr of type %s as arg #%d\n",
- err_extra, regno);
+ dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
return -EINVAL;
}
err = mark_dynptr_read(env, reg);
- if (err)
- return err;
}
- return 0;
+ return err;
}
static bool arg_type_is_mem_size(enum bpf_arg_type type)
@@ -6522,7 +6725,14 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
found:
- if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) {
+ if (base_type(reg->type) != PTR_TO_BTF_ID)
+ return 0;
+
+ switch ((int)reg->type) {
+ case PTR_TO_BTF_ID:
+ case PTR_TO_BTF_ID | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU:
+ {
/* For bpf_sk_release, it needs to match against first member
* 'struct sock_common', hence make an exception for it. This
* allows bpf_sk_release to work for multiple socket types.
@@ -6558,13 +6768,23 @@ found:
return -EACCES;
}
}
- } else if (type_is_alloc(reg->type)) {
+ break;
+ }
+ case PTR_TO_BTF_ID | MEM_ALLOC:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
return -EFAULT;
}
+ /* Handled by helper specific checks */
+ break;
+ case PTR_TO_BTF_ID | MEM_PERCPU:
+ case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
+ /* Handled by helper specific checks */
+ break;
+ default:
+ verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
+ return -EFAULT;
}
-
return 0;
}
@@ -6651,7 +6871,6 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_RCU:
- case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* its fixed offset must be 0. In the other cases, fixed offset
@@ -6666,6 +6885,28 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
}
}
+static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
+ const struct bpf_func_proto *fn,
+ struct bpf_reg_state *regs)
+{
+ struct bpf_reg_state *state = NULL;
+ int i;
+
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
+ if (arg_type_is_dynptr(fn->arg_type[i])) {
+ if (state) {
+ verbose(env, "verifier internal error: multiple dynptr args\n");
+ return NULL;
+ }
+ state = &regs[BPF_REG_1 + i];
+ }
+
+ if (!state)
+ verbose(env, "verifier internal error: no dynptr arg found\n");
+
+ return state;
+}
+
static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
@@ -6692,9 +6933,28 @@ static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state
return state->stack[spi].spilled_ptr.ref_obj_id;
}
+static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
+{
+ struct bpf_func_state *state = func(env, reg);
+ int spi;
+
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return reg->dynptr.type;
+
+ spi = __get_spi(reg->off);
+ if (spi < 0) {
+ verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
+ return BPF_DYNPTR_TYPE_INVALID;
+ }
+
+ return state->stack[spi].spilled_ptr.dynptr.type;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
- const struct bpf_func_proto *fn)
+ const struct bpf_func_proto *fn,
+ int insn_idx)
{
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
@@ -6907,7 +7167,7 @@ skip_type_check:
err = check_mem_size_reg(env, reg, regno, true, meta);
break;
case ARG_PTR_TO_DYNPTR:
- err = process_dynptr_func(env, regno, arg_type, meta);
+ err = process_dynptr_func(env, regno, insn_idx, arg_type);
if (err)
return err;
break;
@@ -7126,22 +7386,26 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
break;
case BPF_MAP_TYPE_SK_STORAGE:
if (func_id != BPF_FUNC_sk_storage_get &&
- func_id != BPF_FUNC_sk_storage_delete)
+ func_id != BPF_FUNC_sk_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_INODE_STORAGE:
if (func_id != BPF_FUNC_inode_storage_get &&
- func_id != BPF_FUNC_inode_storage_delete)
+ func_id != BPF_FUNC_inode_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_TASK_STORAGE:
if (func_id != BPF_FUNC_task_storage_get &&
- func_id != BPF_FUNC_task_storage_delete)
+ func_id != BPF_FUNC_task_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_CGRP_STORAGE:
if (func_id != BPF_FUNC_cgrp_storage_get &&
- func_id != BPF_FUNC_cgrp_storage_delete)
+ func_id != BPF_FUNC_cgrp_storage_delete &&
+ func_id != BPF_FUNC_kptr_xchg)
goto error;
break;
case BPF_MAP_TYPE_BLOOM_FILTER:
@@ -7355,6 +7619,9 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
* are now invalid, so turn them into unknown SCALAR_VALUE.
+ *
+ * This also applies to dynptr slices belonging to skb and xdp dynptrs,
+ * since these slices point to packet data.
*/
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
@@ -7362,8 +7629,8 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
struct bpf_reg_state *reg;
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg_is_pkt_pointer_any(reg))
- __mark_reg_unknown(env, reg);
+ if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
+ mark_reg_invalid(env, reg);
}));
}
@@ -7408,12 +7675,8 @@ static int release_reference(struct bpf_verifier_env *env,
return err;
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
- if (reg->ref_obj_id == ref_obj_id) {
- if (!env->allow_ptr_leaks)
- __mark_reg_not_init(env, reg);
- else
- __mark_reg_unknown(env, reg);
- }
+ if (reg->ref_obj_id == ref_obj_id)
+ mark_reg_invalid(env, reg);
}));
return 0;
@@ -7426,7 +7689,7 @@ static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
if (type_is_non_owning_ref(reg->type))
- __mark_reg_unknown(env, reg);
+ mark_reg_invalid(env, reg);
}));
}
@@ -8197,7 +8460,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- err = check_func_arg(env, i, &meta, fn);
+ err = check_func_arg(env, i, &meta, fn, insn_idx);
if (err)
return err;
}
@@ -8222,30 +8485,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env);
- /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
- * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
- * is safe to do directly.
- */
- if (meta.uninit_dynptr_regno) {
- if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
- verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
- return -EFAULT;
- }
- /* we write BPF_DW bits (8 bytes) at a time */
- for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
- err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
- i, BPF_DW, BPF_WRITE, -1, false);
- if (err)
- return err;
- }
-
- err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
- fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
- insn_idx);
- if (err)
- return err;
- }
-
if (meta.release_regno) {
err = -EINVAL;
/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
@@ -8330,43 +8569,62 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
break;
case BPF_FUNC_dynptr_data:
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- if (arg_type_is_dynptr(fn->arg_type[i])) {
- struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
- int id, ref_obj_id;
+ {
+ struct bpf_reg_state *reg;
+ int id, ref_obj_id;
- if (meta.dynptr_id) {
- verbose(env, "verifier internal error: meta.dynptr_id already set\n");
- return -EFAULT;
- }
+ reg = get_dynptr_arg_reg(env, fn, regs);
+ if (!reg)
+ return -EFAULT;
- if (meta.ref_obj_id) {
- verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
- return -EFAULT;
- }
- id = dynptr_id(env, reg);
- if (id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr id\n");
- return id;
- }
+ if (meta.dynptr_id) {
+ verbose(env, "verifier internal error: meta.dynptr_id already set\n");
+ return -EFAULT;
+ }
+ if (meta.ref_obj_id) {
+ verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
+ return -EFAULT;
+ }
- ref_obj_id = dynptr_ref_obj_id(env, reg);
- if (ref_obj_id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
- return ref_obj_id;
- }
+ id = dynptr_id(env, reg);
+ if (id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ return id;
+ }
- meta.dynptr_id = id;
- meta.ref_obj_id = ref_obj_id;
- break;
- }
+ ref_obj_id = dynptr_ref_obj_id(env, reg);
+ if (ref_obj_id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
+ return ref_obj_id;
}
- if (i == MAX_BPF_FUNC_REG_ARGS) {
- verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
+
+ meta.dynptr_id = id;
+ meta.ref_obj_id = ref_obj_id;
+
+ break;
+ }
+ case BPF_FUNC_dynptr_write:
+ {
+ enum bpf_dynptr_type dynptr_type;
+ struct bpf_reg_state *reg;
+
+ reg = get_dynptr_arg_reg(env, fn, regs);
+ if (!reg)
return -EFAULT;
- }
+
+ dynptr_type = dynptr_get_type(env, reg);
+ if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
+ return -EFAULT;
+
+ if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
+ /* this will trigger clear_all_pkt_pointers(), which will
+ * invalidate all dynptr slices associated with the skb
+ */
+ changes_data = true;
+
break;
+ }
case BPF_FUNC_user_ringbuf_drain:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_user_ringbuf_callback_state);
@@ -8595,36 +8853,6 @@ static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
}
}
-struct bpf_kfunc_call_arg_meta {
- /* In parameters */
- struct btf *btf;
- u32 func_id;
- u32 kfunc_flags;
- const struct btf_type *func_proto;
- const char *func_name;
- /* Out parameters */
- u32 ref_obj_id;
- u8 release_regno;
- bool r0_rdonly;
- u32 ret_btf_id;
- u64 r0_size;
- u32 subprogno;
- struct {
- u64 value;
- bool found;
- } arg_constant;
- struct {
- struct btf *btf;
- u32 btf_id;
- } arg_obj_drop;
- struct {
- struct btf_field *field;
- } arg_list_head;
- struct {
- struct btf_field *field;
- } arg_rbtree_root;
-};
-
static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_ACQUIRE;
@@ -8696,6 +8924,19 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf,
return __kfunc_param_match_suffix(btf, arg, "__sz");
}
+static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
+ const struct btf_param *arg,
+ const struct bpf_reg_state *reg)
+{
+ const struct btf_type *t;
+
+ t = btf_type_skip_modifiers(btf, arg->type, NULL);
+ if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
+ return false;
+
+ return __kfunc_param_match_suffix(btf, arg, "__szk");
+}
+
static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
{
return __kfunc_param_match_suffix(btf, arg, "__k");
@@ -8711,6 +8952,11 @@ static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param
return __kfunc_param_match_suffix(btf, arg, "__alloc");
}
+static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
+{
+ return __kfunc_param_match_suffix(btf, arg, "__uninit");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -8877,6 +9123,10 @@ enum special_kfunc_type {
KF_bpf_rbtree_remove,
KF_bpf_rbtree_add,
KF_bpf_rbtree_first,
+ KF_bpf_dynptr_from_skb,
+ KF_bpf_dynptr_from_xdp,
+ KF_bpf_dynptr_slice,
+ KF_bpf_dynptr_slice_rdwr,
};
BTF_SET_START(special_kfunc_set)
@@ -8891,6 +9141,10 @@ BTF_ID(func, bpf_rdonly_cast)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_dynptr_from_skb)
+BTF_ID(func, bpf_dynptr_from_xdp)
+BTF_ID(func, bpf_dynptr_slice)
+BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -8907,6 +9161,10 @@ BTF_ID(func, bpf_rcu_read_unlock)
BTF_ID(func, bpf_rbtree_remove)
BTF_ID(func, bpf_rbtree_add)
BTF_ID(func, bpf_rbtree_first)
+BTF_ID(func, bpf_dynptr_from_skb)
+BTF_ID(func, bpf_dynptr_from_xdp)
+BTF_ID(func, bpf_dynptr_slice)
+BTF_ID(func, bpf_dynptr_slice_rdwr)
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -8986,7 +9244,10 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
return KF_ARG_PTR_TO_CALLBACK;
- if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
+
+ if (argno + 1 < nargs &&
+ (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
+ is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
arg_mem_size = true;
/* This is the catch all argument type of register types supported by
@@ -9206,7 +9467,6 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
ptr = reg->map_ptr;
break;
case PTR_TO_BTF_ID | MEM_ALLOC:
- case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
ptr = reg->btf;
break;
default:
@@ -9455,7 +9715,8 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
&meta->arg_rbtree_root.field);
}
-static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
+static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
+ int insn_idx)
{
const char *func_name = meta->func_name, *ref_tname;
const struct btf *btf = meta->btf;
@@ -9538,7 +9799,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return -EINVAL;
}
- if (is_kfunc_trusted_args(meta) &&
+ if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
(register_is_null(reg) || type_may_be_null(reg->type))) {
verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
return -EACCES;
@@ -9646,16 +9907,43 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_DYNPTR:
+ {
+ enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
+
if (reg->type != PTR_TO_STACK &&
reg->type != CONST_PTR_TO_DYNPTR) {
verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
return -EINVAL;
}
- ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ dynptr_arg_type |= MEM_RDONLY;
+
+ if (is_kfunc_arg_uninit(btf, &args[i]))
+ dynptr_arg_type |= MEM_UNINIT;
+
+ if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb])
+ dynptr_arg_type |= DYNPTR_TYPE_SKB;
+ else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
+ dynptr_arg_type |= DYNPTR_TYPE_XDP;
+
+ ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
if (ret < 0)
return ret;
+
+ if (!(dynptr_arg_type & MEM_UNINIT)) {
+ int id = dynptr_id(env, reg);
+
+ if (id < 0) {
+ verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ return id;
+ }
+ meta->initialized_dynptr.id = id;
+ meta->initialized_dynptr.type = dynptr_get_type(env, reg);
+ }
+
break;
+ }
case KF_ARG_PTR_TO_LIST_HEAD:
if (reg->type != PTR_TO_MAP_VALUE &&
reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
@@ -9749,14 +10037,33 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_MEM_SIZE:
- ret = check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1);
+ {
+ struct bpf_reg_state *size_reg = &regs[regno + 1];
+ const struct btf_param *size_arg = &args[i + 1];
+
+ ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
if (ret < 0) {
verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
return ret;
}
- /* Skip next '__sz' argument */
+
+ if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
+ if (meta->arg_constant.found) {
+ verbose(env, "verifier internal error: only one constant argument permitted\n");
+ return -EFAULT;
+ }
+ if (!tnum_is_const(size_reg->var_off)) {
+ verbose(env, "R%d must be a known constant\n", regno + 1);
+ return -EINVAL;
+ }
+ meta->arg_constant.found = true;
+ meta->arg_constant.value = size_reg->var_off.value;
+ }
+
+ /* Skip next '__sz' or '__szk' argument */
i++;
break;
+ }
case KF_ARG_PTR_TO_CALLBACK:
meta->subprogno = reg->subprogno;
break;
@@ -9828,10 +10135,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
- if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
- verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
- return -EACCES;
- }
if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state;
@@ -9860,7 +10163,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
/* Check the arguments */
- err = check_kfunc_args(env, &meta);
+ err = check_kfunc_args(env, &meta, insn_idx);
if (err < 0)
return err;
/* In case of release function, we get register number of refcounted
@@ -9991,6 +10294,42 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].btf_id = meta.arg_constant.value;
+ } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
+ meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
+ enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+
+ if (!meta.arg_constant.found) {
+ verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
+ return -EFAULT;
+ }
+
+ regs[BPF_REG_0].mem_size = meta.arg_constant.value;
+
+ /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
+ regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
+ regs[BPF_REG_0].type |= MEM_RDONLY;
+ } else {
+ /* this will set env->seen_direct_write to true */
+ if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
+ verbose(env, "the prog does not allow writes to packet data\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!meta.initialized_dynptr.id) {
+ verbose(env, "verifier internal error: no dynptr id\n");
+ return -EFAULT;
+ }
+ regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
+
+ /* we don't need to set BPF_REG_0's ref obj id
+ * because packet slices are not refcounted (see
+ * dynptr_type_refcounted)
+ */
} else {
verbose(env, "kernel function %s unhandled dynamic return type\n",
meta.func_name);
@@ -9998,6 +10337,14 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
} else if (!__btf_type_is_struct(ptr_type)) {
if (!meta.r0_size) {
+ __u32 sz;
+
+ if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
+ meta.r0_size = sz;
+ meta.r0_rdonly = true;
+ }
+ }
+ if (!meta.r0_size) {
ptr_type_name = btf_name_by_offset(desc_btf,
ptr_type->name_off);
verbose(env,
@@ -13152,44 +13499,43 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns,
*/
static int visit_insn(int t, struct bpf_verifier_env *env)
{
- struct bpf_insn *insns = env->prog->insnsi;
+ struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
int ret;
- if (bpf_pseudo_func(insns + t))
+ if (bpf_pseudo_func(insn))
return visit_func_call_insn(t, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
- if (BPF_CLASS(insns[t].code) != BPF_JMP &&
- BPF_CLASS(insns[t].code) != BPF_JMP32)
+ if (BPF_CLASS(insn->code) != BPF_JMP &&
+ BPF_CLASS(insn->code) != BPF_JMP32)
return push_insn(t, t + 1, FALLTHROUGH, env, false);
- switch (BPF_OP(insns[t].code)) {
+ switch (BPF_OP(insn->code)) {
case BPF_EXIT:
return DONE_EXPLORING;
case BPF_CALL:
- if (insns[t].imm == BPF_FUNC_timer_set_callback)
+ if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback)
/* Mark this call insn as a prune point to trigger
* is_state_visited() check before call itself is
* processed by __check_func_call(). Otherwise new
* async state will be pushed for further exploration.
*/
mark_prune_point(env, t);
- return visit_func_call_insn(t, insns, env,
- insns[t].src_reg == BPF_PSEUDO_CALL);
+ return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
- if (BPF_SRC(insns[t].code) != BPF_K)
+ if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
/* unconditional jump with single edge */
- ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
+ ret = push_insn(t, t + insn->off + 1, FALLTHROUGH, env,
true);
if (ret)
return ret;
- mark_prune_point(env, t + insns[t].off + 1);
- mark_jmp_point(env, t + insns[t].off + 1);
+ mark_prune_point(env, t + insn->off + 1);
+ mark_jmp_point(env, t + insn->off + 1);
return ret;
@@ -13201,7 +13547,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
if (ret)
return ret;
- return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
+ return push_insn(t, t + insn->off + 1, BRANCH, env, true);
}
}
@@ -13877,13 +14223,17 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
tnum_in(rold->var_off, rcur->var_off);
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
+ case PTR_TO_MEM:
+ case PTR_TO_BUF:
+ case PTR_TO_TP_BUFFER:
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
*/
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off) &&
- check_ids(rold->id, rcur->id, idmap);
+ check_ids(rold->id, rcur->id, idmap) &&
+ check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
/* We must have at least as much range as the old ptr
@@ -13936,6 +14286,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
+ if (env->allow_uninit_stack &&
+ old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
+ continue;
+
/* explored stack has more populated slots than current stack
* and these slots were used
*/
@@ -14311,7 +14665,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
* This threshold shouldn't be too high either, since states
* at the end of the loop are likely to be useful in pruning.
*/
- if (env->jmps_processed - env->prev_jmps_processed < 20 &&
+ if (!env->test_state_freq &&
+ env->jmps_processed - env->prev_jmps_processed < 20 &&
env->insn_processed - env->prev_insn_processed < 100)
add_new_state = false;
goto miss;
@@ -14500,6 +14855,44 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
!reg_type_mismatch_ok(prev));
}
+static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
+ bool allow_trust_missmatch)
+{
+ enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+
+ if (*prev_type == NOT_INIT) {
+ /* Saw a valid insn
+ * dst_reg = *(u32 *)(src_reg + off)
+ * save type to validate intersecting paths
+ */
+ *prev_type = type;
+ } else if (reg_type_mismatch(type, *prev_type)) {
+ /* Abuser program is trying to use the same insn
+ * dst_reg = *(u32*) (src_reg + off)
+ * with different pointer types:
+ * src_reg == ctx in one branch and
+ * src_reg == stack|map in some other branch.
+ * Reject it.
+ */
+ if (allow_trust_missmatch &&
+ base_type(type) == PTR_TO_BTF_ID &&
+ base_type(*prev_type) == PTR_TO_BTF_ID) {
+ /*
+ * Have to support a use case when one path through
+ * the program yields TRUSTED pointer while another
+ * is UNTRUSTED. Fallback to UNTRUSTED to generate
+ * BPF_PROBE_MEM.
+ */
+ *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ } else {
+ verbose(env, "same insn cannot be used with different pointers\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int do_check(struct bpf_verifier_env *env)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -14609,7 +15002,7 @@ static int do_check(struct bpf_verifier_env *env)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type *prev_src_type, src_reg_type;
+ enum bpf_reg_type src_reg_type;
/* check for reserved fields is already done */
@@ -14633,29 +15026,11 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
- prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
-
- if (*prev_src_type == NOT_INIT) {
- /* saw a valid insn
- * dst_reg = *(u32 *)(src_reg + off)
- * save type to validate intersecting paths
- */
- *prev_src_type = src_reg_type;
-
- } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
- /* ABuser program is trying to use the same insn
- * dst_reg = *(u32*) (src_reg + off)
- * with different pointer types:
- * src_reg == ctx in one branch and
- * src_reg == stack|map in some other branch.
- * Reject it.
- */
- verbose(env, "same insn cannot be used with different pointers\n");
- return -EINVAL;
- }
-
+ err = save_aux_ptr_type(env, src_reg_type, true);
+ if (err)
+ return err;
} else if (class == BPF_STX) {
- enum bpf_reg_type *prev_dst_type, dst_reg_type;
+ enum bpf_reg_type dst_reg_type;
if (BPF_MODE(insn->code) == BPF_ATOMIC) {
err = check_atomic(env, env->insn_idx, insn);
@@ -14688,16 +15063,12 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
- prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
-
- if (*prev_dst_type == NOT_INIT) {
- *prev_dst_type = dst_reg_type;
- } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
- verbose(env, "same insn cannot be used with different pointers\n");
- return -EINVAL;
- }
-
+ err = save_aux_ptr_type(env, dst_reg_type, false);
+ if (err)
+ return err;
} else if (class == BPF_ST) {
+ enum bpf_reg_type dst_reg_type;
+
if (BPF_MODE(insn->code) != BPF_MEM ||
insn->src_reg != BPF_REG_0) {
verbose(env, "BPF_ST uses reserved fields\n");
@@ -14708,12 +15079,7 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
- if (is_ctx_reg(env, insn->dst_reg)) {
- verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
- insn->dst_reg,
- reg_type_str(env, reg_state(env, insn->dst_reg)->type));
- return -EACCES;
- }
+ dst_reg_type = regs[insn->dst_reg].type;
/* check that memory (dst_reg + off) is writeable */
err = check_mem_access(env, env->insn_idx, insn->dst_reg,
@@ -14722,6 +15088,9 @@ static int do_check(struct bpf_verifier_env *env)
if (err)
return err;
+ err = save_aux_ptr_type(env, dst_reg_type, false);
+ if (err)
+ return err;
} else if (class == BPF_JMP || class == BPF_JMP32) {
u8 opcode = BPF_OP(insn->code);
@@ -14756,6 +15125,8 @@ static int do_check(struct bpf_verifier_env *env)
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
return err;
+
+ mark_reg_scratched(env, BPF_REG_0);
} else if (opcode == BPF_JA) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->imm != 0 ||
@@ -15830,14 +16201,12 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
bpf_convert_ctx_access_t convert_ctx_access;
- bool ctx_access;
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
type = BPF_READ;
- ctx_access = true;
} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
@@ -15847,7 +16216,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
type = BPF_WRITE;
- ctx_access = BPF_CLASS(insn->code) == BPF_STX;
} else {
continue;
}
@@ -15870,9 +16238,6 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
continue;
}
- if (!ctx_access)
- continue;
-
switch ((int)env->insn_aux_data[i + delta].ptr_type) {
case PTR_TO_CTX:
if (!ops->convert_ctx_access)
@@ -16321,6 +16686,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
+ } else if (desc->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
+ bool seen_direct_write = env->seen_direct_write;
+ bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
+
+ if (is_rdonly)
+ insn->imm = BPF_CALL_IMM(bpf_dynptr_from_skb_rdonly);
+
+ /* restore env->seen_direct_write to its original value, since
+ * may_access_direct_pkt_data mutates it
+ */
+ env->seen_direct_write = seen_direct_write;
}
return 0;
}
@@ -17712,8 +18088,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
- env->rcu_tag_supported = btf_vmlinux &&
- btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e8da032bb6fc..bcf91bc7bf71 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1453,10 +1453,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
NULL : &bpf_probe_read_compat_str_proto;
#endif
#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
case BPF_FUNC_cgrp_storage_get:
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6f3d654b3339..6a8b33a103a4 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -737,6 +737,7 @@ __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
{
+ /* p != NULL, but p->cnt could be 0 */
}
__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
@@ -784,7 +785,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_SET8_END(test_sk_check_kfunc_ids)
diff --git a/net/core/filter.c b/net/core/filter.c
index 1d6f165923bf..50f649f1b4a9 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1721,6 +1721,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.arg5_type = ARG_ANYTHING,
};
+int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
+ u32 len, u64 flags)
+{
+ return ____bpf_skb_store_bytes(skb, offset, from, len, flags);
+}
+
BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
void *, to, u32, len)
{
@@ -1751,6 +1757,11 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+{
+ return ____bpf_skb_load_bytes(skb, offset, to, len);
+}
+
BPF_CALL_4(bpf_flow_dissector_load_bytes,
const struct bpf_flow_dissector *, ctx, u32, offset,
void *, to, u32, len)
@@ -3828,7 +3839,7 @@ static const struct bpf_func_proto sk_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
+BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
{
return xdp_get_buff_len(xdp);
}
@@ -3883,8 +3894,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
-static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
- void *buf, unsigned long len, bool flush)
+void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
+ void *buf, unsigned long len, bool flush)
{
unsigned long ptr_len, ptr_off = 0;
skb_frag_t *next_frag, *end_frag;
@@ -3930,7 +3941,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
}
}
-static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 size = xdp->data_end - xdp->data;
@@ -3988,6 +3999,11 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
+{
+ return ____bpf_xdp_load_bytes(xdp, offset, buf, len);
+}
+
BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
void *, buf, u32, len)
{
@@ -4015,6 +4031,11 @@ static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len)
+{
+ return ____bpf_xdp_store_bytes(xdp, offset, buf, len);
+}
+
static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -8144,12 +8165,6 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_delete_proto;
case BPF_FUNC_get_netns_cookie:
return &bpf_get_netns_cookie_sk_msg_proto;
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
-#endif
#ifdef CONFIG_CGROUP_NET_CLASSID
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_curr_proto;
@@ -9264,11 +9279,15 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
#endif
/* <store>: skb->tstamp = tstamp */
- *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
- offsetof(struct sk_buff, tstamp));
+ *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM,
+ skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
return insn;
}
+#define BPF_EMIT_STORE(size, si, off) \
+ BPF_RAW_INSN(BPF_CLASS((si)->code) | (size) | BPF_MEM, \
+ (si)->dst_reg, (si)->src_reg, (off), (si)->imm)
+
static u32 bpf_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
struct bpf_insn *insn_buf,
@@ -9298,9 +9317,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, priority):
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, priority, 4,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ bpf_target_off(struct sk_buff, priority, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, priority, 4,
@@ -9331,9 +9350,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, mark):
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, mark, 4,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ bpf_target_off(struct sk_buff, mark, 4,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, mark, 4,
@@ -9352,11 +9371,16 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, queue_mapping):
if (type == BPF_WRITE) {
- *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff,
- queue_mapping,
- 2, target_size));
+ u32 off = bpf_target_off(struct sk_buff, queue_mapping, 2, target_size);
+
+ if (BPF_CLASS(si->code) == BPF_ST && si->imm >= NO_QUEUE_MAPPING) {
+ *insn++ = BPF_JMP_A(0); /* noop */
+ break;
+ }
+
+ if (BPF_CLASS(si->code) == BPF_STX)
+ *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
} else {
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff,
@@ -9392,8 +9416,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct sk_buff, cb);
off += offsetof(struct qdisc_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
@@ -9408,8 +9431,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct qdisc_skb_cb, tc_classid);
*target_size = 2;
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_H, si, off);
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
si->src_reg, off);
@@ -9442,9 +9464,9 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
- bpf_target_off(struct sk_buff, tc_index, 2,
- target_size));
+ *insn++ = BPF_EMIT_STORE(BPF_H, si,
+ bpf_target_off(struct sk_buff, tc_index, 2,
+ target_size));
else
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, tc_index, 2,
@@ -9645,8 +9667,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_bound_dev_if));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_bound_dev_if));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_bound_dev_if));
@@ -9656,8 +9678,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_mark));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_mark));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_mark));
@@ -9667,8 +9689,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, sk_priority));
+ *insn++ = BPF_EMIT_STORE(BPF_W, si,
+ offsetof(struct sock, sk_priority));
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
offsetof(struct sock, sk_priority));
@@ -9933,10 +9955,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(S, TF)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
- *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
+ *insn++ = BPF_RAW_INSN(SIZE | BPF_MEM | BPF_CLASS(si->code), \
+ tmp_reg, si->src_reg, \
bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
- + OFF); \
+ + OFF, \
+ si->imm); \
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
offsetof(S, TF)); \
} while (0)
@@ -10171,9 +10195,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_ops_kern, sk),\
reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
- reg, si->src_reg, \
- offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_RAW_INSN(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD) | \
+ BPF_MEM | BPF_CLASS(si->code), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD), \
+ si->imm); \
*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
offsetof(struct bpf_sock_ops_kern, \
temp)); \
@@ -10205,8 +10231,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
off -= offsetof(struct bpf_sock_ops, replylong[0]);
off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
- off);
+ *insn++ = BPF_EMIT_STORE(BPF_W, si, off);
else
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
off);
@@ -10563,8 +10588,7 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
off += offsetof(struct sk_buff, cb);
off += offsetof(struct sk_skb_cb, data);
if (type == BPF_WRITE)
- *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
- si->src_reg, off);
+ *insn++ = BPF_EMIT_STORE(BPF_SIZE(si->code), si, off);
else
*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
si->src_reg, off);
@@ -11621,3 +11645,82 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
return func;
}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr__uninit);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len);
+
+ return 0;
+}
+
+__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ if (flags) {
+ bpf_dynptr_set_null(ptr__uninit);
+ return -EINVAL;
+ }
+
+ bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp));
+
+ return 0;
+}
+__diag_pop();
+
+int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags,
+ struct bpf_dynptr_kern *ptr__uninit)
+{
+ int err;
+
+ err = bpf_dynptr_from_skb(skb, flags, ptr__uninit);
+ if (err)
+ return err;
+
+ bpf_dynptr_set_rdonly(ptr__uninit);
+
+ return 0;
+}
+
+BTF_SET8_START(bpf_kfunc_check_set_skb)
+BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
+BTF_SET8_END(bpf_kfunc_check_set_skb)
+
+BTF_SET8_START(bpf_kfunc_check_set_xdp)
+BTF_ID_FLAGS(func, bpf_dynptr_from_xdp)
+BTF_SET8_END(bpf_kfunc_check_set_xdp)
+
+static const struct btf_kfunc_id_set bpf_kfunc_set_skb = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_skb,
+};
+
+static const struct btf_kfunc_id_set bpf_kfunc_set_xdp = {
+ .owner = THIS_MODULE,
+ .set = &bpf_kfunc_check_set_xdp,
+};
+
+static int __init bpf_kfunc_init(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SK_SKB, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCKET_FILTER, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_OUT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_IN, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
+ return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
+}
+late_initcall(bpf_kfunc_init);
diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
deleted file mode 100644
index b551b741653d..000000000000
--- a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
-#define _UAPI__ASM_BPF_PERF_EVENT_H__
-
-#include <asm/ptrace.h>
-
-typedef struct user_pt_regs bpf_user_pt_regs_t;
-
-#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
deleted file mode 100644
index 0a8e37a519f2..000000000000
--- a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
-#define _UAPI__ASM_BPF_PERF_EVENT_H__
-
-#include "ptrace.h"
-
-typedef user_pt_regs bpf_user_pt_regs_t;
-
-#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
deleted file mode 100644
index ad64d673b5e6..000000000000
--- a/tools/arch/s390/include/uapi/asm/ptrace.h
+++ /dev/null
@@ -1,458 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S390 version
- * Copyright IBM Corp. 1999, 2000
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- */
-
-#ifndef _UAPI_S390_PTRACE_H
-#define _UAPI_S390_PTRACE_H
-
-/*
- * Offsets in the user_regs_struct. They are used for the ptrace
- * system call and in entry.S
- */
-#ifndef __s390x__
-
-#define PT_PSWMASK 0x00
-#define PT_PSWADDR 0x04
-#define PT_GPR0 0x08
-#define PT_GPR1 0x0C
-#define PT_GPR2 0x10
-#define PT_GPR3 0x14
-#define PT_GPR4 0x18
-#define PT_GPR5 0x1C
-#define PT_GPR6 0x20
-#define PT_GPR7 0x24
-#define PT_GPR8 0x28
-#define PT_GPR9 0x2C
-#define PT_GPR10 0x30
-#define PT_GPR11 0x34
-#define PT_GPR12 0x38
-#define PT_GPR13 0x3C
-#define PT_GPR14 0x40
-#define PT_GPR15 0x44
-#define PT_ACR0 0x48
-#define PT_ACR1 0x4C
-#define PT_ACR2 0x50
-#define PT_ACR3 0x54
-#define PT_ACR4 0x58
-#define PT_ACR5 0x5C
-#define PT_ACR6 0x60
-#define PT_ACR7 0x64
-#define PT_ACR8 0x68
-#define PT_ACR9 0x6C
-#define PT_ACR10 0x70
-#define PT_ACR11 0x74
-#define PT_ACR12 0x78
-#define PT_ACR13 0x7C
-#define PT_ACR14 0x80
-#define PT_ACR15 0x84
-#define PT_ORIGGPR2 0x88
-#define PT_FPC 0x90
-/*
- * A nasty fact of life that the ptrace api
- * only supports passing of longs.
- */
-#define PT_FPR0_HI 0x98
-#define PT_FPR0_LO 0x9C
-#define PT_FPR1_HI 0xA0
-#define PT_FPR1_LO 0xA4
-#define PT_FPR2_HI 0xA8
-#define PT_FPR2_LO 0xAC
-#define PT_FPR3_HI 0xB0
-#define PT_FPR3_LO 0xB4
-#define PT_FPR4_HI 0xB8
-#define PT_FPR4_LO 0xBC
-#define PT_FPR5_HI 0xC0
-#define PT_FPR5_LO 0xC4
-#define PT_FPR6_HI 0xC8
-#define PT_FPR6_LO 0xCC
-#define PT_FPR7_HI 0xD0
-#define PT_FPR7_LO 0xD4
-#define PT_FPR8_HI 0xD8
-#define PT_FPR8_LO 0XDC
-#define PT_FPR9_HI 0xE0
-#define PT_FPR9_LO 0xE4
-#define PT_FPR10_HI 0xE8
-#define PT_FPR10_LO 0xEC
-#define PT_FPR11_HI 0xF0
-#define PT_FPR11_LO 0xF4
-#define PT_FPR12_HI 0xF8
-#define PT_FPR12_LO 0xFC
-#define PT_FPR13_HI 0x100
-#define PT_FPR13_LO 0x104
-#define PT_FPR14_HI 0x108
-#define PT_FPR14_LO 0x10C
-#define PT_FPR15_HI 0x110
-#define PT_FPR15_LO 0x114
-#define PT_CR_9 0x118
-#define PT_CR_10 0x11C
-#define PT_CR_11 0x120
-#define PT_IEEE_IP 0x13C
-#define PT_LASTOFF PT_IEEE_IP
-#define PT_ENDREGS 0x140-1
-
-#define GPR_SIZE 4
-#define CR_SIZE 4
-
-#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */
-
-#else /* __s390x__ */
-
-#define PT_PSWMASK 0x00
-#define PT_PSWADDR 0x08
-#define PT_GPR0 0x10
-#define PT_GPR1 0x18
-#define PT_GPR2 0x20
-#define PT_GPR3 0x28
-#define PT_GPR4 0x30
-#define PT_GPR5 0x38
-#define PT_GPR6 0x40
-#define PT_GPR7 0x48
-#define PT_GPR8 0x50
-#define PT_GPR9 0x58
-#define PT_GPR10 0x60
-#define PT_GPR11 0x68
-#define PT_GPR12 0x70
-#define PT_GPR13 0x78
-#define PT_GPR14 0x80
-#define PT_GPR15 0x88
-#define PT_ACR0 0x90
-#define PT_ACR1 0x94
-#define PT_ACR2 0x98
-#define PT_ACR3 0x9C
-#define PT_ACR4 0xA0
-#define PT_ACR5 0xA4
-#define PT_ACR6 0xA8
-#define PT_ACR7 0xAC
-#define PT_ACR8 0xB0
-#define PT_ACR9 0xB4
-#define PT_ACR10 0xB8
-#define PT_ACR11 0xBC
-#define PT_ACR12 0xC0
-#define PT_ACR13 0xC4
-#define PT_ACR14 0xC8
-#define PT_ACR15 0xCC
-#define PT_ORIGGPR2 0xD0
-#define PT_FPC 0xD8
-#define PT_FPR0 0xE0
-#define PT_FPR1 0xE8
-#define PT_FPR2 0xF0
-#define PT_FPR3 0xF8
-#define PT_FPR4 0x100
-#define PT_FPR5 0x108
-#define PT_FPR6 0x110
-#define PT_FPR7 0x118
-#define PT_FPR8 0x120
-#define PT_FPR9 0x128
-#define PT_FPR10 0x130
-#define PT_FPR11 0x138
-#define PT_FPR12 0x140
-#define PT_FPR13 0x148
-#define PT_FPR14 0x150
-#define PT_FPR15 0x158
-#define PT_CR_9 0x160
-#define PT_CR_10 0x168
-#define PT_CR_11 0x170
-#define PT_IEEE_IP 0x1A8
-#define PT_LASTOFF PT_IEEE_IP
-#define PT_ENDREGS 0x1B0-1
-
-#define GPR_SIZE 8
-#define CR_SIZE 8
-
-#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
-
-#endif /* __s390x__ */
-
-#define NUM_GPRS 16
-#define NUM_FPRS 16
-#define NUM_CRS 16
-#define NUM_ACRS 16
-
-#define NUM_CR_WORDS 3
-
-#define FPR_SIZE 8
-#define FPC_SIZE 4
-#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */
-#define ACR_SIZE 4
-
-
-#define PTRACE_OLDSETOPTIONS 21
-#define PTRACE_SYSEMU 31
-#define PTRACE_SYSEMU_SINGLESTEP 32
-#ifndef __ASSEMBLY__
-#include <linux/stddef.h>
-#include <linux/types.h>
-
-typedef union {
- float f;
- double d;
- __u64 ui;
- struct
- {
- __u32 hi;
- __u32 lo;
- } fp;
-} freg_t;
-
-typedef struct {
- __u32 fpc;
- __u32 pad;
- freg_t fprs[NUM_FPRS];
-} s390_fp_regs;
-
-#define FPC_EXCEPTION_MASK 0xF8000000
-#define FPC_FLAGS_MASK 0x00F80000
-#define FPC_DXC_MASK 0x0000FF00
-#define FPC_RM_MASK 0x00000003
-
-/* this typedef defines how a Program Status Word looks like */
-typedef struct {
- unsigned long mask;
- unsigned long addr;
-} __attribute__ ((aligned(8))) psw_t;
-
-#ifndef __s390x__
-
-#define PSW_MASK_PER 0x40000000UL
-#define PSW_MASK_DAT 0x04000000UL
-#define PSW_MASK_IO 0x02000000UL
-#define PSW_MASK_EXT 0x01000000UL
-#define PSW_MASK_KEY 0x00F00000UL
-#define PSW_MASK_BASE 0x00080000UL /* always one */
-#define PSW_MASK_MCHECK 0x00040000UL
-#define PSW_MASK_WAIT 0x00020000UL
-#define PSW_MASK_PSTATE 0x00010000UL
-#define PSW_MASK_ASC 0x0000C000UL
-#define PSW_MASK_CC 0x00003000UL
-#define PSW_MASK_PM 0x00000F00UL
-#define PSW_MASK_RI 0x00000000UL
-#define PSW_MASK_EA 0x00000000UL
-#define PSW_MASK_BA 0x00000000UL
-
-#define PSW_MASK_USER 0x0000FF00UL
-
-#define PSW_ADDR_AMODE 0x80000000UL
-#define PSW_ADDR_INSN 0x7FFFFFFFUL
-
-#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20)
-
-#define PSW_ASC_PRIMARY 0x00000000UL
-#define PSW_ASC_ACCREG 0x00004000UL
-#define PSW_ASC_SECONDARY 0x00008000UL
-#define PSW_ASC_HOME 0x0000C000UL
-
-#else /* __s390x__ */
-
-#define PSW_MASK_PER 0x4000000000000000UL
-#define PSW_MASK_DAT 0x0400000000000000UL
-#define PSW_MASK_IO 0x0200000000000000UL
-#define PSW_MASK_EXT 0x0100000000000000UL
-#define PSW_MASK_BASE 0x0000000000000000UL
-#define PSW_MASK_KEY 0x00F0000000000000UL
-#define PSW_MASK_MCHECK 0x0004000000000000UL
-#define PSW_MASK_WAIT 0x0002000000000000UL
-#define PSW_MASK_PSTATE 0x0001000000000000UL
-#define PSW_MASK_ASC 0x0000C00000000000UL
-#define PSW_MASK_CC 0x0000300000000000UL
-#define PSW_MASK_PM 0x00000F0000000000UL
-#define PSW_MASK_RI 0x0000008000000000UL
-#define PSW_MASK_EA 0x0000000100000000UL
-#define PSW_MASK_BA 0x0000000080000000UL
-
-#define PSW_MASK_USER 0x0000FF0180000000UL
-
-#define PSW_ADDR_AMODE 0x0000000000000000UL
-#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
-
-#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52)
-
-#define PSW_ASC_PRIMARY 0x0000000000000000UL
-#define PSW_ASC_ACCREG 0x0000400000000000UL
-#define PSW_ASC_SECONDARY 0x0000800000000000UL
-#define PSW_ASC_HOME 0x0000C00000000000UL
-
-#endif /* __s390x__ */
-
-
-/*
- * The s390_regs structure is used to define the elf_gregset_t.
- */
-typedef struct {
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
- unsigned int acrs[NUM_ACRS];
- unsigned long orig_gpr2;
-} s390_regs;
-
-/*
- * The user_pt_regs structure exports the beginning of
- * the in-kernel pt_regs structure to user space.
- */
-typedef struct {
- unsigned long args[1];
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
-} user_pt_regs;
-
-/*
- * Now for the user space program event recording (trace) definitions.
- * The following structures are used only for the ptrace interface, don't
- * touch or even look at it if you don't want to modify the user-space
- * ptrace interface. In particular stay away from it for in-kernel PER.
- */
-typedef struct {
- unsigned long cr[NUM_CR_WORDS];
-} per_cr_words;
-
-#define PER_EM_MASK 0xE8000000UL
-
-typedef struct {
-#ifdef __s390x__
- unsigned : 32;
-#endif /* __s390x__ */
- unsigned em_branching : 1;
- unsigned em_instruction_fetch : 1;
- /*
- * Switching on storage alteration automatically fixes
- * the storage alteration event bit in the users std.
- */
- unsigned em_storage_alteration : 1;
- unsigned em_gpr_alt_unused : 1;
- unsigned em_store_real_address : 1;
- unsigned : 3;
- unsigned branch_addr_ctl : 1;
- unsigned : 1;
- unsigned storage_alt_space_ctl : 1;
- unsigned : 21;
- unsigned long starting_addr;
- unsigned long ending_addr;
-} per_cr_bits;
-
-typedef struct {
- unsigned short perc_atmid;
- unsigned long address;
- unsigned char access_id;
-} per_lowcore_words;
-
-typedef struct {
- unsigned perc_branching : 1;
- unsigned perc_instruction_fetch : 1;
- unsigned perc_storage_alteration : 1;
- unsigned perc_gpr_alt_unused : 1;
- unsigned perc_store_real_address : 1;
- unsigned : 3;
- unsigned atmid_psw_bit_31 : 1;
- unsigned atmid_validity_bit : 1;
- unsigned atmid_psw_bit_32 : 1;
- unsigned atmid_psw_bit_5 : 1;
- unsigned atmid_psw_bit_16 : 1;
- unsigned atmid_psw_bit_17 : 1;
- unsigned si : 2;
- unsigned long address;
- unsigned : 4;
- unsigned access_id : 4;
-} per_lowcore_bits;
-
-typedef struct {
- union {
- per_cr_words words;
- per_cr_bits bits;
- } control_regs;
- /*
- * The single_step and instruction_fetch bits are obsolete,
- * the kernel always sets them to zero. To enable single
- * stepping use ptrace(PTRACE_SINGLESTEP) instead.
- */
- unsigned single_step : 1;
- unsigned instruction_fetch : 1;
- unsigned : 30;
- /*
- * These addresses are copied into cr10 & cr11 if single
- * stepping is switched off
- */
- unsigned long starting_addr;
- unsigned long ending_addr;
- union {
- per_lowcore_words words;
- per_lowcore_bits bits;
- } lowcore;
-} per_struct;
-
-typedef struct {
- unsigned int len;
- unsigned long kernel_addr;
- unsigned long process_addr;
-} ptrace_area;
-
-/*
- * S/390 specific non posix ptrace requests. I chose unusual values so
- * they are unlikely to clash with future ptrace definitions.
- */
-#define PTRACE_PEEKUSR_AREA 0x5000
-#define PTRACE_POKEUSR_AREA 0x5001
-#define PTRACE_PEEKTEXT_AREA 0x5002
-#define PTRACE_PEEKDATA_AREA 0x5003
-#define PTRACE_POKETEXT_AREA 0x5004
-#define PTRACE_POKEDATA_AREA 0x5005
-#define PTRACE_GET_LAST_BREAK 0x5006
-#define PTRACE_PEEK_SYSTEM_CALL 0x5007
-#define PTRACE_POKE_SYSTEM_CALL 0x5008
-#define PTRACE_ENABLE_TE 0x5009
-#define PTRACE_DISABLE_TE 0x5010
-#define PTRACE_TE_ABORT_RAND 0x5011
-
-/*
- * The numbers chosen here are somewhat arbitrary but absolutely MUST
- * not overlap with any of the number assigned in <linux/ptrace.h>.
- */
-#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
-
-/*
- * PT_PROT definition is loosely based on hppa bsd definition in
- * gdb/hppab-nat.c
- */
-#define PTRACE_PROT 21
-
-typedef enum {
- ptprot_set_access_watchpoint,
- ptprot_set_write_watchpoint,
- ptprot_disable_watchpoint
-} ptprot_flags;
-
-typedef struct {
- unsigned long lowaddr;
- unsigned long hiaddr;
- ptprot_flags prot;
-} ptprot_area;
-
-/* Sequence of bytes for breakpoint illegal instruction. */
-#define S390_BREAKPOINT {0x0,0x1}
-#define S390_BREAKPOINT_U16 ((__u16)0x0001)
-#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
-#define S390_SYSCALL_SIZE 2
-
-/*
- * The user_regs_struct defines the way the user registers are
- * store on the stack for signal handling.
- */
-struct user_regs_struct {
- psw_t psw;
- unsigned long gprs[NUM_GPRS];
- unsigned int acrs[NUM_ACRS];
- unsigned long orig_gpr2;
- s390_fp_regs fp_regs;
- /*
- * These per registers are in here so that gdb can modify them
- * itself as there is no "official" ptrace interface for hardware
- * watchpoints. This is the way intel does it.
- */
- per_struct per_info;
- unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
-};
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _UAPI_S390_PTRACE_H */
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
index 7fea83bedf48..bca5dd0a59e3 100644
--- a/tools/bpf/bpftool/json_writer.c
+++ b/tools/bpf/bpftool/json_writer.c
@@ -80,9 +80,6 @@ static void jsonw_puts(json_writer_t *self, const char *str)
case '"':
fputs("\\\"", self->out);
break;
- case '\'':
- fputs("\\\'", self->out);
- break;
default:
putc(*str, self->out);
}
diff --git a/tools/bpf/resolve_btfids/.gitignore b/tools/bpf/resolve_btfids/.gitignore
index 16913fffc985..52d5e9721d92 100644
--- a/tools/bpf/resolve_btfids/.gitignore
+++ b/tools/bpf/resolve_btfids/.gitignore
@@ -1,3 +1,4 @@
/fixdep
/resolve_btfids
/libbpf/
+/libsubcmd/
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 62ce1f5d1b1d..976b194eb775 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4969,6 +4969,12 @@ union bpf_attr {
* different maps if key/value layout matches across maps.
* Every bpf_timer_set_callback() can have different callback_fn.
*
+ * *flags* can be one of:
+ *
+ * **BPF_F_TIMER_ABS**
+ * Start the timer in absolute expire value instead of the
+ * default relative one.
+ *
* Return
* 0 on success.
* **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
@@ -5325,11 +5331,22 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
- * *flags* is currently unused.
+ *
+ * *flags* must be 0 except for skb-type dynptrs.
+ *
+ * For skb-type dynptrs:
+ * * All data slices of the dynptr are automatically
+ * invalidated after **bpf_dynptr_write**\ (). This is
+ * because writing may pull the skb and change the
+ * underlying packet buffer.
+ *
+ * * For *flags*, please see the flags accepted by
+ * **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr or if *flags* is not 0.
+ * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
+ * other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
@@ -5337,6 +5354,9 @@ union bpf_attr {
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
+ *
+ * skb and xdp type dynptrs may not use bpf_dynptr_data. They should
+ * instead use bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
@@ -7083,4 +7103,13 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+/*
+ * Flags to control bpf_timer_start() behaviour.
+ * - BPF_F_TIMER_ABS: Timeout passed is absolute time, by default it is
+ * relative to current time.
+ */
+enum {
+ BPF_F_TIMER_ABS = (1ULL << 0),
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 5a3dfb56d78f..b8b0a6369363 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
- usdt.o
+ usdt.o zip.o
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9ed9bceb4111..f0f786373238 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/*
- * common eBPF ELF operations.
+ * Common BPF ELF operations.
*
* Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
* Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
@@ -386,14 +386,73 @@ LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_link_get_fd_by_id_opts(__u32 id,
const struct bpf_get_fd_by_id_opts *opts);
LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
-/* Type-safe variants of bpf_obj_get_info_by_fd(). The callers still needs to
- * pass info_len, which should normally be
- * sizeof(struct bpf_{prog,map,btf,link}_info), in order to be compatible with
- * different libbpf and kernel versions.
+
+/**
+ * @brief **bpf_prog_get_info_by_fd()** obtains information about the BPF
+ * program corresponding to *prog_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param prog_fd BPF program file descriptor
+ * @param info pointer to **struct bpf_prog_info** that will be populated with
+ * BPF program information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
*/
LIBBPF_API int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_map_get_info_by_fd()** obtains information about the BPF
+ * map corresponding to *map_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param map_fd BPF map file descriptor
+ * @param info pointer to **struct bpf_map_info** that will be populated with
+ * BPF map information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the
+ * BTF object corresponding to *btf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param btf_fd BTF object file descriptor
+ * @param info pointer to **struct bpf_btf_info** that will be populated with
+ * BTF object information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len);
+
+/**
+ * @brief **bpf_btf_get_info_by_fd()** obtains information about the BPF
+ * link corresponding to *link_fd*.
+ *
+ * Populates up to *info_len* bytes of *info* and updates *info_len* with the
+ * actual number of bytes written to *info*.
+ *
+ * @param link_fd BPF link file descriptor
+ * @param info pointer to **struct bpf_link_info** that will be populated with
+ * BPF link information
+ * @param info_len pointer to the size of *info*; on success updated with the
+ * number of bytes written to *info*
+ * @return 0, on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
LIBBPF_API int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len);
struct bpf_prog_query_opts {
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 5ec1871acb2f..7d12d3e620cc 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -174,8 +174,8 @@ enum libbpf_tristate {
#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
+#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#define __kptr __attribute__((btf_type_tag("kptr")))
-#define __kptr_ref __attribute__((btf_type_tag("kptr_ref")))
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 6db88f41fa0d..6fb3d0f9af17 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -204,6 +204,7 @@ struct pt_regs___s390 {
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
+#define __PT_PARM5_SYSCALL_REG uregs[4]
#define __PT_PARM6_SYSCALL_REG uregs[5]
#define __PT_PARM7_SYSCALL_REG uregs[6]
@@ -415,6 +416,8 @@ struct pt_regs___arm64 {
* https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
*/
+/* loongarch provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x))
#define __PT_PARM1_REG regs[4]
#define __PT_PARM2_REG regs[5]
#define __PT_PARM3_REG regs[6]
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 9181d36118d2..0a2c079244b6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1000,8 +1000,6 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
}
}
- err = 0;
-
if (!btf_data) {
pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path);
err = -ENODATA;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 05c4db355f28..a557718401e4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -53,6 +53,7 @@
#include "libbpf_internal.h"
#include "hashmap.h"
#include "bpf_gen_internal.h"
+#include "zip.h"
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
@@ -798,7 +799,6 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
progs = obj->programs;
nr_progs = obj->nr_programs;
nr_syms = symbols->d_size / sizeof(Elf64_Sym);
- sec_off = 0;
for (i = 0; i < nr_syms; i++) {
sym = elf_sym_by_idx(obj, i);
@@ -2615,7 +2615,7 @@ static int bpf_object__init_maps(struct bpf_object *obj,
strict = !OPTS_GET(opts, relaxed_maps, false);
pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
err = err ?: bpf_object__init_global_data_maps(obj);
err = err ?: bpf_object__init_kconfig_map(obj);
err = err ?: bpf_object__init_struct_ops_maps(obj);
@@ -9724,6 +9724,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p
char errmsg[STRERR_BUFSIZE];
struct bpf_link_perf *link;
int prog_fd, link_fd = -1, err;
+ bool force_ioctl_attach;
if (!OPTS_VALID(opts, bpf_perf_event_opts))
return libbpf_err_ptr(-EINVAL);
@@ -9747,7 +9748,8 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p
link->link.dealloc = &bpf_link_perf_dealloc;
link->perf_event_fd = pfd;
- if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
+ force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
+ if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
@@ -10106,6 +10108,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
const struct bpf_kprobe_opts *opts)
{
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ enum probe_attach_mode attach_mode;
char errmsg[STRERR_BUFSIZE];
char *legacy_probe = NULL;
struct bpf_link *link;
@@ -10116,11 +10119,32 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
if (!OPTS_VALID(opts, bpf_kprobe_opts))
return libbpf_err_ptr(-EINVAL);
+ attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
retprobe = OPTS_GET(opts, retprobe, false);
offset = OPTS_GET(opts, offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
legacy = determine_kprobe_perf_type() < 0;
+ switch (attach_mode) {
+ case PROBE_ATTACH_MODE_LEGACY:
+ legacy = true;
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_PERF:
+ if (legacy)
+ return libbpf_err_ptr(-ENOTSUP);
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_LINK:
+ if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
+ return libbpf_err_ptr(-ENOTSUP);
+ break;
+ case PROBE_ATTACH_MODE_DEFAULT:
+ break;
+ default:
+ return libbpf_err_ptr(-EINVAL);
+ }
+
if (!legacy) {
pfd = perf_event_open_probe(false /* uprobe */, retprobe,
func_name, offset,
@@ -10531,32 +10555,19 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
return NULL;
}
-/* Find offset of function name in object specified by path. "name" matches
- * symbol name or name@@LIB for library functions.
+/* Find offset of function name in the provided ELF object. "binary_path" is
+ * the path to the ELF binary represented by "elf", and only used for error
+ * reporting matters. "name" matches symbol name or name@@LIB for library
+ * functions.
*/
-static long elf_find_func_offset(const char *binary_path, const char *name)
+static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
{
- int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
+ int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
bool is_shared_lib, is_name_qualified;
- char errmsg[STRERR_BUFSIZE];
long ret = -ENOENT;
size_t name_len;
GElf_Ehdr ehdr;
- Elf *elf;
- fd = open(binary_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0) {
- ret = -errno;
- pr_warn("failed to open %s: %s\n", binary_path,
- libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
- return ret;
- }
- elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
- if (!elf) {
- pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
- close(fd);
- return -LIBBPF_ERRNO__FORMAT;
- }
if (!gelf_getehdr(elf, &ehdr)) {
pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
ret = -LIBBPF_ERRNO__FORMAT;
@@ -10569,7 +10580,7 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
/* Does name specify "@@LIB"? */
is_name_qualified = strstr(name, "@@") != NULL;
- /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
+ /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
* a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
* linked binary may not have SHT_DYMSYM, so absence of a section should not be
* reported as a warning/error.
@@ -10682,11 +10693,101 @@ static long elf_find_func_offset(const char *binary_path, const char *name)
}
}
out:
+ return ret;
+}
+
+/* Find offset of function name in ELF object specified by path. "name" matches
+ * symbol name or name@@LIB for library functions.
+ */
+static long elf_find_func_offset_from_file(const char *binary_path, const char *name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ long ret = -ENOENT;
+ Elf *elf;
+ int fd;
+
+ fd = open(binary_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = -errno;
+ pr_warn("failed to open %s: %s\n", binary_path,
+ libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
+ return ret;
+ }
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
+ close(fd);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ ret = elf_find_func_offset(elf, binary_path, name);
elf_end(elf);
close(fd);
return ret;
}
+/* Find offset of function name in archive specified by path. Currently
+ * supported are .zip files that do not compress their contents, as used on
+ * Android in the form of APKs, for example. "file_name" is the name of the ELF
+ * file inside the archive. "func_name" matches symbol name or name@@LIB for
+ * library functions.
+ *
+ * An overview of the APK format specifically provided here:
+ * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
+ */
+static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
+ const char *func_name)
+{
+ struct zip_archive *archive;
+ struct zip_entry entry;
+ long ret;
+ Elf *elf;
+
+ archive = zip_archive_open(archive_path);
+ if (IS_ERR(archive)) {
+ ret = PTR_ERR(archive);
+ pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
+ return ret;
+ }
+
+ ret = zip_archive_find_entry(archive, file_name, &entry);
+ if (ret) {
+ pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
+ archive_path, ret);
+ goto out;
+ }
+ pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
+ (unsigned long)entry.data_offset);
+
+ if (entry.compression) {
+ pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
+ archive_path);
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+
+ elf = elf_memory((void *)entry.data, entry.data_length);
+ if (!elf) {
+ pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
+ elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__LIBELF;
+ goto out;
+ }
+
+ ret = elf_find_func_offset(elf, file_name, func_name);
+ if (ret > 0) {
+ pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
+ func_name, file_name, archive_path, entry.data_offset, ret,
+ ret + entry.data_offset);
+ ret += entry.data_offset;
+ }
+ elf_end(elf);
+
+out:
+ zip_archive_close(archive);
+ return ret;
+}
+
static const char *arch_specific_lib_paths(void)
{
/*
@@ -10772,9 +10873,11 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
const struct bpf_uprobe_opts *opts)
{
- DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ const char *archive_path = NULL, *archive_sep = NULL;
char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
- char full_binary_path[PATH_MAX];
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
+ enum probe_attach_mode attach_mode;
+ char full_path[PATH_MAX];
struct bpf_link *link;
size_t ref_ctr_off;
int pfd, err;
@@ -10784,6 +10887,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
if (!OPTS_VALID(opts, bpf_uprobe_opts))
return libbpf_err_ptr(-EINVAL);
+ attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
retprobe = OPTS_GET(opts, retprobe, false);
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
@@ -10791,27 +10895,60 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
if (!binary_path)
return libbpf_err_ptr(-EINVAL);
- if (!strchr(binary_path, '/')) {
- err = resolve_full_path(binary_path, full_binary_path,
- sizeof(full_binary_path));
+ /* Check if "binary_path" refers to an archive. */
+ archive_sep = strstr(binary_path, "!/");
+ if (archive_sep) {
+ full_path[0] = '\0';
+ libbpf_strlcpy(full_path, binary_path,
+ min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
+ archive_path = full_path;
+ binary_path = archive_sep + 2;
+ } else if (!strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, full_path, sizeof(full_path));
if (err) {
pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
prog->name, binary_path, err);
return libbpf_err_ptr(err);
}
- binary_path = full_binary_path;
+ binary_path = full_path;
}
func_name = OPTS_GET(opts, func_name, NULL);
if (func_name) {
long sym_off;
- sym_off = elf_find_func_offset(binary_path, func_name);
+ if (archive_path) {
+ sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
+ func_name);
+ binary_path = archive_path;
+ } else {
+ sym_off = elf_find_func_offset_from_file(binary_path, func_name);
+ }
if (sym_off < 0)
return libbpf_err_ptr(sym_off);
func_offset += sym_off;
}
legacy = determine_uprobe_perf_type() < 0;
+ switch (attach_mode) {
+ case PROBE_ATTACH_MODE_LEGACY:
+ legacy = true;
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_PERF:
+ if (legacy)
+ return libbpf_err_ptr(-ENOTSUP);
+ pe_opts.force_ioctl_attach = true;
+ break;
+ case PROBE_ATTACH_MODE_LINK:
+ if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
+ return libbpf_err_ptr(-ENOTSUP);
+ break;
+ case PROBE_ATTACH_MODE_DEFAULT:
+ break;
+ default:
+ return libbpf_err_ptr(-EINVAL);
+ }
+
if (!legacy) {
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2efd80f6f7b9..db4992a036f8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -447,12 +447,15 @@ LIBBPF_API struct bpf_link *
bpf_program__attach(const struct bpf_program *prog);
struct bpf_perf_event_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
+ /* don't use BPF link when attach BPF program */
+ bool force_ioctl_attach;
+ size_t :0;
};
-#define bpf_perf_event_opts__last_field bpf_cookie
+#define bpf_perf_event_opts__last_field force_ioctl_attach
LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd);
@@ -461,8 +464,25 @@ LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
const struct bpf_perf_event_opts *opts);
+/**
+ * enum probe_attach_mode - the mode to attach kprobe/uprobe
+ *
+ * force libbpf to attach kprobe/uprobe in specific mode, -ENOTSUP will
+ * be returned if it is not supported by the kernel.
+ */
+enum probe_attach_mode {
+ /* attach probe in latest supported mode by kernel */
+ PROBE_ATTACH_MODE_DEFAULT = 0,
+ /* attach probe in legacy mode, using debugfs/tracefs */
+ PROBE_ATTACH_MODE_LEGACY,
+ /* create perf event with perf_event_open() syscall */
+ PROBE_ATTACH_MODE_PERF,
+ /* attach probe with BPF link */
+ PROBE_ATTACH_MODE_LINK,
+};
+
struct bpf_kprobe_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
@@ -470,9 +490,11 @@ struct bpf_kprobe_opts {
size_t offset;
/* kprobe is return probe */
bool retprobe;
+ /* kprobe attach mode */
+ enum probe_attach_mode attach_mode;
size_t :0;
};
-#define bpf_kprobe_opts__last_field retprobe
+#define bpf_kprobe_opts__last_field attach_mode
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe,
@@ -506,7 +528,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
const struct bpf_kprobe_multi_opts *opts);
struct bpf_ksyscall_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
@@ -552,7 +574,7 @@ bpf_program__attach_ksyscall(const struct bpf_program *prog,
const struct bpf_ksyscall_opts *opts);
struct bpf_uprobe_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* offset of kernel reference counted USDT semaphore, added in
* a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
@@ -570,9 +592,11 @@ struct bpf_uprobe_opts {
* binary_path.
*/
const char *func_name;
+ /* uprobe attach mode */
+ enum probe_attach_mode attach_mode;
size_t :0;
};
-#define bpf_uprobe_opts__last_field func_name
+#define bpf_uprobe_opts__last_field attach_mode
/**
* @brief **bpf_program__attach_uprobe()** attaches a BPF program
@@ -646,7 +670,7 @@ bpf_program__attach_usdt(const struct bpf_program *prog,
const struct bpf_usdt_opts *opts);
struct bpf_tracepoint_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* custom user-provided value fetchable through bpf_get_attach_cookie() */
__u64 bpf_cookie;
@@ -1110,7 +1134,7 @@ struct user_ring_buffer;
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
struct ring_buffer_opts {
- size_t sz; /* size of this struct, for forward/backward compatiblity */
+ size_t sz; /* size of this struct, for forward/backward compatibility */
};
#define ring_buffer_opts__last_field sz
@@ -1475,7 +1499,7 @@ LIBBPF_API void
bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s);
struct gen_loader_opts {
- size_t sz; /* size of this struct, for forward/backward compatiblity */
+ size_t sz; /* size of this struct, for forward/backward compatibility */
const char *data;
const char *insns;
__u32 data_sz;
@@ -1493,13 +1517,13 @@ enum libbpf_tristate {
};
struct bpf_linker_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
};
#define bpf_linker_opts__last_field sz
struct bpf_linker_file_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
};
#define bpf_linker_file_opts__last_field sz
@@ -1542,7 +1566,7 @@ typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cook
struct bpf_link **link);
struct libbpf_prog_handler_opts {
- /* size of this struct, for forward/backward compatiblity */
+ /* size of this struct, for forward/backward compatibility */
size_t sz;
/* User-provided value that is passed to prog_setup_fn,
* prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index 4ac02c28e152..d7069780984a 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -1997,7 +1997,6 @@ add_sym:
static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
{
struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
- struct dst_sec *dst_symtab;
int i, err;
for (i = 1; i < obj->sec_cnt; i++) {
@@ -2030,9 +2029,6 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
return -1;
}
- /* add_dst_sec() above could have invalidated linker->secs */
- dst_symtab = &linker->secs[linker->symtab_sec_idx];
-
/* shdr->sh_link points to SYMTAB */
dst_sec->shdr->sh_link = linker->symtab_sec_idx;
@@ -2049,16 +2045,13 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
dst_rel = dst_sec->raw_data + src_sec->dst_off;
n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
for (j = 0; j < n; j++, src_rel++, dst_rel++) {
- size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info);
- size_t sym_type = ELF64_R_TYPE(src_rel->r_info);
- Elf64_Sym *src_sym, *dst_sym;
- size_t dst_sym_idx;
+ size_t src_sym_idx, dst_sym_idx, sym_type;
+ Elf64_Sym *src_sym;
src_sym_idx = ELF64_R_SYM(src_rel->r_info);
src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;
dst_sym_idx = obj->sym_map[src_sym_idx];
- dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx;
dst_rel->r_offset += src_linked_sec->dst_off;
sym_type = ELF64_R_TYPE(src_rel->r_info);
dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 1653e7a8b0a1..84dd5fa14905 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -468,8 +468,13 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
return 0;
err = libbpf_netlink_resolve_genl_family_id("netdev", sizeof("netdev"), &id);
- if (err < 0)
+ if (err < 0) {
+ if (err == -ENOENT) {
+ opts->feature_flags = 0;
+ goto skip_feature_flags;
+ }
return libbpf_err(err);
+ }
memset(&req, 0, sizeof(req));
req.nh.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
@@ -489,6 +494,7 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
opts->feature_flags = md.flags;
+skip_feature_flags:
return 0;
}
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index c4b0e81ae293..a26b2f5fa0fc 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -1551,9 +1551,6 @@ int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const st
if (level <= 0)
return -EINVAL;
- local_t = btf_type_by_id(local_btf, local_id);
- targ_t = btf_type_by_id(targ_btf, targ_id);
-
recur:
depth--;
if (depth < 0)
diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c
new file mode 100644
index 000000000000..8458c2dd0e3b
--- /dev/null
+++ b/tools/lib/bpf/zip.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Routines for dealing with .zip archives.
+ *
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "libbpf_internal.h"
+#include "zip.h"
+
+/* Specification of ZIP file format can be found here:
+ * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+ * For a high level overview of the structure of a ZIP file see
+ * sections 4.3.1 - 4.3.6.
+ *
+ * Data structures appearing in ZIP files do not contain any
+ * padding and they might be misaligned. To allow us to safely
+ * operate on pointers to such structures and their members, we
+ * declare the types as packed.
+ */
+
+#define END_OF_CD_RECORD_MAGIC 0x06054b50
+
+/* See section 4.3.16 of the spec. */
+struct end_of_cd_record {
+ /* Magic value equal to END_OF_CD_RECORD_MAGIC */
+ __u32 magic;
+
+ /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
+ * Zip archive might span multiple files (disks).
+ */
+ __u16 this_disk;
+
+ /* Number of the file containing the beginning of the central directory or
+ * 0xFFFF if ZIP64 archive.
+ */
+ __u16 cd_disk;
+
+ /* Number of central directory records on this disk or 0xFFFF if ZIP64
+ * archive.
+ */
+ __u16 cd_records;
+
+ /* Number of central directory records on all disks or 0xFFFF if ZIP64
+ * archive.
+ */
+ __u16 cd_records_total;
+
+ /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
+ __u32 cd_size;
+
+ /* Offset of the central directory from the beginning of the archive or
+ * 0xFFFFFFFF if ZIP64 archive.
+ */
+ __u32 cd_offset;
+
+ /* Length of comment data following end of central directory record. */
+ __u16 comment_length;
+
+ /* Up to 64k of arbitrary bytes. */
+ /* uint8_t comment[comment_length] */
+} __attribute__((packed));
+
+#define CD_FILE_HEADER_MAGIC 0x02014b50
+#define FLAG_ENCRYPTED (1 << 0)
+#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
+
+/* See section 4.3.12 of the spec. */
+struct cd_file_header {
+ /* Magic value equal to CD_FILE_HEADER_MAGIC. */
+ __u32 magic;
+ __u16 version;
+ /* Minimum zip version needed to extract the file. */
+ __u16 min_version;
+ __u16 flags;
+ __u16 compression;
+ __u16 last_modified_time;
+ __u16 last_modified_date;
+ __u32 crc;
+ __u32 compressed_size;
+ __u32 uncompressed_size;
+ __u16 file_name_length;
+ __u16 extra_field_length;
+ __u16 file_comment_length;
+ /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
+ __u16 disk;
+ __u16 internal_attributes;
+ __u32 external_attributes;
+ /* Offset from the start of the disk containing the local file header to the
+ * start of the local file header.
+ */
+ __u32 offset;
+} __attribute__((packed));
+
+#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
+
+/* See section 4.3.7 of the spec. */
+struct local_file_header {
+ /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
+ __u32 magic;
+ /* Minimum zip version needed to extract the file. */
+ __u16 min_version;
+ __u16 flags;
+ __u16 compression;
+ __u16 last_modified_time;
+ __u16 last_modified_date;
+ __u32 crc;
+ __u32 compressed_size;
+ __u32 uncompressed_size;
+ __u16 file_name_length;
+ __u16 extra_field_length;
+} __attribute__((packed));
+
+struct zip_archive {
+ void *data;
+ __u32 size;
+ __u32 cd_offset;
+ __u32 cd_records;
+};
+
+static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
+{
+ if (offset + size > archive->size || offset > offset + size)
+ return NULL;
+
+ return archive->data + offset;
+}
+
+/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
+ * archive uses features which are not supported.
+ */
+static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
+{
+ __u16 comment_length, cd_records;
+ struct end_of_cd_record *eocd;
+ __u32 cd_offset, cd_size;
+
+ eocd = check_access(archive, offset, sizeof(*eocd));
+ if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
+ return -EINVAL;
+
+ comment_length = eocd->comment_length;
+ if (offset + sizeof(*eocd) + comment_length != archive->size)
+ return -EINVAL;
+
+ cd_records = eocd->cd_records;
+ if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
+ /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
+ return -ENOTSUP;
+
+ cd_offset = eocd->cd_offset;
+ cd_size = eocd->cd_size;
+ if (!check_access(archive, cd_offset, cd_size))
+ return -EINVAL;
+
+ archive->cd_offset = cd_offset;
+ archive->cd_records = cd_records;
+ return 0;
+}
+
+static int find_cd(struct zip_archive *archive)
+{
+ int rc = -EINVAL;
+ int64_t limit;
+ __u32 offset;
+
+ if (archive->size <= sizeof(struct end_of_cd_record))
+ return -EINVAL;
+
+ /* Because the end of central directory ends with a variable length array of
+ * up to 0xFFFF bytes we can't know exactly where it starts and need to
+ * search for it at the end of the file, scanning the (limit, offset] range.
+ */
+ offset = archive->size - sizeof(struct end_of_cd_record);
+ limit = (int64_t)offset - (1 << 16);
+
+ for (; offset >= 0 && offset > limit && rc != 0; offset--) {
+ rc = try_parse_end_of_cd(archive, offset);
+ if (rc == -ENOTSUP)
+ break;
+ }
+ return rc;
+}
+
+struct zip_archive *zip_archive_open(const char *path)
+{
+ struct zip_archive *archive;
+ int err, fd;
+ off_t size;
+ void *data;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return ERR_PTR(-errno);
+
+ size = lseek(fd, 0, SEEK_END);
+ if (size == (off_t)-1 || size > UINT32_MAX) {
+ close(fd);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ err = -errno;
+ close(fd);
+
+ if (data == MAP_FAILED)
+ return ERR_PTR(err);
+
+ archive = malloc(sizeof(*archive));
+ if (!archive) {
+ munmap(data, size);
+ return ERR_PTR(-ENOMEM);
+ };
+
+ archive->data = data;
+ archive->size = size;
+
+ err = find_cd(archive);
+ if (err) {
+ munmap(data, size);
+ free(archive);
+ return ERR_PTR(err);
+ }
+
+ return archive;
+}
+
+void zip_archive_close(struct zip_archive *archive)
+{
+ munmap(archive->data, archive->size);
+ free(archive);
+}
+
+static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
+ __u32 offset)
+{
+ struct local_file_header *lfh;
+
+ lfh = check_access(archive, offset, sizeof(*lfh));
+ if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
+ return NULL;
+
+ return lfh;
+}
+
+static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
+{
+ struct local_file_header *lfh;
+ __u32 compressed_size;
+ const char *name;
+ void *data;
+
+ lfh = local_file_header_at_offset(archive, offset);
+ if (!lfh)
+ return -EINVAL;
+
+ offset += sizeof(*lfh);
+ if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
+ return -EINVAL;
+
+ name = check_access(archive, offset, lfh->file_name_length);
+ if (!name)
+ return -EINVAL;
+
+ offset += lfh->file_name_length;
+ if (!check_access(archive, offset, lfh->extra_field_length))
+ return -EINVAL;
+
+ offset += lfh->extra_field_length;
+ compressed_size = lfh->compressed_size;
+ data = check_access(archive, offset, compressed_size);
+ if (!data)
+ return -EINVAL;
+
+ out->compression = lfh->compression;
+ out->name_length = lfh->file_name_length;
+ out->name = name;
+ out->data = data;
+ out->data_length = compressed_size;
+ out->data_offset = offset;
+
+ return 0;
+}
+
+int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
+ struct zip_entry *out)
+{
+ size_t file_name_length = strlen(file_name);
+ __u32 i, offset = archive->cd_offset;
+
+ for (i = 0; i < archive->cd_records; ++i) {
+ __u16 cdfh_name_length, cdfh_flags;
+ struct cd_file_header *cdfh;
+ const char *cdfh_name;
+
+ cdfh = check_access(archive, offset, sizeof(*cdfh));
+ if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
+ return -EINVAL;
+
+ offset += sizeof(*cdfh);
+ cdfh_name_length = cdfh->file_name_length;
+ cdfh_name = check_access(archive, offset, cdfh_name_length);
+ if (!cdfh_name)
+ return -EINVAL;
+
+ cdfh_flags = cdfh->flags;
+ if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
+ (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
+ file_name_length == cdfh_name_length &&
+ memcmp(file_name, archive->data + offset, file_name_length) == 0) {
+ return get_entry_at_offset(archive, cdfh->offset, out);
+ }
+
+ offset += cdfh_name_length;
+ offset += cdfh->extra_field_length;
+ offset += cdfh->file_comment_length;
+ }
+
+ return -ENOENT;
+}
diff --git a/tools/lib/bpf/zip.h b/tools/lib/bpf/zip.h
new file mode 100644
index 000000000000..1c1bb21fba76
--- /dev/null
+++ b/tools/lib/bpf/zip.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LIBBPF_ZIP_H
+#define __LIBBPF_ZIP_H
+
+#include <linux/types.h>
+
+/* Represents an open zip archive.
+ * Only basic ZIP files are supported, in particular the following are not
+ * supported:
+ * - encryption
+ * - streaming
+ * - multi-part ZIP files
+ * - ZIP64
+ */
+struct zip_archive;
+
+/* Carries information on name, compression method, and data corresponding to a
+ * file in a zip archive.
+ */
+struct zip_entry {
+ /* Compression method as defined in pkzip spec. 0 means data is uncompressed. */
+ __u16 compression;
+
+ /* Non-null terminated name of the file. */
+ const char *name;
+ /* Length of the file name. */
+ __u16 name_length;
+
+ /* Pointer to the file data. */
+ const void *data;
+ /* Length of the file data. */
+ __u32 data_length;
+ /* Offset of the file data within the archive. */
+ __u32 data_offset;
+};
+
+/* Open a zip archive. Returns NULL in case of an error. */
+struct zip_archive *zip_archive_open(const char *path);
+
+/* Close a zip archive and release resources. */
+void zip_archive_close(struct zip_archive *archive);
+
+/* Look up an entry corresponding to a file in given zip archive. */
+int zip_archive_find_entry(struct zip_archive *archive, const char *name, struct zip_entry *out);
+
+#endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 0efb8f2b33ce..ff527ac065cf 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -108,6 +108,8 @@ endif # GCC_TOOLCHAIN_DIR
endif # CLANG_CROSS_FLAGS
CFLAGS += $(CLANG_CROSS_FLAGS)
AFLAGS += $(CLANG_CROSS_FLAGS)
+else
+CLANG_CROSS_FLAGS :=
endif # CROSS_COMPILE
# Hack to avoid type-punned warnings on old systems such as RHEL5:
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index b89eb87034e4..a02a085e7f32 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -4,6 +4,8 @@ bloom_filter_map # failed to find kernel BTF type ID of
bpf_cookie # failed to open_and_load program: -524 (trampoline)
bpf_loop # attaches to __x64_sys_nanosleep
cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
+dynptr/test_dynptr_skb_data
+dynptr/test_skb_readonly
fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index b677dcd0b77a..16f404aa1b23 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -338,7 +338,8 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
define get_sys_includes
$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}')
endef
# Determine target endianness.
@@ -356,7 +357,7 @@ BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
+ -Wno-compare-distinct-pointer-types -Wuninitialized
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
@@ -558,7 +559,7 @@ TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
network_helpers.c testing_helpers.c \
btf_helpers.c flow_dissector_load.h \
- cap_helpers.c test_loader.c xsk.c
+ cap_helpers.c test_loader.c xsk.c disasm.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
$(OUTPUT)/xdp_synproxy \
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
new file mode 100644
index 000000000000..8c993ec8ceea
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -0,0 +1,38 @@
+#ifndef __BPF_KFUNCS__
+#define __BPF_KFUNCS__
+
+/* Description
+ * Initializes an skb-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym;
+
+/* Description
+ * Initializes an xdp-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym;
+
+/* Description
+ * Obtain a read-only pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
+ void *buffer, __u32 buffer__szk) __ksym;
+
+/* Description
+ * Obtain a read-write pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
+ void *buffer, __u32 buffer__szk) __ksym;
+
+#endif
diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c
new file mode 120000
index 000000000000..b1571927bd54
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.c
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h
new file mode 120000
index 000000000000..8054fd497340
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.h
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 4666f88f2bb4..c94fa8d6c4f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -660,16 +660,22 @@ static int do_test_single(struct bpf_align_test *test)
* func#0 @0
* 0: R1=ctx(off=0,imm=0) R10=fp0
* 0: (b7) r3 = 2 ; R3_w=2
+ *
+ * Sometimes it's actually two lines below, e.g. when
+ * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
+ * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
+ * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
+ * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
*/
- if (!strstr(line_ptr, m.match)) {
+ while (!strstr(line_ptr, m.match)) {
cur_line = -1;
line_ptr = strtok(NULL, "\n");
- sscanf(line_ptr, "%u: ", &cur_line);
+ sscanf(line_ptr ?: "", "%u: ", &cur_line);
+ if (!line_ptr || cur_line != m.line)
+ break;
}
- if (cur_line != m.line || !line_ptr ||
- !strstr(line_ptr, m.match)) {
- printf("Failed to find match %u: %s\n",
- m.line, m.match);
+ if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) {
+ printf("Failed to find match %u: %s\n", m.line, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 56374c8b5436..7175af39134f 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_attach_kprobe_sleepable.skel.h"
+#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
/* this is how USDT semaphore is actually defined, except volatile modifier */
@@ -23,81 +25,54 @@ static noinline void trigger_func3(void)
asm volatile ("");
}
+/* attach point for ref_ctr */
+static noinline void trigger_func4(void)
+{
+ asm volatile ("");
+}
+
static char test_data[] = "test_data";
-void test_attach_probe(void)
+/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */
+static void test_attach_probe_manual(enum probe_attach_mode attach_mode)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
- struct test_attach_probe* skel;
- ssize_t uprobe_offset, ref_ctr_offset;
- struct bpf_link *uprobe_err_link;
- FILE *devnull;
- bool legacy;
-
- /* Check if new-style kprobe/uprobe API is supported.
- * Kernels that support new FD-based kprobe and uprobe BPF attachment
- * through perf_event_open() syscall expose
- * /sys/bus/event_source/devices/kprobe/type and
- * /sys/bus/event_source/devices/uprobe/type files, respectively. They
- * contain magic numbers that are passed as "type" field of
- * perf_event_attr. Lack of such file in the system indicates legacy
- * kernel with old-style kprobe/uprobe attach interface through
- * creating per-probe event through tracefs. For such cases
- * ref_ctr_offset feature is not supported, so we don't test it.
- */
- legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
- uprobe_offset = get_uprobe_offset(&trigger_func);
- if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
return;
- ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
- if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
- return;
-
- skel = test_attach_probe__open();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
-
- /* sleepable kprobe test case needs flags set before loading */
- if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
- BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
- goto cleanup;
-
- if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
- goto cleanup;
- if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
goto cleanup;
/* manual-attach kprobe/kretprobe */
- kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
- false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
+ kprobe_opts.attach_mode = attach_mode;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
- kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
- true /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- /* auto-attachable kprobe and kretprobe */
- skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
- ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
-
- skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
- ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
-
- if (!legacy)
- ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
-
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = attach_mode;
+ uprobe_opts.ref_ctr_offset = 0;
uprobe_opts.retprobe = false;
- uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset;
uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
0 /* self pid */,
"/proc/self/exe",
@@ -107,12 +82,7 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- if (!legacy)
- ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
-
- /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
uprobe_opts.retprobe = true;
- uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset;
uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
-1 /* any pid */,
"/proc/self/exe",
@@ -121,12 +91,7 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
- /* verify auto-attach fails for old-style uprobe definition */
- uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
- if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
- "auto-attach should fail for old-style name"))
- goto cleanup;
-
+ /* attach uprobe by function name manually */
uprobe_opts.func_name = "trigger_func2";
uprobe_opts.retprobe = false;
uprobe_opts.ref_ctr_offset = 0;
@@ -138,11 +103,63 @@ void test_attach_probe(void)
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
goto cleanup;
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate uprobe & uretprobe */
+ trigger_func();
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+static void test_attach_probe_auto(struct test_attach_probe *skel)
+{
+ struct bpf_link *uprobe_err_link;
+
+ /* auto-attachable kprobe and kretprobe */
+ skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
+
+ skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
+
+ /* verify auto-attach fails for old-style uprobe definition */
+ uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+ if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+ "auto-attach should fail for old-style name"))
+ return;
+
/* verify auto-attach works */
skel->links.handle_uretprobe_byname =
bpf_program__attach(skel->progs.handle_uretprobe_byname);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
- goto cleanup;
+ return;
+
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
+ ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+}
+
+static void test_uprobe_lib(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ FILE *devnull;
/* test attach by name for a library function, using the library
* as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
@@ -155,7 +172,7 @@ void test_attach_probe(void)
"libc.so.6",
0, &uprobe_opts);
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
- goto cleanup;
+ return;
uprobe_opts.func_name = "fclose";
uprobe_opts.retprobe = true;
@@ -165,62 +182,144 @@ void test_attach_probe(void)
"libc.so.6",
0, &uprobe_opts);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
+ return;
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ devnull = fopen("/dev/null", "r");
+ fclose(devnull);
+
+ ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
+}
+
+static void test_uprobe_ref_ctr(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ ssize_t uprobe_offset, ref_ctr_offset;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func4);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr"))
+ return;
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
+
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr"))
+ return;
+ skel->links.handle_uprobe_ref_ctr = uprobe_link;
+
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr"))
+ return;
+ skel->links.handle_uretprobe_ref_ctr = uretprobe_link;
+}
+
+static void test_kprobe_sleepable(void)
+{
+ struct test_attach_kprobe_sleepable *skel;
+
+ skel = test_attach_kprobe_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open"))
+ return;
+
+ /* sleepable kprobe test case needs flags set before loading */
+ if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
+ BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel),
+ "skel_kprobe_sleepable_load"))
goto cleanup;
/* sleepable kprobes should not attach successfully */
skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable);
- if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"))
- goto cleanup;
+ ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable");
+cleanup:
+ test_attach_kprobe_sleepable__destroy(skel);
+}
+
+static void test_uprobe_sleepable(struct test_attach_probe *skel)
+{
/* test sleepable uprobe and uretprobe variants */
skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable);
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable"))
- goto cleanup;
+ return;
skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3);
if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3"))
- goto cleanup;
+ return;
skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable"))
- goto cleanup;
+ return;
skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3);
if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3"))
- goto cleanup;
+ return;
skel->bss->user_ptr = test_data;
- /* trigger & validate kprobe && kretprobe */
- usleep(1);
-
- /* trigger & validate shared library u[ret]probes attached by name */
- devnull = fopen("/dev/null", "r");
- fclose(devnull);
-
- /* trigger & validate uprobe & uretprobe */
- trigger_func();
-
- /* trigger & validate uprobe attached by name */
- trigger_func2();
-
/* trigger & validate sleepable uprobe attached by name */
trigger_func3();
- ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
- ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
- ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
- ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
- ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
- ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
- ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
- ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
- ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
- ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res");
ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res");
+}
+
+void test_attach_probe(void)
+{
+ struct test_attach_probe *skel;
+
+ skel = test_attach_probe__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
+ goto cleanup;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ if (test__start_subtest("manual-default"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT);
+ if (test__start_subtest("manual-legacy"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY);
+ if (test__start_subtest("manual-perf"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_PERF);
+ if (test__start_subtest("manual-link"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LINK);
+
+ if (test__start_subtest("auto"))
+ test_attach_probe_auto(skel);
+ if (test__start_subtest("kprobe-sleepable"))
+ test_kprobe_sleepable();
+ if (test__start_subtest("uprobe-lib"))
+ test_uprobe_lib(skel);
+ if (test__start_subtest("uprobe-sleepable"))
+ test_uprobe_sleepable(skel);
+ if (test__start_subtest("uprobe-ref_ctr"))
+ test_uprobe_ref_ctr(skel);
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index b3f7985c8504..adda85f97058 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -84,6 +84,7 @@ static const char * const success_tests[] = {
"test_cgrp_xchg_release",
"test_cgrp_get_release",
"test_cgrp_get_ancestors",
+ "test_cgrp_from_id",
};
void test_cgrp_kfunc(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
index 2cc759956e3b..63e776f4176e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -193,7 +193,7 @@ out:
cgrp_ls_sleepable__destroy(skel);
}
-static void test_no_rcu_lock(__u64 cgroup_id)
+static void test_yes_rcu_lock(__u64 cgroup_id)
{
struct cgrp_ls_sleepable *skel;
int err;
@@ -204,7 +204,7 @@ static void test_no_rcu_lock(__u64 cgroup_id)
skel->bss->target_pid = syscall(SYS_gettid);
- bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
+ bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
@@ -220,7 +220,7 @@ out:
cgrp_ls_sleepable__destroy(skel);
}
-static void test_rcu_lock(void)
+static void test_no_rcu_lock(void)
{
struct cgrp_ls_sleepable *skel;
int err;
@@ -229,7 +229,7 @@ static void test_rcu_lock(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
+ bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
err = cgrp_ls_sleepable__load(skel);
ASSERT_ERR(err, "skel_load");
@@ -256,10 +256,10 @@ void test_cgrp_local_storage(void)
test_negative();
if (test__start_subtest("cgroup_iter_sleepable"))
test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
+ if (test__start_subtest("yes_rcu_lock"))
+ test_yes_rcu_lock(cgroup_id);
if (test__start_subtest("no_rcu_lock"))
- test_no_rcu_lock(cgroup_id);
- if (test__start_subtest("rcu_lock"))
- test_rcu_lock();
+ test_no_rcu_lock();
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 224f016b0a53..2a55f717fc07 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -13,6 +13,7 @@
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_dynptr.skel.h"
#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
@@ -446,6 +447,28 @@ cleanup:
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
+static void test_cls_redirect_dynptr(void)
+{
+ struct test_cls_redirect_dynptr *skel;
+ int err;
+
+ skel = test_cls_redirect_dynptr__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_dynptr__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_dynptr__destroy(skel);
+}
+
static void test_cls_redirect_inlined(void)
{
struct test_cls_redirect *skel;
@@ -496,4 +519,6 @@ void test_cls_redirect(void)
test_cls_redirect_inlined();
if (test__start_subtest("cls_redirect_subprogs"))
test_cls_redirect_subprogs();
+ if (test__start_subtest("cls_redirect_dynptr"))
+ test_cls_redirect_dynptr();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
new file mode 100644
index 000000000000..d5fe3d4b936c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -0,0 +1,917 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <regex.h>
+#include <test_progs.h>
+
+#include "bpf/btf.h"
+#include "bpf_util.h"
+#include "linux/filter.h"
+#include "disasm.h"
+
+#define MAX_PROG_TEXT_SZ (32 * 1024)
+
+/* The code in this file serves the sole purpose of executing test cases
+ * specified in the test_cases array. Each test case specifies a program
+ * type, context field offset, and disassembly patterns that correspond
+ * to read and write instructions generated by
+ * verifier.c:convert_ctx_access() for accessing that field.
+ *
+ * For each test case, up to three programs are created:
+ * - One that uses BPF_LDX_MEM to read the context field.
+ * - One that uses BPF_STX_MEM to write to the context field.
+ * - One that uses BPF_ST_MEM to write to the context field.
+ *
+ * The disassembly of each program is then compared with the pattern
+ * specified in the test case.
+ */
+struct test_case {
+ char *name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int field_offset;
+ int field_sz;
+ /* Program generated for BPF_ST_MEM uses value 42 by default,
+ * this field allows to specify custom value.
+ */
+ struct {
+ bool use;
+ int value;
+ } st_value;
+ /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */
+ char *read;
+ /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and
+ * BPF_ST_MEM (field_sz, ctx, src, field_offset)
+ */
+ char *write;
+ /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_st;
+ /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_stx;
+};
+
+#define N(_prog_type, type, field, name_extra...) \
+ .name = #_prog_type "." #field name_extra, \
+ .prog_type = BPF_PROG_TYPE_##_prog_type, \
+ .field_offset = offsetof(type, field), \
+ .field_sz = sizeof(typeof(((type *)NULL)->field))
+
+static struct test_case test_cases[] = {
+/* Sign extension on s390 changes the pattern */
+#if defined(__x86_64__) || defined(__aarch64__)
+ {
+ N(SCHED_CLS, struct __sk_buff, tstamp),
+ .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);"
+ "w11 &= 160;"
+ "if w11 != 0xa0 goto pc+2;"
+ "$dst = 0;"
+ "goto pc+1;"
+ "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
+ .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);"
+ "if w11 & 0x80 goto pc+1;"
+ "goto pc+2;"
+ "w11 &= -33;"
+ "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;"
+ "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
+ },
+#endif
+ {
+ N(SCHED_CLS, struct __sk_buff, priority),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::priority);",
+ .write = "*(u32 *)($ctx + sk_buff::priority) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, mark),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::mark);",
+ .write = "*(u32 *)($ctx + sk_buff::mark) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, cb[0]),
+ .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));",
+ .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_classid),
+ .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));",
+ .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_index),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);",
+ .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, queue_mapping),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);",
+ .write_stx = "if $src >= 0xffff goto pc+1;"
+ "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ },
+ {
+ /* This is a corner case in filter.c:bpf_convert_ctx_access() */
+ N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"),
+ .st_value = { true, USHRT_MAX },
+ .write_st = "goto pc+0;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, bound_dev_if),
+ .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);",
+ .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, mark),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_mark);",
+ .write = "*(u32 *)($ctx + sock::sk_mark) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, priority),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_priority);",
+ .write = "*(u32 *)($ctx + sock::sk_priority) = $src;",
+ },
+ {
+ N(SOCK_OPS, struct bpf_sock_ops, replylong[0]),
+ .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);",
+ .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;",
+ },
+ {
+ N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +0);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +0) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#else
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +4);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +4) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#endif
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, sk),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, level),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optname),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, retval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "$dst = *(u64 *)($dst + task_struct::bpf_ctx);"
+ "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);",
+ .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);"
+ "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+};
+
+#undef N
+
+static regex_t *ident_regex;
+static regex_t *field_regex;
+
+static char *skip_space(char *str)
+{
+ while (*str && isspace(*str))
+ ++str;
+ return str;
+}
+
+static char *skip_space_and_semi(char *str)
+{
+ while (*str && (isspace(*str) || *str == ';'))
+ ++str;
+ return str;
+}
+
+static char *match_str(char *str, char *prefix)
+{
+ while (*str && *prefix && *str == *prefix) {
+ ++str;
+ ++prefix;
+ }
+ if (*prefix)
+ return NULL;
+ return str;
+}
+
+static char *match_number(char *str, int num)
+{
+ char *next;
+ int snum = strtol(str, &next, 10);
+
+ if (next - str == 0 || num != snum)
+ return NULL;
+
+ return next;
+}
+
+static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off)
+{
+ const struct btf_type *type = btf__type_by_id(btf, btf_id);
+ const struct btf_member *m;
+ __u16 mnum;
+ int i;
+
+ if (!type) {
+ PRINT_FAIL("Can't find btf_type for id %d\n", btf_id);
+ return -1;
+ }
+
+ if (!btf_is_struct(type) && !btf_is_union(type)) {
+ PRINT_FAIL("BTF id %d is not struct or union\n", btf_id);
+ return -1;
+ }
+
+ m = btf_members(type);
+ mnum = btf_vlen(type);
+
+ for (i = 0; i < mnum; ++i, ++m) {
+ const char *mname = btf__name_by_offset(btf, m->name_off);
+
+ if (strcmp(mname, "") == 0) {
+ int msize = find_field_offset_aux(btf, m->type, field_name,
+ off + m->offset);
+ if (msize >= 0)
+ return msize;
+ }
+
+ if (strcmp(mname, field_name))
+ continue;
+
+ return (off + m->offset) / 8;
+ }
+
+ return -1;
+}
+
+static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches)
+{
+ int type_sz = matches[1].rm_eo - matches[1].rm_so;
+ int field_sz = matches[2].rm_eo - matches[2].rm_so;
+ char *type = pattern + matches[1].rm_so;
+ char *field = pattern + matches[2].rm_so;
+ char field_str[128] = {};
+ char type_str[128] = {};
+ int btf_id, field_offset;
+
+ if (type_sz >= sizeof(type_str)) {
+ PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz);
+ return -1;
+ }
+
+ if (field_sz >= sizeof(field_str)) {
+ PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz);
+ return -1;
+ }
+
+ strncpy(type_str, type, type_sz);
+ strncpy(field_str, field, field_sz);
+ btf_id = btf__find_by_name(btf, type_str);
+ if (btf_id < 0) {
+ PRINT_FAIL("No BTF info for type %s\n", type_str);
+ return -1;
+ }
+
+ field_offset = find_field_offset_aux(btf, btf_id, field_str, 0);
+ if (field_offset < 0) {
+ PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str);
+ return -1;
+ }
+
+ return field_offset;
+}
+
+static regex_t *compile_regex(char *pat)
+{
+ regex_t *re;
+ int err;
+
+ re = malloc(sizeof(regex_t));
+ if (!re) {
+ PRINT_FAIL("Can't alloc regex\n");
+ return NULL;
+ }
+
+ err = regcomp(re, pat, REG_EXTENDED);
+ if (err) {
+ char errbuf[512];
+
+ regerror(err, re, errbuf, sizeof(errbuf));
+ PRINT_FAIL("Can't compile regex: %s\n", errbuf);
+ free(re);
+ return NULL;
+ }
+
+ return re;
+}
+
+static void free_regex(regex_t *re)
+{
+ if (!re)
+ return;
+
+ regfree(re);
+ free(re);
+}
+
+static u32 max_line_len(char *str)
+{
+ u32 max_line = 0;
+ char *next = str;
+
+ while (next) {
+ next = strchr(str, '\n');
+ if (next) {
+ max_line = max_t(u32, max_line, (next - str));
+ str = next + 1;
+ } else {
+ max_line = max_t(u32, max_line, strlen(str));
+ }
+ }
+
+ return min(max_line, 60u);
+}
+
+/* Print strings `pattern_origin` and `text_origin` side by side,
+ * assume `pattern_pos` and `text_pos` designate location within
+ * corresponding origin string where match diverges.
+ * The output should look like:
+ *
+ * Can't match disassembly(left) with pattern(right):
+ * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1)
+ * ^ ^
+ * r0 = 0 ;
+ * exit ;
+ */
+static void print_match_error(FILE *out,
+ char *pattern_origin, char *text_origin,
+ char *pattern_pos, char *text_pos)
+{
+ char *pattern = pattern_origin;
+ char *text = text_origin;
+ int middle = max_line_len(text) + 2;
+
+ fprintf(out, "Can't match disassembly(left) with pattern(right):\n");
+ while (*pattern || *text) {
+ int column = 0;
+ int mark1 = -1;
+ int mark2 = -1;
+
+ /* Print one line from text */
+ while (*text && *text != '\n') {
+ if (text == text_pos)
+ mark1 = column;
+ fputc(*text, out);
+ ++text;
+ ++column;
+ }
+ if (text == text_pos)
+ mark1 = column;
+
+ /* Pad to the middle */
+ while (column < middle) {
+ fputc(' ', out);
+ ++column;
+ }
+ fputs("; ", out);
+ column += 3;
+
+ /* Print one line from pattern, pattern lines are terminated by ';' */
+ while (*pattern && *pattern != ';') {
+ if (pattern == pattern_pos)
+ mark2 = column;
+ fputc(*pattern, out);
+ ++pattern;
+ ++column;
+ }
+ if (pattern == pattern_pos)
+ mark2 = column;
+
+ fputc('\n', out);
+ if (*pattern)
+ ++pattern;
+ if (*text)
+ ++text;
+
+ /* If pattern and text diverge at this line, print an
+ * additional line with '^' marks, highlighting
+ * positions where match fails.
+ */
+ if (mark1 > 0 || mark2 > 0) {
+ for (column = 0; column <= max(mark1, mark2); ++column) {
+ if (column == mark1 || column == mark2)
+ fputc('^', out);
+ else
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+ }
+ }
+}
+
+/* Test if `text` matches `pattern`. Pattern consists of the following elements:
+ *
+ * - Field offset references:
+ *
+ * <type>::<field>
+ *
+ * When such reference is encountered BTF is used to compute numerical
+ * value for the offset of <field> in <type>. The `text` is expected to
+ * contain matching numerical value.
+ *
+ * - Field groups:
+ *
+ * $(<type>::<field> [+ <type>::<field>]*)
+ *
+ * Allows to specify an offset that is a sum of multiple field offsets.
+ * The `text` is expected to contain matching numerical value.
+ *
+ * - Variable references, e.g. `$src`, `$dst`, `$ctx`.
+ * These are substitutions specified in `reg_map` array.
+ * If a substring of pattern is equal to `reg_map[i][0]` the `text` is
+ * expected to contain `reg_map[i][1]` in the matching position.
+ *
+ * - Whitespace is ignored, ';' counts as whitespace for `pattern`.
+ *
+ * - Any other characters, `pattern` and `text` should match one-to-one.
+ *
+ * Example of a pattern:
+ *
+ * __________ fields group ________________
+ * ' '
+ * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;
+ * ^^^^ '______________________'
+ * variable reference field offset reference
+ */
+static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2])
+{
+ char *pattern_origin = pattern;
+ char *text_origin = text;
+ regmatch_t matches[3];
+
+_continue:
+ while (*pattern) {
+ if (!*text)
+ goto err;
+
+ /* Skip whitespace */
+ if (isspace(*pattern) || *pattern == ';') {
+ if (!isspace(*text) && text != text_origin && isalnum(text[-1]))
+ goto err;
+ pattern = skip_space_and_semi(pattern);
+ text = skip_space(text);
+ continue;
+ }
+
+ /* Check for variable references */
+ for (int i = 0; reg_map[i][0]; ++i) {
+ char *pattern_next, *text_next;
+
+ pattern_next = match_str(pattern, reg_map[i][0]);
+ if (!pattern_next)
+ continue;
+
+ text_next = match_str(text, reg_map[i][1]);
+ if (!text_next)
+ goto err;
+
+ pattern = pattern_next;
+ text = text_next;
+ goto _continue;
+ }
+
+ /* Match field group:
+ * $(sk_buff::cb + qdisc_skb_cb::tc_classid)
+ */
+ if (strncmp(pattern, "$(", 2) == 0) {
+ char *group_start = pattern, *text_next;
+ int acc_offset = 0;
+
+ pattern += 2;
+
+ for (;;) {
+ int field_offset;
+
+ pattern = skip_space(pattern);
+ if (!*pattern) {
+ PRINT_FAIL("Unexpected end of pattern\n");
+ goto err;
+ }
+
+ if (*pattern == ')') {
+ ++pattern;
+ break;
+ }
+
+ if (*pattern == '+') {
+ ++pattern;
+ continue;
+ }
+
+ printf("pattern: %s\n", pattern);
+ if (regexec(field_regex, pattern, 3, matches, 0) != 0) {
+ PRINT_FAIL("Field reference expected\n");
+ goto err;
+ }
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ acc_offset += field_offset;
+ }
+
+ text_next = match_number(text, acc_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for group offset %.*s (%d)\n",
+ (int)(pattern - group_start),
+ group_start,
+ acc_offset);
+ goto err;
+ }
+ text = text_next;
+ }
+
+ /* Match field reference:
+ * sk_buff::cb
+ */
+ if (regexec(field_regex, pattern, 3, matches, 0) == 0) {
+ int field_offset;
+ char *text_next;
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ text_next = match_number(text, field_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for field offset %.*s (%d)\n",
+ (int)matches[0].rm_eo, pattern, field_offset);
+ goto err;
+ }
+
+ pattern += matches[0].rm_eo;
+ text = text_next;
+ continue;
+ }
+
+ /* If pattern points to identifier not followed by '::'
+ * skip the identifier to avoid n^2 application of the
+ * field reference rule.
+ */
+ if (regexec(ident_regex, pattern, 1, matches, 0) == 0) {
+ if (strncmp(pattern, text, matches[0].rm_eo) != 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ text += matches[0].rm_eo;
+ continue;
+ }
+
+ /* Match literally */
+ if (*pattern != *text)
+ goto err;
+
+ ++pattern;
+ ++text;
+ }
+
+ return true;
+
+err:
+ test__fail();
+ print_match_error(stdout, pattern_origin, text_origin, pattern, text);
+ return false;
+}
+
+/* Request BPF program instructions after all rewrites are applied,
+ * e.g. verifier.c:convert_ctx_access() is done.
+ */
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_prog_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %d is not multiple of %d\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_prog_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ return -1;
+}
+
+static void print_insn(void *private_data, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf((FILE *)private_data, fmt, args);
+ va_end(args);
+}
+
+/* Disassemble instructions to a stream */
+static void print_xlated(FILE *out, struct bpf_insn *insn, __u32 len)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn,
+ .cb_call = NULL,
+ .cb_imm = NULL,
+ .private_data = out,
+ };
+ bool double_insn = false;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+ print_bpf_insn(&cbs, insn + i, true);
+ }
+}
+
+/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix
+ * for each instruction (FF stands for instruction `code` byte).
+ * This function removes the prefix inplace for each line in `str`.
+ */
+static void remove_insn_prefix(char *str, int size)
+{
+ const int prefix_size = 5;
+
+ int write_pos = 0, read_pos = prefix_size;
+ int len = strlen(str);
+ char c;
+
+ size = min(size, len);
+
+ while (read_pos < size) {
+ c = str[read_pos++];
+ if (c == 0)
+ break;
+ str[write_pos++] = c;
+ if (c == '\n')
+ read_pos += prefix_size;
+ }
+ str[write_pos] = 0;
+}
+
+struct prog_info {
+ char *prog_kind;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ struct bpf_insn *prog;
+ u32 prog_len;
+};
+
+static void match_program(struct btf *btf,
+ struct prog_info *pinfo,
+ char *pattern,
+ char *reg_map[][2],
+ bool skip_first_insn)
+{
+ struct bpf_insn *buf = NULL;
+ int err = 0, prog_fd = 0;
+ FILE *prog_out = NULL;
+ char *text = NULL;
+ __u32 cnt = 0;
+
+ text = calloc(MAX_PROG_TEXT_SZ, 1);
+ if (!text) {
+ PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ);
+ goto out;
+ }
+
+ // TODO: log level
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ opts.log_buf = text;
+ opts.log_size = MAX_PROG_TEXT_SZ;
+ opts.log_level = 1 | 2 | 4;
+ opts.expected_attach_type = pinfo->expected_attach_type;
+
+ prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL",
+ pinfo->prog, pinfo->prog_len, &opts);
+ if (prog_fd < 0) {
+ PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n",
+ errno, strerror(errno), text);
+ goto out;
+ }
+
+ memset(text, 0, MAX_PROG_TEXT_SZ);
+
+ err = get_xlated_program(prog_fd, &buf, &cnt);
+ if (err) {
+ PRINT_FAIL("Can't load back BPF program\n");
+ goto out;
+ }
+
+ prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w");
+ if (!prog_out) {
+ PRINT_FAIL("Can't open memory stream\n");
+ goto out;
+ }
+ if (skip_first_insn)
+ print_xlated(prog_out, buf + 1, cnt - 1);
+ else
+ print_xlated(prog_out, buf, cnt);
+ fclose(prog_out);
+ remove_insn_prefix(text, MAX_PROG_TEXT_SZ);
+
+ ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map),
+ pinfo->prog_kind);
+
+out:
+ if (prog_fd)
+ close(prog_fd);
+ free(buf);
+ free(text);
+}
+
+static void run_one_testcase(struct btf *btf, struct test_case *test)
+{
+ struct prog_info pinfo = {};
+ int bpf_sz;
+
+ if (!test__start_subtest(test->name))
+ return;
+
+ switch (test->field_sz) {
+ case 8:
+ bpf_sz = BPF_DW;
+ break;
+ case 4:
+ bpf_sz = BPF_W;
+ break;
+ case 2:
+ bpf_sz = BPF_H;
+ break;
+ case 1:
+ bpf_sz = BPF_B;
+ break;
+ default:
+ PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz);
+ return;
+ }
+
+ pinfo.prog_type = test->prog_type;
+ pinfo.expected_attach_type = test->expected_attach_type;
+
+ if (test->read) {
+ struct bpf_insn ldx_prog[] = {
+ BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$dst", "r2" },
+ {}
+ };
+
+ pinfo.prog_kind = "LDX";
+ pinfo.prog = ldx_prog;
+ pinfo.prog_len = ARRAY_SIZE(ldx_prog);
+ match_program(btf, &pinfo, test->read, reg_map, false);
+ }
+
+ if (test->write || test->write_st || test->write_stx) {
+ struct bpf_insn stx_prog[] = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *stx_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "r2" },
+ {}
+ };
+ struct bpf_insn st_prog[] = {
+ BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset,
+ test->st_value.use ? test->st_value.value : 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *st_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "42" },
+ {}
+ };
+
+ if (test->write || test->write_stx) {
+ char *pattern = test->write_stx ? test->write_stx : test->write;
+
+ pinfo.prog_kind = "STX";
+ pinfo.prog = stx_prog;
+ pinfo.prog_len = ARRAY_SIZE(stx_prog);
+ match_program(btf, &pinfo, pattern, stx_reg_map, true);
+ }
+
+ if (test->write || test->write_st) {
+ char *pattern = test->write_st ? test->write_st : test->write;
+
+ pinfo.prog_kind = "ST";
+ pinfo.prog = st_prog;
+ pinfo.prog_len = ARRAY_SIZE(st_prog);
+ match_program(btf, &pinfo, pattern, st_reg_map, false);
+ }
+ }
+
+ test__end_subtest();
+}
+
+void test_ctx_rewrite(void)
+{
+ struct btf *btf;
+ int i;
+
+ field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)");
+ ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+");
+ if (!field_regex || !ident_regex)
+ return;
+
+ btf = btf__load_vmlinux_btf();
+ if (!btf) {
+ PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno));
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); ++i)
+ run_one_testcase(btf, &test_cases[i]);
+
+out:
+ btf__free(btf);
+ free_regex(field_regex);
+ free_regex(ident_regex);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
index 2853883b7cbb..5c0ebe6ba866 100644
--- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -10,14 +10,6 @@
#include "network_helpers.h"
#include "decap_sanity.skel.h"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
#define NS_TEST "decap_sanity_ns"
#define IPV6_IFACE_ADDR "face::1"
#define UDP_TEST_PORT 7777
@@ -37,9 +29,9 @@ void test_decap_sanity(void)
if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
return;
- SYS("ip netns add %s", NS_TEST);
- SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
- SYS("ip -net %s link set dev lo up", NS_TEST);
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
nstoken = open_netns(NS_TEST);
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
@@ -80,6 +72,6 @@ fail:
bpf_tc_hook_destroy(&qdisc_hook);
close_netns(nstoken);
}
- system("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
decap_sanity__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b99264ec0d9c..d176c34a7d2e 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -2,20 +2,32 @@
/* Copyright (c) 2022 Facebook */
#include <test_progs.h>
+#include <network_helpers.h>
#include "dynptr_fail.skel.h"
#include "dynptr_success.skel.h"
-static const char * const success_tests[] = {
- "test_read_write",
- "test_data_slice",
- "test_ringbuf",
+enum test_setup_type {
+ SETUP_SYSCALL_SLEEP,
+ SETUP_SKB_PROG,
};
-static void verify_success(const char *prog_name)
+static struct {
+ const char *prog_name;
+ enum test_setup_type type;
+} success_tests[] = {
+ {"test_read_write", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_ringbuf", SETUP_SYSCALL_SLEEP},
+ {"test_skb_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_skb_data", SETUP_SKB_PROG},
+};
+
+static void verify_success(const char *prog_name, enum test_setup_type setup_type)
{
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
+ int err;
skel = dynptr_success__open();
if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
@@ -23,23 +35,53 @@ static void verify_success(const char *prog_name)
skel->bss->pid = getpid();
- dynptr_success__load(skel);
- if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
- goto cleanup;
-
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto cleanup;
- link = bpf_program__attach(prog);
- if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ bpf_program__set_autoload(prog, true);
+
+ err = dynptr_success__load(skel);
+ if (!ASSERT_OK(err, "dynptr_success__load"))
goto cleanup;
- usleep(1);
+ switch (setup_type) {
+ case SETUP_SYSCALL_SLEEP:
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
- ASSERT_EQ(skel->bss->err, 0, "err");
+ usleep(1);
+
+ bpf_link__destroy(link);
+ break;
+ case SETUP_SKB_PROG:
+ {
+ int prog_fd;
+ char buf[64];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- bpf_link__destroy(link);
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
+ }
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
cleanup:
dynptr_success__destroy(skel);
@@ -50,10 +92,10 @@ void test_dynptr(void)
int i;
for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
- if (!test__start_subtest(success_tests[i]))
+ if (!test__start_subtest(success_tests[i].prog_name))
continue;
- verify_success(success_tests[i]);
+ verify_success(success_tests[i].prog_name, success_tests[i].type);
}
RUN_TESTS(dynptr_fail);
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 32dd731e9070..3b77d8a422db 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -4,11 +4,6 @@
#include <net/if.h>
#include "empty_skb.skel.h"
-#define SYS(cmd) ({ \
- if (!ASSERT_OK(system(cmd), (cmd))) \
- goto out; \
-})
-
void test_empty_skb(void)
{
LIBBPF_OPTS(bpf_test_run_opts, tattr);
@@ -93,18 +88,18 @@ void test_empty_skb(void)
},
};
- SYS("ip netns add empty_skb");
+ SYS(out, "ip netns add empty_skb");
tok = open_netns("empty_skb");
- SYS("ip link add veth0 type veth peer veth1");
- SYS("ip link set dev veth0 up");
- SYS("ip link set dev veth1 up");
- SYS("ip addr add 10.0.0.1/8 dev veth0");
- SYS("ip addr add 10.0.0.2/8 dev veth1");
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "ip link set dev veth0 up");
+ SYS(out, "ip link set dev veth1 up");
+ SYS(out, "ip addr add 10.0.0.1/8 dev veth0");
+ SYS(out, "ip addr add 10.0.0.2/8 dev veth1");
veth_ifindex = if_nametoindex("veth0");
- SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
- SYS("ip link set ipip0 up");
- SYS("ip addr add 192.168.1.1/16 dev ipip0");
+ SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
+ SYS(out, "ip link set ipip0 up");
+ SYS(out, "ip addr add 192.168.1.1/16 dev ipip0");
ipip_ifindex = if_nametoindex("ipip0");
bpf_obj = empty_skb__open_and_load();
@@ -142,5 +137,5 @@ out:
empty_skb__destroy(bpf_obj);
if (tok)
close_netns(tok);
- system("ip netns del empty_skb");
+ SYS_NOFAIL("ip netns del empty_skb");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 61ccddccf485..429393caf612 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -8,14 +8,6 @@
#include "network_helpers.h"
#include "fib_lookup.skel.h"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
#define NS_TEST "fib_lookup_ns"
#define IPV6_IFACE_ADDR "face::face"
#define IPV6_NUD_FAILED_ADDR "face::1"
@@ -59,16 +51,16 @@ static int setup_netns(void)
{
int err;
- SYS("ip link add veth1 type veth peer name veth2");
- SYS("ip link set dev veth1 up");
+ SYS(fail, "ip link add veth1 type veth peer name veth2");
+ SYS(fail, "ip link set dev veth1 up");
- SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
- SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
- SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
- SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR);
- SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
- SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+ SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
@@ -140,7 +132,7 @@ void test_fib_lookup(void)
return;
prog_fd = bpf_program__fd(skel->progs.fib_lookup);
- SYS("ip netns add %s", NS_TEST);
+ SYS(fail, "ip netns add %s", NS_TEST);
nstoken = open_netns(NS_TEST);
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
@@ -182,6 +174,6 @@ void test_fib_lookup(void)
fail:
if (nstoken)
close_netns(nstoken);
- system("ip netns del " NS_TEST " &> /dev/null");
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
fib_lookup__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 7acca37a3d2b..c4773173a4e4 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -346,6 +346,30 @@ struct test tests[] = {
.retval = BPF_OK,
},
{
+ .name = "ipv6-empty-flow-label",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.flow_lbl = { 0x00, 0x00, 0x00 },
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .retval = BPF_OK,
+ },
+ {
.name = "ipip-encap",
.pkt.ipip = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 9c1a18573ffd..1eab286b14fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -93,4 +93,6 @@ void test_l4lb_all(void)
test_l4lb("test_l4lb.bpf.o");
if (test__start_subtest("l4lb_noinline"))
test_l4lb("test_l4lb_noinline.bpf.o");
+ if (test__start_subtest("l4lb_noinline_dynptr"))
+ test_l4lb("test_l4lb_noinline_dynptr.bpf.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
index f4ffdcabf4e4..239e1c5753b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -141,7 +141,7 @@ void test_log_fixup(void)
if (test__start_subtest("bad_core_relo_trunc_partial"))
bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
if (test__start_subtest("bad_core_relo_trunc_full"))
- bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */);
+ bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */);
if (test__start_subtest("bad_core_relo_subprog"))
bad_core_relo_subprog();
if (test__start_subtest("missing_map"))
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 3533a4ecad01..8743df599567 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -4,70 +4,160 @@
#include "map_kptr.skel.h"
#include "map_kptr_fail.skel.h"
+#include "rcu_tasks_trace_gp.skel.h"
static void test_map_kptr_success(bool test_run)
{
+ LIBBPF_OPTS(bpf_test_run_opts, lopts);
LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
);
+ int key = 0, ret, cpu;
struct map_kptr *skel;
- int key = 0, ret;
- char buf[16];
+ char buf[16], *pbuf;
skel = map_kptr__open_and_load();
if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
return;
- ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts);
- ASSERT_OK(ret, "test_map_kptr_ref refcount");
- ASSERT_OK(opts.retval, "test_map_kptr_ref retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref1 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval");
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts);
ASSERT_OK(ret, "test_map_kptr_ref2 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval");
+
if (test_run)
goto exit;
+ cpu = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus"))
+ goto exit;
+
+ pbuf = calloc(cpu, sizeof(buf));
+ if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)"))
+ goto exit;
+
ret = bpf_map__update_elem(skel->maps.array_map,
&key, sizeof(key), buf, sizeof(buf), 0);
ASSERT_OK(ret, "array_map update");
- ret = bpf_map__update_elem(skel->maps.array_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "array_map update2");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__update_elem(skel->maps.pcpu_array_map,
+ &key, sizeof(key), pbuf, cpu * sizeof(buf), 0);
+ ASSERT_OK(ret, "pcpu_array_map update");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
- ret = bpf_map__update_elem(skel->maps.hash_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "hash_map update");
ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
- ret = bpf_map__update_elem(skel->maps.hash_malloc_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "hash_malloc_map update");
ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
- ret = bpf_map__update_elem(skel->maps.lru_hash_map,
- &key, sizeof(key), buf, sizeof(buf), 0);
- ASSERT_OK(ret, "lru_hash_map update");
ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "lru_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "lru_pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete");
+ skel->data->ref--;
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval");
+
+ free(pbuf);
exit:
map_kptr__destroy(skel);
}
-void test_map_kptr(void)
+static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu)
{
- if (test__start_subtest("success")) {
+ long gp_seq = READ_ONCE(rcu->bss->gp_seq);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace),
+ &opts), "do_call_rcu_tasks_trace"))
+ return -EFAULT;
+ if (!ASSERT_OK(opts.retval, "opts.retval == 0"))
+ return -EFAULT;
+ while (gp_seq == READ_ONCE(rcu->bss->gp_seq))
+ sched_yield();
+ return 0;
+}
+
+void serial_test_map_kptr(void)
+{
+ struct rcu_tasks_trace_gp *skel;
+
+ RUN_TESTS(map_kptr_fail);
+
+ skel = rcu_tasks_trace_gp__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load"))
+ return;
+ if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach"))
+ goto end;
+
+ if (test__start_subtest("success-map")) {
+ test_map_kptr_success(true);
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on bpf_map_free_deferred */
test_map_kptr_success(false);
- /* Do test_run twice, so that we see refcount going back to 1
- * after we leave it in map from first iteration.
- */
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on synchronous delete elem */
test_map_kptr_success(true);
}
- RUN_TESTS(map_kptr_fail);
+end:
+ rcu_tasks_trace_gp__destroy(skel);
+ return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 59f08d6d1d53..cd0c42fff7c0 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -7,6 +7,8 @@
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
+#define NS_TEST "mptcp_ns"
+
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
#endif
@@ -138,12 +140,20 @@ out:
static void test_base(void)
{
+ struct nstoken *nstoken = NULL;
int server_fd, cgroup_fd;
cgroup_fd = test__join_cgroup("/mptcp");
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
/* without MPTCP */
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "start_server"))
@@ -157,13 +167,18 @@ with_mptcp:
/* with MPTCP */
server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
- goto close_cgroup_fd;
+ goto fail;
ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp");
close(server_fd);
-close_cgroup_fd:
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..daa952711d8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_parse_tcp_hdr_opt.skel.h"
+#include "test_parse_tcp_hdr_opt_dynptr.skel.h"
+#include "test_tcp_hdr_options.h"
+
+struct test_pkt {
+ struct ipv6_packet pk6_v6;
+ u8 options[16];
+} __packed;
+
+struct test_pkt pkt = {
+ .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .pk6_v6.iph.nexthdr = IPPROTO_TCP,
+ .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .pk6_v6.tcp.urg_ptr = 123,
+ .pk6_v6.tcp.doff = 9, /* 16 bytes of options */
+
+ .options = {
+ TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP,
+ 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL
+ },
+};
+
+static void test_parse_opt(void)
+{
+ struct test_parse_tcp_hdr_opt *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt__destroy(skel);
+}
+
+static void test_parse_opt_dynptr(void)
+{
+ struct test_parse_tcp_hdr_opt_dynptr *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt_dynptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt_dynptr__destroy(skel);
+}
+
+void test_parse_tcp_hdr_opt(void)
+{
+ if (test__start_subtest("parse_tcp_hdr_opt"))
+ test_parse_opt();
+ if (test__start_subtest("parse_tcp_hdr_opt_dynptr"))
+ test_parse_opt_dynptr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index 447d8560ecb6..3f1f58d3a729 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -25,10 +25,10 @@ static void test_success(void)
bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
bpf_program__set_autoload(skel->progs.task_succ, true);
- bpf_program__set_autoload(skel->progs.no_lock, true);
bpf_program__set_autoload(skel->progs.two_regions, true);
bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
err = rcu_read_lock__load(skel);
if (!ASSERT_OK(err, "skel_load"))
goto out;
@@ -69,6 +69,7 @@ out:
static const char * const inproper_region_tests[] = {
"miss_lock",
+ "no_lock",
"miss_unlock",
"non_sleepable_rcu_mismatch",
"inproper_sleepable_helper",
@@ -99,7 +100,6 @@ out:
}
static const char * const rcuptr_misuse_tests[] = {
- "task_untrusted_non_rcuptr",
"task_untrusted_rcuptr",
"cross_rcu_region",
};
@@ -128,17 +128,8 @@ out:
void test_rcu_read_lock(void)
{
- struct btf *vmlinux_btf;
int cgroup_fd;
- vmlinux_btf = btf__load_vmlinux_btf();
- if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
- return;
- if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) {
- test__skip();
- goto out;
- }
-
cgroup_fd = test__join_cgroup("/rcu_read_lock");
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
goto out;
@@ -153,6 +144,5 @@ void test_rcu_read_lock(void)
if (test__start_subtest("negative_tests_rcuptr_misuse"))
test_rcuptr_misuse();
close(cgroup_fd);
-out:
- btf__free(vmlinux_btf);
+out:;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index bca5e6839ac4..6ee22c3b251a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -137,24 +137,16 @@ static int get_ifaddr(const char *name, char *ifaddr)
return 0;
}
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
- SYS("ip link add veth_src type veth peer name veth_src_fwd");
- SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+ SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
+ SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
- SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
- SYS("ip link set veth_dst address " MAC_DST);
+ SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
+ SYS(fail, "ip link set veth_dst address " MAC_DST);
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
goto fail;
@@ -175,27 +167,27 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
goto fail;
- SYS("ip link set veth_src netns " NS_SRC);
- SYS("ip link set veth_src_fwd netns " NS_FWD);
- SYS("ip link set veth_dst_fwd netns " NS_FWD);
- SYS("ip link set veth_dst netns " NS_DST);
+ SYS(fail, "ip link set veth_src netns " NS_SRC);
+ SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
+ SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
+ SYS(fail, "ip link set veth_dst netns " NS_DST);
/** setup in 'src' namespace */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
- SYS("ip addr add " IP4_SRC "/32 dev veth_src");
- SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
- SYS("ip link set dev veth_src up");
+ SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
+ SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
+ SYS(fail, "ip link set dev veth_src up");
- SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
- SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
- SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
- SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
+ SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
- SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
+ SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
veth_src_fwd_addr);
close_netns(nstoken);
@@ -209,15 +201,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
* needs v4 one in order to start ARP probing. IP4_NET route is added
* to the endpoints so that the ARP processing will reply.
*/
- SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
- SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
- SYS("ip link set dev veth_src_fwd up");
- SYS("ip link set dev veth_dst_fwd up");
+ SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
+ SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
+ SYS(fail, "ip link set dev veth_src_fwd up");
+ SYS(fail, "ip link set dev veth_dst_fwd up");
- SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
- SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
- SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
- SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
close_netns(nstoken);
@@ -226,16 +218,16 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
- SYS("ip addr add " IP4_DST "/32 dev veth_dst");
- SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
- SYS("ip link set dev veth_dst up");
+ SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
+ SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
+ SYS(fail, "ip link set dev veth_dst up");
- SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
- SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
- SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
- SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
close_netns(nstoken);
@@ -375,7 +367,7 @@ done:
static int test_ping(int family, const char *addr)
{
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
+ SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
@@ -953,7 +945,7 @@ static int tun_open(char *name)
if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
goto fail;
- SYS("ip link set dev %s up", name);
+ SYS(fail, "ip link set dev %s up", name);
return fd;
fail:
@@ -1076,23 +1068,23 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
/* Setup route and neigh tables */
- SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
- SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
- SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
- SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
- SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
- SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
" dev tun_src scope global");
- SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
- SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
- SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
" dev tun_src scope global");
- SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
- SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto fail;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 07ad457f3370..47f1d482fe39 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -91,30 +91,15 @@
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
-#define SYS_NOFAIL(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- system(cmd); \
- })
-
static int config_device(void)
{
- SYS("ip netns add at_ns0");
- SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
- SYS("ip link set veth0 netns at_ns0");
- SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
- SYS("ip link set dev veth1 up mtu 1500");
- SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
- SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
+ SYS(fail, "ip netns add at_ns0");
+ SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
+ SYS(fail, "ip link set veth0 netns at_ns0");
+ SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
+ SYS(fail, "ip link set dev veth1 up mtu 1500");
+ SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
return 0;
fail:
@@ -132,23 +117,23 @@ static void cleanup(void)
static int add_vxlan_tunnel(void)
{
/* at_ns0 namespace */
- SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
VXLAN_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
IP4_ADDR2_VETH1, MAC_VETH1);
/* root namespace */
- SYS("ip link add dev %s type vxlan external gbp dstport 4789",
+ SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789",
VXLAN_TUNL_DEV1);
- SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
- SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
- SYS("ip neigh add %s lladdr %s dev %s",
+ SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip neigh add %s lladdr %s dev %s",
IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1);
return 0;
@@ -165,26 +150,26 @@ static void delete_vxlan_tunnel(void)
static int add_ip6vxlan_tunnel(void)
{
- SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
+ SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
IP6_ADDR_VETH0);
- SYS("ip netns exec at_ns0 ip link set dev veth0 up");
- SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
- SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
- SYS("ip link set dev veth1 up");
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up");
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
/* at_ns0 namespace */
- SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
IP6VXLAN_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
- SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
/* root namespace */
- SYS("ip link add dev %s type vxlan external dstport 4789",
+ SYS(fail, "ip link add dev %s type vxlan external dstport 4789",
IP6VXLAN_TUNL_DEV1);
- SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
- SYS("ip link set dev %s address %s up",
+ SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s address %s up",
IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
return 0;
@@ -205,7 +190,7 @@ static void delete_ip6vxlan_tunnel(void)
static int test_ping(int family, const char *addr)
{
- SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
+ SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
return 0;
fail:
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 7eb049214859..290c21dbe65a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -29,6 +29,9 @@ static int timer(struct timer *timer_skel)
/* check that timer_cb2() was executed twice */
ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+ /* check that timer_cb3() was executed twice */
+ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
+
/* check that there were no errors in timer execution */
ASSERT_EQ(timer_skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
new file mode 100644
index 000000000000..e64c71948491
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "uninit_stack.skel.h"
+
+void test_uninit_stack(void)
+{
+ RUN_TESTS(uninit_stack);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index 3a13e102c149..e51721df14fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -590,7 +590,7 @@ static void *kick_kernel_cb(void *arg)
/* Kick the kernel, causing it to drain the ring buffer and then wake
* up the test thread waiting on epoll.
*/
- syscall(__NR_getrlimit);
+ syscall(__NR_prlimit64);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index d4cd9f873c14..fa3cac5488f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -4,11 +4,10 @@
#define IFINDEX_LO 1
#define XDP_FLAGS_REPLACE (1U << 4)
-void serial_test_xdp_attach(void)
+static void test_xdp_attach(const char *file)
{
__u32 duration = 0, id1, id2, id0 = 0, len;
struct bpf_object *obj1, *obj2, *obj3;
- const char *file = "./test_xdp.bpf.o";
struct bpf_prog_info info = {};
int err, fd1, fd2, fd3;
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
@@ -85,3 +84,11 @@ out_2:
out_1:
bpf_object__close(obj1);
}
+
+void serial_test_xdp_attach(void)
+{
+ if (test__start_subtest("xdp_attach"))
+ test_xdp_attach("./test_xdp.bpf.o");
+ if (test__start_subtest("xdp_attach_dynptr"))
+ test_xdp_attach("./test_xdp_dynptr.bpf.o");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 5e3a26b15ec6..d19f79048ff6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -141,41 +141,33 @@ static const char * const xmit_policy_names[] = {
static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
int bond_both_attach)
{
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- return -1; \
- })
-
- SYS("ip netns add ns_dst");
- SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
- SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
-
- SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ SYS(fail, "ip netns add ns_dst");
+ SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s",
mode_names[mode], xmit_policy_names[xmit_policy]);
- SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
- SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
mode_names[mode], xmit_policy_names[xmit_policy]);
- SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
- SYS("ip link set veth1_1 master bond1");
+ SYS(fail, "ip link set veth1_1 master bond1");
if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
- SYS("ip link set veth1_2 master bond1");
+ SYS(fail, "ip link set veth1_2 master bond1");
} else {
- SYS("ip link set veth1_2 up addrgenmode none");
+ SYS(fail, "ip link set veth1_2 up addrgenmode none");
if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
return -1;
}
- SYS("ip -netns ns_dst link set veth2_1 master bond2");
+ SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2");
if (bond_both_attach == BOND_BOTH_AND_ATTACH)
- SYS("ip -netns ns_dst link set veth2_2 master bond2");
+ SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2");
else
- SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none");
/* Load a dummy program on sending side as with veth peer needs to have a
* XDP program loaded as well.
@@ -194,8 +186,8 @@ static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
}
return 0;
-
-#undef SYS
+fail:
+ return -1;
}
static void bonding_cleanup(struct skeletons *skeletons)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 2666c84dbd01..856cbc29e6a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -12,14 +12,6 @@
#include <uapi/linux/netdev.h>
#include "test_xdp_do_redirect.skel.h"
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto out; \
- })
-
struct udp_packet {
struct ethhdr eth;
struct ipv6hdr iph;
@@ -126,19 +118,19 @@ void test_xdp_do_redirect(void)
* iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
* payload.
*/
- SYS("ip netns add testns");
+ SYS(out, "ip netns add testns");
nstoken = open_netns("testns");
if (!ASSERT_OK_PTR(nstoken, "setns"))
goto out;
- SYS("ip link add veth_src type veth peer name veth_dst");
- SYS("ip link set dev veth_src address 00:11:22:33:44:55");
- SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb");
- SYS("ip link set dev veth_src up");
- SYS("ip link set dev veth_dst up");
- SYS("ip addr add dev veth_src fc00::1/64");
- SYS("ip addr add dev veth_dst fc00::2/64");
- SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
+ SYS(out, "ip link add veth_src type veth peer name veth_dst");
+ SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55");
+ SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb");
+ SYS(out, "ip link set dev veth_src up");
+ SYS(out, "ip link set dev veth_dst up");
+ SYS(out, "ip addr add dev veth_src fc00::1/64");
+ SYS(out, "ip addr add dev veth_dst fc00::2/64");
+ SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
/* We enable forwarding in the test namespace because that will cause
* the packets that go through the kernel stack (with XDP_PASS) to be
@@ -151,7 +143,7 @@ void test_xdp_do_redirect(void)
* code didn't have this, so we keep the test behaviour to make sure the
* bug doesn't resurface.
*/
- SYS("sysctl -qw net.ipv6.conf.all.forwarding=1");
+ SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1");
ifindex_src = if_nametoindex("veth_src");
ifindex_dst = if_nametoindex("veth_dst");
@@ -225,6 +217,6 @@ out_tc:
out:
if (nstoken)
close_netns(nstoken);
- system("ip netns del testns");
+ SYS_NOFAIL("ip netns del testns");
test_xdp_do_redirect__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index aa4beae99f4f..490e851dc27d 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -34,11 +34,6 @@
#define PREFIX_LEN "8"
#define FAMILY AF_INET
-#define SYS(cmd) ({ \
- if (!ASSERT_OK(system(cmd), (cmd))) \
- goto out; \
-})
-
struct xsk {
void *umem_area;
struct xsk_umem *umem;
@@ -298,16 +293,16 @@ void test_xdp_metadata(void)
/* Setup new networking namespace, with a veth pair. */
- SYS("ip netns add xdp_metadata");
+ SYS(out, "ip netns add xdp_metadata");
tok = open_netns("xdp_metadata");
- SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
+ SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
" type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
- SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01");
- SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02");
- SYS("ip link set dev " TX_NAME " up");
- SYS("ip link set dev " RX_NAME " up");
- SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
- SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+ SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01");
+ SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02");
+ SYS(out, "ip link set dev " TX_NAME " up");
+ SYS(out, "ip link set dev " RX_NAME " up");
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
rx_ifindex = if_nametoindex(RX_NAME);
tx_ifindex = if_nametoindex(TX_NAME);
@@ -405,5 +400,5 @@ out:
xdp_metadata__destroy(bpf_obj);
if (tok)
close_netns(tok);
- system("ip netns del xdp_metadata");
+ SYS_NOFAIL("ip netns del xdp_metadata");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index c72083885b6d..8b50a992d233 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -8,11 +8,6 @@
#define CMD_OUT_BUF_SIZE 1023
-#define SYS(cmd) ({ \
- if (!ASSERT_OK(system(cmd), (cmd))) \
- goto out; \
-})
-
#define SYS_OUT(cmd, ...) ({ \
char buf[1024]; \
snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
@@ -69,37 +64,37 @@ static void test_synproxy(bool xdp)
char buf[CMD_OUT_BUF_SIZE];
size_t size;
- SYS("ip netns add synproxy");
+ SYS(out, "ip netns add synproxy");
- SYS("ip link add tmp0 type veth peer name tmp1");
- SYS("ip link set tmp1 netns synproxy");
- SYS("ip link set tmp0 up");
- SYS("ip addr replace 198.18.0.1/24 dev tmp0");
+ SYS(out, "ip link add tmp0 type veth peer name tmp1");
+ SYS(out, "ip link set tmp1 netns synproxy");
+ SYS(out, "ip link set tmp0 up");
+ SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0");
/* When checksum offload is enabled, the XDP program sees wrong
* checksums and drops packets.
*/
- SYS("ethtool -K tmp0 tx off");
+ SYS(out, "ethtool -K tmp0 tx off");
if (xdp)
/* Workaround required for veth. */
- SYS("ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
+ SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
ns = open_netns("synproxy");
if (!ASSERT_OK_PTR(ns, "setns"))
goto out;
- SYS("ip link set lo up");
- SYS("ip link set tmp1 up");
- SYS("ip addr replace 198.18.0.2/24 dev tmp1");
- SYS("sysctl -w net.ipv4.tcp_syncookies=2");
- SYS("sysctl -w net.ipv4.tcp_timestamps=1");
- SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
- SYS("iptables-legacy -t raw -I PREROUTING \
+ SYS(out, "ip link set lo up");
+ SYS(out, "ip link set tmp1 up");
+ SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1");
+ SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2");
+ SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1");
+ SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+ SYS(out, "iptables-legacy -t raw -I PREROUTING \
-i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
- SYS("iptables-legacy -t filter -A INPUT \
+ SYS(out, "iptables-legacy -t filter -A INPUT \
-i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
-j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
- SYS("iptables-legacy -t filter -A INPUT \
+ SYS(out, "iptables-legacy -t filter -A INPUT \
-i tmp1 -m state --state INVALID -j DROP");
ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
@@ -170,8 +165,8 @@ out:
if (ns)
close_netns(ns);
- system("ip link del tmp0");
- system("ip netns del synproxy");
+ SYS_NOFAIL("ip link del tmp0");
+ SYS_NOFAIL("ip netns del synproxy");
}
void test_xdp_synproxy(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
index 8b03c9bb4862..d37f5394e199 100644
--- a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
@@ -69,21 +69,6 @@
"proto esp aead 'rfc4106(gcm(aes))' " \
"0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
-#define SYS(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- if (!ASSERT_OK(system(cmd), cmd)) \
- goto fail; \
- })
-
-#define SYS_NOFAIL(fmt, ...) \
- ({ \
- char cmd[1024]; \
- snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
- system(cmd); \
- })
-
static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
{
LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
@@ -126,23 +111,23 @@ static void cleanup(void)
static int config_underlay(void)
{
- SYS("ip netns add " NS0);
- SYS("ip netns add " NS1);
- SYS("ip netns add " NS2);
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+ SYS(fail, "ip netns add " NS2);
/* NS0 <-> NS1 [veth01 <-> veth10] */
- SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
- SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
- SYS("ip -net " NS0 " link set dev veth01 up");
- SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
- SYS("ip -net " NS1 " link set dev veth10 up");
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
/* NS0 <-> NS2 [veth02 <-> veth20] */
- SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
- SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
- SYS("ip -net " NS0 " link set dev veth02 up");
- SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
- SYS("ip -net " NS2 " link set dev veth20 up");
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS2 " link set dev veth20 up");
return 0;
fail:
@@ -153,20 +138,20 @@ static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
const char *ipv4_remote, int if_id)
{
/* State: local -> remote */
- SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
/* State: local <- remote */
- SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
/* Policy: local -> remote */
- SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
+ SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
"if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
if_id, ipv4_local, ipv4_remote, if_id);
/* Policy: local <- remote */
- SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
+ SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
"if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
if_id, ipv4_remote, ipv4_local, if_id);
@@ -274,16 +259,16 @@ static int config_overlay(void)
if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
goto fail;
- SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
- SYS("ip -net " NS0 " link set dev ipsec0 up");
+ SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
+ SYS(fail, "ip -net " NS0 " link set dev ipsec0 up");
- SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
- SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
- SYS("ip -net " NS1 " link set dev ipsec0 up");
+ SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
+ SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS1 " link set dev ipsec0 up");
- SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
- SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
- SYS("ip -net " NS2 " link set dev ipsec0 up");
+ SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
+ SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS2 " link set dev ipsec0 up");
return 0;
fail:
@@ -294,7 +279,7 @@ static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
{
skel->bss->req_if_id = if_id;
- SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
+ SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
goto fail;
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index a20c5ed5e454..b04e092fac94 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -337,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb)
keys->ip_proto = ip6h->nexthdr;
keys->flow_label = ip6_flowlabel(ip6h);
- if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+ if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
return export_flow_keys(keys, BPF_OK);
return parse_ipv6_proto(skb, ip6h->nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 14e28f991451..f704885aa534 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -2,10 +2,33 @@
#ifndef __BPF_MISC_H__
#define __BPF_MISC_H__
+/* This set of attributes controls behavior of the
+ * test_loader.c:test_loader__run_subtests().
+ *
+ * __msg Message expected to be found in the verifier log.
+ * Multiple __msg attributes could be specified.
+ *
+ * __success Expect program load success in privileged mode.
+ *
+ * __failure Expect program load failure in privileged mode.
+ *
+ * __log_level Log level to use for the program, numeric value expected.
+ *
+ * __flag Adds one flag use for the program, the following values are valid:
+ * - BPF_F_STRICT_ALIGNMENT;
+ * - BPF_F_TEST_RND_HI32;
+ * - BPF_F_TEST_STATE_FREQ;
+ * - BPF_F_SLEEPABLE;
+ * - BPF_F_XDP_HAS_FRAGS;
+ * - A numeric value.
+ * Multiple __flag attributes could be specified, the final flags
+ * value is derived by applying binary "or" to all specified values.
+ */
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
+#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
/* Convenience macro for use with 'asm volatile' blocks */
#define __naked __attribute__((naked))
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 7653df1bc787..ce96b33e38d6 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_helpers.h>
struct map_value {
- struct prog_test_ref_kfunc __kptr_ref *ptr;
+ struct prog_test_ref_kfunc __kptr *ptr;
};
struct {
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index 7d30855bfe78..d0b7cd0d09d7 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -10,7 +10,7 @@
#include <bpf/bpf_tracing.h>
struct __cgrps_kfunc_map_value {
- struct cgroup __kptr_ref * cgrp;
+ struct cgroup __kptr * cgrp;
};
struct hash_map {
@@ -24,6 +24,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym;
void bpf_cgroup_release(struct cgroup *p) __ksym;
struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
{
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index 4ad7fe24966d..b42291ed9586 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -205,7 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
}
SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
+__failure __msg("expects refcounted")
int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
{
struct __cgrps_kfunc_map_value *v;
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 0c23ea32df9f..030aff700084 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -61,7 +61,7 @@ int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *pa
SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
{
- struct cgroup *kptr;
+ struct cgroup *kptr, *cg;
struct __cgrps_kfunc_map_value *v;
long status;
@@ -80,6 +80,16 @@ int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
return 0;
}
+ kptr = v->cgrp;
+ if (!kptr) {
+ err = 4;
+ return 0;
+ }
+
+ cg = bpf_cgroup_ancestor(kptr, 1);
+ if (cg) /* verifier only check */
+ bpf_cgroup_release(cg);
+
kptr = bpf_kptr_xchg(&v->cgrp, NULL);
if (!kptr) {
err = 3;
@@ -168,3 +178,45 @@ int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path)
return 0;
}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *parent, *res;
+ u64 parent_cgid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* @cgrp's ID is not visible yet, let's test with the parent */
+ parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!parent) {
+ err = 1;
+ return 0;
+ }
+
+ parent_cgid = parent->kn->id;
+ bpf_cgroup_release(parent);
+
+ res = bpf_cgroup_from_id(parent_cgid);
+ if (!res) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_cgroup_release(res);
+
+ if (res != parent) {
+ err = 3;
+ return 0;
+ }
+
+ res = bpf_cgroup_from_id((u64)-1);
+ if (res) {
+ bpf_cgroup_release(res);
+ err = 4;
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
index 2d11ed528b6f..7615dc23d301 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -49,7 +49,7 @@ int no_rcu_lock(void *ctx)
if (task->pid != target_pid)
return 0;
- /* ptr_to_btf_id semantics. should work. */
+ /* task->cgroups is untrusted in sleepable prog outside of RCU CS */
cgrp = task->cgroups->dfl_cgrp;
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
@@ -71,7 +71,7 @@ int yes_rcu_lock(void *ctx)
bpf_rcu_read_lock();
cgrp = task->cgroups->dfl_cgrp;
- /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */
+ /* cgrp is trusted under RCU CS */
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
cgroup_id = cgrp->kn->id;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index ad34f3b602be..65e5496ca1b2 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -10,7 +10,7 @@
int err;
struct __cpumask_map_value {
- struct bpf_cpumask __kptr_ref * cpumask;
+ struct bpf_cpumask __kptr * cpumask;
};
struct array_map {
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index 33e8e86dd090..c16f7563b84e 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -44,7 +44,7 @@ int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flag
}
SEC("tp_btf/task_newtask")
-__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask")
+__failure __msg("must be referenced")
int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index aa5b69354b91..20ce920d891d 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -5,7 +5,9 @@
#include <string.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
char _license[] SEC("license") = "GPL";
@@ -244,6 +246,27 @@ done:
return 0;
}
+/* A data slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ *(__u8*)(hdr + 1) = 1;
+
+ return SK_PASS;
+}
+
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
@@ -399,7 +422,6 @@ int invalid_helper2(void *ctx)
/* this should fail */
bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
-
return 0;
}
@@ -1044,6 +1066,193 @@ int dynptr_read_into_slot(void *ctx)
return 0;
}
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R0 cannot write into rdonly_mem")
+int skb_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice3(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice4(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice1(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return XDP_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 9;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return XDP_PASS;
+}
+
+/* Only supported prog type can create skb-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
+int skb_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+
+ return 0;
+}
+
/* Reject writes to dynptr slot for uninit arg */
SEC("?raw_tp")
__failure __msg("potential write to dynptr at off=-16")
@@ -1061,6 +1270,61 @@ int uninit_write_into_slot(void *ctx)
return 0;
}
+/* Only supported prog type can create xdp-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed")
+int xdp_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_xdp(ctx, 0, &ptr);
+
+ return 0;
+}
+
+__u32 hdr_size = sizeof(struct ethhdr);
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("unbounded memory access")
+int dynptr_slice_var_len1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail */
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("must be a known constant")
+int dynptr_slice_var_len2(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ if (hdr_size <= sizeof(buffer)) {
+ /* this should fail */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+ hdr->h_proto = 12;
+ }
+
+ return SK_PASS;
+}
+
static int callback(__u32 index, void *data)
{
*(__u32 *)data = 123;
@@ -1092,3 +1356,24 @@ int invalid_data_slices(void *ctx)
return 0;
}
+
+/* Program types that don't allow writes to packet data should fail if
+ * bpf_dynptr_slice_rdwr is called
+ */
+SEC("cgroup_skb/ingress")
+__failure __msg("the prog does not allow writes to packet data")
+int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail since cgroup_skb doesn't allow
+ * changing packet data
+ */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 35db7c6c1fc7..c8358a7c7924 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -5,6 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
#include "errno.h"
char _license[] SEC("license") = "GPL";
@@ -30,7 +31,7 @@ struct {
__type(value, __u32);
} array_map SEC(".maps");
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int test_read_write(void *ctx)
{
char write_data[64] = "hello there, world!!";
@@ -61,8 +62,8 @@ int test_read_write(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
-int test_data_slice(void *ctx)
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_data(void *ctx)
{
__u32 key = 0, val = 235, *map_val;
struct bpf_dynptr ptr;
@@ -131,7 +132,7 @@ static int ringbuf_callback(__u32 index, void *data)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int test_ringbuf(void *ctx)
{
struct bpf_dynptr ptr;
@@ -163,3 +164,49 @@ done:
bpf_ringbuf_discard_dynptr(&ptr, 0);
return 0;
}
+
+SEC("?cgroup_skb/egress")
+int test_skb_readonly(struct __sk_buff *skb)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ __u64 *data;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since cgroup skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_data(struct __sk_buff *skb)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ __u64 *data;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ data = bpf_dynptr_data(&ptr, 0, 1);
+ if (data) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
index b3b326b8e2d1..47d5dedff554 100644
--- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
@@ -13,7 +13,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma,
struct callback_ctx *data)
{
/* writing to vma, which is illegal */
- vma->vm_flags |= 0x55;
+ vma->vm_start = 0xffffffffff600000;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
index 2d2e61470794..13f00ca2ed0a 100644
--- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
-static struct prog_test_ref_kfunc __kptr_ref *v;
+static struct prog_test_ref_kfunc __kptr *v;
long total_sum = -1;
extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c
index 687081a724b3..ad73029cb1e3 100644
--- a/tools/testing/selftests/bpf/progs/lru_bug.c
+++ b/tools/testing/selftests/bpf/progs/lru_bug.c
@@ -4,7 +4,7 @@
#include <bpf/bpf_helpers.h>
struct map_value {
- struct task_struct __kptr *ptr;
+ struct task_struct __kptr_untrusted *ptr;
};
struct {
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index 228ec45365a8..3903d30217b8 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -4,8 +4,8 @@
#include <bpf/bpf_helpers.h>
struct map_value {
- struct prog_test_ref_kfunc __kptr *unref_ptr;
- struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
};
struct array_map {
@@ -15,6 +15,13 @@ struct array_map {
__uint(max_entries, 1);
} array_map SEC(".maps");
+struct pcpu_array_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_array_map SEC(".maps");
+
struct hash_map {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
@@ -22,6 +29,13 @@ struct hash_map {
__uint(max_entries, 1);
} hash_map SEC(".maps");
+struct pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_hash_map SEC(".maps");
+
struct hash_malloc_map {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
@@ -30,6 +44,14 @@ struct hash_malloc_map {
__uint(map_flags, BPF_F_NO_PREALLOC);
} hash_malloc_map SEC(".maps");
+struct pcpu_hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} pcpu_hash_malloc_map SEC(".maps");
+
struct lru_hash_map {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__type(key, int);
@@ -37,6 +59,41 @@ struct lru_hash_map {
__uint(max_entries, 1);
} lru_hash_map SEC(".maps");
+struct lru_pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_pcpu_hash_map SEC(".maps");
+
+struct cgrp_ls_map {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} cgrp_ls_map SEC(".maps");
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} task_ls_map SEC(".maps");
+
+struct inode_ls_map {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} inode_ls_map SEC(".maps");
+
+struct sk_ls_map {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} sk_ls_map SEC(".maps");
+
#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \
struct { \
__uint(type, map_type); \
@@ -61,6 +118,7 @@ extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp
extern struct prog_test_ref_kfunc *
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
@@ -90,12 +148,23 @@ static void test_kptr_ref(struct map_value *v)
WRITE_ONCE(v->unref_ptr, p);
if (!p)
return;
+ /*
+ * p is rcu_ptr_prog_test_ref_kfunc,
+ * because bpf prog is non-sleepable and runs in RCU CS.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
if (p->a + p->b > 100)
return;
/* store NULL */
p = bpf_kptr_xchg(&v->ref_ptr, NULL);
if (!p)
return;
+ /*
+ * p is trusted_ptr_prog_test_ref_kfunc.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
if (p->a + p->b > 100) {
bpf_kfunc_call_test_release(p);
return;
@@ -160,6 +229,58 @@ int test_map_kptr(struct __sk_buff *ctx)
return 0;
}
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path)
+{
+ struct map_value *v;
+
+ v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct task_struct *task;
+ struct map_value *v;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+ v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct map_value *v;
+
+ v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("tc")
+int test_sk_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+ v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
SEC("tc")
int test_map_in_map_kptr(struct __sk_buff *ctx)
{
@@ -189,106 +310,257 @@ int test_map_in_map_kptr(struct __sk_buff *ctx)
return 0;
}
-SEC("tc")
-int test_map_kptr_ref(struct __sk_buff *ctx)
+int ref = 1;
+
+static __always_inline
+int test_map_kptr_ref_pre(struct map_value *v)
{
struct prog_test_ref_kfunc *p, *p_st;
unsigned long arg = 0;
- struct map_value *v;
- int key = 0, ret;
+ int ret;
p = bpf_kfunc_call_test_acquire(&arg);
if (!p)
return 1;
+ ref++;
p_st = p->next;
- if (p_st->cnt.refs.counter != 2) {
+ if (p_st->cnt.refs.counter != ref) {
ret = 2;
goto end;
}
- v = bpf_map_lookup_elem(&array_map, &key);
- if (!v) {
- ret = 3;
- goto end;
- }
-
p = bpf_kptr_xchg(&v->ref_ptr, p);
if (p) {
- ret = 4;
+ ret = 3;
goto end;
}
- if (p_st->cnt.refs.counter != 2)
- return 5;
+ if (p_st->cnt.refs.counter != ref)
+ return 4;
p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
if (!p)
- return 6;
- if (p_st->cnt.refs.counter != 3) {
- ret = 7;
+ return 5;
+ ref++;
+ if (p_st->cnt.refs.counter != ref) {
+ ret = 6;
goto end;
}
bpf_kfunc_call_test_release(p);
- if (p_st->cnt.refs.counter != 2)
- return 8;
+ ref--;
+ if (p_st->cnt.refs.counter != ref)
+ return 7;
p = bpf_kptr_xchg(&v->ref_ptr, NULL);
if (!p)
- return 9;
+ return 8;
bpf_kfunc_call_test_release(p);
- if (p_st->cnt.refs.counter != 1)
- return 10;
+ ref--;
+ if (p_st->cnt.refs.counter != ref)
+ return 9;
p = bpf_kfunc_call_test_acquire(&arg);
if (!p)
- return 11;
+ return 10;
+ ref++;
p = bpf_kptr_xchg(&v->ref_ptr, p);
if (p) {
- ret = 12;
+ ret = 11;
goto end;
}
- if (p_st->cnt.refs.counter != 2)
- return 13;
+ if (p_st->cnt.refs.counter != ref)
+ return 12;
/* Leave in map */
return 0;
end:
+ ref--;
bpf_kfunc_call_test_release(p);
return ret;
}
-SEC("tc")
-int test_map_kptr_ref2(struct __sk_buff *ctx)
+static __always_inline
+int test_map_kptr_ref_post(struct map_value *v)
{
struct prog_test_ref_kfunc *p, *p_st;
- struct map_value *v;
- int key = 0;
-
- v = bpf_map_lookup_elem(&array_map, &key);
- if (!v)
- return 1;
p_st = v->ref_ptr;
- if (!p_st || p_st->cnt.refs.counter != 2)
- return 2;
+ if (!p_st || p_st->cnt.refs.counter != ref)
+ return 1;
p = bpf_kptr_xchg(&v->ref_ptr, NULL);
if (!p)
- return 3;
- if (p_st->cnt.refs.counter != 2) {
+ return 2;
+ if (p_st->cnt.refs.counter != ref) {
bpf_kfunc_call_test_release(p);
- return 4;
+ return 3;
}
p = bpf_kptr_xchg(&v->ref_ptr, p);
if (p) {
bpf_kfunc_call_test_release(p);
- return 5;
+ return 4;
}
- if (p_st->cnt.refs.counter != 2)
- return 6;
+ if (p_st->cnt.refs.counter != ref)
+ return 5;
+
+ return 0;
+}
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref1(struct __sk_buff *ctx)
+{
+ struct map_value *v, val = {};
+ int key = 0, ret;
+
+ bpf_map_update_elem(&hash_map, &key, &val, 0);
+ bpf_map_update_elem(&hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_hash_map, &key, &val, 0);
+
+ bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0);
+ bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0);
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
+
+ return 0;
+}
+
+#undef TEST
+#undef TEST_PCPU
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref2(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, ret;
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
return 0;
}
+#undef TEST
+#undef TEST_PCPU
+
+SEC("tc")
+int test_map_kptr_ref3(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sp = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sp);
+ if (!p)
+ return 1;
+ ref++;
+ if (p->cnt.refs.counter != ref) {
+ bpf_kfunc_call_test_release(p);
+ return 2;
+ }
+ bpf_kfunc_call_test_release(p);
+ ref--;
+ return 0;
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref1(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+ int ret;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (v)
+ return 150;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_pre(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref2(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+ int ret;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_post(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref_del(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+ int ret;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ if (!v->ref_ptr)
+ return 300;
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 760e41e1a632..08f9ec18c345 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -7,9 +7,9 @@
struct map_value {
char buf[8];
- struct prog_test_ref_kfunc __kptr *unref_ptr;
- struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
- struct prog_test_member __kptr_ref *ref_memb_ptr;
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+ struct prog_test_member __kptr *ref_memb_ptr;
};
struct array_map {
@@ -281,7 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
}
SEC("?tc")
-__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_")
int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
{
struct map_value *v;
@@ -316,7 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
}
SEC("?tc")
-__failure __msg("R2 type=untrusted_ptr_ expected=ptr_")
+__failure __msg("R2 must be referenced")
int reject_untrusted_xchg(struct __sk_buff *ctx)
{
struct prog_test_ref_kfunc *p;
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
index 14aff7676436..0d1aa6bbace4 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -17,7 +17,7 @@ char _license[] SEC("license") = "GPL";
*/
SEC("tp_btf/task_newtask")
-__failure __msg("R2 must be referenced or trusted")
+__failure __msg("R2 must be")
int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags)
{
bpf_cpumask_test_cpu(0, task->user_cpus_ptr);
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index e5db1a4287e5..4c90aa6abddd 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -75,7 +75,7 @@ SEC("tc")
long rbtree_add_and_remove(void *ctx)
{
struct bpf_rb_node *res = NULL;
- struct node_data *n, *m;
+ struct node_data *n, *m = NULL;
n = bpf_obj_new(typeof(*n));
if (!n)
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index bf3cba115897..1ced900f3fce 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -232,8 +232,11 @@ long rbtree_api_first_release_unlock_escape(void *ctx)
bpf_spin_lock(&glock);
res = bpf_rbtree_first(&groot);
- if (res)
- n = container_of(res, struct node_data, node);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 1;
+ }
+ n = container_of(res, struct node_data, node);
bpf_spin_unlock(&glock);
bpf_spin_lock(&glock);
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 5cecbdbbb16e..7250bb76d18a 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -81,7 +81,7 @@ int no_lock(void *ctx)
{
struct task_struct *task, *real_parent;
- /* no bpf_rcu_read_lock(), old code still works */
+ /* old style ptr_to_btf_id is not allowed in sleepable */
task = bpf_get_current_task_btf();
real_parent = task->real_parent;
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
@@ -286,13 +286,13 @@ out:
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-int task_untrusted_non_rcuptr(void *ctx)
+int task_trusted_non_rcuptr(void *ctx)
{
struct task_struct *task, *group_leader;
task = bpf_get_current_task_btf();
bpf_rcu_read_lock();
- /* the pointer group_leader marked as untrusted */
+ /* the pointer group_leader is explicitly marked as trusted */
group_leader = task->real_parent->group_leader;
(void)bpf_task_storage_get(&map_a, group_leader, 0, 0);
bpf_rcu_read_unlock();
diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
new file mode 100644
index 000000000000..df4873558634
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_ls_map SEC(".maps");
+
+long gp_seq;
+
+SEC("syscall")
+int do_call_rcu_tasks_trace(void *ctx)
+{
+ struct task_struct *current;
+ int *v;
+
+ current = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 1;
+ /* Invoke call_rcu_tasks_trace */
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
+SEC("kprobe/rcu_tasks_trace_postgp")
+int rcu_tasks_trace_postgp(void *ctx)
+{
+ __sync_add_and_fetch(&gp_seq, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index c0ffd171743e..4c2a4b0e3a25 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -10,7 +10,7 @@
#include <bpf/bpf_tracing.h>
struct __tasks_kfunc_map_value {
- struct task_struct __kptr_ref * task;
+ struct task_struct __kptr * task;
};
struct hash_map {
diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
new file mode 100644
index 000000000000..f548b7446218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+
+/**
+ * This program will be manually made sleepable on the userspace side
+ * and should thus be unattachable.
+ */
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int handle_kprobe_sleepable(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 3b5dc34d23e9..68466a6ad18c 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -7,12 +7,8 @@
#include <bpf/bpf_core_read.h>
#include "bpf_misc.h"
-int kprobe_res = 0;
int kprobe2_res = 0;
-int kretprobe_res = 0;
int kretprobe2_res = 0;
-int uprobe_res = 0;
-int uretprobe_res = 0;
int uprobe_byname_res = 0;
int uretprobe_byname_res = 0;
int uprobe_byname2_res = 0;
@@ -23,13 +19,6 @@ int uretprobe_byname3_sleepable_res = 0;
int uretprobe_byname3_res = 0;
void *user_ptr = 0;
-SEC("kprobe")
-int handle_kprobe(struct pt_regs *ctx)
-{
- kprobe_res = 1;
- return 0;
-}
-
SEC("ksyscall/nanosleep")
int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem)
{
@@ -37,24 +26,6 @@ int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __ker
return 0;
}
-/**
- * This program will be manually made sleepable on the userspace side
- * and should thus be unattachable.
- */
-SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
-int handle_kprobe_sleepable(struct pt_regs *ctx)
-{
- kprobe_res = 2;
- return 0;
-}
-
-SEC("kretprobe")
-int handle_kretprobe(struct pt_regs *ctx)
-{
- kretprobe_res = 2;
- return 0;
-}
-
SEC("kretsyscall/nanosleep")
int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
{
@@ -63,16 +34,14 @@ int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
}
SEC("uprobe")
-int handle_uprobe(struct pt_regs *ctx)
+int handle_uprobe_ref_ctr(struct pt_regs *ctx)
{
- uprobe_res = 3;
return 0;
}
SEC("uretprobe")
-int handle_uretprobe(struct pt_regs *ctx)
+int handle_uretprobe_ref_ctr(struct pt_regs *ctx)
{
- uretprobe_res = 4;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
new file mode 100644
index 000000000000..7f08bce94596
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+int kretprobe_res = 0;
+int uprobe_res = 0;
+int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+void *user_ptr = 0;
+
+SEC("kprobe")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ kretprobe_res = 2;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ uprobe_res = 3;
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ uretprobe_res = 4;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
new file mode 100644
index 000000000000..f45a7095de7a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -0,0 +1,980 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+#include "bpf_kfuncs.h"
+
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+ uint64_t processed_packets_total;
+ uint64_t l3_protocol_packets_total_ipv4;
+ uint64_t l3_protocol_packets_total_ipv6;
+ uint64_t l4_protocol_packets_total_tcp;
+ uint64_t l4_protocol_packets_total_udp;
+ uint64_t accepted_packets_total_syn;
+ uint64_t accepted_packets_total_syn_cookies;
+ uint64_t accepted_packets_total_last_hop;
+ uint64_t accepted_packets_total_icmp_echo_request;
+ uint64_t accepted_packets_total_established;
+ uint64_t forwarded_packets_total_gue;
+ uint64_t forwarded_packets_total_gre;
+
+ uint64_t errors_total_unknown_l3_proto;
+ uint64_t errors_total_unknown_l4_proto;
+ uint64_t errors_total_malformed_ip;
+ uint64_t errors_total_fragmented_ip;
+ uint64_t errors_total_malformed_icmp;
+ uint64_t errors_total_unwanted_icmp;
+ uint64_t errors_total_malformed_icmp_pkt_too_big;
+ uint64_t errors_total_malformed_tcp;
+ uint64_t errors_total_malformed_udp;
+ uint64_t errors_total_icmp_echo_replies;
+ uint64_t errors_total_malformed_encapsulation;
+ uint64_t errors_total_encap_adjust_failed;
+ uint64_t errors_total_encap_buffer_too_small;
+ uint64_t errors_total_redirect_loop;
+ uint64_t errors_total_encap_mtu_violate;
+} metrics_t;
+
+typedef enum {
+ INVALID = 0,
+ UNKNOWN,
+ ECHO_REQUEST,
+ SYN,
+ SYN_COOKIE,
+ ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+ uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+ offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+ offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+struct iphdr_info {
+ void *hdr;
+ __u64 len;
+};
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x) \
+ do { \
+ ret_t __ret = x; \
+ if (__ret != CONTINUE_PROCESSING) \
+ return __ret; \
+ } while (0)
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+ uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+ return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr)
+{
+ if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*iphdr);
+
+ if (iphdr->ihl < 5)
+ return -1;
+
+ /* skip ipv4 options */
+ *offset += (iphdr->ihl - 5) * 4;
+
+ return 0;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports)
+{
+ if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0))
+ return false;
+
+ *offset += sizeof(*ports);
+
+ /* Ports in the L4 headers are reversed, since we are parsing an ICMP
+ * payload which is going towards the eyeball.
+ */
+ uint16_t dst = ports->src;
+ ports->src = ports->dst;
+ ports->dst = dst;
+ return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+ /* The highest reasonable value for an IPv4 header
+ * checksum requires two folds, so we just do that always.
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+
+ /* An IP header without options is 20 bytes. Two of those
+ * are the checksum, which we always set to zero. Hence,
+ * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+ * which fits in 32 bit.
+ */
+ _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+ uint32_t acc = 0;
+ uint16_t *ipw = (uint16_t *)iph;
+
+ for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++)
+ acc += ipw[i];
+
+ iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset,
+ const struct ipv6hdr *ipv6, uint8_t *upper_proto,
+ bool *is_fragment)
+{
+ /* We understand five extension headers.
+ * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+ * headers should occur once, except Destination Options, which may
+ * occur twice. Hence we give up after 6 headers.
+ */
+ struct {
+ uint8_t next;
+ uint8_t len;
+ } exthdr = {
+ .next = ipv6->nexthdr,
+ };
+ *is_fragment = false;
+
+ for (int i = 0; i < 6; i++) {
+ switch (exthdr.next) {
+ case IPPROTO_FRAGMENT:
+ *is_fragment = true;
+ /* NB: We don't check that hdrlen == 0 as per spec. */
+ /* fallthrough; */
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_MH:
+ if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0))
+ return false;
+
+ /* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+ *offset += (exthdr.len + 1) * 8;
+
+ /* Decode next header */
+ break;
+
+ default:
+ /* The next header is not one of the known extension
+ * headers, treat it as the upper layer header.
+ *
+ * This handles IPPROTO_NONE.
+ *
+ * Encapsulating Security Payload (50) and Authentication
+ * Header (51) also end up here (and will trigger an
+ * unknown proto error later). They have a custom header
+ * format and seem too esoteric to care about.
+ */
+ *upper_proto = exthdr.next;
+ return true;
+ }
+ }
+
+ /* We never found an upper layer header. */
+ return false;
+}
+
+static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6,
+ uint8_t *proto, bool *is_fragment)
+{
+ if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*ipv6);
+
+ if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment))
+ return -1;
+
+ return 0;
+}
+
+/* Global metrics, per CPU
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static metrics_t *get_global_metrics(void)
+{
+ uint64_t key = 0;
+ return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+ /* Changing the ethertype if the encapsulated packet is ipv6 */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6)
+ encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+
+ if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+ return TC_ACT_SHOT;
+
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead =
+ payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+ int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+ __u8 encap_buffer[sizeof(encap_gre_t)] = {};
+ uint16_t proto = ETH_P_IP;
+ uint32_t mtu_len = 0;
+ encap_gre_t *encap_gre;
+
+ metrics->forwarded_packets_total_gre++;
+
+ /* Loop protection: the inner packet's TTL is decremented as a safeguard
+ * against any forwarding loop. As the only interesting field is the TTL
+ * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+ * as they handle the split packets if needed (no need for the data to be
+ * in the linear section).
+ */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ proto = ETH_P_IPV6;
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1, 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ } else {
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+ 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ /* IPv4 also has a checksum to patch. While the TTL is only one byte,
+ * this function only works for 2 and 4 bytes arguments (the result is
+ * the same).
+ */
+ rc = bpf_l3_csum_replace(
+ skb, payload_off + offsetof(struct iphdr, check), ttl,
+ ttl - 1, 2);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+ 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ }
+
+ if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) {
+ metrics->errors_total_encap_mtu_violate++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+ metrics->errors_total_encap_adjust_failed++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap_gre) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre->ip.protocol = IPPROTO_GRE;
+ encap_gre->ip.daddr = next_hop->s_addr;
+ encap_gre->ip.saddr = ENCAPSULATION_IP;
+ encap_gre->ip.tot_len =
+ bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+ encap_gre->gre.flags = 0;
+ encap_gre->gre.protocol = bpf_htons(proto);
+ pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+ if (encap_gre == encap_buffer)
+ bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ /* swap L2 addresses */
+ /* This assumes that packets are received from a router.
+ * So just swapping the MAC addresses here will make the packet go back to
+ * the router, which will send it to the appropriate machine.
+ */
+ unsigned char temp[ETH_ALEN];
+ memcpy(temp, encap->eth.h_dest, sizeof(temp));
+ memcpy(encap->eth.h_dest, encap->eth.h_source,
+ sizeof(encap->eth.h_dest));
+ memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+ encap->unigue.last_hop_gre) {
+ return forward_with_gre(skb, dynptr, encap, next_hop, metrics);
+ }
+
+ metrics->forwarded_packets_total_gue++;
+ uint32_t old_saddr = encap->ip.saddr;
+ encap->ip.saddr = encap->ip.daddr;
+ encap->ip.daddr = next_hop->s_addr;
+ if (encap->unigue.next_hop < encap->unigue.hop_count) {
+ encap->unigue.next_hop++;
+ }
+
+ /* Remove ip->saddr, add next_hop->s_addr */
+ const uint64_t off = offsetof(typeof(*encap), ip.check);
+ int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+ if (ret < 0) {
+ return TC_ACT_SHOT;
+ }
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(__u64 *offset, int n)
+{
+ __u32 res;
+ switch (n) {
+ case 1:
+ *offset += sizeof(struct in_addr);
+ case 0:
+ return CONTINUE_PROCESSING;
+
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap,
+ struct in_addr *next_hop)
+{
+ if (encap->unigue.next_hop > encap->unigue.hop_count)
+ return TC_ACT_SHOT;
+
+ /* Skip "used" next hops. */
+ MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count) {
+ /* No more next hops, we are at the end of the GLB header. */
+ next_hop->s_addr = 0;
+ return CONTINUE_PROCESSING;
+ }
+
+ if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0))
+ return TC_ACT_SHOT;
+
+ *offset += sizeof(*next_hop);
+
+ /* Skip the remainig next hops (may be zero). */
+ return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+ switch (iphlen) {
+ case sizeof(struct iphdr): {
+ struct iphdr *ipv4 = (struct iphdr *)iph;
+ tuple->ipv4.daddr = ipv4->daddr;
+ tuple->ipv4.saddr = ipv4->saddr;
+ tuple->ipv4.sport = sport;
+ tuple->ipv4.dport = dport;
+ return sizeof(tuple->ipv4);
+ }
+
+ case sizeof(struct ipv6hdr): {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+ memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+ sizeof(tuple->ipv6.daddr));
+ memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+ sizeof(tuple->ipv6.saddr));
+ tuple->ipv6.sport = sport;
+ tuple->ipv6.dport = dport;
+ return sizeof(tuple->ipv6);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, void *iph, struct tcphdr *tcp)
+{
+ struct bpf_sock *sk =
+ bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ if (iph != NULL && tcp != NULL) {
+ /* Kludge: we've run out of arguments, but need the length of the ip header. */
+ uint64_t iphlen = sizeof(struct iphdr);
+
+ if (tuplen == sizeof(tuple->ipv6))
+ iphlen = sizeof(struct ipv6hdr);
+
+ if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+ sizeof(*tcp)) == 0) {
+ bpf_sk_release(sk);
+ return SYN_COOKIE;
+ }
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+ struct bpf_sock *sk =
+ bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state == BPF_TCP_ESTABLISHED) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, metrics_t *metrics)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+ case IPPROTO_UDP:
+ return classify_udp(skb, tuple, tuplen);
+
+ default:
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset,
+ metrics_t *metrics)
+{
+ struct icmphdr icmp;
+ struct iphdr ipv4;
+
+ if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(icmp);
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp.type == ICMP_ECHOREPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp.type == ICMP_ECHO)
+ return ECHO_REQUEST;
+
+ if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ /* The source address in the outer IP header is from the entity that
+ * originated the ICMP message. Use the original IP header to restore
+ * the correct flow tuple.
+ */
+ struct bpf_sock_tuple tuple;
+ tuple.ipv4.saddr = ipv4.daddr;
+ tuple.ipv4.daddr = ipv4.saddr;
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, ipv4.protocol, &tuple,
+ sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct ipv6hdr ipv6;
+ struct icmp6hdr icmp6;
+ bool is_fragment;
+ uint8_t l4_proto;
+
+ if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ /* Swap source and dest addresses. */
+ memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr));
+ memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr));
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+ metrics);
+}
+
+static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct tcphdr tcp;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_tcp++;
+
+ if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_tcp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(tcp);
+
+ if (tcp.syn)
+ return SYN;
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest);
+ return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp);
+}
+
+static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct udphdr udph;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_udp++;
+
+ if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_udp++;
+ return INVALID;
+ }
+ *offset += sizeof(udph);
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest);
+ return classify_udp(skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct iphdr ipv4;
+ struct iphdr_info info = {
+ .hdr = &ipv4,
+ .len = sizeof(ipv4),
+ };
+
+ metrics->l3_protocol_packets_total_ipv4++;
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4.version != 4) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4_is_fragment(&ipv4)) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (ipv4.protocol) {
+ case IPPROTO_ICMP:
+ return process_icmpv4(skb, dynptr, offset, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct ipv6hdr ipv6;
+ struct iphdr_info info = {
+ .hdr = &ipv6,
+ .len = sizeof(ipv6),
+ };
+ uint8_t l4_proto;
+ bool is_fragment;
+
+ metrics->l3_protocol_packets_total_ipv6++;
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv6.version != 6) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (l4_proto) {
+ case IPPROTO_ICMPV6:
+ return process_icmpv6(dynptr, offset, skb, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+SEC("tc")
+int cls_redirect(struct __sk_buff *skb)
+{
+ __u8 encap_buffer[sizeof(encap_headers_t)] = {};
+ struct bpf_dynptr dynptr;
+ struct in_addr next_hop;
+ /* Tracks offset of the dynptr. This will be unnecessary once
+ * bpf_dynptr_advance() is available.
+ */
+ __u64 off = 0;
+ ret_t ret;
+
+ bpf_dynptr_from_skb(skb, 0, &dynptr);
+
+ metrics_t *metrics = get_global_metrics();
+ if (metrics == NULL)
+ return TC_ACT_SHOT;
+
+ metrics->processed_packets_total++;
+
+ /* Pass bogus packets as long as we're not sure they're
+ * destined for us.
+ */
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ encap_headers_t *encap;
+
+ /* Make sure that all encapsulation headers are available in
+ * the linear portion of the skb. This makes it easy to manipulate them.
+ */
+ if (bpf_skb_pull_data(skb, sizeof(*encap)))
+ return TC_ACT_OK;
+
+ encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap)
+ return TC_ACT_OK;
+
+ off += sizeof(*encap);
+
+ if (encap->ip.ihl != 5)
+ /* We never have any options. */
+ return TC_ACT_OK;
+
+ if (encap->ip.daddr != ENCAPSULATION_IP ||
+ encap->ip.protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ /* TODO Check UDP length? */
+ if (encap->udp.dest != ENCAPSULATION_PORT)
+ return TC_ACT_OK;
+
+ /* We now know that the packet is destined to us, we can
+ * drop bogus ones.
+ */
+ if (ipv4_is_fragment((void *)&encap->ip)) {
+ metrics->errors_total_fragmented_ip++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.variant != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.control != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.flags != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.hlen !=
+ sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.version != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.reserved != 0)
+ return TC_ACT_SHOT;
+
+ MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop));
+
+ if (next_hop.s_addr == 0) {
+ metrics->accepted_packets_total_last_hop++;
+ return accept_locally(skb, encap);
+ }
+
+ verdict_t verdict;
+ switch (encap->gue.proto_ctype) {
+ case IPPROTO_IPIP:
+ verdict = process_ipv4(skb, &dynptr, &off, metrics);
+ break;
+
+ case IPPROTO_IPV6:
+ verdict = process_ipv6(skb, &dynptr, &off, metrics);
+ break;
+
+ default:
+ metrics->errors_total_unknown_l3_proto++;
+ return TC_ACT_SHOT;
+ }
+
+ switch (verdict) {
+ case INVALID:
+ /* metrics have already been bumped */
+ return TC_ACT_SHOT;
+
+ case UNKNOWN:
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics);
+
+ case ECHO_REQUEST:
+ metrics->accepted_packets_total_icmp_echo_request++;
+ break;
+
+ case SYN:
+ if (encap->unigue.forward_syn) {
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop,
+ metrics);
+ }
+
+ metrics->accepted_packets_total_syn++;
+ break;
+
+ case SYN_COOKIE:
+ metrics->accepted_packets_total_syn_cookies++;
+ break;
+
+ case ESTABLISHED:
+ metrics->accepted_packets_total_established++;
+ break;
+ }
+
+ ret = accept_locally(skb, encap);
+
+ if (encap == encap_buffer)
+ bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 98327bdbbfd2..8fba3f3649e2 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -5,12 +5,12 @@
#include "bpf_misc.h"
struct Small {
- int x;
+ long x;
};
struct Big {
- int x;
- int y;
+ long x;
+ long y;
};
__noinline int foo(const struct Big *big)
@@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big)
}
SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid indirect access to stack")
int global_func10(struct __sk_buff *skb)
{
const struct Small small = {.x = skb->len };
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index 2fbef3cc7ad8..2dde8e3fe4c9 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -48,7 +48,7 @@ SEC("?lsm.s/bpf")
__failure __msg("arg#0 expected pointer to stack or dynptr_ptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
{
- unsigned long val;
+ unsigned long val = 0;
return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
(struct bpf_dynptr *)val, NULL);
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
new file mode 100644
index 000000000000..f997f5080748
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+#include "test_iptunnel_common.h"
+#include <bpf/bpf_endian.h>
+
+#include "bpf_kfuncs.h"
+
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, struct vip);
+ __type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CH_RINGS_SIZE);
+ __type(key, __u32);
+ __type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_REALS);
+ __type(key, __u32);
+ __type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, __u32);
+ __type(value, struct vip_stats);
+} stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CTL_MAP_SIZE);
+ __type(key, __u32);
+ __type(value, struct ctl_value);
+} ctl_array SEC(".maps");
+
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return false;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer_icmp[sizeof(struct iphdr)] = {};
+ __u8 buffer_ip[sizeof(struct iphdr)] = {};
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct udphdr)] = {};
+ struct udphdr *udp;
+
+ udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!udp)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ struct tcphdr *tcp;
+
+ tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!tcp)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __noinline int process_packet(struct bpf_dynptr *skb_ptr,
+ struct eth_hdr *eth, __u64 off,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ struct packet_description pckt = {};
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ __u8 buffer[sizeof(struct iphdr)] = {};
+
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ __u8 buffer[sizeof(struct eth_hdr)] = {};
+ struct bpf_dynptr ptr;
+ struct eth_hdr *eth;
+ __u32 eth_proto;
+ __u32 nh_off;
+ int err;
+
+ nh_off = sizeof(struct eth_hdr);
+
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+ eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ err = process_packet(&ptr, eth, nh_off, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ err = process_packet(&ptr, eth, nh_off, true, ctx);
+ else
+ return TC_ACT_SHOT;
+
+ if (eth == buffer)
+ bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
+
+ return err;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..79bab9b50e9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This parsing logic is taken from the open source library katran, a layer 4
+ * load balancer.
+ *
+ * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c
+ *
+ * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+struct hdr_opt_state {
+ __u32 server_id;
+ __u8 byte_offset;
+ __u8 hdr_bytes_remaining;
+};
+
+static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ __u8 *tcp_opt, kind, hdr_len;
+
+ tcp_opt = (__u8 *)(data + state->byte_offset);
+ if (tcp_opt + 1 > data_end)
+ return -1;
+
+ kind = tcp_opt[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ state->hdr_bytes_remaining--;
+ state->byte_offset++;
+ return 0;
+ }
+
+ if (state->hdr_bytes_remaining < 2 ||
+ tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end)
+ return -1;
+
+ hdr_len = tcp_opt[1];
+ if (hdr_len > state->hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ if (tcp_opt + tcp_hdr_opt_len_tpr > data_end)
+ return -1;
+
+ state->server_id = *(__u32 *)&tcp_opt[2];
+ return 1;
+ }
+
+ state->hdr_bytes_remaining -= hdr_len;
+ state->byte_offset += hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ struct hdr_opt_state opt_state = {};
+ __u8 tcp_hdr_opt_len = 0;
+ struct tcphdr *tcp_hdr;
+ __u64 tcp_offset = 0;
+ __u32 off;
+ int err;
+
+ tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+ tcp_hdr = (struct tcphdr *)(data + tcp_offset);
+ if (tcp_hdr + 1 > data_end)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ opt_state.hdr_bytes_remaining = tcp_hdr_opt_len;
+ opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset;
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(xdp, &opt_state);
+
+ if (err || !opt_state.hdr_bytes_remaining)
+ break;
+ }
+
+ if (!opt_state.server_id)
+ return XDP_DROP;
+
+ server_id = opt_state.server_id;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
new file mode 100644
index 000000000000..d3b319722e30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This logic is lifted from a real-world use case of packet parsing, used in
+ * the open source library katran, a layer 4 load balancer.
+ *
+ * This test demonstrates how to parse packet contents using dynptrs. The
+ * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining,
+ __u32 *server_id)
+{
+ __u8 *tcp_opt, kind, hdr_len;
+ __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)];
+ __u8 *data;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer));
+ if (!data)
+ return -1;
+
+ kind = data[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ *off += 1;
+ *hdr_bytes_remaining -= 1;
+ return 0;
+ }
+
+ if (*hdr_bytes_remaining < 2)
+ return -1;
+
+ hdr_len = data[1];
+ if (hdr_len > *hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id));
+ return 1;
+ }
+
+ *off += hdr_len;
+ *hdr_bytes_remaining -= hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ __u8 hdr_bytes_remaining;
+ struct tcphdr *tcp_hdr;
+ __u8 tcp_hdr_opt_len;
+ int err = 0;
+ __u32 off;
+
+ struct bpf_dynptr ptr;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+ tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer));
+ if (!tcp_hdr)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ hdr_bytes_remaining = tcp_hdr_opt_len;
+
+ off += sizeof(struct tcphdr);
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id);
+
+ if (err || !hdr_bytes_remaining)
+ break;
+ }
+
+ if (!server_id)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index b502e5c92e33..6ccf6d546074 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -23,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
bool *ipv4)
{
struct bpf_sock_tuple *result;
+ __u64 ihl_len = 0;
__u8 proto = 0;
- __u64 ihl_len;
if (eth_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)(data + nh_off);
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 508da4a23c4f..95b4aa0928ba 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -324,11 +324,11 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
SEC("tc")
int vxlan_set_tunnel_dst(struct __sk_buff *skb)
{
- int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
__u32 index = 0;
__u32 *local_ip = NULL;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -363,11 +363,11 @@ int vxlan_set_tunnel_dst(struct __sk_buff *skb)
SEC("tc")
int vxlan_set_tunnel_src(struct __sk_buff *skb)
{
- int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
__u32 index = 0;
__u32 *local_ip = NULL;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -494,9 +494,9 @@ SEC("tc")
int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
__u32 index = 0;
__u32 *local_ip;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -525,9 +525,9 @@ SEC("tc")
int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
__u32 index = 0;
__u32 *local_ip;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
@@ -556,9 +556,9 @@ SEC("tc")
int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
__u32 index = 0;
__u32 *local_ip;
+ int ret = 0;
local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
if (!local_ip) {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
new file mode 100644
index 000000000000..7521a805b506
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "test_iptunnel_common.h"
+#include "bpf_kfuncs.h"
+
+const size_t tcphdr_sz = sizeof(struct tcphdr);
+const size_t udphdr_sz = sizeof(struct udphdr);
+const size_t ethhdr_sz = sizeof(struct ethhdr);
+const size_t iphdr_sz = sizeof(struct iphdr);
+const size_t ipv6hdr_sz = sizeof(struct ipv6hdr);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 256);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_IPTNL_ENTRIES);
+ __type(key, struct vip);
+ __type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz];
+ __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz];
+ __u8 iph_buffer_udp[iphdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ __u32 transport_hdr_sz;
+ struct iphdr *iph;
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp));
+ __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp));
+ else
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp));
+
+ if (!iph)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ iph = (struct iphdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(iph + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ iph->version = 4;
+ iph->ihl = iphdr_sz >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + iphdr_sz);
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+ for (i = 0; i < iphdr_sz >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz];
+ __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz];
+ __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ __u32 transport_hdr_sz;
+ struct ipv6hdr *ip6h;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp));
+ __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp));
+ else
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp));
+
+ if (!ip6h)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ ip6h = (struct ipv6hdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(ip6h + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz);
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ __u8 buffer[ethhdr_sz];
+ struct bpf_dynptr ptr;
+ struct ethhdr *eth;
+ __u16 h_proto;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp, &ptr);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp, &ptr);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index acda5c9cea93..9a16d95213e1 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -46,7 +46,15 @@ struct {
__type(value, struct elem);
} lru SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} abs_timer SEC(".maps");
+
__u64 bss_data;
+__u64 abs_data;
__u64 err;
__u64 ok;
__u64 callback_check = 52;
@@ -284,3 +292,40 @@ int BPF_PROG2(test2, int, a, int, b)
return bpf_timer_test();
}
+
+/* callback for absolute timer */
+static int timer_cb3(void *map, int *key, struct bpf_timer *timer)
+{
+ abs_data += 6;
+
+ if (abs_data < 12) {
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ } else {
+ /* Re-arm timer ~35 seconds in future */
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35),
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test3, int, a)
+{
+ int key = 0;
+ struct bpf_timer *timer;
+
+ bpf_printk("test3");
+
+ timer = bpf_map_lookup_elem(&abs_timer, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0)
+ err |= 2048;
+ bpf_timer_set_callback(timer, timer_cb3);
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
new file mode 100644
index 000000000000..8a403470e557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Read an uninitialized value from stack at a fixed offset */
+SEC("socket")
+__naked int read_uninit_stack_fixed_off(void *ctx)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = *(u8 *)(r10 - 8 ); \
+ r0 += r1; \
+ r1 = *(u8 *)(r10 - 11); \
+ r1 = *(u8 *)(r10 - 13); \
+ r1 = *(u8 *)(r10 - 15); \
+ r1 = *(u16*)(r10 - 16); \
+ r1 = *(u32*)(r10 - 32); \
+ r1 = *(u64*)(r10 - 64); \
+ /* read from a spill of a wrong size, it is a separate \
+ * branch in check_stack_read_fixed_off() \
+ */ \
+ *(u32*)(r10 - 72) = r1; \
+ r1 = *(u64*)(r10 - 72); \
+ r0 = 0; \
+ exit; \
+"
+ ::: __clobber_all);
+}
+
+/* Read an uninitialized value from stack at a variable offset */
+SEC("socket")
+__naked int read_uninit_stack_var_off(void *ctx)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ /* force stack depth to be 64 */ \
+ *(u64*)(r10 - 64) = r0; \
+ r0 = -r0; \
+ /* give r0 a range [-31, -1] */ \
+ if r0 s<= -32 goto exit_%=; \
+ if r0 s>= 0 goto exit_%=; \
+ /* access stack using r0 */ \
+ r1 = r10; \
+ r1 += r0; \
+ r2 = *(u8*)(r1 + 0); \
+exit_%=: r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+static __noinline void dummy(void) {}
+
+/* Pass a pointer to uninitialized stack memory to a helper.
+ * Passed memory block should be marked as STACK_MISC after helper call.
+ */
+SEC("socket")
+__log_level(7) __msg("fp-104=mmmmmmmm")
+__naked int helper_uninit_to_misc(void *ctx)
+{
+ asm volatile (" \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = r10; \
+ r1 += -128; \
+ r2 = 32; \
+ call %[bpf_trace_printk]; \
+ /* Call to dummy() forces print_verifier_state(..., true), \
+ * thus showing the stack state, matched by __msg(). \
+ */ \
+ call %[dummy]; \
+ r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_trace_printk),
+ __imm(dummy)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
index b39093dd5715..0ade1110613b 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
@@ -202,7 +202,7 @@ do_nothing_cb(struct bpf_dynptr *dynptr, void *context)
return 0;
}
-SEC("fentry/" SYS_PREFIX "sys_getrlimit")
+SEC("fentry/" SYS_PREFIX "sys_prlimit64")
int test_user_ringbuf_epoll(void *ctx)
{
long num_samples;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 679efb3aa785..bf41390157bf 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -13,12 +13,15 @@
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
+#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
struct test_spec {
const char *name;
bool expect_failure;
- const char *expect_msg;
+ const char **expect_msgs;
+ size_t expect_msg_cnt;
int log_level;
+ int prog_flags;
};
static int tester_init(struct test_loader *tester)
@@ -67,7 +70,8 @@ static int parse_test_spec(struct test_loader *tester,
for (i = 1; i < btf__type_cnt(btf); i++) {
const struct btf_type *t;
- const char *s;
+ const char *s, *val;
+ char *e;
t = btf__type_by_id(btf, i);
if (!btf_is_decl_tag(t))
@@ -82,14 +86,48 @@ static int parse_test_spec(struct test_loader *tester,
} else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
spec->expect_failure = false;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
- spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+ void *tmp;
+ const char **msg;
+
+ tmp = realloc(spec->expect_msgs,
+ (1 + spec->expect_msg_cnt) * sizeof(void *));
+ if (!tmp) {
+ ASSERT_FAIL("failed to realloc memory for messages\n");
+ return -ENOMEM;
+ }
+ spec->expect_msgs = tmp;
+ msg = &spec->expect_msgs[spec->expect_msg_cnt++];
+ *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
} else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
+ val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1;
errno = 0;
- spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0);
- if (errno) {
+ spec->log_level = strtol(val, &e, 0);
+ if (errno || e[0] != '\0') {
ASSERT_FAIL("failed to parse test log level from '%s'", s);
return -EINVAL;
}
+ } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
+ val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+ if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
+ spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+ } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
+ spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+ } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
+ spec->prog_flags |= BPF_F_TEST_RND_HI32;
+ } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
+ spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+ } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
+ spec->prog_flags |= BPF_F_SLEEPABLE;
+ } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
+ spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+ } else /* assume numeric value */ {
+ errno = 0;
+ spec->prog_flags |= strtol(val, &e, 0);
+ if (errno || e[0] != '\0') {
+ ASSERT_FAIL("failed to parse test prog flags from '%s'", s);
+ return -EINVAL;
+ }
+ }
}
}
@@ -101,7 +139,7 @@ static void prepare_case(struct test_loader *tester,
struct bpf_object *obj,
struct bpf_program *prog)
{
- int min_log_level = 0;
+ int min_log_level = 0, prog_flags;
if (env.verbosity > VERBOSE_NONE)
min_log_level = 1;
@@ -119,7 +157,11 @@ static void prepare_case(struct test_loader *tester,
else
bpf_program__set_log_level(prog, spec->log_level);
+ prog_flags = bpf_program__flags(prog);
+ bpf_program__set_flags(prog, prog_flags | spec->prog_flags);
+
tester->log_buf[0] = '\0';
+ tester->next_match_pos = 0;
}
static void emit_verifier_log(const char *log_buf, bool force)
@@ -135,17 +177,26 @@ static void validate_case(struct test_loader *tester,
struct bpf_program *prog,
int load_err)
{
- if (spec->expect_msg) {
+ int i, j;
+
+ for (i = 0; i < spec->expect_msg_cnt; i++) {
char *match;
+ const char *expect_msg;
+
+ expect_msg = spec->expect_msgs[i];
- match = strstr(tester->log_buf, spec->expect_msg);
+ match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
if (!ASSERT_OK_PTR(match, "expect_msg")) {
/* if we are in verbose mode, we've already emitted log */
if (env.verbosity == VERBOSE_NONE)
emit_verifier_log(tester->log_buf, true /*force*/);
- fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg);
+ for (j = 0; j < i; j++)
+ fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]);
+ fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
return;
}
+
+ tester->next_match_pos = match - tester->log_buf + strlen(expect_msg);
}
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index d5d51ec97ec8..3cbf005747ed 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -376,6 +376,21 @@ int test__join_cgroup(const char *path);
___ok; \
})
+#define SYS(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
+#define SYS_NOFAIL(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ system(cmd); \
+ })
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -412,6 +427,7 @@ int get_bpf_max_tramp_links(void);
struct test_loader {
char *log_buf;
size_t log_buf_sz;
+ size_t next_match_pos;
struct bpf_object *obj;
};
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
index 6118e3ab61fc..56c9f8a3ad3d 100644
--- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -50,6 +50,7 @@ struct linum_err {
#define TCPOPT_EOL 0
#define TCPOPT_NOP 1
+#define TCPOPT_MSS 2
#define TCPOPT_WINDOW 3
#define TCPOPT_EXP 254
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 8b9949bb833d..49a70d9beb0b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -699,13 +699,13 @@ static int create_cgroup_storage(bool percpu)
* struct bpf_timer t;
* };
* struct btf_ptr {
+ * struct prog_test_ref_kfunc __kptr_untrusted *ptr;
* struct prog_test_ref_kfunc __kptr *ptr;
- * struct prog_test_ref_kfunc __kptr_ref *ptr;
- * struct prog_test_member __kptr_ref *ptr;
+ * struct prog_test_member __kptr *ptr;
* }
*/
static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
- "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref"
+ "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted"
"\0prog_test_member";
static __u32 btf_raw_types[] = {
/* int */
@@ -724,20 +724,20 @@ static __u32 btf_raw_types[] = {
BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
/* struct prog_test_ref_kfunc */ /* [6] */
BTF_STRUCT_ENC(51, 0, 0),
- BTF_STRUCT_ENC(89, 0, 0), /* [7] */
+ BTF_STRUCT_ENC(95, 0, 0), /* [7] */
+ /* type tag "kptr_untrusted" */
+ BTF_TYPE_TAG_ENC(80, 6), /* [8] */
/* type tag "kptr" */
- BTF_TYPE_TAG_ENC(75, 6), /* [8] */
- /* type tag "kptr_ref" */
- BTF_TYPE_TAG_ENC(80, 6), /* [9] */
- BTF_TYPE_TAG_ENC(80, 7), /* [10] */
+ BTF_TYPE_TAG_ENC(75, 6), /* [9] */
+ BTF_TYPE_TAG_ENC(75, 7), /* [10] */
BTF_PTR_ENC(8), /* [11] */
BTF_PTR_ENC(9), /* [12] */
BTF_PTR_ENC(10), /* [13] */
/* struct btf_ptr */ /* [14] */
BTF_STRUCT_ENC(43, 3, 24),
- BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */
- BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */
- BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
+ BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */
+ BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */
+ BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
static char bpf_vlog[UINT_MAX >> 8];
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9d993926bf0e..5702fc9761ef 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -181,7 +181,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "negative offset ptr_ ptr R1 off=-4 disallowed",
+ .errstr = "ptr R1 off=-4 disallowed",
},
{
"calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset",
@@ -243,7 +243,7 @@
},
.result_unpriv = REJECT,
.result = REJECT,
- .errstr = "R1 must be referenced",
+ .errstr = "R1 must be",
},
{
"calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
@@ -2221,19 +2221,22 @@
* that fp-8 stack slot was unused in the fall-through
* branch and will accept the program incorrectly
*/
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2),
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map_hash_48b = { 6 },
- .errstr = "invalid indirect read from stack R2 off -8+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map_hash_48b = { 7 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"calls: ctx read at start of subprog",
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index c8eaf0536c24..2fd31612c0b8 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -1,15 +1,4 @@
{
- "context stores via ST",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_ST stores into R1 ctx is not allowed",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
"context stores via BPF_ATOMIC",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index a6c869a7319c..9c4885885aba 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,19 +29,30 @@
{
"helper access to variable memory: stack, bitwise AND, zero included",
.insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
+ /* set max stack size */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+ /* set r3 to a random value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ /* use bitwise AND to limit r3 range to [0, 64] */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+ * For unpriv this should signal an error, because memory at &fp[-64] is
+ * not initialized.
+ */
+ BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
BPF_EXIT_INSN(),
},
- .errstr = "invalid indirect read from stack R1 off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_ringbuf = { 4 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
@@ -183,20 +194,31 @@
{
"helper access to variable memory: stack, JMP, no min check",
.insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
+ /* set max stack size */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+ /* set r3 to a random value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ /* use JMP to limit r3 range to [0, 64] */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+ * For unpriv this should signal an error, because memory at &fp[-64] is
+ * not initialized.
+ */
+ BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid indirect read from stack R1 off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_ringbuf = { 4 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"helper access to variable memory: stack, JMP (signed), no min check",
@@ -564,29 +586,41 @@
{
"helper access to variable memory: 8 bytes leak",
.insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ /* set max stack size */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0),
+ /* set r3 to a random value */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ /* Note: fp[-32] left uninitialized */
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ /* Limit r3 range to [1, 64] */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.
+ * For unpriv this should signal an error, because memory region [1, 64]
+ * at &fp[-64] is not fully initialized.
+ */
+ BPF_EMIT_CALL(BPF_FUNC_ringbuf_output),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid indirect read from stack R1 off -64+32 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .fixup_map_ringbuf = { 3 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"helper access to variable memory: 8 bytes no leak (init memory)",
diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
index 070893fb2900..02d9e004260b 100644
--- a/tools/testing/selftests/bpf/verifier/int_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/int_ptr.c
@@ -54,12 +54,13 @@
/* bpf_strtoul() */
BPF_EMIT_CALL(BPF_FUNC_strtoul),
- BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack R4 off -16+4 size 8",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8",
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"ARG_PTR_TO_LONG misaligned",
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index 6914904344c0..d775ccb01989 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -336,7 +336,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_kptr = { 1 },
.result = REJECT,
- .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_",
+ .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_",
},
{
"map_kptr: ref: reject off != 0",
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
index d63fd8991b03..745d6b5842fd 100644
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ b/tools/testing/selftests/bpf/verifier/search_pruning.c
@@ -128,9 +128,10 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "invalid read from stack off -16+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr_unpriv = "invalid read from stack off -16+0 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"precision tracking for u32 spill/fill",
@@ -258,6 +259,8 @@
BPF_EXIT_INSN(),
},
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "invalid read from stack off -8+1 size 8",
- .result = REJECT,
+ .errstr_unpriv = "invalid read from stack off -8+1 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index d11d0b28be41..108dd3ee1edd 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -531,33 +531,6 @@
.result = ACCEPT,
},
{
- "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 14 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid indirect read from stack",
-},
-{
"bpf_map_lookup_elem(smap, &key)",
.insns = {
BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 9bb302dade23..d1463bf4949a 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -171,9 +171,10 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .result = REJECT,
- .errstr = "invalid read from stack off -4+0 size 4",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid read from stack off -4+0 size 4",
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"Spill a u32 const scalar. Refill as u16. Offset to skb->data",
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index 878ca26c3f0a..af0c0f336625 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -240,6 +240,29 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ /* Same as above, but use BPF_ST_MEM to save 42
+ * instead of BPF_STX_MEM.
+ */
+ "unpriv: spill/fill of different pointers st",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"unpriv: spill/fill of different pointers stx - ctx and sock",
.insns = {
BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index d37f512fad16..b183e26c03f1 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -213,31 +213,6 @@
.prog_type = BPF_PROG_TYPE_LWT_IN,
},
{
- "indirect variable-offset stack access, max_off+size > max_initialized",
- .insns = {
- /* Fill only the second from top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-12 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack R2 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
"indirect variable-offset stack access, min_off < min_initialized",
.insns = {
/* Fill only the top 8 bytes of the stack. */
@@ -290,33 +265,6 @@
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
- "indirect variable-offset stack access, uninitialized",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack R4 var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
"indirect variable-offset stack access, ok",
.insns = {
/* Fill the top 16 bytes of the stack. */