From b22bf1b9979a608827dea98c61ed9ec297bcc513 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 4 Jan 2022 18:59:29 +0100 Subject: bpftool: Refactor misc. feature probe There is currently a single miscellaneous feature probe, HAVE_LARGE_INSN_LIMIT, to check for the 1M instructions limit in the verifier. Subsequent patches will add additional miscellaneous probes, which follow the same pattern at the existing probe. This patch therefore refactors the probe to avoid code duplication in subsequent patches. The BPF program type and the checked error numbers in the HAVE_LARGE_INSN_LIMIT probe are changed to better generalize to other probes. The feature probe retains its current behavior despite those changes. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/956c9329a932c75941194f91790d01f31dfbe01b.1641314075.git.paul@isovalent.com --- tools/bpf/bpftool/feature.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 6719b9282eca..3da97a02f455 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -642,6 +642,30 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, printf("\n"); } +static void +probe_misc_feature(struct bpf_insn *insns, size_t len, + const char *define_prefix, __u32 ifindex, + const char *feat_name, const char *plain_name, + const char *define_name) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .prog_ifindex = ifindex, + ); + bool res; + int fd; + + errno = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", + insns, len, &opts); + res = fd >= 0 || !errno; + + if (fd >= 0) + close(fd); + + print_bool_feature(feat_name, plain_name, define_name, res, + define_prefix); +} + /* * Probe for availability of kernel commit (5.3): * @@ -649,29 +673,18 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, */ static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) { - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .prog_ifindex = ifindex, - ); struct bpf_insn insns[BPF_MAXINSNS + 1]; - bool res; - int i, fd; + int i; for (i = 0; i < BPF_MAXINSNS; i++) insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); - errno = 0; - fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", - insns, ARRAY_SIZE(insns), &opts); - res = fd >= 0 || (errno != E2BIG && errno != EINVAL); - - if (fd >= 0) - close(fd); - - print_bool_feature("have_large_insn_limit", + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_large_insn_limit", "Large program size limit", - "LARGE_INSN_LIMIT", - res, define_prefix); + "LARGE_INSN_LIMIT"); } static void -- cgit From c04fb2b0bd9275969be3b0a95f9c3ef76b1bfb73 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 4 Jan 2022 18:59:57 +0100 Subject: bpftool: Probe for bounded loop support This patch introduces a new probe to check whether the verifier supports bounded loops as introduced in commit 2589726d12a1 ("bpf: introduce bounded loops"). This patch will allow BPF users such as Cilium to probe for loop support on startup and only unconditionally unroll loops on older kernels. The results are displayed as part of the miscellaneous section, as shown below. $ bpftool feature probe | grep loops Bounded loop support is available $ bpftool feature probe macro | grep LOOPS #define HAVE_BOUNDED_LOOPS $ bpftool feature probe -j | jq .misc { "have_large_insn_limit": true, "have_bounded_loops": true } Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/f7807c0b27d79f48e71de7b5a99c680ca4bd0151.1641314075.git.paul@isovalent.com --- tools/bpf/bpftool/feature.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 3da97a02f455..03579d113042 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -687,6 +687,27 @@ static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) "LARGE_INSN_LIMIT"); } +/* + * Probe for bounded loop support introduced in commit 2589726d12a1 + * ("bpf: introduce bounded loops"). + */ +static void +probe_bounded_loops(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 10), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_bounded_loops", + "Bounded loop support", + "BOUNDED_LOOPS"); +} + static void section_system_config(enum probe_component target, const char *define_prefix) { @@ -801,6 +822,7 @@ static void section_misc(const char *define_prefix, __u32 ifindex) "/*** eBPF misc features ***/", define_prefix); probe_large_insn_limit(define_prefix, ifindex); + probe_bounded_loops(define_prefix, ifindex); print_end_section(); } -- cgit From 0fd800b2456cf90ed738a1260b53acaa8843b5ae Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 4 Jan 2022 19:00:13 +0100 Subject: bpftool: Probe for instruction set extensions This patch introduces new probes to check whether the kernel supports instruction set extensions v2 and v3. The first introduced eBPF instructions BPF_J{LT,LE,SLT,SLE} in commit 92b31a9af73b ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). The second introduces 32-bit variants of all jump instructions in commit 092ed0968bb6 ("bpf: verifier support JMP32"). These probes are useful for userspace BPF projects that want to use newer instruction set extensions on newer kernels, to reduce the programs' sizes or their complexity. LLVM already provides an mcpu=probe option to automatically probe the kernel and select the newest-supported instruction set extension. That is however not flexible enough for all use cases. For example, in Cilium, we only want to use the v3 instruction set extension on v5.10+, even though it is supported on all kernels v5.1+. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/3bfedcd9898c1f41ac67ca61f144fec84c6c3a92.1641314075.git.paul@isovalent.com --- tools/bpf/bpftool/feature.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 03579d113042..e999159fa28d 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -708,6 +708,48 @@ probe_bounded_loops(const char *define_prefix, __u32 ifindex) "BOUNDED_LOOPS"); } +/* + * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b + * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). + */ +static void +probe_v2_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v2_isa_extension", + "ISA extension v2", + "V2_ISA_EXTENSION"); +} + +/* + * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6 + * ("bpf: verifier support JMP32"). + */ +static void +probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v3_isa_extension", + "ISA extension v3", + "V3_ISA_EXTENSION"); +} + static void section_system_config(enum probe_component target, const char *define_prefix) { @@ -823,6 +865,8 @@ static void section_misc(const char *define_prefix, __u32 ifindex) define_prefix); probe_large_insn_limit(define_prefix, ifindex); probe_bounded_loops(define_prefix, ifindex); + probe_v2_isa_extension(define_prefix, ifindex); + probe_v3_isa_extension(define_prefix, ifindex); print_end_section(); } -- cgit From 5e22dd18626726028a93ff1350a8a71a00fd843d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Jan 2022 13:10:30 +0100 Subject: bpf/selftests: Fix namespace mount setup in tc_redirect The tc_redirect umounts /sys in the new namespace, which can be mounted as shared and cause global umount. The lazy umount also takes down mounted trees under /sys like debugfs, which won't be available after sysfs mounts again and could cause fails in other tests. # cat /proc/self/mountinfo | grep debugfs 34 23 0:7 / /sys/kernel/debug rw,nosuid,nodev,noexec,relatime shared:14 - debugfs debugfs rw # cat /proc/self/mountinfo | grep sysfs 23 86 0:22 / /sys rw,nosuid,nodev,noexec,relatime shared:2 - sysfs sysfs rw # mount | grep debugfs debugfs on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime) # ./test_progs -t tc_redirect #164 tc_redirect:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED # mount | grep debugfs # cat /proc/self/mountinfo | grep debugfs # cat /proc/self/mountinfo | grep sysfs 25 86 0:22 / /sys rw,relatime shared:2 - sysfs sysfs rw Making the sysfs private under the new namespace so the umount won't trigger the global sysfs umount. Reported-by: Hangbin Liu Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Cc: Jussi Maki Link: https://lore.kernel.org/bpf/20220104121030.138216-1-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 4b18b73df10b..c2426df58e17 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd) if (!ASSERT_OK(err, "unshare")) return err; + /* Make our /sys mount private, so the following umount won't + * trigger the global umount in case it's shared. + */ + err = mount("none", "/sys", NULL, MS_PRIVATE, NULL); + if (!ASSERT_OK(err, "remount private /sys")) + return err; + err = umount2("/sys", MNT_DETACH); if (!ASSERT_OK(err, "umount2 /sys")) return err; -- cgit From ca796fe66f7fceff17679ee6cc5fe4b4023de44d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 5 Jan 2022 11:33:34 -0800 Subject: bpf, selftests: Add verifier test for mem_or_null register with offset. Add a new test case with mem_or_null typed register with off > 0 to ensure it gets rejected by the verifier: # ./test_verifier 1011 #1009/u check with invalid reg offset 0 OK #1009/p check with invalid reg offset 0 OK Summary: 2 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/spill_fill.c | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 6c907144311f..1a8eb9672bd1 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -58,6 +58,34 @@ .result = ACCEPT, .result_unpriv = ACCEPT, }, +{ + "check with invalid reg offset 0", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* add invalid offset to memory or NULL */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* should not be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "R0 pointer arithmetic on mem_or_null prohibited", +}, { "check corrupted spill/fill", .insns = { -- cgit From 7218c28c87f57c131879a75a226b9033ac90b266 Mon Sep 17 00:00:00 2001 From: Christy Lee Date: Wed, 29 Dec 2021 12:41:56 -0800 Subject: libbpf: Deprecate bpf_perf_event_read_simple() API With perf_buffer__poll() and perf_buffer__consume() APIs available, there is no reason to expose bpf_perf_event_read_simple() API to users. If users need custom perf buffer, they could re-implement the function. Mark bpf_perf_event_read_simple() and move the logic to a new static function so it can still be called by other functions in the same file. [0] Closes: https://github.com/libbpf/libbpf/issues/310 Signed-off-by: Christy Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211229204156.13569-1-christylee@fb.com --- tools/lib/bpf/libbpf.c | 22 ++++++++++++++-------- tools/lib/bpf/libbpf.h | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9cb99d1e2385..1d02ba7f11b4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10676,10 +10676,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) return link; } -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data) +static enum bpf_perf_event_ret +perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) { struct perf_event_mmap_page *header = mmap_mem; __u64 data_head = ring_buffer_read_head(header); @@ -10724,6 +10724,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } +__attribute__((alias("perf_event_read_simple"))) +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); + struct perf_buffer; struct perf_buffer_params { @@ -11132,10 +11138,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb, { enum bpf_perf_event_ret ret; - ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, - pb->page_size, &cpu_buf->buf, - &cpu_buf->buf_size, - perf_buffer__process_record, cpu_buf); + ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, + pb->page_size, &cpu_buf->buf, + &cpu_buf->buf_size, + perf_buffer__process_record, cpu_buf); if (ret != LIBBPF_PERF_EVENT_CONT) return ret; return 0; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 85dfef88b3d2..ddf1cc9e7803 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1026,6 +1026,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, void *private_data); +LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") LIBBPF_API enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, -- cgit From 71cff670baff5cc6a6eeb0181e2cc55579c5e1e0 Mon Sep 17 00:00:00 2001 From: Qiang Wang Date: Mon, 27 Dec 2021 21:07:12 +0800 Subject: libbpf: Use probe_name for legacy kprobe Fix a bug in commit 46ed5fc33db9, which wrongly used the func_name instead of probe_name to register legacy kprobe. Fixes: 46ed5fc33db9 ("libbpf: Refactor and simplify legacy kprobe code") Co-developed-by: Chengming Zhou Signed-off-by: Qiang Wang Signed-off-by: Chengming Zhou Signed-off-by: Andrii Nakryiko Tested-by: Hengqi Chen Reviewed-by: Hengqi Chen Link: https://lore.kernel.org/bpf/20211227130713.66933-1-wangqiang.wq.frank@bytedance.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1d02ba7f11b4..26e49e6aa5b1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10017,7 +10017,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), func_name, offset); - legacy_probe = strdup(func_name); + legacy_probe = strdup(probe_name); if (!legacy_probe) return libbpf_err_ptr(-ENOMEM); -- cgit From 51a33c60f1c22c0d2dafad774315ba1537765442 Mon Sep 17 00:00:00 2001 From: Qiang Wang Date: Mon, 27 Dec 2021 21:07:13 +0800 Subject: libbpf: Support repeated legacy kprobes on same function If repeated legacy kprobes on same function in one process, libbpf will register using the same probe name and got -EBUSY error. So append index to the probe name format to fix this problem. Co-developed-by: Chengming Zhou Signed-off-by: Qiang Wang Signed-off-by: Chengming Zhou Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211227130713.66933-2-wangqiang.wq.frank@bytedance.com --- tools/lib/bpf/libbpf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 26e49e6aa5b1..7f10dd501a52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9916,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...) static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, const char *kfunc_name, size_t offset) { - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); + static int index = 0; + + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, + __sync_fetch_and_add(&index, 1)); } static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, -- cgit From 9855c131b9c8b0327ff5182f88bb1991f212415b Mon Sep 17 00:00:00 2001 From: Christy Lee Date: Tue, 4 Jan 2022 16:06:01 -0800 Subject: libbpf 1.0: Deprecate bpf_map__is_offload_neutral() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deprecate bpf_map__is_offload_neutral(). It’s most probably broken already. PERF_EVENT_ARRAY isn’t the only map that’s not suitable for hardware offloading. Applications can directly check map type instead. [0] Closes: https://github.com/libbpf/libbpf/issues/306 Signed-off-by: Christy Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220105000601.2090044-1-christylee@fb.com --- tools/bpf/bpftool/prog.c | 2 +- tools/lib/bpf/libbpf.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f874896c4154..2a21d50516bc 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1655,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) j = 0; idx = 0; bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) + if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) bpf_map__set_ifindex(map, ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index ddf1cc9e7803..88dd943ba545 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -744,6 +744,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); /** -- cgit From 5f6082642814050352a3e29f8713796b55ebf788 Mon Sep 17 00:00:00 2001 From: Christy Lee Date: Tue, 4 Jan 2022 16:31:20 -0800 Subject: libbpf 1.0: Deprecate bpf_object__find_map_by_offset() API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit API created with simplistic assumptions about BPF map definitions. It hasn’t worked for a while, deprecate it in preparation for libbpf 1.0. [0] Closes: https://github.com/libbpf/libbpf/issues/302 Signed-off-by: Christy Lee Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220105003120.2222673-1-christylee@fb.com --- tools/lib/bpf/libbpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 88dd943ba545..8b9bc5e90c2b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -677,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. */ -LIBBPF_API struct bpf_map * +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") +struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") -- cgit From 70bc793382a0e37ba4e35e4d1a317b280b829a44 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 6 Jan 2022 12:51:56 -0800 Subject: selftests/bpf: Don't rely on preserving volatile in PT_REGS macros in loop3 PT_REGS*() macro on some architectures force-cast struct pt_regs to other types (user_pt_regs, etc) and might drop volatile modifiers, if any. Volatile isn't really required as pt_regs value isn't supposed to change during the BPF program run, so this is correct behavior. But progs/loop3.c relies on that volatile modifier to ensure that loop is preserved. Fix loop3.c by declaring i and sum variables as volatile instead. It preserves the loop and makes the test pass on all architectures (including s390x which is currently broken). Fixes: 3cc31d794097 ("libbpf: Normalize PT_REGS_xxx() macro definitions") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220106205156.955373-1-andrii@kernel.org --- tools/testing/selftests/bpf/progs/loop3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 76e93b31c14b..717dab14322b 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -12,9 +12,9 @@ char _license[] SEC("license") = "GPL"; SEC("raw_tracepoint/consume_skb") -int while_true(volatile struct pt_regs* ctx) +int while_true(struct pt_regs *ctx) { - __u64 i = 0, sum = 0; + volatile __u64 i = 0, sum = 0; do { i++; sum += PT_REGS_RC(ctx); -- cgit From e59618f0f46fa6cf86d5b82380e0f453756b282b Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Thu, 6 Jan 2022 15:13:05 -0500 Subject: libbpf: Add documentation for bpf_map batch operations This adds documention for: - bpf_map_delete_batch() - bpf_map_lookup_batch() - bpf_map_lookup_and_delete_batch() - bpf_map_update_batch() This also updates the public API for the `keys` parameter of `bpf_map_delete_batch()`, and both the `keys` and `values` parameters of `bpf_map_update_batch()` to be constants. Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220106201304.112675-1-grantseltzer@gmail.com --- tools/lib/bpf/bpf.c | 8 ++-- tools/lib/bpf/bpf.h | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9b64eed2b003..550b4cbb6c99 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -691,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, return libbpf_err_errno(ret); } -int bpf_map_delete_batch(int fd, void *keys, __u32 *count, +int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, - NULL, keys, NULL, count, opts); + NULL, (void *)keys, NULL, count, opts); } int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, @@ -715,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, count, opts); } -int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, +int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, - keys, values, count, opts); + (void *)keys, (void *)values, count, opts); } int bpf_obj_pin(int fd, const char *pathname) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 00619f64a040..14e0d97ad2cf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -254,17 +254,128 @@ struct bpf_map_batch_opts { }; #define bpf_map_batch_opts__last_field flags -LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + +/** + * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple + * elements in a BPF map. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param count input and output parameter; on input **count** represents the + * number of elements in the map to delete in batch; + * on output if a non-EFAULT error is returned, **count** represents the number of deleted + * elements if the output **count** value is not equal to the input **count** value + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch deletion works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. + * + * The parameter *in_batch* is the address of the first element in the batch to read. + * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent + * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate + * that the batched lookup starts from the beginning of the map. + * + * The *keys* and *values* are output parameters which must point to memory large enough to + * hold *count* items based on the key and value size of the map *map_fd*. The *keys* + * buffer must be of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * indicate that the batched lookup starts from the beginning of the map. + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array large enough for *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read in batch; on output it's the number of elements that were + * successfully read. + * If a non-EFAULT error is returned, count will be set as the number of elements + * that were read before the error occurred. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch lookup works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion + * of BPF map elements where each element is deleted after being retrieved. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * get address of the first element in *out_batch* + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array of *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read and delete in batch; on output it represents the number of + * elements that were successfully read and deleted + * If a non-**EFAULT** error code is returned and if the output **count** value + * is not equal to the input **count** value, up to **count** elements may + * have been deleted. + * if **EFAULT** is returned up to *count* elements may have been deleted without + * being returned via the *keys* and *values* output parameters. + * @param opts options for configuring the way the batch lookup and delete works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); -LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + +/** + * @brief **bpf_map_update_batch()** updates multiple elements in a map + * by specifying keys and their corresponding values. + * + * The *keys* and *values* parameters must point to memory large enough + * to hold *count* items based on the key and value size of the map. + * + * The *opts* parameter can be used to control how *bpf_map_update_batch()* + * should handle keys that either do or do not already exist in the map. + * In particular the *flags* parameter of *bpf_map_batch_opts* can be + * one of the following: + * + * Note that *count* is an input and output parameter, where on output it + * represents how many elements were successfully updated. Also note that if + * **EFAULT** then *count* should not be trusted to be correct. + * + * **BPF_ANY** + * Create new elements or update existing. + * + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * + * **BPF_EXIST** + * Update existing elements. + * + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param values pointer to an array of *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to update in batch; on output if a non-EFAULT error is returned, + * **count** represents the number of updated elements if the output **count** + * value is not equal to the input **count** value. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch update works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, const struct bpf_map_batch_opts *opts); -- cgit From 44bab87d8ca6f0544a9f8fc97bdf33aa5b3c899e Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Thu, 6 Jan 2022 12:55:25 -0800 Subject: bpf/selftests: Test bpf_d_path on rdonly_mem. The second parameter of bpf_d_path() can only accept writable memories. Rdonly_mem obtained from bpf_per_cpu_ptr() can not be passed into bpf_d_path for modification. This patch adds a selftest to verify this behavior. Signed-off-by: Hao Luo Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220106205525.2116218-1-haoluo@google.com --- tools/testing/selftests/bpf/prog_tests/d_path.c | 22 ++++++++++++++++- .../bpf/progs/test_d_path_check_rdonly_mem.c | 28 ++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 0a577a248d34..32fc5b3b5cf6 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -9,6 +9,7 @@ #define MAX_FILES 7 #include "test_d_path.skel.h" +#include "test_d_path_check_rdonly_mem.skel.h" static int duration; @@ -99,7 +100,7 @@ out_close: return ret; } -void test_d_path(void) +static void test_d_path_basic(void) { struct test_d_path__bss *bss; struct test_d_path *skel; @@ -155,3 +156,22 @@ void test_d_path(void) cleanup: test_d_path__destroy(skel); } + +static void test_d_path_check_rdonly_mem(void) +{ + struct test_d_path_check_rdonly_mem *skel; + + skel = test_d_path_check_rdonly_mem__open_and_load(); + ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem"); + + test_d_path_check_rdonly_mem__destroy(skel); +} + +void test_d_path(void) +{ + if (test__start_subtest("basic")) + test_d_path_basic(); + + if (test__start_subtest("check_rdonly_mem")) + test_d_path_check_rdonly_mem(); +} diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c new file mode 100644 index 000000000000..27c27cff6a3a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include "vmlinux.h" +#include +#include + +extern const int bpf_prog_active __ksym; + +SEC("fentry/security_inode_getattr") +int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + void *active; + __u32 cpu; + + cpu = bpf_get_smp_processor_id(); + active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* FAIL here! 'active' points to readonly memory. bpf helpers + * that update its arguments can not write into it. + */ + bpf_d_path(path, active, sizeof(int)); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit From 6fd92c7f0c3846340fee20f62dacb17d0a15c0d3 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 6 Jan 2022 21:20:21 +0800 Subject: bpf: selftests: Use C99 initializers in test_sock.c Use C99 initializers for the initialization of 'tests' in test_sock.c. Signed-off-by: Menglong Dong Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220106132022.3470772-3-imagedong@tencent.com --- tools/testing/selftests/bpf/test_sock.c | 220 +++++++++++++------------------- 1 file changed, 92 insertions(+), 128 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index e8edd3dd3ec2..94f9b126f5ed 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -46,7 +46,7 @@ struct sock_test { static struct sock_test tests[] = { { - "bind4 load with invalid access: src_ip6", + .descr = "bind4 load with invalid access: src_ip6", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -54,16 +54,12 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = LOAD_REJECT, }, { - "bind4 load with invalid access: mark", + .descr = "bind4 load with invalid access: mark", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -71,16 +67,12 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = LOAD_REJECT, }, { - "bind6 load with invalid access: src_ip4", + .descr = "bind6 load with invalid access: src_ip4", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -88,16 +80,12 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .result = LOAD_REJECT, }, { - "sock_create load with invalid access: src_port", + .descr = "sock_create load with invalid access: src_port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -105,128 +93,106 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET_SOCK_CREATE, - BPF_CGROUP_INET_SOCK_CREATE, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .result = LOAD_REJECT, }, { - "sock_create load w/o expected_attach_type (compat mode)", + .descr = "sock_create load w/o expected_attach_type (compat mode)", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - 0, - BPF_CGROUP_INET_SOCK_CREATE, - AF_INET, - SOCK_STREAM, - "127.0.0.1", - 8097, - SUCCESS, + .expected_attach_type = 0, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 8097, + .result = SUCCESS, }, { - "sock_create load w/ expected_attach_type", + .descr = "sock_create load w/ expected_attach_type", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET_SOCK_CREATE, - BPF_CGROUP_INET_SOCK_CREATE, - AF_INET, - SOCK_STREAM, - "127.0.0.1", - 8097, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 8097, + .result = SUCCESS, }, { - "attach type mismatch bind4 vs bind6", + .descr = "attach type mismatch bind4 vs bind6", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .result = ATTACH_REJECT, }, { - "attach type mismatch bind6 vs bind4", + .descr = "attach type mismatch bind6 vs bind4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = ATTACH_REJECT, }, { - "attach type mismatch default vs bind4", + .descr = "attach type mismatch default vs bind4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - 0, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = 0, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = ATTACH_REJECT, }, { - "attach type mismatch bind6 vs sock_create", + .descr = "attach type mismatch bind6 vs sock_create", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET_SOCK_CREATE, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .result = ATTACH_REJECT, }, { - "bind4 reject all", + .descr = "bind4 reject all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - AF_INET, - SOCK_STREAM, - "0.0.0.0", - 0, - BIND_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "0.0.0.0", + .result = BIND_REJECT, }, { - "bind6 reject all", + .descr = "bind6 reject all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - AF_INET6, - SOCK_STREAM, - "::", - 0, - BIND_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::", + .result = BIND_REJECT, }, { - "bind6 deny specific IP & port", + .descr = "bind6 deny specific IP & port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -247,16 +213,16 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - AF_INET6, - SOCK_STREAM, - "::1", - 8193, - BIND_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::1", + .port = 8193, + .result = BIND_REJECT, }, { - "bind4 allow specific IP & port", + .descr = "bind4 allow specific IP & port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -277,41 +243,39 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - AF_INET, - SOCK_STREAM, - "127.0.0.1", - 4098, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 4098, + .result = SUCCESS, }, { - "bind4 allow all", + .descr = "bind4 allow all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - AF_INET, - SOCK_STREAM, - "0.0.0.0", - 0, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "0.0.0.0", + .result = SUCCESS, }, { - "bind6 allow all", + .descr = "bind6 allow all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - AF_INET6, - SOCK_STREAM, - "::", - 0, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::", + .result = SUCCESS, }, }; -- cgit From f7342481749365d9ac5f24fb971659a64e045bb5 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 6 Jan 2022 21:20:22 +0800 Subject: bpf: selftests: Add bind retry for post_bind{4, 6} With previous patch, kernel is able to 'put_port' after sys_bind() fails. Add the test for that case: rebind another port after sys_bind() fails. If the bind success, it means previous bind operation is already undoed. Signed-off-by: Menglong Dong Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220106132022.3470772-4-imagedong@tencent.com --- tools/testing/selftests/bpf/test_sock.c | 150 +++++++++++++++++++++++++++----- 1 file changed, 130 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 94f9b126f5ed..fe10f8134278 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -35,12 +35,15 @@ struct sock_test { /* Endpoint to bind() to */ const char *ip; unsigned short port; + unsigned short port_retry; /* Expected test result */ enum { LOAD_REJECT, ATTACH_REJECT, BIND_REJECT, SUCCESS, + RETRY_SUCCESS, + RETRY_REJECT } result; }; @@ -251,6 +254,99 @@ static struct sock_test tests[] = { .port = 4098, .result = SUCCESS, }, + { + .descr = "bind4 deny specific IP & port of TCP, and retry", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x7F000001), 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 4098, + .port_retry = 5000, + .result = RETRY_SUCCESS, + }, + { + .descr = "bind4 deny specific IP & port of UDP, and retry", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x7F000001), 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_DGRAM, + .ip = "127.0.0.1", + .port = 4098, + .port_retry = 5000, + .result = RETRY_SUCCESS, + }, + { + .descr = "bind6 deny specific IP & port, and retry", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip6[3])), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x00000001), 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::1", + .port = 8193, + .port_retry = 9000, + .result = RETRY_SUCCESS, + }, { .descr = "bind4 allow all", .insns = { @@ -315,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd, return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); } -static int bind_sock(int domain, int type, const char *ip, unsigned short port) +static int bind_sock(int domain, int type, const char *ip, + unsigned short port, unsigned short port_retry) { struct sockaddr_storage addr; struct sockaddr_in6 *addr6; struct sockaddr_in *addr4; int sockfd = -1; socklen_t len; - int err = 0; + int res = SUCCESS; sockfd = socket(domain, type, 0); if (sockfd < 0) @@ -348,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port) goto err; } - if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) - goto err; + if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { + /* sys_bind() may fail for different reasons, errno has to be + * checked to confirm that BPF program rejected it. + */ + if (errno != EPERM) + goto err; + if (port_retry) + goto retry; + res = BIND_REJECT; + goto out; + } + goto out; +retry: + if (domain == AF_INET) + addr4->sin_port = htons(port_retry); + else + addr6->sin6_port = htons(port_retry); + if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { + if (errno != EPERM) + goto err; + res = RETRY_REJECT; + } else { + res = RETRY_SUCCESS; + } goto out; err: - err = -1; + res = -1; out: close(sockfd); - return err; + return res; } static int run_test_case(int cgfd, const struct sock_test *test) { int progfd = -1; int err = 0; + int res; printf("Test case: %s .. ", test->descr); progfd = load_sock_prog(test->insns, test->expected_attach_type); @@ -380,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test) goto err; } - if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) { - /* sys_bind() may fail for different reasons, errno has to be - * checked to confirm that BPF program rejected it. - */ - if (test->result == BIND_REJECT && errno == EPERM) - goto out; - else - goto err; - } - + res = bind_sock(test->domain, test->type, test->ip, test->port, + test->port_retry); + if (res > 0 && test->result == res) + goto out; - if (test->result != SUCCESS) - goto err; - - goto out; err: err = -1; out: -- cgit