diff options
| -rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/unwind_orc.c | 39 | ||||
| -rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 12 | ||||
| -rw-r--r-- | include/linux/bpf.h | 3 | ||||
| -rw-r--r-- | kernel/bpf/core.c | 16 | ||||
| -rw-r--r-- | kernel/bpf/dmabuf_iter.c | 56 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 2 | ||||
| -rw-r--r-- | net/smc/Kconfig | 4 | ||||
| -rw-r--r-- | tools/bpf/bpftool/Makefile | 2 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 7 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/d_path.c | 89 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c | 47 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/test_d_path.c | 23 |
14 files changed, 256 insertions, 48 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 74dd29816f36..b6eb7a465ad2 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx) arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) return; - if (capable(CAP_SYS_ADMIN)) + if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) return; if (supports_clearbhb(SCOPE_SYSTEM)) { diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 977ee75e047c..f610fde2d5c4 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -2,6 +2,7 @@ #include <linux/objtool.h> #include <linux/module.h> #include <linux/sort.h> +#include <linux/bpf.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #include <asm/unwind.h> @@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip) } #endif +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, +}; + +static struct orc_entry *orc_bpf_find(unsigned long ip) +{ +#ifdef CONFIG_BPF_JIT + if (bpf_has_frame_pointer(ip)) + return &orc_fp_entry; +#endif + + return NULL; +} + /* * If we crash with IP==0, the last successfully executed instruction * was probably an indirect function call with a NULL function pointer, @@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; -/* Fake frame pointer entry -- used as a fallback for generated code */ -static struct orc_entry orc_fp_entry = { - .type = ORC_TYPE_CALL, - .sp_reg = ORC_REG_BP, - .sp_offset = 16, - .bp_reg = ORC_REG_PREV_SP, - .bp_offset = -16, -}; - static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; + /* BPF lookup: */ + orc = orc_bpf_find(ip); + if (orc) + return orc; + return orc_ftrace_find(ip); } @@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state) if (!orc) { /* * As a fallback, try to assume this code uses a frame pointer. - * This is useful for generated code, like BPF, which ORC - * doesn't know about. This is just a guess, so the rest of - * the unwind is no longer considered reliable. + * This is just a guess, so the rest of the unwind is no longer + * considered reliable. */ orc = &orc_fp_entry; state->error = true; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b69dc7194e2c..b0bac2a66eff 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_prologue(&prog, image, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + + bpf_prog->aux->ksym.fp_start = prog - temp; + /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ @@ -2736,6 +2739,8 @@ emit_jmp: pop_r12(&prog); } EMIT1(0xC9); /* leave */ + bpf_prog->aux->ksym.fp_end = prog - temp; + emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; @@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + if (im) + im->ksym.fp_start = prog - (u8 *)rw_image; + if (!is_imm8(stack_size)) { /* sub rsp, stack_size */ EMIT3_off32(0x48, 0x81, 0xEC, stack_size); @@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); + EMIT1(0xC9); /* leave */ + if (im) + im->ksym.fp_end = prog - (u8 *)rw_image; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6498be4c44f8..e5be698256d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1283,6 +1283,8 @@ struct bpf_ksym { struct list_head lnode; struct latch_tree_node tnode; bool prog; + u32 fp_start; + u32 fp_end; }; enum bpf_tramp_prog_type { @@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +bool bpf_has_frame_pointer(unsigned long ip); int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c8ae6ab31651..1b9b18e5b03c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) NULL; } +bool bpf_has_frame_pointer(unsigned long ip) +{ + struct bpf_ksym *ksym; + unsigned long offset; + + guard(rcu)(); + + ksym = bpf_ksym_find(ip); + if (!ksym || !ksym->fp_start || !ksym->fp_end) + return false; + + offset = ip - ksym->start; + + return offset >= ksym->fp_start && offset < ksym->fp_end; +} + const struct exception_table_entry *search_bpf_extables(unsigned long addr) { const struct exception_table_entry *e = NULL; diff --git a/kernel/bpf/dmabuf_iter.c b/kernel/bpf/dmabuf_iter.c index 4dd7ef7c145c..cd500248abd9 100644 --- a/kernel/bpf/dmabuf_iter.c +++ b/kernel/bpf/dmabuf_iter.c @@ -6,10 +6,33 @@ #include <linux/kernel.h> #include <linux/seq_file.h> +struct dmabuf_iter_priv { + /* + * If this pointer is non-NULL, the buffer's refcount is elevated to + * prevent destruction between stop/start. If reading is not resumed and + * start is never called again, then dmabuf_iter_seq_fini drops the + * reference when the iterator is released. + */ + struct dma_buf *dmabuf; +}; + static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos) { - if (*pos) - return NULL; + struct dmabuf_iter_priv *p = seq->private; + + if (*pos) { + struct dma_buf *dmabuf = p->dmabuf; + + if (!dmabuf) + return NULL; + + /* + * Always resume from where we stopped, regardless of the value + * of pos. + */ + p->dmabuf = NULL; + return dmabuf; + } return dma_buf_iter_begin(); } @@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v) { struct dma_buf *dmabuf = v; - if (dmabuf) - dma_buf_put(dmabuf); + if (dmabuf) { + struct dmabuf_iter_priv *p = seq->private; + + p->dmabuf = dmabuf; + } } static const struct seq_operations dmabuf_iter_seq_ops = { @@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux, seq_puts(seq, "dmabuf iter\n"); } +static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux) +{ + struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv; + + p->dmabuf = NULL; + return 0; +} + +static void dmabuf_iter_seq_fini(void *priv) +{ + struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv; + + if (p->dmabuf) + dma_buf_put(p->dmabuf); +} + static const struct bpf_iter_seq_info dmabuf_iter_seq_info = { .seq_ops = &dmabuf_iter_seq_ops, - .init_seq_private = NULL, - .fini_seq_private = NULL, - .seq_priv_size = 0, + .init_seq_private = dmabuf_iter_seq_init, + .fini_seq_private = dmabuf_iter_seq_fini, + .seq_priv_size = sizeof(struct dmabuf_iter_priv), }; static struct bpf_iter_reg bpf_dmabuf_reg_info = { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d57727abaade..fe28d86f7c35 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -965,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_BTF_ID, .arg1_btf_id = &bpf_d_path_btf_ids[0], - .arg2_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_PTR_TO_MEM | MEM_WRITE, .arg3_type = ARG_CONST_SIZE_OR_ZERO, .allowed = bpf_d_path_allowed, }; diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 325addf83cc6..277ef504bc26 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -22,10 +22,10 @@ config SMC_DIAG config SMC_HS_CTRL_BPF bool "Generic eBPF hook for SMC handshake flow" - depends on SMC && BPF_SYSCALL + depends on SMC && BPF_JIT && BPF_SYSCALL default y help SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC handshake flow, which offer much greater flexibility in modifying the behavior of the SMC protocol stack compared to a complete kernel-based approach. Select - this option if you want filtring the handshake process via eBPF programs.
\ No newline at end of file + this option if you want filtring the handshake process via eBPF programs. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 586d1b2595d1..5442073a2e42 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -224,6 +224,8 @@ endif $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) $(QUIET_CLANG)$(CLANG) \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3dc8a8078815..f4dfd23148a5 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8484,7 +8484,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, struct bpf_object *obj = ctx; const struct btf_type *t; struct extern_desc *ext; - char *res; + const char *res; res = strstr(sym_name, ".llvm."); if (sym_type == 'd' && res) @@ -11818,7 +11818,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, * * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") */ - char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + char sym_trim[256], *psym_trim = sym_trim; + const char *sym_sfx; if (!(sym_sfx = strstr(sym_name, ".llvm."))) return 0; @@ -12401,7 +12402,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) if (!search_paths[i]) continue; for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { - char *next_path; + const char *next_path; int seg_len; if (s[0] == ':') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b7030a6e2e76..4aa60e83ff19 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -437,6 +437,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) \ -std=gnu11 \ -fno-strict-aliasing \ + -Wno-microsoft-anon-tag \ + -fms-extensions \ -Wno-compare-distinct-pointer-types \ -Wno-initializer-overrides \ # diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index ccc768592e66..1a2a2f1abf03 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid) return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN); } +static inline long syscall_close(int fd) +{ + return syscall(__NR_close_range, + (unsigned int)fd, + (unsigned int)fd, + 0u); +} + static int trigger_fstat_events(pid_t pid) { int sockfd = -1, procfd = -1, devfd = -1; @@ -104,18 +112,34 @@ out_close: /* sys_close no longer triggers filp_close, but we can * call sys_close_range instead which still does */ -#define close(fd) syscall(__NR_close_range, fd, fd, 0) + syscall_close(pipefd[0]); + syscall_close(pipefd[1]); + syscall_close(sockfd); + syscall_close(procfd); + syscall_close(devfd); + syscall_close(localfd); + syscall_close(indicatorfd); + return ret; +} - close(pipefd[0]); - close(pipefd[1]); - close(sockfd); - close(procfd); - close(devfd); - close(localfd); - close(indicatorfd); +static void attach_and_load(struct test_d_path **skel) +{ + int err; -#undef close - return ret; + *skel = test_d_path__open_and_load(); + if (CHECK(!*skel, "setup", "d_path skeleton failed\n")) + goto cleanup; + + err = test_d_path__attach(*skel); + if (CHECK(err, "setup", "attach failed: %d\n", err)) + goto cleanup; + + (*skel)->bss->my_pid = getpid(); + return; + +cleanup: + test_d_path__destroy(*skel); + *skel = NULL; } static void test_d_path_basic(void) @@ -124,16 +148,11 @@ static void test_d_path_basic(void) struct test_d_path *skel; int err; - skel = test_d_path__open_and_load(); - if (CHECK(!skel, "setup", "d_path skeleton failed\n")) - goto cleanup; - - err = test_d_path__attach(skel); - if (CHECK(err, "setup", "attach failed: %d\n", err)) + attach_and_load(&skel); + if (!skel) goto cleanup; bss = skel->bss; - bss->my_pid = getpid(); err = trigger_fstat_events(bss->my_pid); if (err < 0) @@ -195,6 +214,39 @@ static void test_d_path_check_types(void) test_d_path_check_types__destroy(skel); } +/* Check if the verifier correctly generates code for + * accessing the memory modified by d_path helper. + */ +static void test_d_path_mem_access(void) +{ + int localfd = -1; + char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX"; + struct test_d_path__bss *bss; + struct test_d_path *skel; + + attach_and_load(&skel); + if (!skel) + goto cleanup; + + bss = skel->bss; + + localfd = mkstemp(path_template); + if (CHECK(localfd < 0, "trigger", "mkstemp failed\n")) + goto cleanup; + + if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n")) + goto cleanup; + remove(path_template); + + if (CHECK(!bss->path_match_fallocate, "check", + "failed to read fallocate path")) + goto cleanup; + +cleanup: + syscall_close(localfd); + test_d_path__destroy(skel); +} + void test_d_path(void) { if (test__start_subtest("basic")) @@ -205,4 +257,7 @@ void test_d_path(void) if (test__start_subtest("check_alloc_mem")) test_d_path_check_types(); + + if (test__start_subtest("check_mem_access")) + test_d_path_mem_access(); } diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c index 6c2b0c3dbcd8..e442be9dde7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c @@ -73,12 +73,10 @@ close_memfd: return -1; } -static int create_sys_heap_dmabuf(void) +static int create_sys_heap_dmabuf(size_t bytes) { - sysheap_test_buffer_size = 20 * getpagesize(); - struct dma_heap_allocation_data data = { - .len = sysheap_test_buffer_size, + .len = bytes, .fd = 0, .fd_flags = O_RDWR | O_CLOEXEC, .heap_flags = 0, @@ -110,7 +108,9 @@ close_sysheap_dmabuf: static int create_test_buffers(void) { udmabuf = create_udmabuf(); - sysheap_dmabuf = create_sys_heap_dmabuf(); + + sysheap_test_buffer_size = 20 * getpagesize(); + sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size); if (udmabuf < 0 || sysheap_dmabuf < 0) return -1; @@ -219,6 +219,26 @@ close_iter_fd: close(iter_fd); } +static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel) +{ + int iter_fd; + char buf[1024]; + size_t total_bytes_read = 0; + ssize_t bytes_read; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector)); + if (!ASSERT_OK_FD(iter_fd, "iter_create")) + return; + + while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0) + total_bytes_read += bytes_read; + + ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read"); + + close(iter_fd); +} + + static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd) { LIBBPF_OPTS(bpf_test_run_opts, topts); @@ -275,6 +295,23 @@ void test_dmabuf_iter(void) subtest_dmabuf_iter_check_no_infinite_reads(skel); if (test__start_subtest("default_iter")) subtest_dmabuf_iter_check_default_iter(skel); + if (test__start_subtest("lots_of_buffers")) { + size_t NUM_BUFS = 100; + int buffers[NUM_BUFS]; + int i; + + for (i = 0; i < NUM_BUFS; ++i) { + buffers[i] = create_sys_heap_dmabuf(getpagesize()); + if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd")) + goto cleanup_bufs; + } + + subtest_dmabuf_iter_check_lots_of_buffers(skel); + +cleanup_bufs: + for (--i; i >= 0; --i) + close(buffers[i]); + } if (test__start_subtest("open_coded")) subtest_dmabuf_iter_check_open_coded(skel, map_fd); diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c index 84e1f883f97b..561b2f861808 100644 --- a/tools/testing/selftests/bpf/progs/test_d_path.c +++ b/tools/testing/selftests/bpf/progs/test_d_path.c @@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {}; int called_stat = 0; int called_close = 0; +int path_match_fallocate = 0; SEC("fentry/security_inode_getattr") int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, @@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id) return 0; } +SEC("fentry/vfs_fallocate") +int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + int ret = 0; + char path_fallocate[MAX_PATH_LEN] = {}; + + if (pid != my_pid) + return 0; + + ret = bpf_d_path(&file->f_path, + path_fallocate, MAX_PATH_LEN); + if (ret < 0) + return 0; + + if (!path_fallocate[0]) + return 0; + + path_match_fallocate = 1; + return 0; +} + char _license[] SEC("license") = "GPL"; |
