diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 09:27:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-21 09:27:50 -0700 |
commit | 440b65232829fad69947b8de983c13a525cc8871 (patch) | |
tree | 3cab57fca48b43ba0e11804683b33b71743494c6 /kernel/trace | |
parent | 1ec6d097897a35dfb55c4c31fc8633cf5be46497 (diff) | |
parent | 5277d130947ba8c0d54c16eed89eb97f0b6d2e5a (diff) |
Merge tag 'bpf-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov:
- Introduce '__attribute__((bpf_fastcall))' for helpers and kfuncs with
corresponding support in LLVM.
It is similar to existing 'no_caller_saved_registers' attribute in
GCC/LLVM with a provision for backward compatibility. It allows
compilers generate more efficient BPF code assuming the verifier or
JITs will inline or partially inline a helper/kfunc with such
attribute. bpf_cast_to_kern_ctx, bpf_rdonly_cast,
bpf_get_smp_processor_id are the first set of such helpers.
- Harden and extend ELF build ID parsing logic.
When called from sleepable context the relevants parts of ELF file
will be read to find and fetch .note.gnu.build-id information. Also
harden the logic to avoid TOCTOU, overflow, out-of-bounds problems.
- Improvements and fixes for sched-ext:
- Allow passing BPF iterators as kfunc arguments
- Make the pointer returned from iter_next method trusted
- Fix x86 JIT convergence issue due to growing/shrinking conditional
jumps in variable length encoding
- BPF_LSM related:
- Introduce few VFS kfuncs and consolidate them in
fs/bpf_fs_kfuncs.c
- Enforce correct range of return values from certain LSM hooks
- Disallow attaching to other LSM hooks
- Prerequisite work for upcoming Qdisc in BPF:
- Allow kptrs in program provided structs
- Support for gen_epilogue in verifier_ops
- Important fixes:
- Fix uprobe multi pid filter check
- Fix bpf_strtol and bpf_strtoul helpers
- Track equal scalars history on per-instruction level
- Fix tailcall hierarchy on x86 and arm64
- Fix signed division overflow to prevent INT_MIN/-1 trap on x86
- Fix get kernel stack in BPF progs attached to tracepoint:syscall
- Selftests:
- Add uprobe bench/stress tool
- Generate file dependencies to drastically improve re-build time
- Match JIT-ed and BPF asm with __xlated/__jited keywords
- Convert older tests to test_progs framework
- Add support for RISC-V
- Few fixes when BPF programs are compiled with GCC-BPF backend
(support for GCC-BPF in BPF CI is ongoing in parallel)
- Add traffic monitor
- Enable cross compile and musl libc
* tag 'bpf-next-6.12' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (260 commits)
btf: require pahole 1.21+ for DEBUG_INFO_BTF with default DWARF version
btf: move pahole check in scripts/link-vmlinux.sh to lib/Kconfig.debug
btf: remove redundant CONFIG_BPF test in scripts/link-vmlinux.sh
bpf: Call the missed kfree() when there is no special field in btf
bpf: Call the missed btf_record_free() when map creation fails
selftests/bpf: Add a test case to write mtu result into .rodata
selftests/bpf: Add a test case to write strtol result into .rodata
selftests/bpf: Rename ARG_PTR_TO_LONG test description
selftests/bpf: Fix ARG_PTR_TO_LONG {half-,}uninitialized test
bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error
bpf: Improve check_raw_mode_ok test for MEM_UNINIT-tagged types
bpf: Fix helper writes to read-only maps
bpf: Remove truncation test in bpf_strtol and bpf_strtoul helpers
bpf: Fix bpf_strtol and bpf_strtoul helpers for 32bit
selftests/bpf: Add tests for sdiv/smod overflow cases
bpf: Fix a sdiv overflow issue
libbpf: Add bpf_object__token_fd accessor
docs/bpf: Add missing BPF program types to docs
docs/bpf: Add constant values for linkages
bpf: Use fake pt_regs when doing bpf syscall tracepoint tracing
...
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/bpf_trace.c | 108 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 12 |
2 files changed, 18 insertions, 102 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ac0a01cc8634..a582cd25ca87 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -24,7 +24,6 @@ #include <linux/key.h> #include <linux/verification.h> #include <linux/namei.h> -#include <linux/fileattr.h> #include <net/bpf_sk_storage.h> @@ -798,29 +797,6 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = { .ret_btf_id = &bpf_task_pt_regs_ids[0], }; -BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) -{ - struct bpf_array *array = container_of(map, struct bpf_array, map); - struct cgroup *cgrp; - - if (unlikely(idx >= array->map.max_entries)) - return -E2BIG; - - cgrp = READ_ONCE(array->ptrs[idx]); - if (unlikely(!cgrp)) - return -EAGAIN; - - return task_under_cgroup_hierarchy(current, cgrp); -} - -static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { - .func = bpf_current_task_under_cgroup, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, -}; - struct send_signal_irq_work { struct irq_work irq_work; struct task_struct *task; @@ -1226,7 +1202,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_LONG, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg3_size = sizeof(u64), }; BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) @@ -1242,7 +1219,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { .func = get_func_ret, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_LONG, + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg2_size = sizeof(u64), }; BPF_CALL_1(get_func_arg_cnt, void *, ctx) @@ -1439,73 +1417,6 @@ static int __init bpf_key_sig_kfuncs_init(void) late_initcall(bpf_key_sig_kfuncs_init); #endif /* CONFIG_KEYS */ -/* filesystem kfuncs */ -__bpf_kfunc_start_defs(); - -/** - * bpf_get_file_xattr - get xattr of a file - * @file: file to get xattr from - * @name__str: name of the xattr - * @value_p: output buffer of the xattr value - * - * Get xattr *name__str* of *file* and store the output in *value_ptr*. - * - * For security reasons, only *name__str* with prefix "user." is allowed. - * - * Return: 0 on success, a negative value on error. - */ -__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, - struct bpf_dynptr *value_p) -{ - struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; - struct dentry *dentry; - u32 value_len; - void *value; - int ret; - - if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) - return -EPERM; - - value_len = __bpf_dynptr_size(value_ptr); - value = __bpf_dynptr_data_rw(value_ptr, value_len); - if (!value) - return -EINVAL; - - dentry = file_dentry(file); - ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ); - if (ret) - return ret; - return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len); -} - -__bpf_kfunc_end_defs(); - -BTF_KFUNCS_START(fs_kfunc_set_ids) -BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_KFUNCS_END(fs_kfunc_set_ids) - -static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) -{ - if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id)) - return 0; - - /* Only allow to attach from LSM hooks, to avoid recursion */ - return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0; -} - -static const struct btf_kfunc_id_set bpf_fs_kfunc_set = { - .owner = THIS_MODULE, - .set = &fs_kfunc_set_ids, - .filter = bpf_get_file_xattr_filter, -}; - -static int __init bpf_fs_kfuncs_init(void) -{ - return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set); -} - -late_initcall(bpf_fs_kfuncs_init); - static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1548,8 +1459,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; - case BPF_FUNC_current_task_under_cgroup: - return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_probe_write_user: @@ -1578,6 +1487,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; #endif case BPF_FUNC_send_signal: return &bpf_send_signal_proto; @@ -1598,7 +1509,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; case BPF_FUNC_get_task_stack: - return &bpf_get_task_stack_proto; + return prog->sleepable ? &bpf_get_task_stack_sleepable_proto + : &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return &bpf_copy_from_user_proto; case BPF_FUNC_copy_from_user_task: @@ -1654,7 +1566,7 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; case BPF_FUNC_get_stack: - return &bpf_get_stack_proto; + return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto; #ifdef CONFIG_BPF_KPROBE_OVERRIDE case BPF_FUNC_override_return: return &bpf_override_return_proto; @@ -3299,7 +3211,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, struct bpf_run_ctx *old_run_ctx; int err = 0; - if (link->task && current->mm != link->task->mm) + if (link->task && !same_thread_group(current, link->task)) return 0; if (sleepable) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9c581d6da843..785733245ead 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -564,6 +564,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *)); /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ + perf_fetch_caller_regs(regs); *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; for (i = 0; i < sys_data->nb_args; i++) @@ -575,6 +576,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; + struct pt_regs *fake_regs; struct hlist_head *head; unsigned long args[6]; bool valid_prog_array; @@ -602,7 +604,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) size = ALIGN(size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - rec = perf_trace_buf_alloc(size, NULL, &rctx); + rec = perf_trace_buf_alloc(size, &fake_regs, &rctx); if (!rec) return; @@ -611,7 +613,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); if ((valid_prog_array && - !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || + !perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; @@ -666,6 +668,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg } __aligned(8) param; /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ + perf_fetch_caller_regs(regs); *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; param.ret = rec->ret; @@ -676,6 +679,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct pt_regs *fake_regs; struct hlist_head *head; bool valid_prog_array; int syscall_nr; @@ -701,7 +705,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); size -= sizeof(u32); - rec = perf_trace_buf_alloc(size, NULL, &rctx); + rec = perf_trace_buf_alloc(size, &fake_regs, &rctx); if (!rec) return; @@ -709,7 +713,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->ret = syscall_get_return_value(current, regs); if ((valid_prog_array && - !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) || + !perf_call_bpf_exit(sys_data->exit_event, fake_regs, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; |