diff options
Diffstat (limited to 'kernel/trace/bpf_trace.c')
-rw-r--r-- | kernel/trace/bpf_trace.c | 563 |
1 files changed, 331 insertions, 232 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9dc605f08a23..adc947587eb8 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -24,7 +24,6 @@ #include <linux/key.h> #include <linux/verification.h> #include <linux/namei.h> -#include <linux/fileattr.h> #include <net/bpf_sk_storage.h> @@ -358,17 +357,6 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = { .arg3_type = ARG_CONST_SIZE, }; -static const struct bpf_func_proto *bpf_get_probe_write_proto(void) -{ - if (!capable(CAP_SYS_ADMIN)) - return NULL; - - pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!", - current->comm, task_pid_nr(current)); - - return &bpf_probe_write_user_proto; -} - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -620,7 +608,8 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, - u64 flags, struct perf_sample_data *sd) + u64 flags, struct perf_raw_record *raw, + struct perf_sample_data *sd) { struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); @@ -645,6 +634,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; + perf_sample_save_raw_data(sd, event, raw); + return perf_event_output(event, sd, regs); } @@ -688,9 +679,8 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, } perf_sample_data_init(sd, 0, 0); - perf_sample_save_raw_data(sd, &raw); - err = __bpf_perf_event_output(regs, map, flags, sd); + err = __bpf_perf_event_output(regs, map, flags, &raw, sd); out: this_cpu_dec(bpf_trace_nest_level); preempt_enable(); @@ -749,9 +739,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, perf_fetch_caller_regs(regs); perf_sample_data_init(sd, 0, 0); - perf_sample_save_raw_data(sd, &raw); - ret = __bpf_perf_event_output(regs, map, flags, sd); + ret = __bpf_perf_event_output(regs, map, flags, &raw, sd); out: this_cpu_dec(bpf_event_output_nest_level); preempt_enable(); @@ -798,34 +787,13 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = { .ret_btf_id = &bpf_task_pt_regs_ids[0], }; -BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) -{ - struct bpf_array *array = container_of(map, struct bpf_array, map); - struct cgroup *cgrp; - - if (unlikely(idx >= array->map.max_entries)) - return -E2BIG; - - cgrp = READ_ONCE(array->ptrs[idx]); - if (unlikely(!cgrp)) - return -EAGAIN; - - return task_under_cgroup_hierarchy(current, cgrp); -} - -static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { - .func = bpf_current_task_under_cgroup, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_ANYTHING, -}; - struct send_signal_irq_work { struct irq_work irq_work; struct task_struct *task; u32 sig; enum pid_type type; + bool has_siginfo; + struct kernel_siginfo info; }; static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); @@ -833,30 +801,49 @@ static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work); static void do_bpf_send_signal(struct irq_work *entry) { struct send_signal_irq_work *work; + struct kernel_siginfo *siginfo; work = container_of(entry, struct send_signal_irq_work, irq_work); - group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); + siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV; + + group_send_sig_info(work->sig, siginfo, work->task, work->type); put_task_struct(work->task); } -static int bpf_send_signal_common(u32 sig, enum pid_type type) +static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value) { struct send_signal_irq_work *work = NULL; + struct kernel_siginfo info; + struct kernel_siginfo *siginfo; + + if (!task) { + task = current; + siginfo = SEND_SIG_PRIV; + } else { + clear_siginfo(&info); + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_KERNEL; + info.si_pid = 0; + info.si_uid = 0; + info.si_value.sival_ptr = (void *)(unsigned long)value; + siginfo = &info; + } /* Similar to bpf_probe_write_user, task needs to be * in a sound condition and kernel memory access be * permitted in order to send signal to the current * task. */ - if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING))) + if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING))) return -EPERM; if (unlikely(!nmi_uaccess_okay())) return -EPERM; /* Task should not be pid=1 to avoid kernel panic. */ - if (unlikely(is_global_init(current))) + if (unlikely(is_global_init(task))) return -EPERM; - if (irqs_disabled()) { + if (!preemptible()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ @@ -871,19 +858,22 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) * to the irq_work. The current task may change when queued * irq works get executed. */ - work->task = get_task_struct(current); + work->task = get_task_struct(task); + work->has_siginfo = siginfo == &info; + if (work->has_siginfo) + copy_siginfo(&work->info, &info); work->sig = sig; work->type = type; irq_work_queue(&work->irq_work); return 0; } - return group_send_sig_info(sig, SEND_SIG_PRIV, current, type); + return group_send_sig_info(sig, siginfo, task, type); } BPF_CALL_1(bpf_send_signal, u32, sig) { - return bpf_send_signal_common(sig, PIDTYPE_TGID); + return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0); } static const struct bpf_func_proto bpf_send_signal_proto = { @@ -895,7 +885,7 @@ static const struct bpf_func_proto bpf_send_signal_proto = { BPF_CALL_1(bpf_send_signal_thread, u32, sig) { - return bpf_send_signal_common(sig, PIDTYPE_PID); + return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0); } static const struct bpf_func_proto bpf_send_signal_thread_proto = { @@ -1053,9 +1043,15 @@ static unsigned long get_entry_ip(unsigned long fentry_ip) { u32 instr; - /* Being extra safe in here in case entry ip is on the page-edge. */ - if (get_kernel_nofault(instr, (u32 *) fentry_ip - 1)) - return fentry_ip; + /* We want to be extra safe in case entry ip is on the page edge, + * but otherwise we need to avoid get_kernel_nofault()'s overhead. + */ + if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) { + if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE))) + return fentry_ip; + } else { + instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE); + } if (is_endbr(instr)) fentry_ip -= ENDBR_INSN_SIZE; return fentry_ip; @@ -1182,9 +1178,6 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) { -#ifndef CONFIG_X86 - return -ENOENT; -#else static const u32 br_entry_size = sizeof(struct perf_branch_entry); u32 entry_cnt = size / br_entry_size; @@ -1197,7 +1190,6 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) return -ENOENT; return entry_cnt * br_entry_size; -#endif } static const struct bpf_func_proto bpf_get_branch_snapshot_proto = { @@ -1224,7 +1216,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = { .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_PTR_TO_LONG, + .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, + .arg3_size = sizeof(u64), }; BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value) @@ -1240,7 +1233,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = { .func = get_func_ret, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_PTR_TO_LONG, + .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, + .arg2_size = sizeof(u64), }; BPF_CALL_1(get_func_arg_cnt, void *, ctx) @@ -1367,8 +1361,8 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** * bpf_verify_pkcs7_signature - verify a PKCS#7 signature - * @data_ptr: data to verify - * @sig_ptr: signature of the data + * @data_p: data to verify + * @sig_p: signature of the data * @trusted_keyring: keyring with keys trusted for signature verification * * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* @@ -1376,10 +1370,12 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, - struct bpf_dynptr_kern *sig_ptr, +__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { + struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; + struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; const void *data, *sig; u32 data_len, sig_len; int ret; @@ -1435,75 +1431,11 @@ static int __init bpf_key_sig_kfuncs_init(void) late_initcall(bpf_key_sig_kfuncs_init); #endif /* CONFIG_KEYS */ -/* filesystem kfuncs */ -__bpf_kfunc_start_defs(); - -/** - * bpf_get_file_xattr - get xattr of a file - * @file: file to get xattr from - * @name__str: name of the xattr - * @value_ptr: output buffer of the xattr value - * - * Get xattr *name__str* of *file* and store the output in *value_ptr*. - * - * For security reasons, only *name__str* with prefix "user." is allowed. - * - * Return: 0 on success, a negative value on error. - */ -__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, - struct bpf_dynptr_kern *value_ptr) -{ - struct dentry *dentry; - u32 value_len; - void *value; - int ret; - - if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) - return -EPERM; - - value_len = __bpf_dynptr_size(value_ptr); - value = __bpf_dynptr_data_rw(value_ptr, value_len); - if (!value) - return -EINVAL; - - dentry = file_dentry(file); - ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ); - if (ret) - return ret; - return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len); -} - -__bpf_kfunc_end_defs(); - -BTF_KFUNCS_START(fs_kfunc_set_ids) -BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS) -BTF_KFUNCS_END(fs_kfunc_set_ids) - -static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id) -{ - if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id)) - return 0; - - /* Only allow to attach from LSM hooks, to avoid recursion */ - return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0; -} - -static const struct btf_kfunc_id_set bpf_fs_kfunc_set = { - .owner = THIS_MODULE, - .set = &fs_kfunc_set_ids, - .filter = bpf_get_file_xattr_filter, -}; - -static int __init bpf_fs_kfuncs_init(void) -{ - return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set); -} - -late_initcall(bpf_fs_kfuncs_init); - static const struct bpf_func_proto * bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *func_proto; + switch (func_id) { case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; @@ -1525,8 +1457,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; - case BPF_FUNC_get_current_pid_tgid: - return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; case BPF_FUNC_get_current_task_btf: @@ -1545,13 +1475,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; - case BPF_FUNC_current_task_under_cgroup: - return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; - case BPF_FUNC_probe_write_user: - return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? - NULL : bpf_get_probe_write_proto(); case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: @@ -1575,6 +1500,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; #endif case BPF_FUNC_send_signal: return &bpf_send_signal_proto; @@ -1582,8 +1509,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_send_signal_thread_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; - case BPF_FUNC_get_ns_current_pid_tgid: - return &bpf_get_ns_current_pid_tgid_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: @@ -1597,7 +1522,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; case BPF_FUNC_get_task_stack: - return &bpf_get_task_stack_proto; + return prog->sleepable ? &bpf_get_task_stack_sleepable_proto + : &bpf_get_task_stack_proto; case BPF_FUNC_copy_from_user: return &bpf_copy_from_user_proto; case BPF_FUNC_copy_from_user_task: @@ -1629,10 +1555,47 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); default: - return bpf_base_func_proto(func_id, prog); + break; + } + + func_proto = bpf_base_func_proto(func_id, prog); + if (func_proto) + return func_proto; + + if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_probe_write_user: + return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? + NULL : &bpf_probe_write_user_proto; + default: + return NULL; } } +static bool is_kprobe_multi(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI || + prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; +} + +static inline bool is_kprobe_session(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION; +} + +static inline bool is_uprobe_multi(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI || + prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; +} + +static inline bool is_uprobe_session(const struct bpf_prog *prog) +{ + return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION; +} + static const struct bpf_func_proto * kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1642,21 +1605,21 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; case BPF_FUNC_get_stack: - return &bpf_get_stack_proto; + return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto; #ifdef CONFIG_BPF_KPROBE_OVERRIDE case BPF_FUNC_override_return: return &bpf_override_return_proto; #endif case BPF_FUNC_get_func_ip: - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + if (is_kprobe_multi(prog)) return &bpf_get_func_ip_proto_kprobe_multi; - if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) + if (is_uprobe_multi(prog)) return &bpf_get_func_ip_proto_uprobe_multi; return &bpf_get_func_ip_proto_kprobe; case BPF_FUNC_get_attach_cookie: - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) + if (is_kprobe_multi(prog)) return &bpf_get_attach_cookie_proto_kmulti; - if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) + if (is_uprobe_multi(prog)) return &bpf_get_attach_cookie_proto_umulti; return &bpf_get_attach_cookie_proto_trace; default: @@ -2008,6 +1971,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_stackid_proto_raw_tp; case BPF_FUNC_get_stack: return &bpf_get_stack_proto_raw_tp; + case BPF_FUNC_get_attach_cookie: + return &bpf_get_attach_cookie_proto_tracing; default: return bpf_tracing_func_proto(func_id, prog); } @@ -2070,6 +2035,9 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_func_arg_cnt: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; case BPF_FUNC_get_attach_cookie: + if (prog->type == BPF_PROG_TYPE_TRACING && + prog->expected_attach_type == BPF_TRACE_RAW_TP) + return &bpf_get_attach_cookie_proto_tracing; return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); @@ -2278,6 +2246,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event) { struct bpf_prog_array *old_array; struct bpf_prog_array *new_array; + struct bpf_prog *prog = NULL; int ret; mutex_lock(&bpf_event_mutex); @@ -2286,9 +2255,10 @@ void perf_event_detach_bpf_prog(struct perf_event *event) goto unlock; old_array = bpf_event_rcu_dereference(event->tp_event->prog_array); + if (!old_array) + goto put; + ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array); - if (ret == -ENOENT) - goto unlock; if (ret < 0) { bpf_prog_array_delete_safe(old_array, event->prog); } else { @@ -2296,11 +2266,23 @@ void perf_event_detach_bpf_prog(struct perf_event *event) bpf_prog_array_free_sleepable(old_array); } - bpf_prog_put(event->prog); +put: + prog = event->prog; event->prog = NULL; unlock: mutex_unlock(&bpf_event_mutex); + + if (prog) { + /* + * It could be that the bpf_prog is not sleepable (and will be freed + * via normal RCU), but is called from a point that supports sleepable + * programs and uses tasks-trace-RCU. + */ + synchronize_rcu_tasks_trace(); + + bpf_prog_put(prog); + } } int perf_event_query_prog_array(struct perf_event *event, void __user *info) @@ -2370,16 +2352,26 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) } static __always_inline -void __bpf_trace_run(struct bpf_prog *prog, u64 *args) +void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) { + struct bpf_prog *prog = link->link.prog; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; + cant_sleep(); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); goto out; } + + run_ctx.bpf_cookie = link->cookie; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + rcu_read_lock(); (void) bpf_prog_run(prog, args); rcu_read_unlock(); + + bpf_reset_run_ctx(old_run_ctx); out: this_cpu_dec(*(prog->active)); } @@ -2408,12 +2400,12 @@ out: #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 #define BPF_TRACE_DEFN_x(x) \ - void bpf_trace_run##x(struct bpf_prog *prog, \ + void bpf_trace_run##x(struct bpf_raw_tp_link *link, \ REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ { \ u64 args[x]; \ REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ - __bpf_trace_run(prog, args); \ + __bpf_trace_run(link, args); \ } \ EXPORT_SYMBOL_GPL(bpf_trace_run##x) BPF_TRACE_DEFN_x(1); @@ -2429,9 +2421,10 @@ BPF_TRACE_DEFN_x(10); BPF_TRACE_DEFN_x(11); BPF_TRACE_DEFN_x(12); -static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { struct tracepoint *tp = btp->tp; + struct bpf_prog *prog = link->link.prog; /* * check that program doesn't access arguments beyond what's @@ -2443,18 +2436,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog * if (prog->aux->max_tp_access > btp->writable_size) return -EINVAL; - return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, - prog); + return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link); } -int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { - return __bpf_probe_register(btp, prog); -} - -int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) -{ - return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, @@ -2577,6 +2564,12 @@ static int __init bpf_event_init(void) fs_initcall(bpf_event_init); #endif /* CONFIG_MODULES */ +struct bpf_session_run_ctx { + struct bpf_run_ctx run_ctx; + bool is_return; + void *data; +}; + #ifdef CONFIG_FPROBE struct bpf_kprobe_multi_link { struct bpf_link link; @@ -2590,7 +2583,7 @@ struct bpf_kprobe_multi_link { }; struct bpf_kprobe_multi_run_ctx { - struct bpf_run_ctx run_ctx; + struct bpf_session_run_ctx session_ctx; struct bpf_kprobe_multi_link *link; unsigned long entry_ip; }; @@ -2600,6 +2593,20 @@ struct user_syms { char *buf; }; +#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS +static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs); +#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs) +#else +#define bpf_kprobe_multi_pt_regs_ptr() (NULL) +#endif + +static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip) +{ + unsigned long ip = ftrace_get_symaddr(fentry_ip); + + return ip ? : fentry_ip; +} + static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) { unsigned long __user usymbol; @@ -2769,7 +2776,8 @@ static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx) if (WARN_ON_ONCE(!ctx)) return 0; - run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx); + run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, + session_ctx.run_ctx); link = run_ctx->link; if (!link->cookies) return 0; @@ -2786,30 +2794,38 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx) { struct bpf_kprobe_multi_run_ctx *run_ctx; - run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx); + run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, + session_ctx.run_ctx); return run_ctx->entry_ip; } static int kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, - unsigned long entry_ip, struct pt_regs *regs) + unsigned long entry_ip, struct ftrace_regs *fregs, + bool is_return, void *data) { struct bpf_kprobe_multi_run_ctx run_ctx = { + .session_ctx = { + .is_return = is_return, + .data = data, + }, .link = link, .entry_ip = entry_ip, }; struct bpf_run_ctx *old_run_ctx; + struct pt_regs *regs; int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { bpf_prog_inc_misses_counter(link->link.prog); - err = 0; + err = 1; goto out; } migrate_disable(); rcu_read_lock(); - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr()); + old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); rcu_read_unlock(); @@ -2822,25 +2838,28 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, static int kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *fregs, void *data) { struct bpf_kprobe_multi_link *link; + int err; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); - return 0; + err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), + fregs, false, data); + return is_kprobe_session(link->link.prog) ? err : 0; } static void kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, - unsigned long ret_ip, struct pt_regs *regs, + unsigned long ret_ip, struct ftrace_regs *fregs, void *data) { struct bpf_kprobe_multi_link *link; link = container_of(fp, struct bpf_kprobe_multi_link, fp); - kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); + kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip), + fregs, true, data); } static int symbols_cmp_r(const void *a, const void *b, const void *priv) @@ -2973,7 +2992,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; - if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI) + if (!is_kprobe_multi(prog)) return -EINVAL; flags = attr->link_create.kprobe_multi.flags; @@ -3054,10 +3073,12 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (err) goto error; - if (flags & BPF_F_KPROBE_MULTI_RETURN) - link->fp.exit_handler = kprobe_multi_link_exit_handler; - else + if (!(flags & BPF_F_KPROBE_MULTI_RETURN)) link->fp.entry_handler = kprobe_multi_link_handler; + if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog)) + link->fp.exit_handler = kprobe_multi_link_exit_handler; + if (is_kprobe_session(prog)) + link->fp.entry_data_size = sizeof(u64); link->addrs = addrs; link->cookies = cookies; @@ -3122,7 +3143,9 @@ struct bpf_uprobe { loff_t offset; unsigned long ref_ctr_offset; u64 cookie; + struct uprobe *uprobe; struct uprobe_consumer consumer; + bool session; }; struct bpf_uprobe_multi_link { @@ -3135,20 +3158,20 @@ struct bpf_uprobe_multi_link { }; struct bpf_uprobe_multi_run_ctx { - struct bpf_run_ctx run_ctx; + struct bpf_session_run_ctx session_ctx; unsigned long entry_ip; struct bpf_uprobe *uprobe; }; -static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, - u32 cnt) +static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt) { u32 i; - for (i = 0; i < cnt; i++) { - uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, - &uprobes[i].consumer); - } + for (i = 0; i < cnt; i++) + uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer); + + if (cnt) + uprobe_unregister_sync(); } static void bpf_uprobe_multi_link_release(struct bpf_link *link) @@ -3156,7 +3179,7 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link) struct bpf_uprobe_multi_link *umulti_link; umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); - bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); + bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt); if (umulti_link->task) put_task_struct(umulti_link->task); path_put(&umulti_link->path); @@ -3182,7 +3205,8 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, struct bpf_uprobe_multi_link *umulti_link; u32 ucount = info->uprobe_multi.count; int err = 0, i; - long left; + char *p, *buf; + long left = 0; if (!upath ^ !upath_size) return -EINVAL; @@ -3196,26 +3220,23 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, info->uprobe_multi.pid = umulti_link->task ? task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0; - if (upath) { - char *p, *buf; - - upath_size = min_t(u32, upath_size, PATH_MAX); - - buf = kmalloc(upath_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = d_path(&umulti_link->path, buf, upath_size); - if (IS_ERR(p)) { - kfree(buf); - return PTR_ERR(p); - } - upath_size = buf + upath_size - p; - left = copy_to_user(upath, p, upath_size); + upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX; + buf = kmalloc(upath_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + p = d_path(&umulti_link->path, buf, upath_size); + if (IS_ERR(p)) { kfree(buf); - if (left) - return -EFAULT; - info->uprobe_multi.path_size = upath_size; + return PTR_ERR(p); } + upath_size = buf + upath_size - p; + + if (upath) + left = copy_to_user(upath, p, upath_size); + kfree(buf); + if (left) + return -EFAULT; + info->uprobe_multi.path_size = upath_size; if (!uoffsets && !ucookies && !uref_ctr_offsets) return 0; @@ -3248,19 +3269,24 @@ static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { static int uprobe_prog_run(struct bpf_uprobe *uprobe, unsigned long entry_ip, - struct pt_regs *regs) + struct pt_regs *regs, + bool is_return, void *data) { struct bpf_uprobe_multi_link *link = uprobe->link; struct bpf_uprobe_multi_run_ctx run_ctx = { + .session_ctx = { + .is_return = is_return, + .data = data, + }, .entry_ip = entry_ip, .uprobe = uprobe, }; struct bpf_prog *prog = link->link.prog; bool sleepable = prog->sleepable; struct bpf_run_ctx *old_run_ctx; - int err = 0; + int err; - if (link->task && current != link->task) + if (link->task && !same_thread_group(current, link->task)) return 0; if (sleepable) @@ -3270,7 +3296,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, migrate_disable(); - old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx); err = bpf_prog_run(link->link.prog, regs); bpf_reset_run_ctx(old_run_ctx); @@ -3284,8 +3310,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, } static bool -uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx, - struct mm_struct *mm) +uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm) { struct bpf_uprobe *uprobe; @@ -3294,28 +3319,36 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx } static int -uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs, + __u64 *data) { struct bpf_uprobe *uprobe; + int ret; uprobe = container_of(con, struct bpf_uprobe, consumer); - return uprobe_prog_run(uprobe, instruction_pointer(regs), regs); + ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data); + if (uprobe->session) + return ret ? UPROBE_HANDLER_IGNORE : 0; + return 0; } static int -uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs, + __u64 *data) { struct bpf_uprobe *uprobe; uprobe = container_of(con, struct bpf_uprobe, consumer); - return uprobe_prog_run(uprobe, func, regs); + uprobe_prog_run(uprobe, func, regs, true, data); + return 0; } static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) { struct bpf_uprobe_multi_run_ctx *run_ctx; - run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, + session_ctx.run_ctx); return run_ctx->entry_ip; } @@ -3323,7 +3356,8 @@ static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) { struct bpf_uprobe_multi_run_ctx *run_ctx; - run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, + session_ctx.run_ctx); return run_ctx->uprobe->cookie; } @@ -3347,7 +3381,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (sizeof(u64) != sizeof(void *)) return -EOPNOTSUPP; - if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI) + if (!is_uprobe_multi(prog)) return -EINVAL; flags = attr->link_create.uprobe_multi.flags; @@ -3361,8 +3395,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); cnt = attr->link_create.uprobe_multi.cnt; + pid = attr->link_create.uprobe_multi.pid; - if (!upath || !uoffsets || !cnt) + if (!upath || !uoffsets || !cnt || pid < 0) return -EINVAL; if (cnt > MAX_UPROBE_MULTI_CNT) return -E2BIG; @@ -3386,11 +3421,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error_path_put; } - pid = attr->link_create.uprobe_multi.pid; if (pid) { - rcu_read_lock(); - task = get_pid_task(find_vpid(pid), PIDTYPE_PID); - rcu_read_unlock(); + task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); if (!task) { err = -ESRCH; goto error_path_put; @@ -3425,11 +3457,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr uprobes[i].link = link; - if (flags & BPF_F_UPROBE_MULTI_RETURN) - uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; - else + if (!(flags & BPF_F_UPROBE_MULTI_RETURN)) uprobes[i].consumer.handler = uprobe_multi_link_handler; - + if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog)) + uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; + if (is_uprobe_session(prog)) + uprobes[i].session = true; if (pid) uprobes[i].consumer.filter = uprobe_multi_link_filter; } @@ -3444,22 +3477,26 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr &bpf_uprobe_multi_link_lops, prog); for (i = 0; i < cnt; i++) { - err = uprobe_register_refctr(d_real_inode(link->path.dentry), - uprobes[i].offset, - uprobes[i].ref_ctr_offset, - &uprobes[i].consumer); - if (err) { - bpf_uprobe_unregister(&path, uprobes, i); - goto error_free; + uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry), + uprobes[i].offset, + uprobes[i].ref_ctr_offset, + &uprobes[i].consumer); + if (IS_ERR(uprobes[i].uprobe)) { + err = PTR_ERR(uprobes[i].uprobe); + link->cnt = i; + goto error_unregister; } } err = bpf_link_prime(&link->link, &link_primer); if (err) - goto error_free; + goto error_unregister; return bpf_link_settle(&link_primer); +error_unregister: + bpf_uprobe_unregister(uprobes, link->cnt); + error_free: kvfree(uprobes); kfree(link); @@ -3483,3 +3520,65 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) return 0; } #endif /* CONFIG_UPROBES */ + +__bpf_kfunc_start_defs(); + +__bpf_kfunc bool bpf_session_is_return(void) +{ + struct bpf_session_run_ctx *session_ctx; + + session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); + return session_ctx->is_return; +} + +__bpf_kfunc __u64 *bpf_session_cookie(void) +{ + struct bpf_session_run_ctx *session_ctx; + + session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx); + return session_ctx->data; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids) +BTF_ID_FLAGS(func, bpf_session_is_return) +BTF_ID_FLAGS(func, bpf_session_cookie) +BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids) + +static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id) +{ + if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id)) + return 0; + + if (!is_kprobe_session(prog) && !is_uprobe_session(prog)) + return -EACCES; + + return 0; +} + +static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = { + .owner = THIS_MODULE, + .set = &kprobe_multi_kfunc_set_ids, + .filter = bpf_kprobe_multi_filter, +}; + +static int __init bpf_kprobe_multi_kfuncs_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_kprobe_multi_kfunc_set); +} + +late_initcall(bpf_kprobe_multi_kfuncs_init); + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, + u64 value) +{ + if (type != PIDTYPE_PID && type != PIDTYPE_TGID) + return -EINVAL; + + return bpf_send_signal_common(sig, type, task, value); +} + +__bpf_kfunc_end_defs(); |