summaryrefslogtreecommitdiff
path: root/kernel/trace/bpf_trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/bpf_trace.c')
-rw-r--r--kernel/trace/bpf_trace.c563
1 files changed, 331 insertions, 232 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9dc605f08a23..adc947587eb8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -24,7 +24,6 @@
#include <linux/key.h>
#include <linux/verification.h>
#include <linux/namei.h>
-#include <linux/fileattr.h>
#include <net/bpf_sk_storage.h>
@@ -358,17 +357,6 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
.arg3_type = ARG_CONST_SIZE,
};
-static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
-{
- if (!capable(CAP_SYS_ADMIN))
- return NULL;
-
- pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
- current->comm, task_pid_nr(current));
-
- return &bpf_probe_write_user_proto;
-}
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -620,7 +608,8 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
- u64 flags, struct perf_sample_data *sd)
+ u64 flags, struct perf_raw_record *raw,
+ struct perf_sample_data *sd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
@@ -645,6 +634,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
+ perf_sample_save_raw_data(sd, event, raw);
+
return perf_event_output(event, sd, regs);
}
@@ -688,9 +679,8 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
}
perf_sample_data_init(sd, 0, 0);
- perf_sample_save_raw_data(sd, &raw);
- err = __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_trace_nest_level);
preempt_enable();
@@ -749,9 +739,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
- perf_sample_save_raw_data(sd, &raw);
- ret = __bpf_perf_event_output(regs, map, flags, sd);
+ ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_event_output_nest_level);
preempt_enable();
@@ -798,34 +787,13 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = {
.ret_btf_id = &bpf_task_pt_regs_ids[0],
};
-BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
-{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
- struct cgroup *cgrp;
-
- if (unlikely(idx >= array->map.max_entries))
- return -E2BIG;
-
- cgrp = READ_ONCE(array->ptrs[idx]);
- if (unlikely(!cgrp))
- return -EAGAIN;
-
- return task_under_cgroup_hierarchy(current, cgrp);
-}
-
-static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
- .func = bpf_current_task_under_cgroup,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_ANYTHING,
-};
-
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
u32 sig;
enum pid_type type;
+ bool has_siginfo;
+ struct kernel_siginfo info;
};
static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
@@ -833,30 +801,49 @@ static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
static void do_bpf_send_signal(struct irq_work *entry)
{
struct send_signal_irq_work *work;
+ struct kernel_siginfo *siginfo;
work = container_of(entry, struct send_signal_irq_work, irq_work);
- group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
+ siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
+
+ group_send_sig_info(work->sig, siginfo, work->task, work->type);
put_task_struct(work->task);
}
-static int bpf_send_signal_common(u32 sig, enum pid_type type)
+static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
{
struct send_signal_irq_work *work = NULL;
+ struct kernel_siginfo info;
+ struct kernel_siginfo *siginfo;
+
+ if (!task) {
+ task = current;
+ siginfo = SEND_SIG_PRIV;
+ } else {
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_KERNEL;
+ info.si_pid = 0;
+ info.si_uid = 0;
+ info.si_value.sival_ptr = (void *)(unsigned long)value;
+ siginfo = &info;
+ }
/* Similar to bpf_probe_write_user, task needs to be
* in a sound condition and kernel memory access be
* permitted in order to send signal to the current
* task.
*/
- if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
+ if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
/* Task should not be pid=1 to avoid kernel panic. */
- if (unlikely(is_global_init(current)))
+ if (unlikely(is_global_init(task)))
return -EPERM;
- if (irqs_disabled()) {
+ if (!preemptible()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
@@ -871,19 +858,22 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
* to the irq_work. The current task may change when queued
* irq works get executed.
*/
- work->task = get_task_struct(current);
+ work->task = get_task_struct(task);
+ work->has_siginfo = siginfo == &info;
+ if (work->has_siginfo)
+ copy_siginfo(&work->info, &info);
work->sig = sig;
work->type = type;
irq_work_queue(&work->irq_work);
return 0;
}
- return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
+ return group_send_sig_info(sig, siginfo, task, type);
}
BPF_CALL_1(bpf_send_signal, u32, sig)
{
- return bpf_send_signal_common(sig, PIDTYPE_TGID);
+ return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
}
static const struct bpf_func_proto bpf_send_signal_proto = {
@@ -895,7 +885,7 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
BPF_CALL_1(bpf_send_signal_thread, u32, sig)
{
- return bpf_send_signal_common(sig, PIDTYPE_PID);
+ return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
}
static const struct bpf_func_proto bpf_send_signal_thread_proto = {
@@ -1053,9 +1043,15 @@ static unsigned long get_entry_ip(unsigned long fentry_ip)
{
u32 instr;
- /* Being extra safe in here in case entry ip is on the page-edge. */
- if (get_kernel_nofault(instr, (u32 *) fentry_ip - 1))
- return fentry_ip;
+ /* We want to be extra safe in case entry ip is on the page edge,
+ * but otherwise we need to avoid get_kernel_nofault()'s overhead.
+ */
+ if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
+ if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
+ return fentry_ip;
+ } else {
+ instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
+ }
if (is_endbr(instr))
fentry_ip -= ENDBR_INSN_SIZE;
return fentry_ip;
@@ -1182,9 +1178,6 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
{
-#ifndef CONFIG_X86
- return -ENOENT;
-#else
static const u32 br_entry_size = sizeof(struct perf_branch_entry);
u32 entry_cnt = size / br_entry_size;
@@ -1197,7 +1190,6 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
return -ENOENT;
return entry_cnt * br_entry_size;
-#endif
}
static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
@@ -1224,7 +1216,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_LONG,
+ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u64),
};
BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
@@ -1240,7 +1233,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = {
.func = get_func_ret,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_LONG,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg2_size = sizeof(u64),
};
BPF_CALL_1(get_func_arg_cnt, void *, ctx)
@@ -1367,8 +1361,8 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
/**
* bpf_verify_pkcs7_signature - verify a PKCS#7 signature
- * @data_ptr: data to verify
- * @sig_ptr: signature of the data
+ * @data_p: data to verify
+ * @sig_p: signature of the data
* @trusted_keyring: keyring with keys trusted for signature verification
*
* Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
@@ -1376,10 +1370,12 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey)
*
* Return: 0 on success, a negative value on error.
*/
-__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
- struct bpf_dynptr_kern *sig_ptr,
+__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p,
+ struct bpf_dynptr *sig_p,
struct bpf_key *trusted_keyring)
{
+ struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p;
+ struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p;
const void *data, *sig;
u32 data_len, sig_len;
int ret;
@@ -1435,75 +1431,11 @@ static int __init bpf_key_sig_kfuncs_init(void)
late_initcall(bpf_key_sig_kfuncs_init);
#endif /* CONFIG_KEYS */
-/* filesystem kfuncs */
-__bpf_kfunc_start_defs();
-
-/**
- * bpf_get_file_xattr - get xattr of a file
- * @file: file to get xattr from
- * @name__str: name of the xattr
- * @value_ptr: output buffer of the xattr value
- *
- * Get xattr *name__str* of *file* and store the output in *value_ptr*.
- *
- * For security reasons, only *name__str* with prefix "user." is allowed.
- *
- * Return: 0 on success, a negative value on error.
- */
-__bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str,
- struct bpf_dynptr_kern *value_ptr)
-{
- struct dentry *dentry;
- u32 value_len;
- void *value;
- int ret;
-
- if (strncmp(name__str, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- return -EPERM;
-
- value_len = __bpf_dynptr_size(value_ptr);
- value = __bpf_dynptr_data_rw(value_ptr, value_len);
- if (!value)
- return -EINVAL;
-
- dentry = file_dentry(file);
- ret = inode_permission(&nop_mnt_idmap, dentry->d_inode, MAY_READ);
- if (ret)
- return ret;
- return __vfs_getxattr(dentry, dentry->d_inode, name__str, value, value_len);
-}
-
-__bpf_kfunc_end_defs();
-
-BTF_KFUNCS_START(fs_kfunc_set_ids)
-BTF_ID_FLAGS(func, bpf_get_file_xattr, KF_SLEEPABLE | KF_TRUSTED_ARGS)
-BTF_KFUNCS_END(fs_kfunc_set_ids)
-
-static int bpf_get_file_xattr_filter(const struct bpf_prog *prog, u32 kfunc_id)
-{
- if (!btf_id_set8_contains(&fs_kfunc_set_ids, kfunc_id))
- return 0;
-
- /* Only allow to attach from LSM hooks, to avoid recursion */
- return prog->type != BPF_PROG_TYPE_LSM ? -EACCES : 0;
-}
-
-static const struct btf_kfunc_id_set bpf_fs_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &fs_kfunc_set_ids,
- .filter = bpf_get_file_xattr_filter,
-};
-
-static int __init bpf_fs_kfuncs_init(void)
-{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_LSM, &bpf_fs_kfunc_set);
-}
-
-late_initcall(bpf_fs_kfuncs_init);
-
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
return &bpf_map_lookup_elem_proto;
@@ -1525,8 +1457,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_ktime_get_boot_ns_proto;
case BPF_FUNC_tail_call:
return &bpf_tail_call_proto;
- case BPF_FUNC_get_current_pid_tgid:
- return &bpf_get_current_pid_tgid_proto;
case BPF_FUNC_get_current_task:
return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_task_btf:
@@ -1545,13 +1475,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_numa_node_id_proto;
case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto;
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
- case BPF_FUNC_probe_write_user:
- return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
- NULL : bpf_get_probe_write_proto();
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
@@ -1575,6 +1500,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_cgrp_storage_get_proto;
case BPF_FUNC_cgrp_storage_delete:
return &bpf_cgrp_storage_delete_proto;
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
#endif
case BPF_FUNC_send_signal:
return &bpf_send_signal_proto;
@@ -1582,8 +1509,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
- case BPF_FUNC_get_ns_current_pid_tgid:
- return &bpf_get_ns_current_pid_tgid_proto;
case BPF_FUNC_ringbuf_output:
return &bpf_ringbuf_output_proto;
case BPF_FUNC_ringbuf_reserve:
@@ -1597,7 +1522,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
case BPF_FUNC_get_task_stack:
- return &bpf_get_task_stack_proto;
+ return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
+ : &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return &bpf_copy_from_user_proto;
case BPF_FUNC_copy_from_user_task:
@@ -1629,10 +1555,47 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
default:
- return bpf_base_func_proto(func_id, prog);
+ break;
+ }
+
+ func_proto = bpf_base_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
+ if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN))
+ return NULL;
+
+ switch (func_id) {
+ case BPF_FUNC_probe_write_user:
+ return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
+ NULL : &bpf_probe_write_user_proto;
+ default:
+ return NULL;
}
}
+static bool is_kprobe_multi(const struct bpf_prog *prog)
+{
+ return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ||
+ prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
+}
+
+static inline bool is_kprobe_session(const struct bpf_prog *prog)
+{
+ return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
+}
+
+static inline bool is_uprobe_multi(const struct bpf_prog *prog)
+{
+ return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
+ prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
+}
+
+static inline bool is_uprobe_session(const struct bpf_prog *prog)
+{
+ return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
+}
+
static const struct bpf_func_proto *
kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1642,21 +1605,21 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
case BPF_FUNC_get_stack:
- return &bpf_get_stack_proto;
+ return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
- if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI)
+ if (is_kprobe_multi(prog))
return &bpf_get_func_ip_proto_kprobe_multi;
- if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
+ if (is_uprobe_multi(prog))
return &bpf_get_func_ip_proto_uprobe_multi;
return &bpf_get_func_ip_proto_kprobe;
case BPF_FUNC_get_attach_cookie:
- if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI)
+ if (is_kprobe_multi(prog))
return &bpf_get_attach_cookie_proto_kmulti;
- if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI)
+ if (is_uprobe_multi(prog))
return &bpf_get_attach_cookie_proto_umulti;
return &bpf_get_attach_cookie_proto_trace;
default:
@@ -2008,6 +1971,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stackid_proto_raw_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_raw_tp;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_tracing;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -2070,6 +2035,9 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_func_arg_cnt:
return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL;
case BPF_FUNC_get_attach_cookie:
+ if (prog->type == BPF_PROG_TYPE_TRACING &&
+ prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ return &bpf_get_attach_cookie_proto_tracing;
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
default:
fn = raw_tp_prog_func_proto(func_id, prog);
@@ -2278,6 +2246,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
{
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
+ struct bpf_prog *prog = NULL;
int ret;
mutex_lock(&bpf_event_mutex);
@@ -2286,9 +2255,10 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ if (!old_array)
+ goto put;
+
ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
- if (ret == -ENOENT)
- goto unlock;
if (ret < 0) {
bpf_prog_array_delete_safe(old_array, event->prog);
} else {
@@ -2296,11 +2266,23 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_free_sleepable(old_array);
}
- bpf_prog_put(event->prog);
+put:
+ prog = event->prog;
event->prog = NULL;
unlock:
mutex_unlock(&bpf_event_mutex);
+
+ if (prog) {
+ /*
+ * It could be that the bpf_prog is not sleepable (and will be freed
+ * via normal RCU), but is called from a point that supports sleepable
+ * programs and uses tasks-trace-RCU.
+ */
+ synchronize_rcu_tasks_trace();
+
+ bpf_prog_put(prog);
+ }
}
int perf_event_query_prog_array(struct perf_event *event, void __user *info)
@@ -2370,16 +2352,26 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
}
static __always_inline
-void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
+void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
{
+ struct bpf_prog *prog = link->link.prog;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+
cant_sleep();
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
bpf_prog_inc_misses_counter(prog);
goto out;
}
+
+ run_ctx.bpf_cookie = link->cookie;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+
rcu_read_lock();
(void) bpf_prog_run(prog, args);
rcu_read_unlock();
+
+ bpf_reset_run_ctx(old_run_ctx);
out:
this_cpu_dec(*(prog->active));
}
@@ -2408,12 +2400,12 @@ out:
#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
#define BPF_TRACE_DEFN_x(x) \
- void bpf_trace_run##x(struct bpf_prog *prog, \
+ void bpf_trace_run##x(struct bpf_raw_tp_link *link, \
REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
{ \
u64 args[x]; \
REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
- __bpf_trace_run(prog, args); \
+ __bpf_trace_run(link, args); \
} \
EXPORT_SYMBOL_GPL(bpf_trace_run##x)
BPF_TRACE_DEFN_x(1);
@@ -2429,9 +2421,10 @@ BPF_TRACE_DEFN_x(10);
BPF_TRACE_DEFN_x(11);
BPF_TRACE_DEFN_x(12);
-static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
{
struct tracepoint *tp = btp->tp;
+ struct bpf_prog *prog = link->link.prog;
/*
* check that program doesn't access arguments beyond what's
@@ -2443,18 +2436,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
if (prog->aux->max_tp_access > btp->writable_size)
return -EINVAL;
- return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
- prog);
+ return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
}
-int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
{
- return __bpf_probe_register(btp, prog);
-}
-
-int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
-{
- return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
+ return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
}
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
@@ -2577,6 +2564,12 @@ static int __init bpf_event_init(void)
fs_initcall(bpf_event_init);
#endif /* CONFIG_MODULES */
+struct bpf_session_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ bool is_return;
+ void *data;
+};
+
#ifdef CONFIG_FPROBE
struct bpf_kprobe_multi_link {
struct bpf_link link;
@@ -2590,7 +2583,7 @@ struct bpf_kprobe_multi_link {
};
struct bpf_kprobe_multi_run_ctx {
- struct bpf_run_ctx run_ctx;
+ struct bpf_session_run_ctx session_ctx;
struct bpf_kprobe_multi_link *link;
unsigned long entry_ip;
};
@@ -2600,6 +2593,20 @@ struct user_syms {
char *buf;
};
+#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
+static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
+{
+ unsigned long ip = ftrace_get_symaddr(fentry_ip);
+
+ return ip ? : fentry_ip;
+}
+
static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
{
unsigned long __user usymbol;
@@ -2769,7 +2776,8 @@ static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
if (WARN_ON_ONCE(!ctx))
return 0;
- run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
+ session_ctx.run_ctx);
link = run_ctx->link;
if (!link->cookies)
return 0;
@@ -2786,30 +2794,38 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
struct bpf_kprobe_multi_run_ctx *run_ctx;
- run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
+ session_ctx.run_ctx);
return run_ctx->entry_ip;
}
static int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
- unsigned long entry_ip, struct pt_regs *regs)
+ unsigned long entry_ip, struct ftrace_regs *fregs,
+ bool is_return, void *data)
{
struct bpf_kprobe_multi_run_ctx run_ctx = {
+ .session_ctx = {
+ .is_return = is_return,
+ .data = data,
+ },
.link = link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+ struct pt_regs *regs;
int err;
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
bpf_prog_inc_misses_counter(link->link.prog);
- err = 0;
+ err = 1;
goto out;
}
migrate_disable();
rcu_read_lock();
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
@@ -2822,25 +2838,28 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
static int
kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *data)
{
struct bpf_kprobe_multi_link *link;
+ int err;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
- return 0;
+ err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, false, data);
+ return is_kprobe_session(link->link.prog) ? err : 0;
}
static void
kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *data)
{
struct bpf_kprobe_multi_link *link;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+ kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, true, data);
}
static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -2973,7 +2992,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
- if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI)
+ if (!is_kprobe_multi(prog))
return -EINVAL;
flags = attr->link_create.kprobe_multi.flags;
@@ -3054,10 +3073,12 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (err)
goto error;
- if (flags & BPF_F_KPROBE_MULTI_RETURN)
- link->fp.exit_handler = kprobe_multi_link_exit_handler;
- else
+ if (!(flags & BPF_F_KPROBE_MULTI_RETURN))
link->fp.entry_handler = kprobe_multi_link_handler;
+ if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog))
+ link->fp.exit_handler = kprobe_multi_link_exit_handler;
+ if (is_kprobe_session(prog))
+ link->fp.entry_data_size = sizeof(u64);
link->addrs = addrs;
link->cookies = cookies;
@@ -3122,7 +3143,9 @@ struct bpf_uprobe {
loff_t offset;
unsigned long ref_ctr_offset;
u64 cookie;
+ struct uprobe *uprobe;
struct uprobe_consumer consumer;
+ bool session;
};
struct bpf_uprobe_multi_link {
@@ -3135,20 +3158,20 @@ struct bpf_uprobe_multi_link {
};
struct bpf_uprobe_multi_run_ctx {
- struct bpf_run_ctx run_ctx;
+ struct bpf_session_run_ctx session_ctx;
unsigned long entry_ip;
struct bpf_uprobe *uprobe;
};
-static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes,
- u32 cnt)
+static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
{
u32 i;
- for (i = 0; i < cnt; i++) {
- uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset,
- &uprobes[i].consumer);
- }
+ for (i = 0; i < cnt; i++)
+ uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer);
+
+ if (cnt)
+ uprobe_unregister_sync();
}
static void bpf_uprobe_multi_link_release(struct bpf_link *link)
@@ -3156,7 +3179,7 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
struct bpf_uprobe_multi_link *umulti_link;
umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
- bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+ bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt);
if (umulti_link->task)
put_task_struct(umulti_link->task);
path_put(&umulti_link->path);
@@ -3182,7 +3205,8 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
struct bpf_uprobe_multi_link *umulti_link;
u32 ucount = info->uprobe_multi.count;
int err = 0, i;
- long left;
+ char *p, *buf;
+ long left = 0;
if (!upath ^ !upath_size)
return -EINVAL;
@@ -3196,26 +3220,23 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
info->uprobe_multi.pid = umulti_link->task ?
task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
- if (upath) {
- char *p, *buf;
-
- upath_size = min_t(u32, upath_size, PATH_MAX);
-
- buf = kmalloc(upath_size, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- p = d_path(&umulti_link->path, buf, upath_size);
- if (IS_ERR(p)) {
- kfree(buf);
- return PTR_ERR(p);
- }
- upath_size = buf + upath_size - p;
- left = copy_to_user(upath, p, upath_size);
+ upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
+ buf = kmalloc(upath_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ p = d_path(&umulti_link->path, buf, upath_size);
+ if (IS_ERR(p)) {
kfree(buf);
- if (left)
- return -EFAULT;
- info->uprobe_multi.path_size = upath_size;
+ return PTR_ERR(p);
}
+ upath_size = buf + upath_size - p;
+
+ if (upath)
+ left = copy_to_user(upath, p, upath_size);
+ kfree(buf);
+ if (left)
+ return -EFAULT;
+ info->uprobe_multi.path_size = upath_size;
if (!uoffsets && !ucookies && !uref_ctr_offsets)
return 0;
@@ -3248,19 +3269,24 @@ static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
static int uprobe_prog_run(struct bpf_uprobe *uprobe,
unsigned long entry_ip,
- struct pt_regs *regs)
+ struct pt_regs *regs,
+ bool is_return, void *data)
{
struct bpf_uprobe_multi_link *link = uprobe->link;
struct bpf_uprobe_multi_run_ctx run_ctx = {
+ .session_ctx = {
+ .is_return = is_return,
+ .data = data,
+ },
.entry_ip = entry_ip,
.uprobe = uprobe,
};
struct bpf_prog *prog = link->link.prog;
bool sleepable = prog->sleepable;
struct bpf_run_ctx *old_run_ctx;
- int err = 0;
+ int err;
- if (link->task && current != link->task)
+ if (link->task && !same_thread_group(current, link->task))
return 0;
if (sleepable)
@@ -3270,7 +3296,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
migrate_disable();
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -3284,8 +3310,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
}
static bool
-uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx,
- struct mm_struct *mm)
+uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
{
struct bpf_uprobe *uprobe;
@@ -3294,28 +3319,36 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx
}
static int
-uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs)
+uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
+ __u64 *data)
{
struct bpf_uprobe *uprobe;
+ int ret;
uprobe = container_of(con, struct bpf_uprobe, consumer);
- return uprobe_prog_run(uprobe, instruction_pointer(regs), regs);
+ ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
+ if (uprobe->session)
+ return ret ? UPROBE_HANDLER_IGNORE : 0;
+ return 0;
}
static int
-uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs)
+uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
+ __u64 *data)
{
struct bpf_uprobe *uprobe;
uprobe = container_of(con, struct bpf_uprobe, consumer);
- return uprobe_prog_run(uprobe, func, regs);
+ uprobe_prog_run(uprobe, func, regs, true, data);
+ return 0;
}
static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
struct bpf_uprobe_multi_run_ctx *run_ctx;
- run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
+ session_ctx.run_ctx);
return run_ctx->entry_ip;
}
@@ -3323,7 +3356,8 @@ static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
{
struct bpf_uprobe_multi_run_ctx *run_ctx;
- run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
+ session_ctx.run_ctx);
return run_ctx->uprobe->cookie;
}
@@ -3347,7 +3381,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
- if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI)
+ if (!is_uprobe_multi(prog))
return -EINVAL;
flags = attr->link_create.uprobe_multi.flags;
@@ -3361,8 +3395,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
cnt = attr->link_create.uprobe_multi.cnt;
+ pid = attr->link_create.uprobe_multi.pid;
- if (!upath || !uoffsets || !cnt)
+ if (!upath || !uoffsets || !cnt || pid < 0)
return -EINVAL;
if (cnt > MAX_UPROBE_MULTI_CNT)
return -E2BIG;
@@ -3386,11 +3421,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error_path_put;
}
- pid = attr->link_create.uprobe_multi.pid;
if (pid) {
- rcu_read_lock();
- task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
- rcu_read_unlock();
+ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
if (!task) {
err = -ESRCH;
goto error_path_put;
@@ -3425,11 +3457,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
uprobes[i].link = link;
- if (flags & BPF_F_UPROBE_MULTI_RETURN)
- uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
- else
+ if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
uprobes[i].consumer.handler = uprobe_multi_link_handler;
-
+ if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
+ uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
+ if (is_uprobe_session(prog))
+ uprobes[i].session = true;
if (pid)
uprobes[i].consumer.filter = uprobe_multi_link_filter;
}
@@ -3444,22 +3477,26 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
&bpf_uprobe_multi_link_lops, prog);
for (i = 0; i < cnt; i++) {
- err = uprobe_register_refctr(d_real_inode(link->path.dentry),
- uprobes[i].offset,
- uprobes[i].ref_ctr_offset,
- &uprobes[i].consumer);
- if (err) {
- bpf_uprobe_unregister(&path, uprobes, i);
- goto error_free;
+ uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry),
+ uprobes[i].offset,
+ uprobes[i].ref_ctr_offset,
+ &uprobes[i].consumer);
+ if (IS_ERR(uprobes[i].uprobe)) {
+ err = PTR_ERR(uprobes[i].uprobe);
+ link->cnt = i;
+ goto error_unregister;
}
}
err = bpf_link_prime(&link->link, &link_primer);
if (err)
- goto error_free;
+ goto error_unregister;
return bpf_link_settle(&link_primer);
+error_unregister:
+ bpf_uprobe_unregister(uprobes, link->cnt);
+
error_free:
kvfree(uprobes);
kfree(link);
@@ -3483,3 +3520,65 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
return 0;
}
#endif /* CONFIG_UPROBES */
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc bool bpf_session_is_return(void)
+{
+ struct bpf_session_run_ctx *session_ctx;
+
+ session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
+ return session_ctx->is_return;
+}
+
+__bpf_kfunc __u64 *bpf_session_cookie(void)
+{
+ struct bpf_session_run_ctx *session_ctx;
+
+ session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
+ return session_ctx->data;
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids)
+BTF_ID_FLAGS(func, bpf_session_is_return)
+BTF_ID_FLAGS(func, bpf_session_cookie)
+BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids)
+
+static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id))
+ return 0;
+
+ if (!is_kprobe_session(prog) && !is_uprobe_session(prog))
+ return -EACCES;
+
+ return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &kprobe_multi_kfunc_set_ids,
+ .filter = bpf_kprobe_multi_filter,
+};
+
+static int __init bpf_kprobe_multi_kfuncs_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_kprobe_multi_kfunc_set);
+}
+
+late_initcall(bpf_kprobe_multi_kfuncs_init);
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
+ u64 value)
+{
+ if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
+ return -EINVAL;
+
+ return bpf_send_signal_common(sig, type, task, value);
+}
+
+__bpf_kfunc_end_defs();