summaryrefslogtreecommitdiff
path: root/kernel/trace/bpf_trace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/bpf_trace.c')
-rw-r--r--kernel/trace/bpf_trace.c1696
1 files changed, 1181 insertions, 515 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f47274de012b..d57727abaade 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -22,7 +22,7 @@
#include <linux/bsearch.h>
#include <linux/sort.h>
#include <linux/key.h>
-#include <linux/verification.h>
+#include <linux/namei.h>
#include <net/bpf_sk_storage.h>
@@ -40,6 +40,9 @@
#define bpf_event_rcu_dereference(p) \
rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
+#define MAX_UPROBE_MULTI_CNT (1U << 20)
+#define MAX_KPROBE_MULTI_CNT (1U << 20)
+
#ifdef CONFIG_MODULES
struct bpf_trace_module {
struct module *module;
@@ -86,6 +89,9 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
+static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx);
+static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
+
/**
* trace_call_bpf - invoke BPF program
* @call: tracepoint event
@@ -113,6 +119,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* and don't send kprobe event into ring-buffer,
* so return zero here
*/
+ rcu_read_lock();
+ bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
+ rcu_read_unlock();
ret = 0;
goto out;
}
@@ -223,17 +232,6 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.arg3_type = ARG_ANYTHING,
};
-static __always_inline int
-bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
-{
- int ret;
-
- ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
- memset(dst, 0, size);
- return ret;
-}
-
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
@@ -358,19 +356,6 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
.arg3_type = ARG_CONST_SIZE,
};
-static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
-{
- if (!capable(CAP_SYS_ADMIN))
- return NULL;
-
- pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
- current->comm, task_pid_nr(current));
-
- return &bpf_probe_write_user_proto;
-}
-
-static DEFINE_RAW_SPINLOCK(trace_printk_lock);
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -378,23 +363,22 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
- u32 *bin_args;
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ .get_buf = true,
+ };
int ret;
- ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
- MAX_TRACE_PRINTK_VARARGS);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args,
+ MAX_TRACE_PRINTK_VARARGS, &data);
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -407,7 +391,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.arg2_type = ARG_CONST_SIZE,
};
-static void __set_printk_clr_event(void)
+static void __set_printk_clr_event(struct work_struct *work)
{
/*
* This program might be calling bpf_trace_printk,
@@ -420,37 +404,37 @@ static void __set_printk_clr_event(void)
if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
pr_warn_ratelimited("could not enable bpf_trace_printk events");
}
+static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
{
- __set_printk_clr_event();
+ schedule_work(&set_printk_work);
return &bpf_trace_printk_proto;
}
-BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args,
u32, data_len)
{
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ .get_buf = true,
+ };
int ret, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (ret < 0)
return ret;
- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+ ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args);
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(data.buf);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return ret;
}
@@ -467,28 +451,30 @@ static const struct bpf_func_proto bpf_trace_vprintk_proto = {
const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
{
- __set_printk_clr_event();
+ schedule_work(&set_printk_work);
return &bpf_trace_vprintk_proto;
}
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
- const void *, data, u32, data_len)
+ const void *, args, u32, data_len)
{
+ struct bpf_bprintf_data data = {
+ .get_bin_args = true,
+ };
int err, num_args;
- u32 *bin_args;
if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
- (data_len && !data))
+ (data_len && !args))
return -EINVAL;
num_args = data_len / 8;
- err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data);
if (err < 0)
return err;
- seq_bprintf(m, fmt, bin_args);
+ seq_bprintf(m, fmt, data.bin_args);
- bpf_bprintf_cleanup();
+ bpf_bprintf_cleanup(&data);
return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}
@@ -585,7 +571,7 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
return value;
}
-static const struct bpf_func_proto bpf_perf_event_read_proto = {
+const struct bpf_func_proto bpf_perf_event_read_proto = {
.func = bpf_perf_event_read,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -620,9 +606,15 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
.arg4_type = ARG_CONST_SIZE,
};
+const struct bpf_func_proto *bpf_get_perf_event_read_value_proto(void)
+{
+ return &bpf_perf_event_read_value_proto;
+}
+
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
- u64 flags, struct perf_sample_data *sd)
+ u64 flags, struct perf_raw_record *raw,
+ struct perf_sample_data *sd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
@@ -647,6 +639,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
+ perf_sample_save_raw_data(sd, event, raw);
+
return perf_event_output(event, sd, regs);
}
@@ -663,8 +657,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
- int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
+ struct bpf_trace_sample_data *sds;
struct perf_raw_record raw = {
.frag = {
.size = size,
@@ -672,7 +665,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
},
};
struct perf_sample_data *sd;
- int err;
+ int nest_level, err;
+
+ preempt_disable();
+ sds = this_cpu_ptr(&bpf_trace_sds);
+ nest_level = this_cpu_inc_return(bpf_trace_nest_level);
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
err = -EBUSY;
@@ -687,13 +684,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
}
perf_sample_data_init(sd, 0, 0);
- sd->raw = &raw;
- sd->sample_flags |= PERF_SAMPLE_RAW;
-
- err = __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_trace_nest_level);
+ preempt_enable();
return err;
}
@@ -718,7 +713,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
- int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
@@ -735,8 +729,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
};
struct perf_sample_data *sd;
struct pt_regs *regs;
+ int nest_level;
u64 ret;
+ preempt_disable();
+ nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
+
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
ret = -EBUSY;
goto out;
@@ -746,12 +744,11 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
- sd->raw = &raw;
- sd->sample_flags |= PERF_SAMPLE_RAW;
- ret = __bpf_perf_event_output(regs, map, flags, sd);
+ ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_event_output_nest_level);
+ preempt_enable();
return ret;
}
@@ -783,8 +780,7 @@ BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
return (unsigned long) task_pt_regs(task);
}
-BTF_ID_LIST(bpf_task_pt_regs_ids)
-BTF_ID(struct, pt_regs)
+BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs)
const struct bpf_func_proto bpf_task_pt_regs_proto = {
.func = bpf_task_pt_regs,
@@ -795,34 +791,13 @@ const struct bpf_func_proto bpf_task_pt_regs_proto = {
.ret_btf_id = &bpf_task_pt_regs_ids[0],
};
-BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
-{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
- struct cgroup *cgrp;
-
- if (unlikely(idx >= array->map.max_entries))
- return -E2BIG;
-
- cgrp = READ_ONCE(array->ptrs[idx]);
- if (unlikely(!cgrp))
- return -EAGAIN;
-
- return task_under_cgroup_hierarchy(current, cgrp);
-}
-
-static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
- .func = bpf_current_task_under_cgroup,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_ANYTHING,
-};
-
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
u32 sig;
enum pid_type type;
+ bool has_siginfo;
+ struct kernel_siginfo info;
};
static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
@@ -830,29 +805,49 @@ static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
static void do_bpf_send_signal(struct irq_work *entry)
{
struct send_signal_irq_work *work;
+ struct kernel_siginfo *siginfo;
work = container_of(entry, struct send_signal_irq_work, irq_work);
- group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
+ siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
+
+ group_send_sig_info(work->sig, siginfo, work->task, work->type);
+ put_task_struct(work->task);
}
-static int bpf_send_signal_common(u32 sig, enum pid_type type)
+static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
{
struct send_signal_irq_work *work = NULL;
+ struct kernel_siginfo info;
+ struct kernel_siginfo *siginfo;
+
+ if (!task) {
+ task = current;
+ siginfo = SEND_SIG_PRIV;
+ } else {
+ clear_siginfo(&info);
+ info.si_signo = sig;
+ info.si_errno = 0;
+ info.si_code = SI_KERNEL;
+ info.si_pid = 0;
+ info.si_uid = 0;
+ info.si_value.sival_ptr = (void *)(unsigned long)value;
+ siginfo = &info;
+ }
/* Similar to bpf_probe_write_user, task needs to be
* in a sound condition and kernel memory access be
* permitted in order to send signal to the current
* task.
*/
- if (unlikely(current->flags & (PF_KTHREAD | PF_EXITING)))
+ if (unlikely(task->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
/* Task should not be pid=1 to avoid kernel panic. */
- if (unlikely(is_global_init(current)))
+ if (unlikely(is_global_init(task)))
return -EPERM;
- if (irqs_disabled()) {
+ if (preempt_count() != 0 || irqs_disabled()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
@@ -867,22 +862,25 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
* to the irq_work. The current task may change when queued
* irq works get executed.
*/
- work->task = current;
+ work->task = get_task_struct(task);
+ work->has_siginfo = siginfo == &info;
+ if (work->has_siginfo)
+ copy_siginfo(&work->info, &info);
work->sig = sig;
work->type = type;
irq_work_queue(&work->irq_work);
return 0;
}
- return group_send_sig_info(sig, SEND_SIG_PRIV, current, type);
+ return group_send_sig_info(sig, siginfo, task, type);
}
BPF_CALL_1(bpf_send_signal, u32, sig)
{
- return bpf_send_signal_common(sig, PIDTYPE_TGID);
+ return bpf_send_signal_common(sig, PIDTYPE_TGID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_proto = {
+const struct bpf_func_proto bpf_send_signal_proto = {
.func = bpf_send_signal,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -891,25 +889,35 @@ static const struct bpf_func_proto bpf_send_signal_proto = {
BPF_CALL_1(bpf_send_signal_thread, u32, sig)
{
- return bpf_send_signal_common(sig, PIDTYPE_PID);
+ return bpf_send_signal_common(sig, PIDTYPE_PID, NULL, 0);
}
-static const struct bpf_func_proto bpf_send_signal_thread_proto = {
+const struct bpf_func_proto bpf_send_signal_thread_proto = {
.func = bpf_send_signal_thread,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_d_path, struct path *, path, char *, buf, u32, sz)
+BPF_CALL_3(bpf_d_path, const struct path *, path, char *, buf, u32, sz)
{
+ struct path copy;
long len;
char *p;
if (!sz)
return 0;
- p = d_path(path, buf, sz);
+ /*
+ * The path pointer is verified as trusted and safe to use,
+ * but let's double check it's valid anyway to workaround
+ * potentially broken verifier.
+ */
+ len = copy_from_kernel_nofault(&copy, path, sizeof(*path));
+ if (len < 0)
+ return len;
+
+ p = d_path(&copy, buf, sz);
if (IS_ERR(p)) {
len = PTR_ERR(p);
} else {
@@ -1034,25 +1042,27 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
.arg1_type = ARG_PTR_TO_CTX,
};
-#ifdef CONFIG_X86_KERNEL_IBT
-static unsigned long get_entry_ip(unsigned long fentry_ip)
+static inline unsigned long get_entry_ip(unsigned long fentry_ip)
{
- u32 instr;
-
- /* Being extra safe in here in case entry ip is on the page-edge. */
- if (get_kernel_nofault(instr, (u32 *) fentry_ip - 1))
- return fentry_ip;
- if (is_endbr(instr))
+#ifdef CONFIG_X86_KERNEL_IBT
+ if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
fentry_ip -= ENDBR_INSN_SIZE;
+#endif
return fentry_ip;
}
-#else
-#define get_entry_ip(fentry_ip) fentry_ip
-#endif
BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
{
- struct kprobe *kp = kprobe_running();
+ struct bpf_trace_run_ctx *run_ctx __maybe_unused;
+ struct kprobe *kp;
+
+#ifdef CONFIG_UPROBES
+ run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
+ if (run_ctx->is_uprobe)
+ return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr;
+#endif
+
+ kp = kprobe_running();
if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
return 0;
@@ -1091,6 +1101,30 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs)
+{
+ return bpf_uprobe_multi_entry_ip(current->bpf_ctx);
+}
+
+static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = {
+ .func = bpf_get_func_ip_uprobe_multi,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs)
+{
+ return bpf_uprobe_multi_cookie(current->bpf_ctx);
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = {
+ .func = bpf_get_attach_cookie_uprobe_multi,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
{
struct bpf_trace_run_ctx *run_ctx;
@@ -1135,9 +1169,6 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
{
-#ifndef CONFIG_X86
- return -ENOENT;
-#else
static const u32 br_entry_size = sizeof(struct perf_branch_entry);
u32 entry_cnt = size / br_entry_size;
@@ -1150,10 +1181,9 @@ BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
return -ENOENT;
return entry_cnt * br_entry_size;
-#endif
}
-static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
.func = bpf_get_branch_snapshot,
.gpl_only = true,
.ret_type = RET_INTEGER,
@@ -1177,7 +1207,8 @@ static const struct bpf_func_proto bpf_get_func_arg_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
- .arg3_type = ARG_PTR_TO_LONG,
+ .arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg3_size = sizeof(u64),
};
BPF_CALL_2(get_func_ret, void *, ctx, u64 *, value)
@@ -1193,7 +1224,8 @@ static const struct bpf_func_proto bpf_get_func_ret_proto = {
.func = get_func_ret,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
- .arg2_type = ARG_PTR_TO_LONG,
+ .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED,
+ .arg2_size = sizeof(u64),
};
BPF_CALL_1(get_func_arg_cnt, void *, ctx)
@@ -1208,318 +1240,64 @@ static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
-#ifdef CONFIG_KEYS
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "kfuncs which will be used in BPF programs");
-
-/**
- * bpf_lookup_user_key - lookup a key by its serial
- * @serial: key handle serial number
- * @flags: lookup-specific flags
- *
- * Search a key with a given *serial* and the provided *flags*.
- * If found, increment the reference count of the key by one, and
- * return it in the bpf_key structure.
- *
- * The bpf_key structure must be passed to bpf_key_put() when done
- * with it, so that the key reference count is decremented and the
- * bpf_key structure is freed.
- *
- * Permission checks are deferred to the time the key is used by
- * one of the available key-specific kfuncs.
- *
- * Set *flags* with KEY_LOOKUP_CREATE, to attempt creating a requested
- * special keyring (e.g. session keyring), if it doesn't yet exist.
- * Set *flags* with KEY_LOOKUP_PARTIAL, to lookup a key without waiting
- * for the key construction, and to retrieve uninstantiated keys (keys
- * without data attached to them).
- *
- * Return: a bpf_key pointer with a valid key pointer if the key is found, a
- * NULL pointer otherwise.
- */
-struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags)
+static const struct bpf_func_proto *
+bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
- key_ref_t key_ref;
- struct bpf_key *bkey;
-
- if (flags & ~KEY_LOOKUP_ALL)
- return NULL;
-
- /*
- * Permission check is deferred until the key is used, as the
- * intent of the caller is unknown here.
- */
- key_ref = lookup_user_key(serial, flags, KEY_DEFER_PERM_CHECK);
- if (IS_ERR(key_ref))
- return NULL;
+ const struct bpf_func_proto *func_proto;
- bkey = kmalloc(sizeof(*bkey), GFP_KERNEL);
- if (!bkey) {
- key_put(key_ref_to_ptr(key_ref));
- return NULL;
+ switch (func_id) {
+ case BPF_FUNC_get_smp_processor_id:
+ return &bpf_get_smp_processor_id_proto;
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ case BPF_FUNC_probe_read:
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
+ NULL : &bpf_probe_read_compat_proto;
+ case BPF_FUNC_probe_read_str:
+ return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
+ NULL : &bpf_probe_read_compat_str_proto;
+#endif
+ case BPF_FUNC_get_func_ip:
+ return &bpf_get_func_ip_proto_tracing;
+ default:
+ break;
}
- bkey->key = key_ref_to_ptr(key_ref);
- bkey->has_ref = true;
-
- return bkey;
-}
+ func_proto = bpf_base_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
-/**
- * bpf_lookup_system_key - lookup a key by a system-defined ID
- * @id: key ID
- *
- * Obtain a bpf_key structure with a key pointer set to the passed key ID.
- * The key pointer is marked as invalid, to prevent bpf_key_put() from
- * attempting to decrement the key reference count on that pointer. The key
- * pointer set in such way is currently understood only by
- * verify_pkcs7_signature().
- *
- * Set *id* to one of the values defined in include/linux/verification.h:
- * 0 for the primary keyring (immutable keyring of system keys);
- * VERIFY_USE_SECONDARY_KEYRING for both the primary and secondary keyring
- * (where keys can be added only if they are vouched for by existing keys
- * in those keyrings); VERIFY_USE_PLATFORM_KEYRING for the platform
- * keyring (primarily used by the integrity subsystem to verify a kexec'ed
- * kerned image and, possibly, the initramfs signature).
- *
- * Return: a bpf_key pointer with an invalid key pointer set from the
- * pre-determined ID on success, a NULL pointer otherwise
- */
-struct bpf_key *bpf_lookup_system_key(u64 id)
-{
- struct bpf_key *bkey;
-
- if (system_keyring_id_check(id) < 0)
+ if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN))
return NULL;
- bkey = kmalloc(sizeof(*bkey), GFP_ATOMIC);
- if (!bkey)
+ switch (func_id) {
+ case BPF_FUNC_probe_write_user:
+ return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
+ NULL : &bpf_probe_write_user_proto;
+ default:
return NULL;
-
- bkey->key = (struct key *)(unsigned long)id;
- bkey->has_ref = false;
-
- return bkey;
+ }
}
-/**
- * bpf_key_put - decrement key reference count if key is valid and free bpf_key
- * @bkey: bpf_key structure
- *
- * Decrement the reference count of the key inside *bkey*, if the pointer
- * is valid, and free *bkey*.
- */
-void bpf_key_put(struct bpf_key *bkey)
+static bool is_kprobe_multi(const struct bpf_prog *prog)
{
- if (bkey->has_ref)
- key_put(bkey->key);
-
- kfree(bkey);
+ return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ||
+ prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
}
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-/**
- * bpf_verify_pkcs7_signature - verify a PKCS#7 signature
- * @data_ptr: data to verify
- * @sig_ptr: signature of the data
- * @trusted_keyring: keyring with keys trusted for signature verification
- *
- * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr*
- * with keys in a keyring referenced by *trusted_keyring*.
- *
- * Return: 0 on success, a negative value on error.
- */
-int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr,
- struct bpf_dynptr_kern *sig_ptr,
- struct bpf_key *trusted_keyring)
+static inline bool is_kprobe_session(const struct bpf_prog *prog)
{
- int ret;
-
- if (trusted_keyring->has_ref) {
- /*
- * Do the permission check deferred in bpf_lookup_user_key().
- * See bpf_lookup_user_key() for more details.
- *
- * A call to key_task_permission() here would be redundant, as
- * it is already done by keyring_search() called by
- * find_asymmetric_key().
- */
- ret = key_validate(trusted_keyring->key);
- if (ret < 0)
- return ret;
- }
-
- return verify_pkcs7_signature(data_ptr->data,
- bpf_dynptr_get_size(data_ptr),
- sig_ptr->data,
- bpf_dynptr_get_size(sig_ptr),
- trusted_keyring->key,
- VERIFYING_UNSPECIFIED_SIGNATURE, NULL,
- NULL);
+ return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
}
-#endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
-
-__diag_pop();
-BTF_SET8_START(key_sig_kfunc_set)
-BTF_ID_FLAGS(func, bpf_lookup_user_key, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE)
-BTF_ID_FLAGS(func, bpf_lookup_system_key, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_key_put, KF_RELEASE)
-#ifdef CONFIG_SYSTEM_DATA_VERIFICATION
-BTF_ID_FLAGS(func, bpf_verify_pkcs7_signature, KF_SLEEPABLE)
-#endif
-BTF_SET8_END(key_sig_kfunc_set)
-
-static const struct btf_kfunc_id_set bpf_key_sig_kfunc_set = {
- .owner = THIS_MODULE,
- .set = &key_sig_kfunc_set,
-};
-
-static int __init bpf_key_sig_kfuncs_init(void)
+static inline bool is_uprobe_multi(const struct bpf_prog *prog)
{
- return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
- &bpf_key_sig_kfunc_set);
+ return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI ||
+ prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
}
-late_initcall(bpf_key_sig_kfuncs_init);
-#endif /* CONFIG_KEYS */
-
-static const struct bpf_func_proto *
-bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+static inline bool is_uprobe_session(const struct bpf_prog *prog)
{
- switch (func_id) {
- case BPF_FUNC_map_lookup_elem:
- return &bpf_map_lookup_elem_proto;
- case BPF_FUNC_map_update_elem:
- return &bpf_map_update_elem_proto;
- case BPF_FUNC_map_delete_elem:
- return &bpf_map_delete_elem_proto;
- case BPF_FUNC_map_push_elem:
- return &bpf_map_push_elem_proto;
- case BPF_FUNC_map_pop_elem:
- return &bpf_map_pop_elem_proto;
- case BPF_FUNC_map_peek_elem:
- return &bpf_map_peek_elem_proto;
- case BPF_FUNC_map_lookup_percpu_elem:
- return &bpf_map_lookup_percpu_elem_proto;
- case BPF_FUNC_ktime_get_ns:
- return &bpf_ktime_get_ns_proto;
- case BPF_FUNC_ktime_get_boot_ns:
- return &bpf_ktime_get_boot_ns_proto;
- case BPF_FUNC_tail_call:
- return &bpf_tail_call_proto;
- case BPF_FUNC_get_current_pid_tgid:
- return &bpf_get_current_pid_tgid_proto;
- case BPF_FUNC_get_current_task:
- return &bpf_get_current_task_proto;
- case BPF_FUNC_get_current_task_btf:
- return &bpf_get_current_task_btf_proto;
- case BPF_FUNC_task_pt_regs:
- return &bpf_task_pt_regs_proto;
- case BPF_FUNC_get_current_uid_gid:
- return &bpf_get_current_uid_gid_proto;
- case BPF_FUNC_get_current_comm:
- return &bpf_get_current_comm_proto;
- case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
- case BPF_FUNC_get_smp_processor_id:
- return &bpf_get_smp_processor_id_proto;
- case BPF_FUNC_get_numa_node_id:
- return &bpf_get_numa_node_id_proto;
- case BPF_FUNC_perf_event_read:
- return &bpf_perf_event_read_proto;
- case BPF_FUNC_current_task_under_cgroup:
- return &bpf_current_task_under_cgroup_proto;
- case BPF_FUNC_get_prandom_u32:
- return &bpf_get_prandom_u32_proto;
- case BPF_FUNC_probe_write_user:
- return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
- NULL : bpf_get_probe_write_proto();
- case BPF_FUNC_probe_read_user:
- return &bpf_probe_read_user_proto;
- case BPF_FUNC_probe_read_kernel:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_proto;
- case BPF_FUNC_probe_read_user_str:
- return &bpf_probe_read_user_str_proto;
- case BPF_FUNC_probe_read_kernel_str:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_kernel_str_proto;
-#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- case BPF_FUNC_probe_read:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_compat_proto;
- case BPF_FUNC_probe_read_str:
- return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
- NULL : &bpf_probe_read_compat_str_proto;
-#endif
-#ifdef CONFIG_CGROUPS
- case BPF_FUNC_get_current_cgroup_id:
- return &bpf_get_current_cgroup_id_proto;
- case BPF_FUNC_get_current_ancestor_cgroup_id:
- return &bpf_get_current_ancestor_cgroup_id_proto;
- case BPF_FUNC_cgrp_storage_get:
- return &bpf_cgrp_storage_get_proto;
- case BPF_FUNC_cgrp_storage_delete:
- return &bpf_cgrp_storage_delete_proto;
-#endif
- case BPF_FUNC_send_signal:
- return &bpf_send_signal_proto;
- case BPF_FUNC_send_signal_thread:
- return &bpf_send_signal_thread_proto;
- case BPF_FUNC_perf_event_read_value:
- return &bpf_perf_event_read_value_proto;
- case BPF_FUNC_get_ns_current_pid_tgid:
- return &bpf_get_ns_current_pid_tgid_proto;
- case BPF_FUNC_ringbuf_output:
- return &bpf_ringbuf_output_proto;
- case BPF_FUNC_ringbuf_reserve:
- return &bpf_ringbuf_reserve_proto;
- case BPF_FUNC_ringbuf_submit:
- return &bpf_ringbuf_submit_proto;
- case BPF_FUNC_ringbuf_discard:
- return &bpf_ringbuf_discard_proto;
- case BPF_FUNC_ringbuf_query:
- return &bpf_ringbuf_query_proto;
- case BPF_FUNC_jiffies64:
- return &bpf_jiffies64_proto;
- case BPF_FUNC_get_task_stack:
- return &bpf_get_task_stack_proto;
- case BPF_FUNC_copy_from_user:
- return &bpf_copy_from_user_proto;
- case BPF_FUNC_copy_from_user_task:
- return &bpf_copy_from_user_task_proto;
- case BPF_FUNC_snprintf_btf:
- return &bpf_snprintf_btf_proto;
- case BPF_FUNC_per_cpu_ptr:
- return &bpf_per_cpu_ptr_proto;
- case BPF_FUNC_this_cpu_ptr:
- return &bpf_this_cpu_ptr_proto;
- case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
- return &bpf_task_storage_get_proto;
- case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
- return &bpf_task_storage_delete_proto;
- case BPF_FUNC_for_each_map_elem:
- return &bpf_for_each_map_elem_proto;
- case BPF_FUNC_snprintf:
- return &bpf_snprintf_proto;
- case BPF_FUNC_get_func_ip:
- return &bpf_get_func_ip_proto_tracing;
- case BPF_FUNC_get_branch_snapshot:
- return &bpf_get_branch_snapshot_proto;
- case BPF_FUNC_find_vma:
- return &bpf_find_vma_proto;
- case BPF_FUNC_trace_vprintk:
- return bpf_get_trace_vprintk_proto();
- default:
- return bpf_base_func_proto(func_id);
- }
+ return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
}
static const struct bpf_func_proto *
@@ -1531,19 +1309,23 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
case BPF_FUNC_get_stack:
- return &bpf_get_stack_proto;
+ return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
#endif
case BPF_FUNC_get_func_ip:
- return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
- &bpf_get_func_ip_proto_kprobe_multi :
- &bpf_get_func_ip_proto_kprobe;
+ if (is_kprobe_multi(prog))
+ return &bpf_get_func_ip_proto_kprobe_multi;
+ if (is_uprobe_multi(prog))
+ return &bpf_get_func_ip_proto_uprobe_multi;
+ return &bpf_get_func_ip_proto_kprobe;
case BPF_FUNC_get_attach_cookie:
- return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ?
- &bpf_get_attach_cookie_proto_kmulti :
- &bpf_get_attach_cookie_proto_trace;
+ if (is_kprobe_multi(prog))
+ return &bpf_get_attach_cookie_proto_kmulti;
+ if (is_uprobe_multi(prog))
+ return &bpf_get_attach_cookie_proto_umulti;
+ return &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1556,8 +1338,6 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
{
if (off < 0 || off >= sizeof(struct pt_regs))
return false;
- if (type != BPF_READ)
- return false;
if (off % size != 0)
return false;
/*
@@ -1567,6 +1347,9 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
if (off + size > sizeof(struct pt_regs))
return false;
+ if (type == BPF_WRITE)
+ prog->aux->kprobe_write_ctx = true;
+
return true;
}
@@ -1787,7 +1570,7 @@ static struct pt_regs *get_bpf_raw_tp_regs(void)
struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
- if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) {
+ if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
this_cpu_dec(bpf_raw_tp_nest_level);
return ERR_PTR(-EBUSY);
}
@@ -1893,6 +1676,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stackid_proto_raw_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_raw_tp;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_tracing;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1955,6 +1740,9 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_func_arg_cnt:
return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL;
case BPF_FUNC_get_attach_cookie:
+ if (prog->type == BPF_PROG_TYPE_TRACING &&
+ prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ return &bpf_get_attach_cookie_proto_tracing;
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
default:
fn = raw_tp_prog_func_proto(func_id, prog);
@@ -2163,6 +1951,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
{
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
+ struct bpf_prog *prog = NULL;
int ret;
mutex_lock(&bpf_event_mutex);
@@ -2171,9 +1960,10 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
+ if (!old_array)
+ goto put;
+
ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
- if (ret == -ENOENT)
- goto unlock;
if (ret < 0) {
bpf_prog_array_delete_safe(old_array, event->prog);
} else {
@@ -2181,11 +1971,23 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
bpf_prog_array_free_sleepable(old_array);
}
- bpf_prog_put(event->prog);
+put:
+ prog = event->prog;
event->prog = NULL;
unlock:
mutex_unlock(&bpf_event_mutex);
+
+ if (prog) {
+ /*
+ * It could be that the bpf_prog is not sleepable (and will be freed
+ * via normal RCU), but is called from a point that supports sleepable
+ * programs and uses tasks-trace-RCU.
+ */
+ synchronize_rcu_tasks_trace();
+
+ bpf_prog_put(prog);
+ }
}
int perf_event_query_prog_array(struct perf_event *event, void __user *info)
@@ -2248,23 +2050,32 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
{
struct module *mod;
- preempt_disable();
+ guard(rcu)();
mod = __module_address((unsigned long)btp);
module_put(mod);
- preempt_enable();
}
static __always_inline
-void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
+void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args)
{
+ struct bpf_prog *prog = link->link.prog;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+
cant_sleep();
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
bpf_prog_inc_misses_counter(prog);
goto out;
}
+
+ run_ctx.bpf_cookie = link->cookie;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+
rcu_read_lock();
(void) bpf_prog_run(prog, args);
rcu_read_unlock();
+
+ bpf_reset_run_ctx(old_run_ctx);
out:
this_cpu_dec(*(prog->active));
}
@@ -2293,12 +2104,12 @@ out:
#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
#define BPF_TRACE_DEFN_x(x) \
- void bpf_trace_run##x(struct bpf_prog *prog, \
+ void bpf_trace_run##x(struct bpf_raw_tp_link *link, \
REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
{ \
u64 args[x]; \
REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
- __bpf_trace_run(prog, args); \
+ __bpf_trace_run(link, args); \
} \
EXPORT_SYMBOL_GPL(bpf_trace_run##x)
BPF_TRACE_DEFN_x(1);
@@ -2314,9 +2125,10 @@ BPF_TRACE_DEFN_x(10);
BPF_TRACE_DEFN_x(11);
BPF_TRACE_DEFN_x(12);
-static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
{
struct tracepoint *tp = btp->tp;
+ struct bpf_prog *prog = link->link.prog;
/*
* check that program doesn't access arguments beyond what's
@@ -2328,23 +2140,18 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
if (prog->aux->max_tp_access > btp->writable_size)
return -EINVAL;
- return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
- prog);
-}
-
-int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
-{
- return __bpf_probe_register(btp, prog);
+ return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link);
}
-int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link)
{
- return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
+ return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link);
}
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
- u64 *probe_offset, u64 *probe_addr)
+ u64 *probe_offset, u64 *probe_addr,
+ unsigned long *missed)
{
bool is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
@@ -2366,22 +2173,26 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
if (is_tracepoint || is_syscall_tp) {
*buf = is_tracepoint ? event->tp_event->tp->name
: event->tp_event->name;
- *fd_type = BPF_FD_TYPE_TRACEPOINT;
- *probe_offset = 0x0;
- *probe_addr = 0x0;
+ /* We allow NULL pointer for tracepoint */
+ if (fd_type)
+ *fd_type = BPF_FD_TYPE_TRACEPOINT;
+ if (probe_offset)
+ *probe_offset = 0x0;
+ if (probe_addr)
+ *probe_addr = 0x0;
} else {
/* kprobe/uprobe */
err = -EOPNOTSUPP;
#ifdef CONFIG_KPROBE_EVENTS
if (flags & TRACE_EVENT_FL_KPROBE)
err = bpf_get_kprobe_info(event, fd_type, buf,
- probe_offset, probe_addr,
+ probe_offset, probe_addr, missed,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
#ifdef CONFIG_UPROBE_EVENTS
if (flags & TRACE_EVENT_FL_UPROBE)
err = bpf_get_uprobe_info(event, fd_type, buf,
- probe_offset,
+ probe_offset, probe_addr,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
}
@@ -2457,6 +2268,12 @@ static int __init bpf_event_init(void)
fs_initcall(bpf_event_init);
#endif /* CONFIG_MODULES */
+struct bpf_session_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ bool is_return;
+ void *data;
+};
+
#ifdef CONFIG_FPROBE
struct bpf_kprobe_multi_link {
struct bpf_link link;
@@ -2469,7 +2286,7 @@ struct bpf_kprobe_multi_link {
};
struct bpf_kprobe_multi_run_ctx {
- struct bpf_run_ctx run_ctx;
+ struct bpf_session_run_ctx session_ctx;
struct bpf_kprobe_multi_link *link;
unsigned long entry_ip;
};
@@ -2479,6 +2296,20 @@ struct user_syms {
char *buf;
};
+#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
+static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
+{
+ unsigned long ip = ftrace_get_symaddr(fentry_ip);
+
+ return ip ? : fentry_ip;
+}
+
static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
{
unsigned long __user usymbol;
@@ -2555,9 +2386,87 @@ static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
kfree(kmulti_link);
}
+static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
+ u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
+ struct bpf_kprobe_multi_link *kmulti_link;
+ u32 ucount = info->kprobe_multi.count;
+ int err = 0, i;
+
+ if (!uaddrs ^ !ucount)
+ return -EINVAL;
+ if (ucookies && !ucount)
+ return -EINVAL;
+
+ kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
+ info->kprobe_multi.count = kmulti_link->cnt;
+ info->kprobe_multi.flags = kmulti_link->link.flags;
+ info->kprobe_multi.missed = kmulti_link->fp.nmissed;
+
+ if (!uaddrs)
+ return 0;
+ if (ucount < kmulti_link->cnt)
+ err = -ENOSPC;
+ else
+ ucount = kmulti_link->cnt;
+
+ if (ucookies) {
+ if (kmulti_link->cookies) {
+ if (copy_to_user(ucookies, kmulti_link->cookies, ucount * sizeof(u64)))
+ return -EFAULT;
+ } else {
+ for (i = 0; i < ucount; i++) {
+ if (put_user(0, ucookies + i))
+ return -EFAULT;
+ }
+ }
+ }
+
+ if (kallsyms_show_value(current_cred())) {
+ if (copy_to_user(uaddrs, kmulti_link->addrs, ucount * sizeof(u64)))
+ return -EFAULT;
+ } else {
+ for (i = 0; i < ucount; i++) {
+ if (put_user(0, uaddrs + i))
+ return -EFAULT;
+ }
+ }
+ return err;
+}
+
+#ifdef CONFIG_PROC_FS
+static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_kprobe_multi_link *kmulti_link;
+
+ kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
+
+ seq_printf(seq,
+ "kprobe_cnt:\t%u\n"
+ "missed:\t%lu\n",
+ kmulti_link->cnt,
+ kmulti_link->fp.nmissed);
+
+ seq_printf(seq, "%s\t %s\n", "cookie", "func");
+ for (int i = 0; i < kmulti_link->cnt; i++) {
+ seq_printf(seq,
+ "%llu\t %pS\n",
+ kmulti_link->cookies[i],
+ (void *)kmulti_link->addrs[i]);
+ }
+}
+#endif
+
static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
.release = bpf_kprobe_multi_link_release,
- .dealloc = bpf_kprobe_multi_link_dealloc,
+ .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
+ .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_kprobe_multi_show_fdinfo,
+#endif
};
static void bpf_kprobe_multi_cookie_swap(void *a, void *b, int size, const void *priv)
@@ -2597,7 +2506,8 @@ static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
if (WARN_ON_ONCE(!ctx))
return 0;
- run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
+ session_ctx.run_ctx);
link = run_ctx->link;
if (!link->cookies)
return 0;
@@ -2614,47 +2524,77 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
{
struct bpf_kprobe_multi_run_ctx *run_ctx;
- run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx, run_ctx);
+ run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
+ session_ctx.run_ctx);
return run_ctx->entry_ip;
}
-static int
+static __always_inline int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
- unsigned long entry_ip, struct pt_regs *regs)
+ unsigned long entry_ip, struct ftrace_regs *fregs,
+ bool is_return, void *data)
{
struct bpf_kprobe_multi_run_ctx run_ctx = {
+ .session_ctx = {
+ .is_return = is_return,
+ .data = data,
+ },
.link = link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+ struct pt_regs *regs;
int err;
+ /*
+ * graph tracer framework ensures we won't migrate, so there is no need
+ * to use migrate_disable for bpf_prog_run again. The check here just for
+ * __this_cpu_inc_return.
+ */
+ cant_sleep();
+
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
- err = 0;
+ bpf_prog_inc_misses_counter(link->link.prog);
+ err = 1;
goto out;
}
- migrate_disable();
rcu_read_lock();
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
rcu_read_unlock();
- migrate_enable();
out:
__this_cpu_dec(bpf_prog_active);
return err;
}
-static void
+static int
kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- struct pt_regs *regs)
+ unsigned long ret_ip, struct ftrace_regs *fregs,
+ void *data)
{
struct bpf_kprobe_multi_link *link;
+ int err;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+ err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, false, data);
+ return is_kprobe_session(link->link.prog) ? err : 0;
+}
+
+static void
+kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
+ void *data)
+{
+ struct bpf_kprobe_multi_link *link;
+
+ link = container_of(fp, struct bpf_kprobe_multi_link, fp);
+ kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, true, data);
}
static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -2687,69 +2627,91 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv)
}
}
-struct module_addr_args {
- unsigned long *addrs;
- u32 addrs_cnt;
+struct modules_array {
struct module **mods;
int mods_cnt;
int mods_cap;
};
-static int module_callback(void *data, const char *name,
- struct module *mod, unsigned long addr)
+static int add_module(struct modules_array *arr, struct module *mod)
{
- struct module_addr_args *args = data;
struct module **mods;
- /* We iterate all modules symbols and for each we:
- * - search for it in provided addresses array
- * - if found we check if we already have the module pointer stored
- * (we iterate modules sequentially, so we can check just the last
- * module pointer)
- * - take module reference and store it
- */
- if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr),
- bpf_kprobe_multi_addrs_cmp))
- return 0;
-
- if (args->mods && args->mods[args->mods_cnt - 1] == mod)
- return 0;
-
- if (args->mods_cnt == args->mods_cap) {
- args->mods_cap = max(16, args->mods_cap * 3 / 2);
- mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL);
+ if (arr->mods_cnt == arr->mods_cap) {
+ arr->mods_cap = max(16, arr->mods_cap * 3 / 2);
+ mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
if (!mods)
return -ENOMEM;
- args->mods = mods;
+ arr->mods = mods;
}
- if (!try_module_get(mod))
- return -EINVAL;
-
- args->mods[args->mods_cnt] = mod;
- args->mods_cnt++;
+ arr->mods[arr->mods_cnt] = mod;
+ arr->mods_cnt++;
return 0;
}
+static bool has_module(struct modules_array *arr, struct module *mod)
+{
+ int i;
+
+ for (i = arr->mods_cnt - 1; i >= 0; i--) {
+ if (arr->mods[i] == mod)
+ return true;
+ }
+ return false;
+}
+
static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
{
- struct module_addr_args args = {
- .addrs = addrs,
- .addrs_cnt = addrs_cnt,
- };
- int err;
+ struct modules_array arr = {};
+ u32 i, err = 0;
+
+ for (i = 0; i < addrs_cnt; i++) {
+ bool skip_add = false;
+ struct module *mod;
+
+ scoped_guard(rcu) {
+ mod = __module_address(addrs[i]);
+ /* Either no module or it's already stored */
+ if (!mod || has_module(&arr, mod)) {
+ skip_add = true;
+ break; /* scoped_guard */
+ }
+ if (!try_module_get(mod))
+ err = -EINVAL;
+ }
+ if (skip_add)
+ continue;
+ if (err)
+ break;
+ err = add_module(&arr, mod);
+ if (err) {
+ module_put(mod);
+ break;
+ }
+ }
/* We return either err < 0 in case of error, ... */
- err = module_kallsyms_on_each_symbol(module_callback, &args);
if (err) {
- kprobe_multi_put_modules(args.mods, args.mods_cnt);
- kfree(args.mods);
+ kprobe_multi_put_modules(arr.mods, arr.mods_cnt);
+ kfree(arr.mods);
return err;
}
/* or number of modules found if everything is ok. */
- *mods = args.mods;
- return args.mods_cnt;
+ *mods = arr.mods;
+ return arr.mods_cnt;
+}
+
+static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ if (!within_error_injection_list(addrs[i]))
+ return -EINVAL;
+ }
+ return 0;
}
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -2768,7 +2730,14 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
if (sizeof(u64) != sizeof(void *))
return -EOPNOTSUPP;
- if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI)
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ if (!is_kprobe_multi(prog))
+ return -EINVAL;
+
+ /* Writing to context is not allowed for kprobes. */
+ if (prog->aux->kprobe_write_ctx)
return -EINVAL;
flags = attr->link_create.kprobe_multi.flags;
@@ -2783,6 +2752,8 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
cnt = attr->link_create.kprobe_multi.cnt;
if (!cnt)
return -EINVAL;
+ if (cnt > MAX_KPROBE_MULTI_CNT)
+ return -E2BIG;
size = cnt * sizeof(*addrs);
addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
@@ -2829,6 +2800,11 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error;
}
+ if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
+ err = -EINVAL;
+ goto error;
+ }
+
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
@@ -2836,20 +2812,23 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
}
bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI,
- &bpf_kprobe_multi_link_lops, prog);
+ &bpf_kprobe_multi_link_lops, prog, attr->link_create.attach_type);
err = bpf_link_prime(&link->link, &link_primer);
if (err)
goto error;
- if (flags & BPF_F_KPROBE_MULTI_RETURN)
- link->fp.exit_handler = kprobe_multi_link_handler;
- else
+ if (!(flags & BPF_F_KPROBE_MULTI_RETURN))
link->fp.entry_handler = kprobe_multi_link_handler;
+ if ((flags & BPF_F_KPROBE_MULTI_RETURN) || is_kprobe_session(prog))
+ link->fp.exit_handler = kprobe_multi_link_exit_handler;
+ if (is_kprobe_session(prog))
+ link->fp.entry_data_size = sizeof(u64);
link->addrs = addrs;
link->cookies = cookies;
link->cnt = cnt;
+ link->link.flags = flags;
if (cookies) {
/*
@@ -2862,13 +2841,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
bpf_kprobe_multi_cookie_cmp,
bpf_kprobe_multi_cookie_swap,
link);
- } else {
- /*
- * We need to sort addrs array even if there are no cookies
- * provided, to allow bsearch in get_modules_for_addrs.
- */
- sort(addrs, cnt, sizeof(*addrs),
- bpf_kprobe_multi_addrs_cmp, NULL);
}
err = get_modules_for_addrs(&link->mods, addrs, cnt);
@@ -2907,3 +2879,697 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
return 0;
}
#endif
+
+#ifdef CONFIG_UPROBES
+struct bpf_uprobe_multi_link;
+
+struct bpf_uprobe {
+ struct bpf_uprobe_multi_link *link;
+ loff_t offset;
+ unsigned long ref_ctr_offset;
+ u64 cookie;
+ struct uprobe *uprobe;
+ struct uprobe_consumer consumer;
+ bool session;
+};
+
+struct bpf_uprobe_multi_link {
+ struct path path;
+ struct bpf_link link;
+ u32 cnt;
+ struct bpf_uprobe *uprobes;
+ struct task_struct *task;
+};
+
+struct bpf_uprobe_multi_run_ctx {
+ struct bpf_session_run_ctx session_ctx;
+ unsigned long entry_ip;
+ struct bpf_uprobe *uprobe;
+};
+
+static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++)
+ uprobe_unregister_nosync(uprobes[i].uprobe, &uprobes[i].consumer);
+
+ if (cnt)
+ uprobe_unregister_sync();
+}
+
+static void bpf_uprobe_multi_link_release(struct bpf_link *link)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ bpf_uprobe_unregister(umulti_link->uprobes, umulti_link->cnt);
+ if (umulti_link->task)
+ put_task_struct(umulti_link->task);
+ path_put(&umulti_link->path);
+}
+
+static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ kvfree(umulti_link->uprobes);
+ kfree(umulti_link);
+}
+
+static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets);
+ u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies);
+ u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets);
+ u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path);
+ u32 upath_size = info->uprobe_multi.path_size;
+ struct bpf_uprobe_multi_link *umulti_link;
+ u32 ucount = info->uprobe_multi.count;
+ int err = 0, i;
+ char *p, *buf;
+ long left = 0;
+
+ if (!upath ^ !upath_size)
+ return -EINVAL;
+
+ if ((uoffsets || uref_ctr_offsets || ucookies) && !ucount)
+ return -EINVAL;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+ info->uprobe_multi.count = umulti_link->cnt;
+ info->uprobe_multi.flags = umulti_link->link.flags;
+ info->uprobe_multi.pid = umulti_link->task ?
+ task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
+
+ upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
+ buf = kmalloc(upath_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ p = d_path(&umulti_link->path, buf, upath_size);
+ if (IS_ERR(p)) {
+ kfree(buf);
+ return PTR_ERR(p);
+ }
+ upath_size = buf + upath_size - p;
+
+ if (upath)
+ left = copy_to_user(upath, p, upath_size);
+ kfree(buf);
+ if (left)
+ return -EFAULT;
+ info->uprobe_multi.path_size = upath_size;
+
+ if (!uoffsets && !ucookies && !uref_ctr_offsets)
+ return 0;
+
+ if (ucount < umulti_link->cnt)
+ err = -ENOSPC;
+ else
+ ucount = umulti_link->cnt;
+
+ for (i = 0; i < ucount; i++) {
+ if (uoffsets &&
+ put_user(umulti_link->uprobes[i].offset, uoffsets + i))
+ return -EFAULT;
+ if (uref_ctr_offsets &&
+ put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i))
+ return -EFAULT;
+ if (ucookies &&
+ put_user(umulti_link->uprobes[i].cookie, ucookies + i))
+ return -EFAULT;
+ }
+
+ return err;
+}
+
+#ifdef CONFIG_PROC_FS
+static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_uprobe_multi_link *umulti_link;
+ char *p, *buf;
+ pid_t pid;
+
+ umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
+
+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ return;
+
+ p = d_path(&umulti_link->path, buf, PATH_MAX);
+ if (IS_ERR(p)) {
+ kfree(buf);
+ return;
+ }
+
+ pid = umulti_link->task ?
+ task_pid_nr_ns(umulti_link->task, task_active_pid_ns(current)) : 0;
+ seq_printf(seq,
+ "uprobe_cnt:\t%u\n"
+ "pid:\t%u\n"
+ "path:\t%s\n",
+ umulti_link->cnt, pid, p);
+
+ seq_printf(seq, "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset");
+ for (int i = 0; i < umulti_link->cnt; i++) {
+ seq_printf(seq,
+ "%llu\t %#llx\t %#lx\n",
+ umulti_link->uprobes[i].cookie,
+ umulti_link->uprobes[i].offset,
+ umulti_link->uprobes[i].ref_ctr_offset);
+ }
+
+ kfree(buf);
+}
+#endif
+
+static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
+ .release = bpf_uprobe_multi_link_release,
+ .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
+ .fill_link_info = bpf_uprobe_multi_link_fill_link_info,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = bpf_uprobe_multi_show_fdinfo,
+#endif
+};
+
+static int uprobe_prog_run(struct bpf_uprobe *uprobe,
+ unsigned long entry_ip,
+ struct pt_regs *regs,
+ bool is_return, void *data)
+{
+ struct bpf_uprobe_multi_link *link = uprobe->link;
+ struct bpf_uprobe_multi_run_ctx run_ctx = {
+ .session_ctx = {
+ .is_return = is_return,
+ .data = data,
+ },
+ .entry_ip = entry_ip,
+ .uprobe = uprobe,
+ };
+ struct bpf_prog *prog = link->link.prog;
+ bool sleepable = prog->sleepable;
+ struct bpf_run_ctx *old_run_ctx;
+ int err;
+
+ if (link->task && !same_thread_group(current, link->task))
+ return 0;
+
+ if (sleepable)
+ rcu_read_lock_trace();
+ else
+ rcu_read_lock();
+
+ migrate_disable();
+
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
+ err = bpf_prog_run(link->link.prog, regs);
+ bpf_reset_run_ctx(old_run_ctx);
+
+ migrate_enable();
+
+ if (sleepable)
+ rcu_read_unlock_trace();
+ else
+ rcu_read_unlock();
+ return err;
+}
+
+static bool
+uprobe_multi_link_filter(struct uprobe_consumer *con, struct mm_struct *mm)
+{
+ struct bpf_uprobe *uprobe;
+
+ uprobe = container_of(con, struct bpf_uprobe, consumer);
+ return uprobe->link->task->mm == mm;
+}
+
+static int
+uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs,
+ __u64 *data)
+{
+ struct bpf_uprobe *uprobe;
+ int ret;
+
+ uprobe = container_of(con, struct bpf_uprobe, consumer);
+ ret = uprobe_prog_run(uprobe, instruction_pointer(regs), regs, false, data);
+ if (uprobe->session)
+ return ret ? UPROBE_HANDLER_IGNORE : 0;
+ return 0;
+}
+
+static int
+uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs,
+ __u64 *data)
+{
+ struct bpf_uprobe *uprobe;
+
+ uprobe = container_of(con, struct bpf_uprobe, consumer);
+ uprobe_prog_run(uprobe, func, regs, true, data);
+ return 0;
+}
+
+static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
+{
+ struct bpf_uprobe_multi_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
+ session_ctx.run_ctx);
+ return run_ctx->entry_ip;
+}
+
+static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
+{
+ struct bpf_uprobe_multi_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
+ session_ctx.run_ctx);
+ return run_ctx->uprobe->cookie;
+}
+
+int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_uprobe_multi_link *link = NULL;
+ unsigned long __user *uref_ctr_offsets;
+ struct bpf_link_primer link_primer;
+ struct bpf_uprobe *uprobes = NULL;
+ struct task_struct *task = NULL;
+ unsigned long __user *uoffsets;
+ u64 __user *ucookies;
+ void __user *upath;
+ u32 flags, cnt, i;
+ struct path path;
+ char *name;
+ pid_t pid;
+ int err;
+
+ /* no support for 32bit archs yet */
+ if (sizeof(u64) != sizeof(void *))
+ return -EOPNOTSUPP;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ if (!is_uprobe_multi(prog))
+ return -EINVAL;
+
+ flags = attr->link_create.uprobe_multi.flags;
+ if (flags & ~BPF_F_UPROBE_MULTI_RETURN)
+ return -EINVAL;
+
+ /*
+ * path, offsets and cnt are mandatory,
+ * ref_ctr_offsets and cookies are optional
+ */
+ upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
+ uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
+ cnt = attr->link_create.uprobe_multi.cnt;
+ pid = attr->link_create.uprobe_multi.pid;
+
+ if (!upath || !uoffsets || !cnt || pid < 0)
+ return -EINVAL;
+ if (cnt > MAX_UPROBE_MULTI_CNT)
+ return -E2BIG;
+
+ uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
+ ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
+
+ name = strndup_user(upath, PATH_MAX);
+ if (IS_ERR(name)) {
+ err = PTR_ERR(name);
+ return err;
+ }
+
+ err = kern_path(name, LOOKUP_FOLLOW, &path);
+ kfree(name);
+ if (err)
+ return err;
+
+ if (!d_is_reg(path.dentry)) {
+ err = -EBADF;
+ goto error_path_put;
+ }
+
+ if (pid) {
+ rcu_read_lock();
+ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
+ rcu_read_unlock();
+ if (!task) {
+ err = -ESRCH;
+ goto error_path_put;
+ }
+ }
+
+ err = -ENOMEM;
+
+ link = kzalloc(sizeof(*link), GFP_KERNEL);
+ uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL);
+
+ if (!uprobes || !link)
+ goto error_free;
+
+ for (i = 0; i < cnt; i++) {
+ if (__get_user(uprobes[i].offset, uoffsets + i)) {
+ err = -EFAULT;
+ goto error_free;
+ }
+ if (uprobes[i].offset < 0) {
+ err = -EINVAL;
+ goto error_free;
+ }
+ if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) {
+ err = -EFAULT;
+ goto error_free;
+ }
+ if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
+ err = -EFAULT;
+ goto error_free;
+ }
+
+ uprobes[i].link = link;
+
+ if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
+ uprobes[i].consumer.handler = uprobe_multi_link_handler;
+ if (flags & BPF_F_UPROBE_MULTI_RETURN || is_uprobe_session(prog))
+ uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
+ if (is_uprobe_session(prog))
+ uprobes[i].session = true;
+ if (pid)
+ uprobes[i].consumer.filter = uprobe_multi_link_filter;
+ }
+
+ link->cnt = cnt;
+ link->uprobes = uprobes;
+ link->path = path;
+ link->task = task;
+ link->link.flags = flags;
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI,
+ &bpf_uprobe_multi_link_lops, prog, attr->link_create.attach_type);
+
+ for (i = 0; i < cnt; i++) {
+ uprobes[i].uprobe = uprobe_register(d_real_inode(link->path.dentry),
+ uprobes[i].offset,
+ uprobes[i].ref_ctr_offset,
+ &uprobes[i].consumer);
+ if (IS_ERR(uprobes[i].uprobe)) {
+ err = PTR_ERR(uprobes[i].uprobe);
+ link->cnt = i;
+ goto error_unregister;
+ }
+ }
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err)
+ goto error_unregister;
+
+ return bpf_link_settle(&link_primer);
+
+error_unregister:
+ bpf_uprobe_unregister(uprobes, link->cnt);
+
+error_free:
+ kvfree(uprobes);
+ kfree(link);
+ if (task)
+ put_task_struct(task);
+error_path_put:
+ path_put(&path);
+ return err;
+}
+#else /* !CONFIG_UPROBES */
+int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
+static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
+{
+ return 0;
+}
+static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
+{
+ return 0;
+}
+#endif /* CONFIG_UPROBES */
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc bool bpf_session_is_return(void)
+{
+ struct bpf_session_run_ctx *session_ctx;
+
+ session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
+ return session_ctx->is_return;
+}
+
+__bpf_kfunc __u64 *bpf_session_cookie(void)
+{
+ struct bpf_session_run_ctx *session_ctx;
+
+ session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
+ return session_ctx->data;
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids)
+BTF_ID_FLAGS(func, bpf_session_is_return)
+BTF_ID_FLAGS(func, bpf_session_cookie)
+BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids)
+
+static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ if (!btf_id_set8_contains(&kprobe_multi_kfunc_set_ids, kfunc_id))
+ return 0;
+
+ if (!is_kprobe_session(prog) && !is_uprobe_session(prog))
+ return -EACCES;
+
+ return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &kprobe_multi_kfunc_set_ids,
+ .filter = bpf_kprobe_multi_filter,
+};
+
+static int __init bpf_kprobe_multi_kfuncs_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_KPROBE, &bpf_kprobe_multi_kfunc_set);
+}
+
+late_initcall(bpf_kprobe_multi_kfuncs_init);
+
+typedef int (*copy_fn_t)(void *dst, const void *src, u32 size, struct task_struct *tsk);
+
+/*
+ * The __always_inline is to make sure the compiler doesn't
+ * generate indirect calls into callbacks, which is expensive,
+ * on some kernel configurations. This allows compiler to put
+ * direct calls into all the specific callback implementations
+ * (copy_user_data_sleepable, copy_user_data_nofault, and so on)
+ */
+static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size,
+ const void *unsafe_src,
+ copy_fn_t str_copy_fn,
+ struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ u64 chunk_sz, off;
+ void *dst_slice;
+ int cnt, err;
+ char buf[256];
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return str_copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz - 1) {
+ chunk_sz = min_t(u64, sizeof(buf), size - off);
+ /* Expect str_copy_fn to return count of copied bytes, including
+ * zero terminator. Next iteration increment off by chunk_sz - 1 to
+ * overwrite NUL.
+ */
+ cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (cnt < 0)
+ return cnt;
+ err = __bpf_dynptr_write(dst, doff + off, buf, cnt, 0);
+ if (err)
+ return err;
+ if (cnt < chunk_sz || chunk_sz == 1) /* we are done */
+ return off + cnt;
+ }
+ return off;
+}
+
+static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
+ u64 size, const void *unsafe_src,
+ copy_fn_t copy_fn, struct task_struct *tsk)
+{
+ struct bpf_dynptr_kern *dst;
+ void *dst_slice;
+ char buf[256];
+ u64 off, chunk_sz;
+ int err;
+
+ dst_slice = bpf_dynptr_slice_rdwr(dptr, doff, NULL, size);
+ if (likely(dst_slice))
+ return copy_fn(dst_slice, unsafe_src, size, tsk);
+
+ dst = (struct bpf_dynptr_kern *)dptr;
+ if (bpf_dynptr_check_off_len(dst, doff, size))
+ return -E2BIG;
+
+ for (off = 0; off < size; off += chunk_sz) {
+ chunk_sz = min_t(u64, sizeof(buf), size - off);
+ err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
+ if (err)
+ return err;
+ err = __bpf_dynptr_write(dst, doff + off, buf, chunk_sz, 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static __always_inline int copy_user_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_data_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (!tsk) { /* Read from the current task */
+ ret = copy_from_user(dst, (const void __user *)unsafe_src, size);
+ if (ret)
+ return -EFAULT;
+ return 0;
+ }
+
+ ret = access_process_vm(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ if (ret != size)
+ return -EFAULT;
+ return 0;
+}
+
+static __always_inline int copy_kernel_data_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return copy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_user_nofault(dst, (const void __user *)unsafe_src, size);
+}
+
+static __always_inline int copy_user_str_sleepable(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ int ret;
+
+ if (unlikely(size == 0))
+ return 0;
+
+ if (tsk) {
+ ret = copy_remote_vm_str(tsk, (unsigned long)unsafe_src, dst, size, 0);
+ } else {
+ ret = strncpy_from_user(dst, (const void __user *)unsafe_src, size - 1);
+ /* strncpy_from_user does not guarantee NUL termination */
+ if (ret >= 0)
+ ((char *)dst)[ret] = '\0';
+ }
+
+ if (ret < 0)
+ return ret;
+ return ret + 1;
+}
+
+static __always_inline int copy_kernel_str_nofault(void *dst, const void *unsafe_src,
+ u32 size, struct task_struct *tsk)
+{
+ return strncpy_from_kernel_nofault(dst, unsafe_src, size);
+}
+
+__bpf_kfunc_start_defs();
+
+__bpf_kfunc int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type,
+ u64 value)
+{
+ if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
+ return -EINVAL;
+
+ return bpf_send_signal_common(sig, type, task, value);
+}
+
+__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_data_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, unsafe_ptr__ign,
+ copy_kernel_str_nofault, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, NULL);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_data_sleepable, tsk);
+}
+
+__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off,
+ u64 size, const void __user *unsafe_ptr__ign,
+ struct task_struct *tsk)
+{
+ return __bpf_dynptr_copy_str(dptr, off, size, (const void *)unsafe_ptr__ign,
+ copy_user_str_sleepable, tsk);
+}
+
+__bpf_kfunc_end_defs();