summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/helpers.c52
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/verifier.c151
-rw-r--r--kernel/events/core.c89
-rw-r--r--kernel/trace/bpf_trace.c160
7 files changed, 345 insertions, 116 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650d7aeb..a2ac051c342f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
}
late_initcall(register_perf_event_array_map);
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 03fd23d4d587..7b7baaed9ed4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void)
prandom_init_once(&bpf_user_rnd_state);
}
-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_user_rnd_u32)
{
/* Should someone ever have the rather unwise idea to use some
* of the registers passed into this function, then note that
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ea3afba1a4f..a5b8bf8cfcfd 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
#include <linux/ktime.h>
#include <linux/sched.h>
#include <linux/uidgid.h>
+#include <linux/filter.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -26,24 +27,10 @@
* if program is allowed to access maps, so check rcu_read_lock_held in
* all three functions.
*/
-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
- /* verifier checked that R1 contains a valid pointer to bpf_map
- * and R2 points to a program stack and map->key_size bytes were
- * initialized
- */
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
- void *value;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
- value = map->ops->map_lookup_elem(map, key);
-
- /* lookup() returns either pointer to element value or NULL
- * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
- */
- return (unsigned long) value;
+ return (unsigned long) map->ops->map_lookup_elem(map, key);
}
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
@@ -54,15 +41,11 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = {
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
+ void *, value, u64, flags)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
- void *value = (void *) (unsigned long) r3;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
- return map->ops->map_update_elem(map, key, value, r4);
+ return map->ops->map_update_elem(map, key, value, flags);
}
const struct bpf_func_proto bpf_map_update_elem_proto = {
@@ -75,13 +58,9 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
.arg4_type = ARG_ANYTHING,
};
-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
return map->ops->map_delete_elem(map, key);
}
@@ -99,7 +78,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_smp_processor_id)
{
return smp_processor_id();
}
@@ -110,7 +89,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_ktime_get_ns)
{
/* NMI safe access to clock monotonic */
return ktime_get_mono_fast_ns();
@@ -122,11 +101,11 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_pid_tgid)
{
struct task_struct *task = current;
- if (!task)
+ if (unlikely(!task))
return -EINVAL;
return (u64) task->tgid << 32 | task->pid;
@@ -138,18 +117,18 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_uid_gid)
{
struct task_struct *task = current;
kuid_t uid;
kgid_t gid;
- if (!task)
+ if (unlikely(!task))
return -EINVAL;
current_uid_gid(&uid, &gid);
return (u64) from_kgid(&init_user_ns, gid) << 32 |
- from_kuid(&init_user_ns, uid);
+ from_kuid(&init_user_ns, uid);
}
const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
@@ -158,10 +137,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
{
struct task_struct *task = current;
- char *buf = (char *) (long) r1;
if (unlikely(!task))
goto err_clear;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index bf4495fcd25d..732ae16d12b7 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -116,10 +116,9 @@ free_smap:
return ERR_PTR(err);
}
-u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
{
- struct pt_regs *regs = (struct pt_regs *) (long) r1;
- struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index daea765d72e6..90493a66dddd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -930,14 +930,14 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
{
- struct reg_state *reg = env->cur_state.regs + regno;
- enum bpf_reg_type expected_type;
+ struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+ enum bpf_reg_type expected_type, type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
return 0;
- if (reg->type == NOT_INIT) {
+ if (type == NOT_INIT) {
verbose("R%d !read_ok\n", regno);
return -EACCES;
}
@@ -950,16 +950,29 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
return 0;
}
+ if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) {
+ verbose("helper access to the packet is not allowed for clsact\n");
+ return -EACCES;
+ }
+
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
+ if (type != PTR_TO_PACKET && type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_CONST_STACK_SIZE ||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
expected_type = CONST_IMM;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_PTR_TO_CTX) {
expected_type = PTR_TO_CTX;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_PTR_TO_STACK ||
arg_type == ARG_PTR_TO_RAW_STACK) {
expected_type = PTR_TO_STACK;
@@ -967,20 +980,16 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
* passed in as argument, it's a CONST_IMM type. Final test
* happens during stack boundary checking.
*/
- if (reg->type == CONST_IMM && reg->imm == 0)
- expected_type = CONST_IMM;
+ if (type == CONST_IMM && reg->imm == 0)
+ /* final test in check_stack_boundary() */;
+ else if (type != PTR_TO_PACKET && type != expected_type)
+ goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
}
- if (reg->type != expected_type) {
- verbose("R%d type=%s expected=%s\n", regno,
- reg_type_str[reg->type], reg_type_str[expected_type]);
- return -EACCES;
- }
-
if (arg_type == ARG_CONST_MAP_PTR) {
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
meta->map_ptr = reg->map_ptr;
@@ -998,8 +1007,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("invalid map_ptr to access map->key\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
- false, NULL);
+ if (type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno, 0,
+ meta->map_ptr->key_size);
+ else
+ err = check_stack_boundary(env, regno,
+ meta->map_ptr->key_size,
+ false, NULL);
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
@@ -1009,9 +1023,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("invalid map_ptr to access map->value\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno,
- meta->map_ptr->value_size,
- false, NULL);
+ if (type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno, 0,
+ meta->map_ptr->value_size);
+ else
+ err = check_stack_boundary(env, regno,
+ meta->map_ptr->value_size,
+ false, NULL);
} else if (arg_type == ARG_CONST_STACK_SIZE ||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -1025,11 +1043,18 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno - 1, reg->imm,
- zero_size_allowed, meta);
+ if (regs[regno - 1].type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno - 1, 0, reg->imm);
+ else
+ err = check_stack_boundary(env, regno - 1, reg->imm,
+ zero_size_allowed, meta);
}
return err;
+err_type:
+ verbose("R%d type=%s expected=%s\n", regno,
+ reg_type_str[type], reg_type_str[expected_type]);
+ return -EACCES;
}
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
@@ -1053,7 +1078,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
- if (func_id != BPF_FUNC_skb_under_cgroup)
+ if (func_id != BPF_FUNC_skb_under_cgroup &&
+ func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
default:
@@ -1075,6 +1101,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
+ case BPF_FUNC_current_task_under_cgroup:
case BPF_FUNC_skb_under_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
@@ -1610,21 +1637,42 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static void find_good_pkt_pointers(struct verifier_env *env,
- struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct verifier_state *state,
+ const struct reg_state *dst_reg)
{
- struct verifier_state *state = &env->cur_state;
struct reg_state *regs = state->regs, *reg;
int i;
- /* r2 = r3;
- * r2 += 8
- * if (r2 > pkt_end) goto somewhere
- * r2 == dst_reg, pkt_end == src_reg,
- * r2=pkt(id=n,off=8,r=0)
- * r3=pkt(id=n,off=0,r=0)
- * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
- * so that range of bytes [r3, r3 + 8) is safe to access
+
+ /* LLVM can generate two kind of checks:
+ *
+ * Type 1:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (r2 > pkt_end) goto <handle exception>
+ * <access okay>
+ *
+ * Where:
+ * r2 == dst_reg, pkt_end == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Type 2:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (pkt_end >= r2) goto <access okay>
+ * <handle exception>
+ *
+ * Where:
+ * pkt_end == dst_reg, r2 == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+ * so that range of bytes [r3, r3 + 8) is safe to access.
*/
+
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
regs[i].range = dst_reg->off;
@@ -1641,8 +1689,8 @@ static void find_good_pkt_pointers(struct verifier_env *env,
static int check_cond_jmp_op(struct verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
- struct verifier_state *other_branch;
+ struct verifier_state *other_branch, *this_branch = &env->cur_state;
+ struct reg_state *regs = this_branch->regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1723,13 +1771,17 @@ static int check_cond_jmp_op(struct verifier_env *env,
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
- find_good_pkt_pointers(env, dst_reg);
+ find_good_pkt_pointers(this_branch, dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+ dst_reg->type == PTR_TO_PACKET_END &&
+ regs[insn->src_reg].type == PTR_TO_PACKET) {
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
} else if (is_pointer_value(env, insn->dst_reg)) {
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
return -EACCES;
}
if (log_level)
- print_verifier_state(&env->cur_state);
+ print_verifier_state(this_branch);
return 0;
}
@@ -2306,7 +2358,8 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (BPF_SIZE(insn->code) != BPF_W) {
+ if (BPF_SIZE(insn->code) != BPF_W &&
+ BPF_SIZE(insn->code) != BPF_DW) {
insn_idx++;
continue;
}
@@ -2483,6 +2536,20 @@ process_bpf_exit:
return 0;
}
+static int check_map_prog_compatibility(struct bpf_map *map,
+ struct bpf_prog *prog)
+
+{
+ if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+ (map->map_type == BPF_MAP_TYPE_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
+ (map->map_flags & BPF_F_NO_PREALLOC)) {
+ verbose("perf_event programs can only use preallocated hash map\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/* look for pseudo eBPF instructions that access map FDs and
* replace them with actual map pointers
*/
@@ -2490,7 +2557,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
- int i, j;
+ int i, j, err;
for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
@@ -2534,6 +2601,12 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
return PTR_ERR(map);
}
+ err = check_map_prog_compatibility(map, env->prog);
+ if (err) {
+ fdput(f);
+ return err;
+ }
+
/* store map pointer inside BPF_LD_IMM64 instruction */
insn[0].imm = (u32) (unsigned long) map;
insn[1].imm = ((u64) (unsigned long) map) >> 32;
@@ -2615,9 +2688,11 @@ static int convert_ctx_accesses(struct verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
u32 insn_delta, cnt;
- if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+ else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW))
type = BPF_WRITE;
else
continue;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cfabdf7b942..a7b8c1c75fa7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7022,7 +7022,7 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(&event->pending);
}
- event->overflow_handler(event, data, regs);
+ READ_ONCE(event->overflow_handler)(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
@@ -7637,11 +7637,83 @@ static void perf_event_free_filter(struct perf_event *event)
ftrace_profile_free_filter(event);
}
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct bpf_perf_event_data_kern ctx = {
+ .data = data,
+ .regs = regs,
+ };
+ int ret = 0;
+
+ preempt_disable();
+ if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+ goto out;
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+ rcu_read_unlock();
+out:
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ if (!ret)
+ return;
+
+ event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ struct bpf_prog *prog;
+
+ if (event->overflow_handler_context)
+ /* hw breakpoint or kernel counter */
+ return -EINVAL;
+
+ if (event->prog)
+ return -EEXIST;
+
+ prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ event->prog = prog;
+ event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+ WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+ return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+ struct bpf_prog *prog = event->prog;
+
+ if (!prog)
+ return;
+
+ WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+ event->prog = NULL;
+ bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint;
struct bpf_prog *prog;
+ if (event->attr.type == PERF_TYPE_HARDWARE ||
+ event->attr.type == PERF_TYPE_SOFTWARE)
+ return perf_event_set_bpf_handler(event, prog_fd);
+
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
@@ -7682,6 +7754,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
{
struct bpf_prog *prog;
+ perf_event_free_bpf_handler(event);
+
if (!event->tp_event)
return;
@@ -8998,6 +9072,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+ if (overflow_handler == bpf_overflow_handler) {
+ struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto err_ns;
+ }
+ event->prog = prog;
+ event->orig_overflow_handler =
+ parent_event->orig_overflow_handler;
+ }
+#endif
}
if (overflow_handler) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438fdb029..5dcb99281259 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
+#include <linux/bpf_perf_event.h>
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
@@ -59,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
-static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
- void *dst = (void *) (long) r1;
- int ret, size = (int) r2;
- void *unsafe_ptr = (void *) (long) r3;
+ int ret;
ret = probe_kernel_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
@@ -81,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};
-static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+ u32, size)
{
- void *unsafe_ptr = (void *) (long) r1;
- void *src = (void *) (long) r2;
- int size = (int) r3;
-
/*
* Ensure we're in user context which is safe for the helper to
* run. This helper has no business in a kthread.
@@ -128,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
* limited trace_printk()
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
*/
-static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+ u64, arg2, u64, arg3)
{
- char *fmt = (char *) (long) r1;
bool str_seen = false;
int mod[3] = {};
int fmt_cnt = 0;
@@ -176,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
switch (fmt_cnt) {
case 1:
- unsafe_addr = r3;
- r3 = (long) buf;
+ unsafe_addr = arg1;
+ arg1 = (long) buf;
break;
case 2:
- unsafe_addr = r4;
- r4 = (long) buf;
+ unsafe_addr = arg2;
+ arg2 = (long) buf;
break;
case 3:
- unsafe_addr = r5;
- r5 = (long) buf;
+ unsafe_addr = arg3;
+ arg3 = (long) buf;
break;
}
buf[0] = 0;
@@ -207,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
}
return __trace_printk(1/* fake ip will not be printed */, fmt,
- mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
- mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
- mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+ mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
+ mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
+ mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -231,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
@@ -310,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
return 0;
}
-static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = (struct pt_regs *)(long) r1;
- struct bpf_map *map = (struct bpf_map *)(long) r2;
- void *data = (void *)(long) r4;
struct perf_raw_record raw = {
.frag = {
.size = size,
@@ -365,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
return __bpf_perf_event_output(regs, map, flags, &raw);
}
-static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_task)
{
return (long) current;
}
@@ -376,6 +370,31 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
.ret_type = RET_INTEGER,
};
+BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+
+ if (unlikely(in_interrupt()))
+ return -EINVAL;
+ if (unlikely(idx >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[idx]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+ .func = bpf_current_task_under_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -407,6 +426,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
return bpf_get_probe_write_proto();
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
+ case BPF_FUNC_get_prandom_u32:
+ return &bpf_get_prandom_u32_proto;
default:
return NULL;
}
@@ -447,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = {
.type = BPF_PROG_TYPE_KPROBE,
};
-static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
+ u64, flags, void *, data, u64, size)
{
+ struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
/*
* r1 points to perf tracepoint buffer where first 8 bytes are hidden
* from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
- * from there and call the same bpf_perf_event_output() helper
+ * from there and call the same bpf_perf_event_output() helper inline.
*/
- u64 ctx = *(long *)(uintptr_t)r1;
-
- return bpf_perf_event_output(ctx, r2, index, r4, size);
+ return ____bpf_perf_event_output(regs, map, flags, data, size);
}
static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
@@ -470,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.arg5_type = ARG_CONST_STACK_SIZE,
};
-static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
+ u64, flags)
{
- u64 ctx = *(long *)(uintptr_t)r1;
+ struct pt_regs *regs = *(struct pt_regs **)tp_buff;
- return bpf_get_stackid(ctx, r2, r3, r4, r5);
+ /*
+ * Same comment as in bpf_perf_event_output_tp(), only that this time
+ * the other helper's function body cannot be inlined due to being
+ * external, thus we need to call raw helper function.
+ */
+ return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
}
static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
@@ -520,10 +551,69 @@ static struct bpf_prog_type_list tracepoint_tl = {
.type = BPF_PROG_TYPE_TRACEPOINT,
};
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+ return false;
+ if (type != BPF_READ)
+ return false;
+ if (off % size != 0)
+ return false;
+ if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
+ if (size != sizeof(u64))
+ return false;
+ } else {
+ if (size != sizeof(long))
+ return false;
+ }
+ return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_perf_event_data, sample_period):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ data), dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, data));
+ *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+ offsetof(struct perf_sample_data, period));
+ break;
+ default:
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ regs), dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, regs));
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+static const struct bpf_verifier_ops perf_event_prog_ops = {
+ .get_func_proto = tp_prog_func_proto,
+ .is_valid_access = pe_prog_is_valid_access,
+ .convert_ctx_access = pe_prog_convert_ctx_access,
+};
+
+static struct bpf_prog_type_list perf_event_tl = {
+ .ops = &perf_event_prog_ops,
+ .type = BPF_PROG_TYPE_PERF_EVENT,
+};
+
static int __init register_kprobe_prog_ops(void)
{
bpf_register_prog_type(&kprobe_tl);
bpf_register_prog_type(&tracepoint_tl);
+ bpf_register_prog_type(&perf_event_tl);
return 0;
}
late_initcall(register_kprobe_prog_ops);