diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_watch.c | 8 | ||||
-rw-r--r-- | kernel/bpf/core.c | 2 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 55 | ||||
-rw-r--r-- | kernel/bpf/stackmap.c | 5 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 497 | ||||
-rw-r--r-- | kernel/cgroup.c | 6 | ||||
-rw-r--r-- | kernel/configs/tiny.config | 8 | ||||
-rw-r--r-- | kernel/cpuset.c | 15 | ||||
-rw-r--r-- | kernel/events/core.c | 47 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 15 | ||||
-rw-r--r-- | kernel/exit.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 37 | ||||
-rw-r--r-- | kernel/kexec_file.c | 3 | ||||
-rw-r--r-- | kernel/memremap.c | 9 | ||||
-rw-r--r-- | kernel/power/qos.c | 11 | ||||
-rw-r--r-- | kernel/printk/nmi.c | 38 | ||||
-rw-r--r-- | kernel/sched/core.c | 22 | ||||
-rw-r--r-- | kernel/seccomp.c | 12 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 3 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 87 |
20 files changed, 540 insertions, 347 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index d6709eb70970..0d302a87f21b 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -19,6 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/file.h> #include <linux/kernel.h> #include <linux/audit.h> #include <linux/kthread.h> @@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) unsigned long ino; dev_t dev; - rcu_read_lock(); - exe_file = rcu_dereference(tsk->mm->exe_file); + exe_file = get_task_exe_file(tsk); + if (!exe_file) + return 0; ino = exe_file->f_inode->i_ino; dev = exe_file->f_inode->i_sb->s_dev; - rcu_read_unlock(); + fput(exe_file); return audit_mark_compare(mark, ino, dev); } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 03fd23d4d587..7b7baaed9ed4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void) prandom_init_once(&bpf_user_rnd_state); } -u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_user_rnd_u32) { /* Should someone ever have the rather unwise idea to use some * of the registers passed into this function, then note that diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ea3afba1a4f..39918402e6e9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -16,6 +16,7 @@ #include <linux/ktime.h> #include <linux/sched.h> #include <linux/uidgid.h> +#include <linux/filter.h> /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -26,48 +27,32 @@ * if program is allowed to access maps, so check rcu_read_lock_held in * all three functions. */ -static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - /* verifier checked that R1 contains a valid pointer to bpf_map - * and R2 points to a program stack and map->key_size bytes were - * initialized - */ - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value; - WARN_ON_ONCE(!rcu_read_lock_held()); - - value = map->ops->map_lookup_elem(map, key); - - /* lookup() returns either pointer to element value or NULL - * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type - */ - return (unsigned long) value; + return (unsigned long) map->ops->map_lookup_elem(map, key); } const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; -static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, + void *, value, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value = (void *) (unsigned long) r3; - WARN_ON_ONCE(!rcu_read_lock_held()); - - return map->ops->map_update_elem(map, key, value, r4); + return map->ops->map_update_elem(map, key, value, flags); } const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -75,19 +60,16 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - WARN_ON_ONCE(!rcu_read_lock_held()); - return map->ops->map_delete_elem(map, key); } const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -99,7 +81,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_smp_processor_id) { return smp_processor_id(); } @@ -110,7 +92,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ return ktime_get_mono_fast_ns(); @@ -122,11 +104,11 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; - if (!task) + if (unlikely(!task)) return -EINVAL; return (u64) task->tgid << 32 | task->pid; @@ -138,18 +120,18 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_uid_gid) { struct task_struct *task = current; kuid_t uid; kgid_t gid; - if (!task) + if (unlikely(!task)) return -EINVAL; current_uid_gid(&uid, &gid); return (u64) from_kgid(&init_user_ns, gid) << 32 | - from_kuid(&init_user_ns, uid); + from_kuid(&init_user_ns, uid); } const struct bpf_func_proto bpf_get_current_uid_gid_proto = { @@ -158,10 +140,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) { struct task_struct *task = current; - char *buf = (char *) (long) r1; if (unlikely(!task)) goto err_clear; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index bf4495fcd25d..732ae16d12b7 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -116,10 +116,9 @@ free_smap: return ERR_PTR(err); } -u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags) { - struct pt_regs *regs = (struct pt_regs *) (long) r1; - struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 48c2705db22c..8c3f794c7028 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/bpf.h> +#include <linux/bpf_verifier.h> #include <linux/filter.h> #include <net/netlink.h> #include <linux/file.h> @@ -126,76 +127,16 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -struct reg_state { - enum bpf_reg_type type; - union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u32 id; - u16 off; - u16 range; - }; - - /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | - * PTR_TO_MAP_VALUE_OR_NULL - */ - struct bpf_map *map_ptr; - }; -}; - -enum bpf_stack_slot_type { - STACK_INVALID, /* nothing was stored in this stack slot */ - STACK_SPILL, /* register spilled into stack */ - STACK_MISC /* BPF program wrote some data into this slot */ -}; - -#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ - -/* state of the program: - * type of all registers and stack info - */ -struct verifier_state { - struct reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; -}; - -/* linked list of verifier states used to prune search */ -struct verifier_state_list { - struct verifier_state state; - struct verifier_state_list *next; -}; - /* verifier_state + insn_idx are pushed to stack when branch is encountered */ -struct verifier_stack_elem { +struct bpf_verifier_stack_elem { /* verifer state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx' */ - struct verifier_state st; + struct bpf_verifier_state st; int insn_idx; int prev_insn_idx; - struct verifier_stack_elem *next; -}; - -#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ - -/* single container for all structs - * one verifier_env per bpf_check() call - */ -struct verifier_env { - struct bpf_prog *prog; /* eBPF program being verified */ - struct verifier_stack_elem *head; /* stack of verifier states to be processed */ - int stack_size; /* number of states to be processed */ - struct verifier_state cur_state; /* current verifier state */ - struct verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ - u32 used_map_cnt; /* number of used maps */ - u32 id_gen; /* used to generate unique reg IDs */ - bool allow_ptr_leaks; + struct bpf_verifier_stack_elem *next; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -204,6 +145,7 @@ struct verifier_env { struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; + bool pkt_access; int regno; int access_size; }; @@ -247,9 +189,9 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; -static void print_verifier_state(struct verifier_state *state) +static void print_verifier_state(struct bpf_verifier_state *state) { - struct reg_state *reg; + struct bpf_reg_state *reg; enum bpf_reg_type t; int i; @@ -425,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn) } } -static int pop_stack(struct verifier_env *env, int *prev_insn_idx) +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; int insn_idx; if (env->head == NULL) @@ -444,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx) return insn_idx; } -static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, - int prev_insn_idx) +static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; - elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL); + elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; @@ -475,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void init_reg_state(struct reg_state *regs) +static void init_reg_state(struct bpf_reg_state *regs) { int i; @@ -491,7 +433,7 @@ static void init_reg_state(struct reg_state *regs) regs[BPF_REG_1].type = PTR_TO_CTX; } -static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) +static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; @@ -504,7 +446,7 @@ enum reg_arg_type { DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int check_reg_arg(struct reg_state *regs, u32 regno, +static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, enum reg_arg_type t) { if (regno >= MAX_BPF_REG) { @@ -564,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct bpf_verifier_state *state, int off, + int size, int value_regno) { int i; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -590,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } else { /* regular write of data into stack */ state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + (struct bpf_reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -598,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, return 0; } -static int check_stack_read(struct verifier_state *state, int off, int size, +static int check_stack_read(struct bpf_verifier_state *state, int off, int size, int value_regno) { u8 *slot_type; @@ -639,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size, } /* check read/write into map element returned by bpf_map_lookup_elem() */ -static int check_map_access(struct verifier_env *env, u32 regno, int off, +static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; @@ -654,24 +596,31 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_write_pkt_data(enum bpf_prog_type type) +static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, + const struct bpf_call_arg_meta *meta) { - switch (type) { + switch (env->prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: + if (meta) + return meta->pkt_access; + + env->seen_direct_write = true; return true; default: return false; } } -static int check_packet_access(struct verifier_env *env, u32 regno, int off, +static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *reg = ®s[regno]; off += reg->off; - if (off < 0 || off + size > reg->range) { + if (off < 0 || size <= 0 || off + size > reg->range) { verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; @@ -680,9 +629,13 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + /* for analyzer ctx accesses are already validated and converted */ + if (env->analyzer_ops) + return 0; + if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ @@ -695,7 +648,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, return -EACCES; } -static bool is_pointer_value(struct verifier_env *env, int regno) +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { if (env->allow_ptr_leaks) return false; @@ -709,12 +662,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno) } } -static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, - int off, int size) +static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) { if (reg->type != PTR_TO_PACKET) { if (off % size != 0) { - verbose("misaligned access off %d size %d\n", off, size); + verbose("misaligned access off %d size %d\n", + off, size); return -EACCES; } else { return 0; @@ -755,12 +709,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { - struct verifier_state *state = &env->cur_state; - struct reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; int size, err = 0; if (reg->type == PTR_TO_STACK) @@ -817,7 +771,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_stack_read(state, off, size, value_regno); } } else if (state->regs[regno].type == PTR_TO_PACKET) { - if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) { verbose("cannot write into packet\n"); return -EACCES; } @@ -846,9 +800,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || @@ -882,12 +836,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized */ -static int check_stack_boundary(struct verifier_env *env, int regno, +static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs; int off, i; if (regs[regno].type != PTR_TO_STACK) { @@ -926,11 +880,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return 0; } -static int check_func_arg(struct verifier_env *env, u32 regno, +static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -950,8 +904,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } - if (type == PTR_TO_PACKET && !may_write_pkt_data(env->prog->type)) { - verbose("helper access to the packet is not allowed for clsact\n"); + if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) { + verbose("helper access to the packet is not allowed\n"); return -EACCES; } @@ -1135,10 +1089,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -static void clear_all_pkt_pointers(struct verifier_env *env) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) @@ -1158,12 +1112,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env) } } -static int check_call(struct verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct reg_state *regs = state->regs; - struct reg_state *reg; + struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1191,6 +1145,7 @@ static int check_call(struct verifier_env *env, int func_id) changes_data = bpf_helper_changes_skb_data(fn->func); memset(&meta, 0, sizeof(meta)); + meta.pkt_access = fn->pkt_access; /* We only support one arg being in raw mode at the moment, which * is sufficient for the helper functions we have right now. @@ -1265,12 +1220,13 @@ static int check_call(struct verifier_env *env, int func_id) return 0; } -static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +static int check_packet_ptr_add(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; - struct reg_state tmp_reg; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1338,10 +1294,10 @@ add_imm: return 0; } -static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; u8 opcode = BPF_OP(insn->code); s64 imm_log2; @@ -1351,7 +1307,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) */ if (BPF_SRC(insn->code) == BPF_X) { - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && dst_reg->imm && opcode == BPF_ADD) { @@ -1440,11 +1396,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. @@ -1461,9 +1418,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) } /* check validity of 32-bit and 64-bit arithmetic operations */ -static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) +static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1637,21 +1594,42 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static void find_good_pkt_pointers(struct verifier_env *env, - struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct bpf_verifier_state *state, + struct bpf_reg_state *dst_reg) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_reg_state *regs = state->regs, *reg; int i; - /* r2 = r3; - * r2 += 8 - * if (r2 > pkt_end) goto somewhere - * r2 == dst_reg, pkt_end == src_reg, - * r2=pkt(id=n,off=8,r=0) - * r3=pkt(id=n,off=0,r=0) - * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access + + /* LLVM can generate two kind of checks: + * + * Type 1: + * + * r2 = r3; + * r2 += 8; + * if (r2 > pkt_end) goto <handle exception> + * <access okay> + * + * Where: + * r2 == dst_reg, pkt_end == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Type 2: + * + * r2 = r3; + * r2 += 8; + * if (pkt_end >= r2) goto <access okay> + * <handle exception> + * + * Where: + * pkt_end == dst_reg, r2 == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) + * so that range of bytes [r3, r3 + 8) is safe to access. */ + for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) regs[i].range = dst_reg->off; @@ -1665,11 +1643,11 @@ static void find_good_pkt_pointers(struct verifier_env *env, } } -static int check_cond_jmp_op(struct verifier_env *env, +static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; - struct verifier_state *other_branch; + struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1731,7 +1709,7 @@ static int check_cond_jmp_op(struct verifier_env *env, if (!other_branch) return -EFAULT; - /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { @@ -1750,13 +1728,17 @@ static int check_cond_jmp_op(struct verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(env, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; } if (log_level) - print_verifier_state(&env->cur_state); + print_verifier_state(this_branch); return 0; } @@ -1769,9 +1751,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) } /* verify BPF_LD_IMM64 instruction */ -static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -1787,9 +1769,19 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) if (err) return err; - if (insn->src_reg == 0) - /* generic move 64-bit immediate into a register */ + if (insn->src_reg == 0) { + /* generic move 64-bit immediate into a register, + * only analyzer needs to collect the ld_imm value. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + + if (!env->analyzer_ops) + return 0; + + regs[insn->dst_reg].type = CONST_IMM; + regs[insn->dst_reg].imm = imm; return 0; + } /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); @@ -1826,11 +1818,11 @@ static bool may_access_skb(enum bpf_prog_type type) * Output: * R0 - 8/16/32-bit skb data converted to cpu endianness */ -static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct reg_state *reg; + struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -1916,7 +1908,7 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct verifier_state_list *) -1L) +#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) static int *insn_stack; /* stack of insns to process */ static int cur_stack; /* current stack index */ @@ -1927,7 +1919,7 @@ static int *insn_state; * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0; @@ -1968,7 +1960,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env) /* non-recursive depth-first-search to detect loops in BPF program * loop == back-edge in directed graph */ -static int check_cfg(struct verifier_env *env) +static int check_cfg(struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2077,7 +2069,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +static bool compare_ptrs_to_packet(struct bpf_reg_state *old, + struct bpf_reg_state *cur) { if (old->id != cur->id) return false; @@ -2152,9 +2145,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct verifier_state *old, struct verifier_state *cur) +static bool states_equal(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) { - struct reg_state *rold, *rcur; + struct bpf_reg_state *rold, *rcur; int i; for (i = 0; i < MAX_BPF_REG; i++) { @@ -2194,9 +2188,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) * the same, check that stored pointers types * are the same as well. * Ex: explored safe path could have stored - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8} * but current path has stored: - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16} * such verifier states are not equivalent. * return false to continue verification of this path */ @@ -2207,10 +2201,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) return true; } -static int is_state_visited(struct verifier_env *env, int insn_idx) +static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { - struct verifier_state_list *new_sl; - struct verifier_state_list *sl; + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; sl = env->explored_states[insn_idx]; if (!sl) @@ -2234,7 +2228,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER); + new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); if (!new_sl) return -ENOMEM; @@ -2245,11 +2239,20 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) return 0; } -static int do_check(struct verifier_env *env) +static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_state *state = &env->cur_state; + if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) + return 0; + + return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); +} + +static int do_check(struct bpf_verifier_env *env) +{ + struct bpf_verifier_state *state = &env->cur_state; struct bpf_insn *insns = env->prog->insnsi; - struct reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; @@ -2303,13 +2306,17 @@ static int do_check(struct verifier_env *env) print_bpf_insn(insn); } + err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); + if (err) + return err; + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type src_reg_type; + enum bpf_reg_type *prev_src_type, src_reg_type; /* check for reserved fields is already done */ @@ -2339,16 +2346,18 @@ static int do_check(struct verifier_env *env) continue; } - if (insn->imm == 0) { + prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) - * use reserved 'imm' field to mark this insn + * save type to validate intersecting paths */ - insn->imm = src_reg_type; + *prev_src_type = src_reg_type; - } else if (src_reg_type != insn->imm && + } else if (src_reg_type != *prev_src_type && (src_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_src_type == PTR_TO_CTX)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -2361,7 +2370,7 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { - enum bpf_reg_type dst_reg_type; + enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -2389,11 +2398,13 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (insn->imm == 0) { - insn->imm = dst_reg_type; - } else if (dst_reg_type != insn->imm && + prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; + } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_dst_type == PTR_TO_CTX)) { verbose("same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -2528,7 +2539,7 @@ static int check_map_prog_compatibility(struct bpf_map *map, /* look for pseudo eBPF instructions that access map FDs and * replace them with actual map pointers */ -static int replace_map_fd_with_map_ptr(struct verifier_env *env) +static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2625,7 +2636,7 @@ next_insn: } /* drop refcnt of maps used by the rejected program */ -static void release_maps(struct verifier_env *env) +static void release_maps(struct bpf_verifier_env *env) { int i; @@ -2634,7 +2645,7 @@ static void release_maps(struct verifier_env *env) } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ -static void convert_pseudo_ld_imm64(struct verifier_env *env) +static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2648,21 +2659,37 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ -static int convert_ctx_accesses(struct verifier_env *env) +static int convert_ctx_accesses(struct bpf_verifier_env *env) { - struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16]; + const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const int insn_cnt = env->prog->len; + struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i; + int i, cnt, delta = 0; - if (!env->prog->aux->ops->convert_ctx_access) + if (ops->gen_prologue) { + cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, + env->prog); + if (cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (cnt) { + new_prog = bpf_patch_insn_single(env->prog, 0, + insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + delta += cnt - 1; + } + } + + if (!ops->convert_ctx_access) return 0; - for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta, cnt; + insn = env->prog->insnsi + delta; + for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; @@ -2672,40 +2699,34 @@ static int convert_ctx_accesses(struct verifier_env *env) else continue; - if (insn->imm != PTR_TO_CTX) { - /* clear internal mark */ - insn->imm = 0; + if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) continue; - } - cnt = env->prog->aux->ops-> - convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf, env->prog); + cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, + insn->off, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt); + new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, + cnt); if (!new_prog) return -ENOMEM; - insn_delta = cnt - 1; + delta += cnt - 1; /* keep walking new program and skip insns we just inserted */ env->prog = new_prog; - insn = new_prog->insnsi + i + insn_delta; - - insn_cnt += insn_delta; - i += insn_delta; + insn = new_prog->insnsi + i + delta; } return 0; } -static void free_states(struct verifier_env *env) +static void free_states(struct bpf_verifier_env *env) { - struct verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl, *sln; int i; if (!env->explored_states) @@ -2728,19 +2749,24 @@ static void free_states(struct verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; - struct verifier_env *env; + struct bpf_verifier_env *env; int ret = -EINVAL; if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; - /* 'struct verifier_env' can be global, but since it's not small, + /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL); + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + (*prog)->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ @@ -2759,12 +2785,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* log_* values have to be sane */ if (log_size < 128 || log_size > UINT_MAX >> 8 || log_level == 0 || log_ubuf == NULL) - goto free_env; + goto err_unlock; ret = -ENOMEM; log_buf = vmalloc(log_size); if (!log_buf) - goto free_env; + goto err_unlock; } else { log_level = 0; } @@ -2774,7 +2800,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; env->explored_states = kcalloc(env->prog->len, - sizeof(struct verifier_state_list *), + sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; if (!env->explored_states) @@ -2833,14 +2859,67 @@ skip_full_check: free_log_buf: if (log_level) vfree(log_buf); -free_env: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); *prog = env->prog; +err_unlock: + mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: kfree(env); + return ret; +} + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv) +{ + struct bpf_verifier_env *env; + int ret; + + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + prog->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; + env->prog = prog; + env->analyzer_ops = ops; + env->analyzer_priv = priv; + + /* grab the mutex to protect few globals used by verifier */ + mutex_lock(&bpf_verifier_lock); + + log_level = 0; + + env->explored_states = kcalloc(env->prog->len, + sizeof(struct bpf_verifier_state_list *), + GFP_KERNEL); + ret = -ENOMEM; + if (!env->explored_states) + goto skip_full_check; + + ret = check_cfg(env); + if (ret < 0) + goto skip_full_check; + + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); + +skip_full_check: + while (pop_stack(env, NULL) >= 0); + free_states(env); + mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); return ret; } +EXPORT_SYMBOL_GPL(bpf_analyzer); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7f5221..5e8dab5bf9ad 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; + /* Socket clone path */ + if (skcd->val) { + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } + rcu_read_lock(); while (true) { diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index c2de56ab0fce..7fa0c4ae6394 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -1,4 +1,12 @@ +# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_KERNEL_GZIP is not set +# CONFIG_KERNEL_BZIP2 is not set +# CONFIG_KERNEL_LZMA is not set CONFIG_KERNEL_XZ=y +# CONFIG_KERNEL_LZO is not set +# CONFIG_KERNEL_LZ4 is not set CONFIG_OPTIMIZE_INLINING=y +# CONFIG_SLAB is not set +# CONFIG_SLUB is not set CONFIG_SLOB=y diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c7fd2778ed50..c27e53326bef 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) mutex_unlock(&cpuset_mutex); } +/* + * Make sure the new task conform to the current state of its parent, + * which could have been changed by cpuset just after it inherits the + * state from the parent and before it sits on the cgroup's task list. + */ +void cpuset_fork(struct task_struct *task) +{ + if (task_css_is_root(task, cpuset_cgrp_id)) + return; + + set_cpus_allowed_ptr(task, ¤t->cpus_allowed); + task->mems_allowed = current->mems_allowed; +} + struct cgroup_subsys cpuset_cgrp_subsys = { .css_alloc = cpuset_css_alloc, .css_online = cpuset_css_online, @@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .post_attach = cpuset_post_attach, .bind = cpuset_bind, + .fork = cpuset_fork, .legacy_cftypes = files, .early_init = true, }; diff --git a/kernel/events/core.c b/kernel/events/core.c index a7b8c1c75fa7..9fc3be0c7d01 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info) return 0; } -static int perf_event_restart(struct perf_event *event) +static int perf_event_stop(struct perf_event *event, int restart) { struct stop_event_data sd = { .event = event, - .restart = 1, + .restart = restart, }; int ret = 0; @@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group) .group = group, .ret = 0, }; - ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); - /* The event must have been read from an online CPU: */ - WARN_ON_ONCE(ret); - ret = ret ? : data.ret; + /* + * Purposely ignore the smp_call_function_single() return + * value. + * + * If event->oncpu isn't a valid CPU it means the event got + * scheduled out and that will have updated the event count. + * + * Therefore, either way, we'll have an up-to-date event count + * after this. + */ + (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -4837,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event, spin_unlock_irqrestore(&rb->event_lock, flags); } + /* + * Avoid racing with perf_mmap_close(AUX): stop the event + * before swizzling the event::rb pointer; if it's getting + * unmapped, its aux_mmap_count will be 0 and it won't + * restart. See the comment in __perf_pmu_output_stop(). + * + * Data will inevitably be lost when set_output is done in + * mid-air, but then again, whoever does it like this is + * not in for the data anyway. + */ + if (has_aux(event)) + perf_event_stop(event, 0); + rcu_assign_pointer(event->rb, rb); if (old_rb) { @@ -6112,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } void perf_event_exec(void) @@ -6156,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) /* * In case of inheritance, it will be the parent that links to the - * ring-buffer, but it will be the child that's actually using it: + * ring-buffer, but it will be the child that's actually using it. + * + * We are using event::rb to determine if the event should be stopped, + * however this may race with ring_buffer_attach() (through set_output), + * which will make us skip the event that actually needs to be stopped. + * So ring_buffer_attach() has to stop an aux event before re-assigning + * its rb pointer. */ if (rcu_dereference(parent->rb) == rb) ro->err = __perf_event_stop(&sd); @@ -6670,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } /* @@ -7933,7 +7960,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) mmput(mm); restart: - perf_event_restart(event); + perf_event_stop(event, 1); } /* diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ae9b90dc9a5a..257fa460b846 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (!rb) return NULL; - if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) + if (!rb_has_aux(rb)) goto err; /* - * If rb::aux_mmap_count is zero (and rb_has_aux() above went through), - * the aux buffer is in perf_mmap_close(), about to get freed. + * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(), + * about to get freed, so we leave immediately. + * + * Checking rb::aux_mmap_count and rb::refcount has to be done in + * the same order, see perf_mmap_close. Otherwise we end up freeing + * aux pages in this path, which is a bug, because in_atomic(). */ if (!atomic_read(&rb->aux_mmap_count)) - goto err_put; + goto err; + + if (!atomic_inc_not_zero(&rb->aux_refcount)) + goto err; /* * Nesting is not supported for AUX area, make sure nested diff --git a/kernel/exit.c b/kernel/exit.c index 2f974ae042a6..091a78be3b09 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -848,12 +848,7 @@ void do_exit(long code) TASKS_RCU(preempt_enable()); exit_notify(tsk, group_dead); proc_exit_connector(tsk); -#ifdef CONFIG_NUMA - task_lock(tsk); - mpol_put(tsk->mempolicy); - tsk->mempolicy = NULL; - task_unlock(tsk); -#endif + mpol_put_task_policy(tsk); #ifdef CONFIG_FUTEX if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); diff --git a/kernel/fork.c b/kernel/fork.c index 52e725d4a866..beb31725f7e2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -799,6 +799,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm) EXPORT_SYMBOL(get_mm_exe_file); /** + * get_task_exe_file - acquire a reference to the task's executable file + * + * Returns %NULL if task's mm (if any) has no associated executable file or + * this is a kernel thread with borrowed mm (see the comment above get_task_mm). + * User must release file via fput(). + */ +struct file *get_task_exe_file(struct task_struct *task) +{ + struct file *exe_file = NULL; + struct mm_struct *mm; + + task_lock(task); + mm = task->mm; + if (mm) { + if (!(task->flags & PF_KTHREAD)) + exe_file = get_mm_exe_file(mm); + } + task_unlock(task); + return exe_file; +} +EXPORT_SYMBOL(get_task_exe_file); + +/** * get_task_mm - acquire a reference to the task's mm * * Returns %NULL if the task has no mm. Checks PF_KTHREAD (meaning @@ -913,14 +936,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) deactivate_mm(tsk, mm); /* - * If we're exiting normally, clear a user-space tid field if - * requested. We leave this alone when dying by signal, to leave - * the value intact in a core dump, and to save the unnecessary - * trouble, say, a killed vfork parent shouldn't touch this mm. - * Userland only wants this done for a sys_exit. + * Signal userspace if we're not exiting with a core dump + * because we want to leave the value intact for debugging + * purposes. */ if (tsk->clear_child_tid) { - if (!(tsk->flags & PF_SIGNALED) && + if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) && atomic_read(&mm->mm_users) > 1) { /* * We don't check the error code - if userspace has @@ -1404,7 +1425,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = ktime_get_boot_ns(); p->io_context = NULL; p->audit_context = NULL; - threadgroup_change_begin(current); cgroup_fork(p); #ifdef CONFIG_NUMA p->mempolicy = mpol_dup(p->mempolicy); @@ -1556,6 +1576,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; + threadgroup_change_begin(current); /* * Ensure that the cgroup subsystem policies allow the new process to be * forked. It should be noted the the new process's css_set can be changed @@ -1656,6 +1677,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, bad_fork_cancel_cgroup: cgroup_cancel_fork(p); bad_fork_free_pid: + threadgroup_change_end(current); if (pid != &init_struct_pid) free_pid(pid); bad_fork_cleanup_thread: @@ -1688,7 +1710,6 @@ bad_fork_cleanup_policy: mpol_put(p->mempolicy); bad_fork_cleanup_threadgroup_lock: #endif - threadgroup_change_end(current); delayacct_tsk_free(p); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 503bc2d348e5..037c321c5618 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min, return 0; out: vfree(pi->sechdrs); + pi->sechdrs = NULL; + vfree(pi->purgatory_buf); + pi->purgatory_buf = NULL; return ret; } diff --git a/kernel/memremap.c b/kernel/memremap.c index 251d16b4cb41..b501e390bb34 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -247,6 +247,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); arch_remove_memory(align_start, align_size); + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, "%s: failed to free all reserved pages\n", __func__); @@ -282,6 +283,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { resource_size_t key, align_start, align_size, align_end; + pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; struct page_map *page_map; int error, nid, is_ram; @@ -351,6 +353,11 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (nid < 0) nid = numa_mem_id(); + error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0, + align_size); + if (error) + goto err_pfn_remap; + error = arch_add_memory(nid, align_start, align_size, true); if (error) goto err_add_memory; @@ -371,6 +378,8 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, return __va(res->start); err_add_memory: + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); + err_pfn_remap: err_radix: pgmap_radix_release(res); devres_free(page_map); diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 97b0df71303e..168ff442ebde 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -482,7 +482,16 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } - cancel_delayed_work_sync(&req->work); + /* + * This function may be called very early during boot, for example, + * from of_clk_init(), where irq needs to stay disabled. + * cancel_delayed_work_sync() assumes that irq is enabled on + * invocation and re-enables it on return. Avoid calling it until + * workqueue is initialized. + */ + if (keventd_up()) + cancel_delayed_work_sync(&req->work); + __pm_qos_update_request(req, new_value); } EXPORT_SYMBOL_GPL(pm_qos_update_request); diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c index b69eb8a2876f..16bab471c7e2 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/nmi.c @@ -99,27 +99,33 @@ again: return add; } -/* - * printk one line from the temporary buffer from @start index until - * and including the @end index. - */ -static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end) +static void printk_nmi_flush_line(const char *text, int len) { - const char *buf = s->buffer + start; - /* * The buffers are flushed in NMI only on panic. The messages must * go only into the ring buffer at this stage. Consoles will get * explicitly called later when a crashdump is not generated. */ if (in_nmi()) - printk_deferred("%.*s", (end - start) + 1, buf); + printk_deferred("%.*s", len, text); else - printk("%.*s", (end - start) + 1, buf); + printk("%.*s", len, text); } /* + * printk one line from the temporary buffer from @start index until + * and including the @end index. + */ +static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s, + int start, int end) +{ + const char *buf = s->buffer + start; + + printk_nmi_flush_line(buf, (end - start) + 1); +} + +/* * Flush data from the associated per_CPU buffer. The function * can be called either via IRQ work or independently. */ @@ -150,9 +156,11 @@ more: * the buffer an unexpected way. If we printed something then * @len must only increase. */ - if (i && i >= len) - pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n", - i, len); + if (i && i >= len) { + const char *msg = "printk_nmi_flush: internal error\n"; + + printk_nmi_flush_line(msg, strlen(msg)); + } if (!len) goto out; /* Someone else has already flushed the buffer. */ @@ -166,14 +174,14 @@ more: /* Print line by line. */ for (; i < size; i++) { if (s->buffer[i] == '\n') { - print_nmi_seq_line(s, last_i, i); + printk_nmi_flush_seq_line(s, last_i, i); last_i = i + 1; } } /* Check if there was a partial line. */ if (last_i < size) { - print_nmi_seq_line(s, last_i, size - 1); - pr_cont("\n"); + printk_nmi_flush_seq_line(s, last_i, size - 1); + printk_nmi_flush_line("\n", strlen("\n")); } /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2a906f20fba7..44817c640e99 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) success = 1; /* we're going to change ->state */ cpu = task_cpu(p); + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck + * in smp_cond_load_acquire() below. + * + * sched_ttwu_pending() try_to_wake_up() + * [S] p->on_rq = 1; [L] P->state + * UNLOCK rq->lock -----. + * \ + * +--- RMB + * schedule() / + * LOCK rq->lock -----' + * UNLOCK rq->lock + * + * [task p] + * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq + * + * Pairs with the UNLOCK+LOCK on rq->lock from the + * last wakeup of our task and the schedule that got our task + * current. + */ + smp_rmb(); if (p->on_rq && ttwu_remote(p, wake_flags)) goto stat; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ef6c6c3f9d8a..0db7c8a2afe2 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, ptrace_event(PTRACE_EVENT_SECCOMP, data); /* * The delivery of a fatal signal during event - * notification may silently skip tracer notification. - * Terminating the task now avoids executing a system - * call that may not be intended. + * notification may silently skip tracer notification, + * which could leave us with a potentially unmodified + * syscall that the tracer would have liked to have + * changed. Since the process is about to die, we just + * force the syscall to be skipped and let the signal + * kill the process and correctly handle any tracer exit + * notifications. */ if (fatal_signal_pending(current)) - do_exit(SIGSYS); + goto skip; /* Check if the tracer forced the syscall to be skipped. */ this_syscall = syscall_get_nr(current, task_pt_regs(current)); if (this_syscall < 0) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 204fdc86863d..2ec7c00228f3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ktime_t now, expires; int cpu = smp_processor_id(); + now = tick_nohz_start_idle(ts); + if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; - now = tick_nohz_start_idle(ts); ts->idle_calls++; expires = tick_nohz_stop_sched_tick(ts, now, cpu); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d3869b03d9fe..5dcb99281259 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -61,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - void *dst = (void *) (long) r1; - int ret, size = (int) r2; - void *unsafe_ptr = (void *) (long) r3; + int ret; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) @@ -83,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, + u32, size) { - void *unsafe_ptr = (void *) (long) r1; - void *src = (void *) (long) r2; - int size = (int) r3; - /* * Ensure we're in user context which is safe for the helper to * run. This helper has no business in a kthread. @@ -130,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed */ -static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, + u64, arg2, u64, arg3) { - char *fmt = (char *) (long) r1; bool str_seen = false; int mod[3] = {}; int fmt_cnt = 0; @@ -178,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) switch (fmt_cnt) { case 1: - unsafe_addr = r3; - r3 = (long) buf; + unsafe_addr = arg1; + arg1 = (long) buf; break; case 2: - unsafe_addr = r4; - r4 = (long) buf; + unsafe_addr = arg2; + arg2 = (long) buf; break; case 3: - unsafe_addr = r5; - r5 = (long) buf; + unsafe_addr = arg3; + arg3 = (long) buf; break; } buf[0] = 0; @@ -209,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) } return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, - mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, - mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); + mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, + mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, + mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); } static const struct bpf_func_proto bpf_trace_printk_proto = { @@ -233,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; @@ -312,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return 0; } -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { - struct pt_regs *regs = (struct pt_regs *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; - void *data = (void *)(long) r4; struct perf_raw_record raw = { .frag = { .size = size, @@ -367,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, return __bpf_perf_event_output(regs, map, flags, &raw); } -static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_task) { return (long) current; } @@ -378,16 +370,13 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) { - struct bpf_map *map = (struct bpf_map *)(long)r1; struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; - u32 idx = (u32)r2; if (unlikely(in_interrupt())) return -EINVAL; - if (unlikely(idx >= array->map.max_entries)) return -E2BIG; @@ -481,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; -static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { + struct pt_regs *regs = *(struct pt_regs **)tp_buff; + /* * r1 points to perf tracepoint buffer where first 8 bytes are hidden * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it - * from there and call the same bpf_perf_event_output() helper + * from there and call the same bpf_perf_event_output() helper inline. */ - u64 ctx = *(long *)(uintptr_t)r1; - - return bpf_perf_event_output(ctx, r2, index, r4, size); + return ____bpf_perf_event_output(regs, map, flags, data, size); } static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { @@ -504,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg5_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags) { - u64 ctx = *(long *)(uintptr_t)r1; + struct pt_regs *regs = *(struct pt_regs **)tp_buff; - return bpf_get_stackid(ctx, r2, r3, r4, r5); + /* + * Same comment as in bpf_perf_event_output_tp(), only that this time + * the other helper's function body cannot be inlined due to being + * external, thus we need to call raw helper function. + */ + return bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); } static const struct bpf_func_proto bpf_get_stackid_proto_tp = { @@ -583,18 +580,18 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, switch (ctx_off) { case offsetof(struct bpf_perf_event_data, sample_period): BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, data)), - dst_reg, src_reg, + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + data), dst_reg, src_reg, offsetof(struct bpf_perf_event_data_kern, data)); *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, offsetof(struct perf_sample_data, period)); break; default: - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct bpf_perf_event_data_kern, regs)), - dst_reg, src_reg, + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + regs), dst_reg, src_reg, offsetof(struct bpf_perf_event_data_kern, regs)); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(long)), - dst_reg, dst_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off); break; } |