diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 4862 |
1 files changed, 3588 insertions, 1274 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ddea9567f755..60611df77957 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -28,6 +28,8 @@ #include <linux/cpumask.h> #include <linux/bpf_mem_alloc.h> #include <net/xdp.h> +#include <linux/trace_events.h> +#include <linux/kallsyms.h> #include "disasm.h" @@ -172,7 +174,7 @@ static bool bpf_global_percpu_ma_set; /* verifier_state + insn_idx are pushed to stack when branch is encountered */ struct bpf_verifier_stack_elem { - /* verifer state is 'st' + /* verifier state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx' */ @@ -190,14 +192,12 @@ struct bpf_verifier_stack_elem { #define BPF_MAP_KEY_POISON (1ULL << 63) #define BPF_MAP_KEY_SEEN (1ULL << 62) -#define BPF_MAP_PTR_UNPRIV 1UL -#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \ - POISON_POINTER_DELTA)) -#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) - #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 -static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +#define BPF_PRIV_STACK_MIN_SIZE 64 + +static int acquire_reference(struct bpf_verifier_env *env, int insn_idx); +static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); @@ -209,21 +209,22 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { - return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; + return aux->map_ptr_state.poison; } static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux) { - return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV; + return aux->map_ptr_state.unpriv; } static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, - const struct bpf_map *map, bool unpriv) + struct bpf_map *map, + bool unpriv, bool poison) { - BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV); unpriv |= bpf_map_ptr_unpriv(aux); - aux->map_ptr_state = (unsigned long)map | - (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL); + aux->map_ptr_state.unpriv = unpriv; + aux->map_ptr_state.poison = poison; + aux->map_ptr_state.map_ptr = map; } static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux) @@ -286,6 +287,7 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct btf_field *kptr_field; + s64 const_map_key; }; struct bpf_kfunc_call_arg_meta { @@ -336,6 +338,10 @@ struct bpf_kfunc_call_arg_meta { u8 spi; u8 frameno; } iter; + struct { + struct bpf_map *ptr; + int uid; + } map; u64 mem_size; }; @@ -383,11 +389,6 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env, verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); } -static bool type_may_be_null(u32 type) -{ - return type & PTR_MAYBE_NULL; -} - static bool reg_not_null(const struct bpf_reg_state *reg) { enum bpf_reg_type type; @@ -501,8 +502,12 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) } static bool is_sync_callback_calling_kfunc(u32 btf_id); +static bool is_async_callback_calling_kfunc(u32 btf_id); +static bool is_callback_calling_kfunc(u32 btf_id); static bool is_bpf_throw_kfunc(struct bpf_insn *insn); +static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id); + static bool is_sync_callback_calling_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_for_each_map_elem || @@ -528,6 +533,22 @@ static bool is_sync_callback_calling_insn(struct bpf_insn *insn) (bpf_pseudo_kfunc_call(insn) && is_sync_callback_calling_kfunc(insn->imm)); } +static bool is_async_callback_calling_insn(struct bpf_insn *insn) +{ + return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) || + (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm)); +} + +static bool is_may_goto_insn(struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_JCOND) && insn->src_reg == BPF_MAY_GOTO; +} + +static bool is_may_goto_insn_at(struct bpf_verifier_env *env, int insn_idx) +{ + return is_may_goto_insn(&env->prog->insnsi[insn_idx]); +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -622,6 +643,11 @@ static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); } +static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + return stack_slot_obj_get_spi(env, reg, "irq_flag", 1); +} + static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) { switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { @@ -733,7 +759,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ if (clone_ref_obj_id) id = clone_ref_obj_id; else - id = acquire_reference_state(env, insn_idx); + id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -995,7 +1021,7 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, if (spi < 0) return spi; - id = acquire_reference_state(env, insn_idx); + id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -1117,10 +1143,136 @@ static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_s return 0; } +static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx); +static int release_irq_state(struct bpf_verifier_state *state, int id); + +static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, + struct bpf_kfunc_call_arg_meta *meta, + struct bpf_reg_state *reg, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i, id; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + + id = acquire_irq_state(env, insn_idx); + if (id < 0) + return id; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + __mark_reg_known_zero(st); + st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + st->live |= REG_LIVE_WRITTEN; + st->ref_obj_id = id; + + for (i = 0; i < BPF_REG_SIZE; i++) + slot->slot_type[i] = STACK_IRQ_FLAG; + + mark_stack_slot_scratched(env, spi); + return 0; +} + +static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i, err; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + err = release_irq_state(env->cur_state, st->ref_obj_id); + WARN_ON_ONCE(err && err != -EACCES); + if (err) { + int insn_idx = 0; + + for (int i = 0; i < env->cur_state->acquired_refs; i++) { + if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) { + insn_idx = env->cur_state->refs[i].insn_idx; + break; + } + } + + verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n", + env->cur_state->active_irq_id, insn_idx); + return err; + } + + __mark_reg_not_init(env, st); + + /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ + st->live |= REG_LIVE_WRITTEN; + + for (i = 0; i < BPF_REG_SIZE; i++) + slot->slot_type[i] = STACK_INVALID; + + mark_stack_slot_scratched(env, spi); + return 0; +} + +static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + int spi, i; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + spi = irq_flag_get_spi(env, reg); + if (spi == -ERANGE) + return true; + if (spi < 0) + return false; + + slot = &state->stack[spi]; + + for (i = 0; i < BPF_REG_SIZE; i++) + if (slot->slot_type[i] == STACK_IRQ_FLAG) + return false; + return true; +} + +static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return -EINVAL; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + if (!st->ref_obj_id) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) + if (slot->slot_type[i] != STACK_IRQ_FLAG) + return -EINVAL; + return 0; +} + /* Check if given stack slot is "special": * - spilled register state (STACK_SPILL); * - dynptr state (STACK_DYNPTR); * - iter state (STACK_ITER). + * - irq flag state (STACK_IRQ_FLAG) */ static bool is_stack_slot_special(const struct bpf_stack_state *stack) { @@ -1130,6 +1282,7 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack) case STACK_SPILL: case STACK_DYNPTR: case STACK_ITER: + case STACK_IRQ_FLAG: return true; case STACK_INVALID: case STACK_MISC: @@ -1155,17 +1308,26 @@ static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack) stack->spilled_ptr.type == SCALAR_VALUE; } +static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack) +{ + return stack->slot_type[0] == STACK_SPILL && + stack->spilled_ptr.type == SCALAR_VALUE; +} + /* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which * case they are equivalent, or it's STACK_ZERO, in which case we preserve * more precise STACK_ZERO. - * Note, in uprivileged mode leaving STACK_INVALID is wrong, so we take - * env->allow_ptr_leaks into account and force STACK_MISC, if necessary. + * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged + * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is + * unnecessary as both are considered equivalent when loading data and pruning, + * in case of unprivileged mode it will be incorrect to allow reads of invalid + * slots. */ static void mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype) { if (*stype == STACK_ZERO) return; - if (env->allow_ptr_leaks && *stype == STACK_INVALID) + if (*stype == STACK_INVALID) return; *stype = STACK_MISC; } @@ -1235,7 +1397,7 @@ out: return arr ? arr : ZERO_SIZE_PTR; } -static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) { dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, sizeof(struct bpf_reference_state), GFP_KERNEL); @@ -1243,6 +1405,10 @@ static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_fun return -ENOMEM; dst->acquired_refs = src->acquired_refs; + dst->active_locks = src->active_locks; + dst->active_preempt_locks = src->active_preempt_locks; + dst->active_rcu_lock = src->active_rcu_lock; + dst->active_irq_id = src->active_irq_id; return 0; } @@ -1259,7 +1425,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st return 0; } -static int resize_reference_state(struct bpf_func_state *state, size_t n) +static int resize_reference_state(struct bpf_verifier_state *state, size_t n) { state->refs = realloc_array(state->refs, state->acquired_refs, n, sizeof(struct bpf_reference_state)); @@ -1302,61 +1468,146 @@ static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state * On success, returns a valid pointer id to associate with the register * On failure, returns a negative errno. */ -static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) +static struct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; int new_ofs = state->acquired_refs; - int id, err; + int err; err = resize_reference_state(state, state->acquired_refs + 1); if (err) - return err; - id = ++env->id_gen; - state->refs[new_ofs].id = id; + return NULL; state->refs[new_ofs].insn_idx = insn_idx; - state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0; - return id; + return &state->refs[new_ofs]; +} + +static int acquire_reference(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_reference_state *s; + + s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; + s->type = REF_TYPE_PTR; + s->id = ++env->id_gen; + return s->id; +} + +static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, + int id, void *ptr) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reference_state *s; + + s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; + s->type = type; + s->id = id; + s->ptr = ptr; + + state->active_locks++; + return 0; +} + +static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reference_state *s; + + s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; + s->type = REF_TYPE_IRQ; + s->id = ++env->id_gen; + + state->active_irq_id = s->id; + return s->id; } -/* release function corresponding to acquire_reference_state(). Idempotent. */ -static int release_reference_state(struct bpf_func_state *state, int ptr_id) +static void release_reference_state(struct bpf_verifier_state *state, int idx) { - int i, last_idx; + int last_idx; + size_t rem; + /* IRQ state requires the relative ordering of elements remaining the + * same, since it relies on the refs array to behave as a stack, so that + * it can detect out-of-order IRQ restore. Hence use memmove to shift + * the array instead of swapping the final element into the deleted idx. + */ last_idx = state->acquired_refs - 1; + rem = state->acquired_refs - idx - 1; + if (last_idx && idx != last_idx) + memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem); + memset(&state->refs[last_idx], 0, sizeof(*state->refs)); + state->acquired_refs--; + return; +} + +static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) +{ + int i; + for (i = 0; i < state->acquired_refs; i++) { - if (state->refs[i].id == ptr_id) { - /* Cannot release caller references in callbacks */ - if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno) - return -EINVAL; - if (last_idx && i != last_idx) - memcpy(&state->refs[i], &state->refs[last_idx], - sizeof(*state->refs)); - memset(&state->refs[last_idx], 0, sizeof(*state->refs)); - state->acquired_refs--; + if (state->refs[i].type != type) + continue; + if (state->refs[i].id == id && state->refs[i].ptr == ptr) { + release_reference_state(state, i); + state->active_locks--; + return 0; + } + } + return -EINVAL; +} + +static int release_irq_state(struct bpf_verifier_state *state, int id) +{ + u32 prev_id = 0; + int i; + + if (id != state->active_irq_id) + return -EACCES; + + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].type != REF_TYPE_IRQ) + continue; + if (state->refs[i].id == id) { + release_reference_state(state, i); + state->active_irq_id = prev_id; return 0; + } else { + prev_id = state->refs[i].id; } } return -EINVAL; } +static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, + int id, void *ptr) +{ + int i; + + for (i = 0; i < state->acquired_refs; i++) { + struct bpf_reference_state *s = &state->refs[i]; + + if (s->type != type) + continue; + + if (s->id == id && s->ptr == ptr) + return s; + } + return NULL; +} + static void free_func_state(struct bpf_func_state *state) { if (!state) return; - kfree(state->refs); kfree(state->stack); kfree(state); } -static void clear_jmp_history(struct bpf_verifier_state *state) -{ - kfree(state->jmp_history); - state->jmp_history = NULL; - state->jmp_history_cnt = 0; -} - static void free_verifier_state(struct bpf_verifier_state *state, bool free_self) { @@ -1366,7 +1617,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, free_func_state(state->frame[i]); state->frame[i] = NULL; } - clear_jmp_history(state); + kfree(state->refs); if (free_self) kfree(state); } @@ -1377,12 +1628,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, static int copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src) { - int err; - - memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); - err = copy_reference_state(dst, src); - if (err) - return err; + memcpy(dst, src, offsetof(struct bpf_func_state, stack)); return copy_stack_state(dst, src); } @@ -1392,13 +1638,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, struct bpf_func_state *dst; int i, err; - dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, - src->jmp_history_cnt, sizeof(*dst_state->jmp_history), - GFP_USER); - if (!dst_state->jmp_history) - return -ENOMEM; - dst_state->jmp_history_cnt = src->jmp_history_cnt; - /* if dst has more stack frames then src frame, free them, this is also * necessary in case of exceptional exits using bpf_throw. */ @@ -1406,18 +1645,22 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; } + err = copy_reference_state(dst_state, src); + if (err) + return err; dst_state->speculative = src->speculative; - dst_state->active_rcu_lock = src->active_rcu_lock; + dst_state->in_sleepable = src->in_sleepable; dst_state->curframe = src->curframe; - dst_state->active_lock.ptr = src->active_lock.ptr; - dst_state->active_lock.id = src->active_lock.id; dst_state->branches = src->branches; dst_state->parent = src->parent; dst_state->first_insn_idx = src->first_insn_idx; dst_state->last_insn_idx = src->last_insn_idx; + dst_state->insn_hist_start = src->insn_hist_start; + dst_state->insn_hist_end = src->insn_hist_end; dst_state->dfs_depth = src->dfs_depth; dst_state->callback_unroll_depth = src->callback_unroll_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; + dst_state->may_goto_depth = src->may_goto_depth; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1820,6 +2063,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) */ if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER)) reg->map_uid = reg->id; + if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE)) + reg->map_uid = reg->id; } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -2113,7 +2358,7 @@ static void __reg64_deduce_bounds(struct bpf_reg_state *reg) static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg) { /* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit - * values on both sides of 64-bit range in hope to have tigher range. + * values on both sides of 64-bit range in hope to have tighter range. * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff]. * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound @@ -2121,7 +2366,7 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg) * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff]. * We just need to make sure that derived bounds we are intersecting - * with are well-formed ranges in respecitve s64 or u64 domain, just + * with are well-formed ranges in respective s64 or u64 domain, just * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments. */ __u64 new_umin, new_umax; @@ -2151,6 +2396,44 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg) reg->smin_value = max_t(s64, reg->smin_value, new_smin); reg->smax_value = min_t(s64, reg->smax_value, new_smax); } + + /* Here we would like to handle a special case after sign extending load, + * when upper bits for a 64-bit range are all 1s or all 0s. + * + * Upper bits are all 1s when register is in a range: + * [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff] + * Upper bits are all 0s when register is in a range: + * [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff] + * Together this forms are continuous range: + * [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff] + * + * Now, suppose that register range is in fact tighter: + * [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R) + * Also suppose that it's 32-bit range is positive, + * meaning that lower 32-bits of the full 64-bit register + * are in the range: + * [0x0000_0000, 0x7fff_ffff] (W) + * + * If this happens, then any value in a range: + * [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff] + * is smaller than a lowest bound of the range (R): + * 0xffff_ffff_8000_0000 + * which means that upper bits of the full 64-bit register + * can't be all 1s, when lower bits are in range (W). + * + * Note that: + * - 0xffff_ffff_8000_0000 == (s64)S32_MIN + * - 0x0000_0000_7fff_ffff == (s64)S32_MAX + * These relations are used in the conditions below. + */ + if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) { + reg->smin_value = reg->s32_min_value; + reg->smax_value = reg->s32_max_value; + reg->umin_value = reg->s32_min_value; + reg->umax_value = reg->s32_max_value; + reg->var_off = tnum_intersect(reg->var_off, + tnum_range(reg->smin_value, reg->smax_value)); + } } static void __reg_deduce_bounds(struct bpf_reg_state *reg) @@ -2264,8 +2547,7 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg) } /* Mark a register as having a completely unknown (scalar) value. */ -static void __mark_reg_unknown(const struct bpf_verifier_env *env, - struct bpf_reg_state *reg) +static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg) { /* * Clear type, off, and union(map_ptr, range) and @@ -2277,10 +2559,20 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, reg->ref_obj_id = 0; reg->var_off = tnum_unknown; reg->frameno = 0; - reg->precise = !env->bpf_capable; + reg->precise = false; __mark_reg_unbounded(reg); } +/* Mark a register as having a completely unknown (scalar) value, + * initialize .precise as true when not bpf capable. + */ +static void __mark_reg_unknown(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg) +{ + __mark_reg_unknown_imprecise(reg); + reg->precise = !env->bpf_capable; +} + static void mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno) { @@ -2294,6 +2586,25 @@ static void mark_reg_unknown(struct bpf_verifier_env *env, __mark_reg_unknown(env, regs + regno); } +static int __mark_reg_s32_range(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, + u32 regno, + s32 s32_min, + s32 s32_max) +{ + struct bpf_reg_state *reg = regs + regno; + + reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min); + reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max); + + reg->smin_value = max_t(s64, reg->smin_value, s32_min); + reg->smax_value = min_t(s64, reg->smax_value, s32_max); + + reg_bounds_sync(reg); + + return reg_bounds_sanity_check(env, reg, "s32_range"); +} + static void __mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) { @@ -2328,6 +2639,8 @@ static void mark_btf_ld_reg(struct bpf_verifier_env *env, regs[regno].type = PTR_TO_BTF_ID | flag; regs[regno].btf = btf; regs[regno].btf_id = btf_id; + if (type_may_be_null(flag)) + regs[regno].id = ++env->id_gen; } #define DEF_NOT_SUBREG (0) @@ -2371,7 +2684,7 @@ static void init_func_state(struct bpf_verifier_env *env, /* Similar to push_stack(), but for async callbacks */ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx, - int subprog) + int subprog, bool is_sleepable) { struct bpf_verifier_stack_elem *elem; struct bpf_func_state *frame; @@ -2396,8 +2709,14 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env, * The caller state doesn't matter. * This is async callback. It starts in a fresh stack. * Initialize it similar to do_check_common(). + * But we do need to make sure to not clobber insn_hist, so we keep + * chaining insn_hist_start/insn_hist_end indices as for a normal + * child state. */ elem->st.branches = 1; + elem->st.in_sleepable = is_sleepable; + elem->st.insn_hist_start = env->cur_state->insn_hist_end; + elem->st.insn_hist_end = elem->st.insn_hist_start; frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) goto err; @@ -2428,16 +2747,36 @@ static int cmp_subprogs(const void *a, const void *b) ((struct bpf_subprog_info *)b)->start; } +/* Find subprogram that contains instruction at 'off' */ +static struct bpf_subprog_info *find_containing_subprog(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *vals = env->subprog_info; + int l, r, m; + + if (off >= env->prog->len || off < 0 || env->subprog_cnt == 0) + return NULL; + + l = 0; + r = env->subprog_cnt - 1; + while (l < r) { + m = l + (r - l + 1) / 2; + if (vals[m].start <= off) + l = m; + else + r = m - 1; + } + return &vals[l]; +} + +/* Find subprogram that starts exactly at 'off' */ static int find_subprog(struct bpf_verifier_env *env, int off) { struct bpf_subprog_info *p; - p = bsearch(&off, env->subprog_info, env->subprog_cnt, - sizeof(env->subprog_info[0]), cmp_subprogs); - if (!p) + p = find_containing_subprog(env, off); + if (!p || p->start != off) return -ENOENT; return p - env->subprog_info; - } static int add_subprog(struct bpf_verifier_env *env, int off) @@ -2653,10 +2992,16 @@ static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, b->module = mod; b->offset = offset; + /* sort() reorders entries by value, so b may no longer point + * to the right entry after this + */ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), kfunc_btf_cmp_by_off, NULL); + } else { + btf = b->btf; } - return b->btf; + + return btf; } void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) @@ -2939,8 +3284,10 @@ static int check_subprogs(struct bpf_verifier_env *env) if (code == (BPF_JMP | BPF_CALL) && insn[i].src_reg == 0 && - insn[i].imm == BPF_FUNC_tail_call) + insn[i].imm == BPF_FUNC_tail_call) { subprog[cur_subprog].has_tail_call = true; + subprog[cur_subprog].tail_call_reachable = true; + } if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) subprog[cur_subprog].has_ld_abs = true; @@ -3028,10 +3375,27 @@ static int mark_reg_read(struct bpf_verifier_env *env, return 0; } -static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static int mark_stack_slot_obj_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi, int nr_slots) { struct bpf_func_state *state = func(env, reg); - int spi, ret; + int err, i; + + for (i = 0; i < nr_slots; i++) { + struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; + + err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); + if (err) + return err; + + mark_stack_slot_scratched(env, spi - i); + } + return 0; +} + +static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + int spi; /* For CONST_PTR_TO_DYNPTR, it must have already been done by * check_reg_arg in check_helper_call and mark_btf_func_reg_size in @@ -3046,31 +3410,23 @@ static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state * * bounds and spi is the first dynptr slot. Simply mark stack slot as * read. */ - ret = mark_reg_read(env, &state->stack[spi].spilled_ptr, - state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64); - if (ret) - return ret; - return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr, - state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); + return mark_stack_slot_obj_read(env, reg, spi, BPF_DYNPTR_NR_SLOTS); } static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi, int nr_slots) { - struct bpf_func_state *state = func(env, reg); - int err, i; - - for (i = 0; i < nr_slots; i++) { - struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr; - - err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64); - if (err) - return err; + return mark_stack_slot_obj_read(env, reg, spi, nr_slots); +} - mark_stack_slot_scratched(env, spi - i); - } +static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + int spi; - return 0; + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + return mark_stack_slot_obj_read(env, reg, spi, 1); } /* This function is supposed to be used by the following 32-bit optimization @@ -3172,7 +3528,8 @@ static int insn_def_regno(const struct bpf_insn *insn) case BPF_ST: return -1; case BPF_STX: - if (BPF_MODE(insn->code) == BPF_ATOMIC && + if ((BPF_MODE(insn->code) == BPF_ATOMIC || + BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) && (insn->imm & BPF_FETCH)) { if (insn->imm == BPF_CMPXCHG) return BPF_REG_0; @@ -3289,12 +3646,89 @@ static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx) return env->insn_aux_data[insn_idx].jmp_point; } +#define LR_FRAMENO_BITS 3 +#define LR_SPI_BITS 6 +#define LR_ENTRY_BITS (LR_SPI_BITS + LR_FRAMENO_BITS + 1) +#define LR_SIZE_BITS 4 +#define LR_FRAMENO_MASK ((1ull << LR_FRAMENO_BITS) - 1) +#define LR_SPI_MASK ((1ull << LR_SPI_BITS) - 1) +#define LR_SIZE_MASK ((1ull << LR_SIZE_BITS) - 1) +#define LR_SPI_OFF LR_FRAMENO_BITS +#define LR_IS_REG_OFF (LR_SPI_BITS + LR_FRAMENO_BITS) +#define LINKED_REGS_MAX 6 + +struct linked_reg { + u8 frameno; + union { + u8 spi; + u8 regno; + }; + bool is_reg; +}; + +struct linked_regs { + int cnt; + struct linked_reg entries[LINKED_REGS_MAX]; +}; + +static struct linked_reg *linked_regs_push(struct linked_regs *s) +{ + if (s->cnt < LINKED_REGS_MAX) + return &s->entries[s->cnt++]; + + return NULL; +} + +/* Use u64 as a vector of 6 10-bit values, use first 4-bits to track + * number of elements currently in stack. + * Pack one history entry for linked registers as 10 bits in the following format: + * - 3-bits frameno + * - 6-bits spi_or_reg + * - 1-bit is_reg + */ +static u64 linked_regs_pack(struct linked_regs *s) +{ + u64 val = 0; + int i; + + for (i = 0; i < s->cnt; ++i) { + struct linked_reg *e = &s->entries[i]; + u64 tmp = 0; + + tmp |= e->frameno; + tmp |= e->spi << LR_SPI_OFF; + tmp |= (e->is_reg ? 1 : 0) << LR_IS_REG_OFF; + + val <<= LR_ENTRY_BITS; + val |= tmp; + } + val <<= LR_SIZE_BITS; + val |= s->cnt; + return val; +} + +static void linked_regs_unpack(u64 val, struct linked_regs *s) +{ + int i; + + s->cnt = val & LR_SIZE_MASK; + val >>= LR_SIZE_BITS; + + for (i = 0; i < s->cnt; ++i) { + struct linked_reg *e = &s->entries[i]; + + e->frameno = val & LR_FRAMENO_MASK; + e->spi = (val >> LR_SPI_OFF) & LR_SPI_MASK; + e->is_reg = (val >> LR_IS_REG_OFF) & 0x1; + val >>= LR_ENTRY_BITS; + } +} + /* for any branch, call, exit record the history of jmps in the given state */ -static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, - int insn_flags) +static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur, + int insn_flags, u64 linked_regs) { - u32 cnt = cur->jmp_history_cnt; - struct bpf_jmp_history_entry *p; + struct bpf_insn_hist_entry *p; size_t alloc_size; /* combine instruction flags if we already recorded this instruction */ @@ -3307,31 +3741,39 @@ static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_st "verifier insn history bug: insn_idx %d cur flags %x new flags %x\n", env->insn_idx, env->cur_hist_ent->flags, insn_flags); env->cur_hist_ent->flags |= insn_flags; + WARN_ONCE(env->cur_hist_ent->linked_regs != 0, + "verifier insn history bug: insn_idx %d linked_regs != 0: %#llx\n", + env->insn_idx, env->cur_hist_ent->linked_regs); + env->cur_hist_ent->linked_regs = linked_regs; return 0; } - cnt++; - alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p))); - p = krealloc(cur->jmp_history, alloc_size, GFP_USER); - if (!p) - return -ENOMEM; - cur->jmp_history = p; + if (cur->insn_hist_end + 1 > env->insn_hist_cap) { + alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p)); + p = kvrealloc(env->insn_hist, alloc_size, GFP_USER); + if (!p) + return -ENOMEM; + env->insn_hist = p; + env->insn_hist_cap = alloc_size / sizeof(*p); + } - p = &cur->jmp_history[cnt - 1]; + p = &env->insn_hist[cur->insn_hist_end]; p->idx = env->insn_idx; p->prev_idx = env->prev_insn_idx; p->flags = insn_flags; - cur->jmp_history_cnt = cnt; + p->linked_regs = linked_regs; + + cur->insn_hist_end++; env->cur_hist_ent = p; return 0; } -static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st, - u32 hist_end, int insn_idx) +static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *env, + u32 hist_start, u32 hist_end, int insn_idx) { - if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx) - return &st->jmp_history[hist_end - 1]; + if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx) + return &env->insn_hist[hist_end - 1]; return NULL; } @@ -3348,25 +3790,26 @@ static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_stat * history entry recording a jump from last instruction of parent state and * first instruction of given state. */ -static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, - u32 *history) +static int get_prev_insn_idx(const struct bpf_verifier_env *env, + struct bpf_verifier_state *st, + int insn_idx, u32 hist_start, u32 *hist_endp) { - u32 cnt = *history; + u32 hist_end = *hist_endp; + u32 cnt = hist_end - hist_start; - if (i == st->first_insn_idx) { + if (insn_idx == st->first_insn_idx) { if (cnt == 0) return -ENOENT; - if (cnt == 1 && st->jmp_history[0].idx == i) + if (cnt == 1 && env->insn_hist[hist_start].idx == insn_idx) return -ENOENT; } - if (cnt && st->jmp_history[cnt - 1].idx == i) { - i = st->jmp_history[cnt - 1].prev_idx; - (*history)--; + if (cnt && env->insn_hist[hist_end - 1].idx == insn_idx) { + (*hist_endp)--; + return env->insn_hist[hist_end - 1].prev_idx; } else { - i--; + return insn_idx - 1; } - return i; } static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) @@ -3486,6 +3929,11 @@ static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg) return bt->reg_masks[bt->frame] & (1 << reg); } +static inline bool bt_is_frame_reg_set(struct backtrack_state *bt, u32 frame, u32 reg) +{ + return bt->reg_masks[frame] & (1 << reg); +} + static inline bool bt_is_frame_slot_set(struct backtrack_state *bt, u32 frame, u32 slot) { return bt->stack_masks[frame] & (1ull << slot); @@ -3530,6 +3978,42 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) } } +/* If any register R in hist->linked_regs is marked as precise in bt, + * do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs. + */ +static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_insn_hist_entry *hist) +{ + struct linked_regs linked_regs; + bool some_precise = false; + int i; + + if (!hist || hist->linked_regs == 0) + return; + + linked_regs_unpack(hist->linked_regs, &linked_regs); + for (i = 0; i < linked_regs.cnt; ++i) { + struct linked_reg *e = &linked_regs.entries[i]; + + if ((e->is_reg && bt_is_frame_reg_set(bt, e->frameno, e->regno)) || + (!e->is_reg && bt_is_frame_slot_set(bt, e->frameno, e->spi))) { + some_precise = true; + break; + } + } + + if (!some_precise) + return; + + for (i = 0; i < linked_regs.cnt; ++i) { + struct linked_reg *e = &linked_regs.entries[i]; + + if (e->is_reg) + bt_set_frame_reg(bt, e->frameno, e->regno); + else + bt_set_frame_slot(bt, e->frameno, e->spi); + } +} + static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); /* For given verifier state backtrack_insn() is called from the last insn to @@ -3542,7 +4026,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx); * - *was* processed previously during backtracking. */ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, - struct bpf_jmp_history_entry *hist, struct backtrack_state *bt) + struct bpf_insn_hist_entry *hist, struct backtrack_state *bt) { const struct bpf_insn_cbs cbs = { .cb_call = disasm_kfunc_name, @@ -3569,6 +4053,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); } + /* If there is a history record that some registers gained range at this insn, + * propagate precision marks to those registers, so that bt_is_reg_set() + * accounts for these registers. + */ + bt_sync_linked_regs(bt, hist); + if (class == BPF_ALU || class == BPF_ALU64) { if (!bt_is_reg_set(bt, dreg)) return 0; @@ -3584,7 +4074,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * sreg needs precision before this insn */ bt_clear_reg(bt, dreg); - bt_set_reg(bt, sreg); + if (sreg != BPF_REG_FP) + bt_set_reg(bt, sreg); } else { /* dreg = K * dreg needs precision after this insn. @@ -3600,7 +4091,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, * both dreg and sreg need precision * before this insn */ - bt_set_reg(bt, sreg); + if (sreg != BPF_REG_FP) + bt_set_reg(bt, sreg); } /* else dreg += K * dreg still needs precision before this insn */ @@ -3796,7 +4288,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, */ bt_set_reg(bt, dreg); bt_set_reg(bt, sreg); - /* else dreg <cond> K + } else if (BPF_SRC(insn->code) == BPF_K) { + /* dreg <cond> K * Only dreg still needs precision before * this insn, so for the K-based conditional * there is nothing new to be marked. @@ -3814,6 +4307,10 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, /* to be analyzed */ return -ENOTSUPP; } + /* Propagate precision marks to linked registers, to account for + * registers marked as precise in this function. + */ + bt_sync_linked_regs(bt, hist); return 0; } @@ -3941,96 +4438,6 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ } } -static bool idset_contains(struct bpf_idset *s, u32 id) -{ - u32 i; - - for (i = 0; i < s->count; ++i) - if (s->ids[i] == id) - return true; - - return false; -} - -static int idset_push(struct bpf_idset *s, u32 id) -{ - if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) - return -EFAULT; - s->ids[s->count++] = id; - return 0; -} - -static void idset_reset(struct bpf_idset *s) -{ - s->count = 0; -} - -/* Collect a set of IDs for all registers currently marked as precise in env->bt. - * Mark all registers with these IDs as precise. - */ -static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st) -{ - struct bpf_idset *precise_ids = &env->idset_scratch; - struct backtrack_state *bt = &env->bt; - struct bpf_func_state *func; - struct bpf_reg_state *reg; - DECLARE_BITMAP(mask, 64); - int i, fr; - - idset_reset(precise_ids); - - for (fr = bt->frame; fr >= 0; fr--) { - func = st->frame[fr]; - - bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); - for_each_set_bit(i, mask, 32) { - reg = &func->regs[i]; - if (!reg->id || reg->type != SCALAR_VALUE) - continue; - if (idset_push(precise_ids, reg->id)) - return -EFAULT; - } - - bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); - for_each_set_bit(i, mask, 64) { - if (i >= func->allocated_stack / BPF_REG_SIZE) - break; - if (!is_spilled_scalar_reg(&func->stack[i])) - continue; - reg = &func->stack[i].spilled_ptr; - if (!reg->id) - continue; - if (idset_push(precise_ids, reg->id)) - return -EFAULT; - } - } - - for (fr = 0; fr <= st->curframe; ++fr) { - func = st->frame[fr]; - - for (i = BPF_REG_0; i < BPF_REG_10; ++i) { - reg = &func->regs[i]; - if (!reg->id) - continue; - if (!idset_contains(precise_ids, reg->id)) - continue; - bt_set_frame_reg(bt, fr, i); - } - for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) { - if (!is_spilled_scalar_reg(&func->stack[i])) - continue; - reg = &func->stack[i].spilled_ptr; - if (!reg->id) - continue; - if (!idset_contains(precise_ids, reg->id)) - continue; - bt_set_frame_slot(bt, fr, i); - } - } - - return 0; -} - /* * __mark_chain_precision() backtracks BPF program instruction sequence and * chain of verifier states making sure that register *regno* (if regno >= 0) @@ -4038,7 +4445,7 @@ static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_veri * SCALARS, as well as any other registers and slots that contribute to * a tracked state of given registers/stack slots, depending on specific BPF * assembly instructions (see backtrack_insns() for exact instruction handling - * logic). This backtracking relies on recorded jmp_history and is able to + * logic). This backtracking relies on recorded insn_hist and is able to * traverse entire chain of parent states. This process ends only when all the * necessary registers/slots and their transitive dependencies are marked as * precise. @@ -4155,39 +4562,15 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) for (;;) { DECLARE_BITMAP(mask, 64); - u32 history = st->jmp_history_cnt; - struct bpf_jmp_history_entry *hist; + u32 hist_start = st->insn_hist_start; + u32 hist_end = st->insn_hist_end; + struct bpf_insn_hist_entry *hist; if (env->log.level & BPF_LOG_LEVEL2) { verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", bt->frame, last_idx, first_idx, subseq_idx); } - /* If some register with scalar ID is marked as precise, - * make sure that all registers sharing this ID are also precise. - * This is needed to estimate effect of find_equal_scalars(). - * Do this at the last instruction of each state, - * bpf_reg_state::id fields are valid for these instructions. - * - * Allows to track precision in situation like below: - * - * r2 = unknown value - * ... - * --- state #0 --- - * ... - * r1 = r2 // r1 and r2 now share the same ID - * ... - * --- state #1 {r1.id = A, r2.id = A} --- - * ... - * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1 - * ... - * --- state #2 {r1.id = A, r2.id = A} --- - * r3 = r10 - * r3 += r1 // need to mark both r1 and r2 - */ - if (mark_precise_scalar_ids(env, st)) - return -EFAULT; - if (last_idx < 0) { /* we are at the entry into subprog, which * is expected for global funcs, but only if @@ -4220,7 +4603,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) err = 0; skip_first = false; } else { - hist = get_jmp_hist_entry(st, history, i); + hist = get_insn_hist_entry(env, hist_start, hist_end, i); err = backtrack_insn(env, i, subseq_idx, hist, bt); } if (err == -ENOTSUPP) { @@ -4237,7 +4620,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) */ return 0; subseq_idx = i; - i = get_prev_insn_idx(st, i, &history); + i = get_prev_insn_idx(env, st, i, hist_start, &hist_end); if (i == -ENOENT) break; if (i >= env->prog->len) { @@ -4298,7 +4681,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_frame_stack_mask(bt, fr)); verbose(env, "stack=%s: ", env->tmp_str_buf); - print_verifier_state(env, func, true); + print_verifier_state(env, st, fr, true); } } @@ -4355,6 +4738,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_MEM: case PTR_TO_FUNC: case PTR_TO_MAP_KEY: + case PTR_TO_ARENA: return true; default: return false; @@ -4380,20 +4764,6 @@ static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32) return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value; } -static bool __is_scalar_unbounded(struct bpf_reg_state *reg) -{ - return tnum_is_unknown(reg->var_off) && - reg->smin_value == S64_MIN && reg->smax_value == S64_MAX && - reg->umin_value == 0 && reg->umax_value == U64_MAX && - reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX && - reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX; -} - -static bool register_is_bounded(struct bpf_reg_state *reg) -{ - return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg); -} - static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg) { @@ -4403,6 +4773,30 @@ static bool __is_pointer_value(bool allow_ptr_leaks, return reg->type != SCALAR_VALUE; } +static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, + struct bpf_reg_state *src_reg) +{ + if (src_reg->type != SCALAR_VALUE) + return; + + if (src_reg->id & BPF_ADD_CONST) { + /* + * The verifier is processing rX = rY insn and + * rY->id has special linked register already. + * Cleared it, since multiple rX += const are not supported. + */ + src_reg->id = 0; + src_reg->off = 0; + } + + if (!src_reg->id && !tnum_is_const(src_reg->var_off)) + /* Ensure that src_reg has a valid ID that will be copied to + * dst_reg and then will be used by sync_linked_regs() to + * propagate min/max range. + */ + src_reg->id = ++env->id_gen; +} + /* Copy src state preserving dst->parent and dst->live fields */ static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) { @@ -4438,6 +4832,36 @@ static bool is_bpf_st_mem(struct bpf_insn *insn) return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM; } +static int get_reg_width(struct bpf_reg_state *reg) +{ + return fls64(reg->umax_value); +} + +/* See comment for mark_fastcall_pattern_for_call() */ +static void check_fastcall_stack_contract(struct bpf_verifier_env *env, + struct bpf_func_state *state, int insn_idx, int off) +{ + struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno]; + struct bpf_insn_aux_data *aux = env->insn_aux_data; + int i; + + if (subprog->fastcall_stack_off <= off || aux[insn_idx].fastcall_pattern) + return; + /* access to the region [max_stack_depth .. fastcall_stack_off) + * from something that is not a part of the fastcall pattern, + * disable fastcall rewrites for current subprogram by setting + * fastcall_stack_off to a value smaller than any possible offset. + */ + subprog->fastcall_stack_off = S16_MIN; + /* reset fastcall aux flags within subprogram, + * happens at most once per subprogram + */ + for (i = subprog->start; i < (subprog + 1)->start; ++i) { + aux[i].fastcall_spills_num = 0; + aux[i].fastcall_pattern = 0; + } +} + /* check_stack_{read,write}_fixed_off functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ @@ -4458,6 +4882,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, */ if (!env->allow_ptr_leaks && is_spilled_reg(&state->stack[spi]) && + !is_spilled_scalar_reg(&state->stack[spi]) && size != BPF_REG_SIZE) { verbose(env, "attempt to corrupt spilled pointer on stack\n"); return -EACCES; @@ -4486,19 +4911,27 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, if (err) return err; + check_fastcall_stack_contract(env, state, insn_idx, off); mark_stack_slot_scratched(env, spi); - if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) { + if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) { + bool reg_value_fits; + + reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size; + /* Make sure that reg had an ID to build a relation on spill. */ + if (reg_value_fits) + assign_scalar_id_before_mov(env, reg); save_register_state(env, state, spi, reg, size); /* Break the relation on a narrowing spill. */ - if (fls64(reg->umax_value) > BITS_PER_BYTE * size) + if (!reg_value_fits) state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && - insn->imm != 0 && env->bpf_capable) { - struct bpf_reg_state fake_reg = {}; + env->bpf_capable) { + struct bpf_reg_state *tmp_reg = &env->fake_reg[0]; - __mark_reg_known(&fake_reg, insn->imm); - fake_reg.type = SCALAR_VALUE; - save_register_state(env, state, spi, &fake_reg, size); + memset(tmp_reg, 0, sizeof(*tmp_reg)); + __mark_reg_known(tmp_reg, insn->imm); + tmp_reg->type = SCALAR_VALUE; + save_register_state(env, state, spi, tmp_reg, size); } else if (reg && is_spillable_regtype(reg->type)) { /* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@ -4554,7 +4987,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, } if (insn_flags) - return push_jmp_history(env, env->cur_state, insn_flags); + return push_insn_history(env, env->cur_state, insn_flags, 0); return 0; } @@ -4613,6 +5046,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, return err; } + check_fastcall_stack_contract(env, state, insn_idx, min_off); /* Variable offset writes destroy any spilled pointers in range. */ for (i = min_off; i < max_off; i++) { u8 new_type, *stype; @@ -4640,7 +5074,20 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, return -EINVAL; } - /* Erase all spilled pointers. */ + /* If writing_zero and the spi slot contains a spill of value 0, + * maintain the spill type. + */ + if (writing_zero && *stype == STACK_SPILL && + is_spilled_scalar_reg(&state->stack[spi])) { + struct bpf_reg_state *spill_reg = &state->stack[spi].spilled_ptr; + + if (tnum_is_const(spill_reg->var_off) && spill_reg->var_off.value == 0) { + zero_used = true; + continue; + } + } + + /* Erase all other spilled pointers. */ state->stack[spi].spilled_ptr.type = NOT_INIT; /* Update the slot type. */ @@ -4738,6 +5185,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, reg = ®_state->stack[spi].spilled_ptr; mark_stack_slot_scratched(env, spi); + check_fastcall_stack_contract(env, state, env->insn_idx, off); if (is_spilled_reg(®_state->stack[spi])) { u8 spill_size = 1; @@ -4756,7 +5204,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (dst_regno < 0) return 0; - if (!(off % BPF_REG_SIZE) && size == spill_size) { + if (size <= spill_size && + bpf_stack_narrow_access_ok(off, size, spill_size)) { /* The earlier check_reg_arg() has decided the * subreg_def for this insn. Save it first. */ @@ -4764,6 +5213,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, copy_register_state(&state->regs[dst_regno], reg); state->regs[dst_regno].subreg_def = subreg_def; + + /* Break the relation on a narrowing fill. + * coerce_reg_to_size will adjust the boundaries. + */ + if (get_reg_width(reg) > size * BITS_PER_BYTE) + state->regs[dst_regno].id = 0; } else { int spill_cnt = 0, zero_cnt = 0; @@ -4839,7 +5294,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, insn_flags = 0; /* we are not restoring spilled register */ } if (insn_flags) - return push_jmp_history(env, env->cur_state, insn_flags); + return push_insn_history(env, env->cur_state, insn_flags, 0); return 0; } @@ -4851,7 +5306,7 @@ enum bpf_access_src { static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, - enum bpf_access_src type, + enum bpf_access_type type, struct bpf_call_arg_meta *meta); static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) @@ -4884,13 +5339,14 @@ static int check_stack_read_var_off(struct bpf_verifier_env *env, /* Note that we pass a NULL meta, so raw access will not be permitted. */ err = check_stack_range_initialized(env, ptr_regno, off, size, - false, ACCESS_DIRECT, NULL); + false, BPF_READ, NULL); if (err) return err; min_off = reg->smin_value + off; max_off = reg->smax_value + off; mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); + check_fastcall_stack_contract(env, ptr_state, env->insn_idx, min_off); return 0; } @@ -5211,19 +5667,27 @@ bad_type: return -EINVAL; } +static bool in_sleepable(struct bpf_verifier_env *env) +{ + return env->prog->sleepable || + (env->cur_state && env->cur_state->in_sleepable); +} + /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock() * can dereference RCU protected pointers and result is PTR_TRUSTED. */ static bool in_rcu_cs(struct bpf_verifier_env *env) { return env->cur_state->active_rcu_lock || - env->cur_state->active_lock.ptr || - !env->prog->aux->sleepable; + env->cur_state->active_locks || + !in_sleepable(env); } /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ BTF_SET_START(rcu_protected_types) +#ifdef CONFIG_NET BTF_ID(struct, prog_test_ref_kfunc) +#endif #ifdef CONFIG_CGROUPS BTF_ID(struct, cgroup) #endif @@ -5231,6 +5695,9 @@ BTF_ID(struct, cgroup) BTF_ID(struct, bpf_cpumask) #endif BTF_ID(struct, task_struct) +#ifdef CONFIG_CRYPTO +BTF_ID(struct, bpf_crypto_ctx) +#endif BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) @@ -5284,6 +5751,22 @@ static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr return ret; } +static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno, + struct btf_field *field) +{ + struct bpf_reg_state *reg; + const struct btf_type *t; + + t = btf_type_by_id(field->kptr.btf, field->kptr.btf_id); + mark_reg_known_zero(env, cur_regs(env), regno); + reg = reg_state(env, regno); + reg->type = PTR_TO_MEM | PTR_MAYBE_NULL; + reg->mem_size = t->size; + reg->id = ++env->id_gen; + + return 0; +} + static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, int value_regno, int insn_idx, struct btf_field *kptr_field) @@ -5312,16 +5795,20 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, verbose(env, "store to referenced kptr disallowed\n"); return -EACCES; } + if (class != BPF_LDX && kptr_field->type == BPF_UPTR) { + verbose(env, "store to uptr disallowed\n"); + return -EACCES; + } if (class == BPF_LDX) { - val_reg = reg_state(env, value_regno); + if (kptr_field->type == BPF_UPTR) + return mark_uptr_ld_reg(env, value_regno, kptr_field); + /* We can simply mark the value_regno receiving the pointer * value from map as PTR_TO_BTF_ID, with the correct type. */ mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf, kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field)); - /* For mark_ptr_or_null_reg */ - val_reg->id = ++env->id_gen; } else if (class == BPF_STX) { val_reg = reg_state(env, value_regno); if (!register_is_null(val_reg) && @@ -5368,27 +5855,32 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, * this program. To check that [x1, x2) overlaps with [y1, y2), * it is sufficient to check x1 < y2 && y1 < x2. */ - if (reg->smin_value + off < p + btf_field_type_size(field->type) && + if (reg->smin_value + off < p + field->size && p < reg->umax_value + off + size) { switch (field->type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: + case BPF_UPTR: if (src != ACCESS_DIRECT) { - verbose(env, "kptr cannot be accessed indirectly by helper\n"); + verbose(env, "%s cannot be accessed indirectly by helper\n", + btf_field_type_name(field->type)); return -EACCES; } if (!tnum_is_const(reg->var_off)) { - verbose(env, "kptr access cannot have variable offset\n"); + verbose(env, "%s access cannot have variable offset\n", + btf_field_type_name(field->type)); return -EACCES; } if (p != off + reg->var_off.value) { - verbose(env, "kptr access misaligned expected=%u off=%llu\n", + verbose(env, "%s access misaligned expected=%u off=%llu\n", + btf_field_type_name(field->type), p, off + reg->var_off.value); return -EACCES; } if (size != bpf_size_to_bytes(BPF_DW)) { - verbose(env, "kptr access size must be BPF_DW\n"); + verbose(env, "%s access size must be BPF_DW\n", + btf_field_type_name(field->type)); return -EACCES; } break; @@ -5491,11 +5983,13 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type, - struct btf **btf, u32 *btf_id) + struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx) { struct bpf_insn_access_aux info = { .reg_type = *reg_type, .log = &env->log, + .is_retval = false, + .is_ldsx = is_ldsx, }; if (env->ops->is_valid_access && @@ -5508,6 +6002,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, * type of narrower access. */ *reg_type = info.reg_type; + *is_retval = info.is_retval; if (base_type(*reg_type) == PTR_TO_BTF_ID) { *btf = info.btf; @@ -5616,6 +6111,13 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno) return reg->type == PTR_TO_FLOW_KEYS; } +static bool is_arena_reg(struct bpf_verifier_env *env, int regno) +{ + const struct bpf_reg_state *reg = reg_state(env, regno); + + return reg->type == PTR_TO_ARENA; +} + static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = { #ifdef CONFIG_NET [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK], @@ -5632,7 +6134,8 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg) return true; /* Types listed in the reg2btf_ids are always trusted */ - if (reg2btf_ids[base_type(reg->type)]) + if (reg2btf_ids[base_type(reg->type)] && + !bpf_type_has_unsafe_modifiers(reg->type)) return true; /* If a register is not referenced, it is trusted if it has the @@ -5763,6 +6266,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_XDP_SOCK: pointer_desc = "xdp_sock "; break; + case PTR_TO_ARENA: + return 0; default: break; } @@ -5770,23 +6275,65 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); } +static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog) +{ + if (!bpf_jit_supports_private_stack()) + return NO_PRIV_STACK; + + /* bpf_prog_check_recur() checks all prog types that use bpf trampoline + * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked + * explicitly. + */ + switch (prog->type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return PRIV_STACK_ADAPTIVE; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + if (prog->aux->priv_stack_requested || bpf_prog_check_recur(prog)) + return PRIV_STACK_ADAPTIVE; + fallthrough; + default: + break; + } + + return NO_PRIV_STACK; +} + +static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) +{ + if (env->prog->jit_requested) + return round_up(stack_depth, 16); + + /* round up to 32-bytes, since this is granularity + * of interpreter stack size + */ + return round_up(max_t(u32, stack_depth, 1), 32); +} + /* starting from main bpf function walk all instructions of the function * and recursively walk all callees that given function can call. * Ignore jump and exit insns. * Since recursion is prevented by check_cfg() this algorithm * only needs a local stack of MAX_CALL_FRAMES to remember callsites */ -static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) +static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, + bool priv_stack_supported) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; - int depth = 0, frame = 0, i, subprog_end; + int depth = 0, frame = 0, i, subprog_end, subprog_depth; bool tail_call_reachable = false; int ret_insn[MAX_CALL_FRAMES]; int ret_prog[MAX_CALL_FRAMES]; int j; i = subprog[idx].start; + if (!priv_stack_supported) + subprog[idx].priv_stack_mode = NO_PRIV_STACK; process_func: /* protect against potential stack overflow that might happen when * bpf2bpf calls get combined with tailcalls. Limit the caller's stack @@ -5813,14 +6360,31 @@ process_func: depth); return -EACCES; } - /* round up to 32-bytes, since this is granularity - * of interpreter stack size - */ - depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); - if (depth > MAX_BPF_STACK) { - verbose(env, "combined stack size of %d calls is %d. Too large\n", - frame + 1, depth); - return -EACCES; + + subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth); + if (priv_stack_supported) { + /* Request private stack support only if the subprog stack + * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to + * avoid jit penalty if the stack usage is small. + */ + if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN && + subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) + subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE; + } + + if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) { + if (subprog_depth > MAX_BPF_STACK) { + verbose(env, "stack size of subprog %d is %d. Too large\n", + idx, subprog_depth); + return -EACCES; + } + } else { + depth += subprog_depth; + if (depth > MAX_BPF_STACK) { + verbose(env, "combined stack size of %d calls is %d. Too large\n", + frame + 1, depth); + return -EACCES; + } } continue_func: subprog_end = subprog[idx + 1].start; @@ -5877,6 +6441,8 @@ continue_func: } i = next_insn; idx = sidx; + if (!priv_stack_supported) + subprog[idx].priv_stack_mode = NO_PRIV_STACK; if (subprog[idx].has_tail_call) tail_call_reachable = true; @@ -5910,7 +6476,8 @@ continue_func: */ if (frame == 0) return 0; - depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32); + if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE) + depth -= round_up_stack_depth(env, subprog[idx].stack_depth); frame--; i = ret_insn[frame]; idx = ret_prog[frame]; @@ -5919,17 +6486,45 @@ continue_func: static int check_max_stack_depth(struct bpf_verifier_env *env) { + enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN; struct bpf_subprog_info *si = env->subprog_info; + bool priv_stack_supported; int ret; for (int i = 0; i < env->subprog_cnt; i++) { + if (si[i].has_tail_call) { + priv_stack_mode = NO_PRIV_STACK; + break; + } + } + + if (priv_stack_mode == PRIV_STACK_UNKNOWN) + priv_stack_mode = bpf_enable_priv_stack(env->prog); + + /* All async_cb subprogs use normal kernel stack. If a particular + * subprog appears in both main prog and async_cb subtree, that + * subprog will use normal kernel stack to avoid potential nesting. + * The reverse subprog traversal ensures when main prog subtree is + * checked, the subprogs appearing in async_cb subtrees are already + * marked as using normal kernel stack, so stack size checking can + * be done properly. + */ + for (int i = env->subprog_cnt - 1; i >= 0; i--) { if (!i || si[i].is_async_cb) { - ret = check_max_stack_depth_subprog(env, i); + priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE; + ret = check_max_stack_depth_subprog(env, i, priv_stack_supported); if (ret < 0) return ret; } - continue; } + + for (int i = 0; i < env->subprog_cnt; i++) { + if (si[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) { + env->prog->aux->jits_use_priv_stack = true; + break; + } + } + return 0; } @@ -6041,10 +6636,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) * values are also truncated so we push 64-bit bounds into * 32-bit bounds. Above were truncated < 32-bits already. */ - if (size < 4) { + if (size < 4) __mark_reg32_unbounded(reg); - reg_bounds_sync(reg); - } + + reg_bounds_sync(reg); } static void set_sext64_default_val(struct bpf_reg_state *reg, int size) @@ -6113,10 +6708,10 @@ static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size) /* both of s64_max/s64_min positive or negative */ if ((s64_max >= 0) == (s64_min >= 0)) { - reg->smin_value = reg->s32_min_value = s64_min; - reg->smax_value = reg->s32_max_value = s64_max; - reg->umin_value = reg->u32_min_value = s64_min; - reg->umax_value = reg->u32_max_value = s64_max; + reg->s32_min_value = reg->smin_value = s64_min; + reg->s32_max_value = reg->smax_value = s64_max; + reg->u32_min_value = reg->umin_value = s64_min; + reg->u32_max_value = reg->umax_value = s64_max; reg->var_off = tnum_range(s64_min, s64_max); return; } @@ -6137,6 +6732,7 @@ static void set_sext32_default_val(struct bpf_reg_state *reg, int size) } reg->u32_min_value = 0; reg->u32_max_value = U32_MAX; + reg->var_off = tnum_subreg(tnum_unknown); } static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) @@ -6181,6 +6777,7 @@ static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size) reg->s32_max_value = s32_max; reg->u32_min_value = (u32)s32_min; reg->u32_max_value = (u32)s32_max; + reg->var_off = tnum_subreg(tnum_range(s32_min, s32_max)); return; } @@ -6242,6 +6839,7 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val, #define BTF_TYPE_SAFE_RCU(__type) __PASTE(__type, __safe_rcu) #define BTF_TYPE_SAFE_RCU_OR_NULL(__type) __PASTE(__type, __safe_rcu_or_null) #define BTF_TYPE_SAFE_TRUSTED(__type) __PASTE(__type, __safe_trusted) +#define BTF_TYPE_SAFE_TRUSTED_OR_NULL(__type) __PASTE(__type, __safe_trusted_or_null) /* * Allow list few fields as RCU trusted or full trusted. @@ -6305,7 +6903,7 @@ BTF_TYPE_SAFE_TRUSTED(struct dentry) { struct inode *d_inode; }; -BTF_TYPE_SAFE_TRUSTED(struct socket) { +BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket) { struct sock *sk; }; @@ -6340,11 +6938,20 @@ static bool type_is_trusted(struct bpf_verifier_env *env, BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file)); BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry)); - BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted"); } +static bool type_is_trusted_or_null(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + const char *field_name, u32 btf_id) +{ + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket)); + + return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, + "__safe_trusted_or_null"); +} + static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, @@ -6453,6 +7060,8 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, */ if (type_is_trusted(env, reg, field_name, btf_id)) { flag |= PTR_TRUSTED; + } else if (type_is_trusted_or_null(env, reg, field_name, btf_id)) { + flag |= PTR_TRUSTED | PTR_MAYBE_NULL; } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) { if (type_is_rcu(env, reg, field_name, btf_id)) { /* ignore __rcu tag and mark it MEM_RCU */ @@ -6584,7 +7193,7 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, static int check_stack_access_within_bounds( struct bpf_verifier_env *env, int regno, int off, int access_size, - enum bpf_access_src src, enum bpf_access_type type) + enum bpf_access_type type) { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; @@ -6593,10 +7202,7 @@ static int check_stack_access_within_bounds( int err; char *err_extra; - if (src == ACCESS_HELPER) - /* We don't know if helpers are reading or writing (or both). */ - err_extra = " indirect access to"; - else if (type == BPF_READ) + if (type == BPF_READ) err_extra = " read from"; else err_extra = " write to"; @@ -6618,6 +7224,11 @@ static int check_stack_access_within_bounds( err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */ + if (!err && access_size < 0) + /* access_size should not be negative (or overflow an int); others checks + * along the way should have prevented such an access. + */ + err = -EFAULT; /* invalid negative access size; integer overflow? */ if (err) { if (tnum_is_const(reg->var_off)) { @@ -6639,6 +7250,17 @@ static int check_stack_access_within_bounds( return grow_stack_state(env, state, -min_off /* size */); } +static bool get_func_retval_range(struct bpf_prog *prog, + struct bpf_retval_range *range) +{ + if (prog->type == BPF_PROG_TYPE_LSM && + prog->expected_attach_type == BPF_LSM_MAC && + !bpf_lsm_get_retval_range(prog, range)) { + return true; + } + return false; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -6693,7 +7315,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return err; if (tnum_is_const(reg->var_off)) kptr_field = btf_record_find(reg->map_ptr->record, - off + reg->var_off.value, BPF_KPTR); + off + reg->var_off.value, BPF_KPTR | BPF_UPTR); if (kptr_field) { err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field); } else if (t == BPF_READ && value_regno >= 0) { @@ -6743,6 +7365,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) mark_reg_unknown(env, regs, value_regno); } else if (reg->type == PTR_TO_CTX) { + bool is_retval = false; + struct bpf_retval_range range; enum bpf_reg_type reg_type = SCALAR_VALUE; struct btf *btf = NULL; u32 btf_id = 0; @@ -6758,7 +7382,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return err; err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf, - &btf_id); + &btf_id, &is_retval, is_ldsx); if (err) verbose_linfo(env, insn_idx, "; "); if (!err && t == BPF_READ && value_regno >= 0) { @@ -6767,7 +7391,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * case, we know the offset is zero. */ if (reg_type == SCALAR_VALUE) { - mark_reg_unknown(env, regs, value_regno); + if (is_retval && get_func_retval_range(env->prog, &range)) { + err = __mark_reg_s32_range(env, regs, value_regno, + range.minval, range.maxval); + if (err) + return err; + } else { + mark_reg_unknown(env, regs, value_regno); + } } else { mark_reg_known_zero(env, regs, value_regno); @@ -6789,7 +7420,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == PTR_TO_STACK) { /* Basic bounds checks. */ - err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t); + err = check_stack_access_within_bounds(env, regno, off, size, t); if (err) return err; @@ -6864,6 +7495,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_ARENA) { + if (t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type)); @@ -6881,6 +7515,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return err; } +static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, + bool allow_trust_mismatch); + static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) { int load_reg; @@ -6940,7 +7577,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || - is_sk_reg(env, insn->dst_reg)) { + is_sk_reg(env, insn->dst_reg) || + (is_arena_reg(env, insn->dst_reg) && !bpf_jit_supports_insn(insn, true))) { verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str(env, reg_state(env, insn->dst_reg)->type)); @@ -6976,6 +7614,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (err) return err; + if (is_arena_reg(env, insn->dst_reg)) { + err = save_aux_ptr_type(env, PTR_TO_ARENA, false); + if (err) + return err; + } /* Check whether we can write into the same memory. */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true, false); @@ -6997,13 +7640,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i static int check_stack_range_initialized( struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, - enum bpf_access_src type, struct bpf_call_arg_meta *meta) + enum bpf_access_type type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); int err, min_off, max_off, i, j, slot, spi; - char *err_extra = type == ACCESS_HELPER ? " indirect" : ""; - enum bpf_access_type bounds_check_type; /* Some accesses can write anything into the stack, others are * read-only. */ @@ -7014,18 +7655,10 @@ static int check_stack_range_initialized( return -EACCES; } - if (type == ACCESS_HELPER) { - /* The bounds checks for writes are more permissive than for - * reads. However, if raw_mode is not set, we'll do extra - * checks below. - */ - bounds_check_type = BPF_WRITE; + if (type == BPF_WRITE) clobber = true; - } else { - bounds_check_type = BPF_READ; - } - err = check_stack_access_within_bounds(env, regno, off, access_size, - type, bounds_check_type); + + err = check_stack_access_within_bounds(env, regno, off, access_size, type); if (err) return err; @@ -7042,8 +7675,8 @@ static int check_stack_range_initialized( char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", - regno, err_extra, tn_buf); + verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", + regno, tn_buf); return -EACCES; } /* Only initialized buffer on stack is allowed to be accessed @@ -7096,7 +7729,7 @@ static int check_stack_range_initialized( slot = -i - 1; spi = slot / BPF_REG_SIZE; if (state->allocated_stack <= slot) { - verbose(env, "verifier bug: allocated_stack too small"); + verbose(env, "verifier bug: allocated_stack too small\n"); return -EFAULT; } @@ -7124,14 +7757,14 @@ static int check_stack_range_initialized( } if (tnum_is_const(reg->var_off)) { - verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", - err_extra, regno, min_off, i - min_off, access_size); + verbose(env, "invalid read from stack R%d off %d+%d size %d\n", + regno, min_off, i - min_off, access_size); } else { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", - err_extra, regno, tn_buf, i - min_off, access_size); + verbose(env, "invalid read from stack R%d var_off %s+%d size %d\n", + regno, tn_buf, i - min_off, access_size); } return -EACCES; mark: @@ -7151,7 +7784,8 @@ mark: } static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, - int access_size, bool zero_size_allowed, + int access_size, enum bpf_access_type access_type, + bool zero_size_allowed, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; @@ -7163,7 +7797,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_KEY: - if (meta && meta->raw_mode) { + if (access_type == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type)); return -EACCES; @@ -7171,15 +7805,13 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_mem_region_access(env, regno, reg->off, access_size, reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: - if (check_map_access_type(env, regno, reg->off, access_size, - meta && meta->raw_mode ? BPF_WRITE : - BPF_READ)) + if (check_map_access_type(env, regno, reg->off, access_size, access_type)) return -EACCES; return check_map_access(env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER); case PTR_TO_MEM: if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) { + if (access_type == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type)); return -EACCES; @@ -7190,7 +7822,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, zero_size_allowed); case PTR_TO_BUF: if (type_is_rdonly_mem(reg->type)) { - if (meta && meta->raw_mode) { + if (access_type == BPF_WRITE) { verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type)); return -EACCES; @@ -7207,7 +7839,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_stack_range_initialized( env, regno, reg->off, access_size, - zero_size_allowed, ACCESS_HELPER, meta); + zero_size_allowed, access_type, meta); case PTR_TO_BTF_ID: return check_ptr_to_btf_access(env, regs, regno, reg->off, access_size, BPF_READ, -1); @@ -7218,7 +7850,6 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, * Dynamically check it now. */ if (!env->ops->convert_ctx_access) { - enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ; int offset = access_size - 1; /* Allow zero-byte read from PTR_TO_CTX */ @@ -7226,7 +7857,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return zero_size_allowed ? 0 : -EACCES; return check_mem_access(env, env->insn_idx, regno, offset, BPF_B, - atype, -1, false, false); + access_type, -1, false, false); } fallthrough; @@ -7251,6 +7882,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, */ static int check_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, u32 regno, + enum bpf_access_type access_type, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { @@ -7266,15 +7898,12 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, */ meta->msize_max_value = reg->umax_value; - /* The register is SCALAR_VALUE; the access check - * happens using its boundaries. + /* The register is SCALAR_VALUE; the access check happens using + * its boundaries. For unprivileged variable accesses, disable + * raw mode so that the program is required to initialize all + * the memory that the helper could just partially fill up. */ if (!tnum_is_const(reg->var_off)) - /* For unprivileged variable accesses, disable raw - * mode so that the program is required to - * initialize all the memory that the helper could - * just partially fill up. - */ meta = NULL; if (reg->smin_value < 0) { @@ -7294,9 +7923,8 @@ static int check_mem_size_reg(struct bpf_verifier_env *env, regno); return -EACCES; } - err = check_helper_mem_access(env, regno - 1, - reg->umax_value, - zero_size_allowed, meta); + err = check_helper_mem_access(env, regno - 1, reg->umax_value, + access_type, zero_size_allowed, meta); if (!err) err = mark_chain_precision(env, regno); return err; @@ -7307,13 +7935,11 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg { bool may_be_null = type_may_be_null(reg->type); struct bpf_reg_state saved_reg; - struct bpf_call_arg_meta meta; int err; if (register_is_null(reg)) return 0; - memset(&meta, 0, sizeof(meta)); /* Assuming that the register contains a value check if the memory * access is safe. Temporarily save and restore the register's state as * the conversion shouldn't be visible to a caller. @@ -7323,10 +7949,8 @@ static int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg mark_ptr_not_null_reg(reg); } - err = check_helper_mem_access(env, regno, mem_size, true, &meta); - /* Check access for BPF_WRITE */ - meta.raw_mode = true; - err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta); + err = check_helper_mem_access(env, regno, mem_size, BPF_READ, true, NULL); + err = err ?: check_helper_mem_access(env, regno, mem_size, BPF_WRITE, true, NULL); if (may_be_null) *reg = saved_reg; @@ -7352,13 +7976,12 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg mark_ptr_not_null_reg(mem_reg); } - err = check_mem_size_reg(env, reg, regno, true, &meta); - /* Check access for BPF_WRITE */ - meta.raw_mode = true; - err = err ?: check_mem_size_reg(env, reg, regno, true, &meta); + err = check_mem_size_reg(env, reg, regno, BPF_READ, true, &meta); + err = err ?: check_mem_size_reg(env, reg, regno, BPF_WRITE, true, &meta); if (may_be_null) *mem_reg = saved_reg; + return err; } @@ -7381,7 +8004,7 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg * Since only one bpf_spin_lock is allowed the checks are simpler than * reg_is_refcounted() logic. The verifier needs to remember only * one spin_lock instead of array of acquired_refs. - * cur_state->active_lock remembers which map value element or allocated + * env->cur_state->active_locks remembers which map value element or allocated * object got locked and clears it after bpf_spin_unlock. */ static int process_spin_lock(struct bpf_verifier_env *env, int regno, @@ -7394,6 +8017,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, struct bpf_map *map = NULL; struct btf *btf = NULL; struct btf_record *rec; + int err; if (!is_const) { verbose(env, @@ -7425,16 +8049,23 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, return -EINVAL; } if (is_lock) { - if (cur->active_lock.ptr) { + void *ptr; + + if (map) + ptr = map; + else + ptr = btf; + + if (cur->active_locks) { verbose(env, "Locking two bpf_spin_locks are not allowed\n"); return -EINVAL; } - if (map) - cur->active_lock.ptr = map; - else - cur->active_lock.ptr = btf; - cur->active_lock.id = reg->id; + err = acquire_lock_state(env, env->insn_idx, REF_TYPE_LOCK, reg->id, ptr); + if (err < 0) { + verbose(env, "Failed to acquire lock state\n"); + return err; + } } else { void *ptr; @@ -7443,20 +8074,17 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, else ptr = btf; - if (!cur->active_lock.ptr) { + if (!cur->active_locks) { verbose(env, "bpf_spin_unlock without taking a lock\n"); return -EINVAL; } - if (cur->active_lock.ptr != ptr || - cur->active_lock.id != reg->id) { + + if (release_lock_state(env->cur_state, REF_TYPE_LOCK, reg->id, ptr)) { verbose(env, "bpf_spin_unlock of different lock\n"); return -EINVAL; } invalidate_non_owning_refs(env); - - cur->active_lock.ptr = NULL; - cur->active_lock.id = 0; } return 0; } @@ -7498,33 +8126,59 @@ static int process_timer_func(struct bpf_verifier_env *env, int regno, return 0; } +static int process_wq_func(struct bpf_verifier_env *env, int regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map *map = reg->map_ptr; + u64 val = reg->var_off.value; + + if (map->record->wq_off != val + reg->off) { + verbose(env, "off %lld doesn't point to 'struct bpf_wq' that is at %d\n", + val + reg->off, map->record->wq_off); + return -EINVAL; + } + meta->map.uid = reg->map_uid; + meta->map.ptr = map; + return 0; +} + static int process_kptr_func(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; - struct bpf_map *map_ptr = reg->map_ptr; struct btf_field *kptr_field; + struct bpf_map *map_ptr; + struct btf_record *rec; u32 kptr_off; + if (type_is_ptr_alloc_obj(reg->type)) { + rec = reg_btf_record(reg); + } else { /* PTR_TO_MAP_VALUE */ + map_ptr = reg->map_ptr; + if (!map_ptr->btf) { + verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", + map_ptr->name); + return -EINVAL; + } + rec = map_ptr->record; + meta->map_ptr = map_ptr; + } + if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d doesn't have constant offset. kptr has to be at the constant offset\n", regno); return -EINVAL; } - if (!map_ptr->btf) { - verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n", - map_ptr->name); - return -EINVAL; - } - if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) { - verbose(env, "map '%s' has no valid kptr\n", map_ptr->name); + + if (!btf_record_has_field(rec, BPF_KPTR)) { + verbose(env, "R%d has no valid kptr\n", regno); return -EINVAL; } - meta->map_ptr = map_ptr; kptr_off = reg->off + reg->var_off.value; - kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR); + kptr_field = btf_record_find(rec, kptr_off, BPF_KPTR); if (!kptr_field) { verbose(env, "off=%d doesn't point to kptr\n", kptr_off); return -EACCES; @@ -7568,6 +8222,13 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; int err; + if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { + verbose(env, + "arg#%d expected pointer to stack or const struct bpf_dynptr\n", + regno - 1); + return -EINVAL; + } + /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): */ @@ -7618,7 +8279,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn if (!is_dynptr_reg_valid_init(env, reg)) { verbose(env, "Expected an initialized dynptr as arg #%d\n", - regno); + regno - 1); return -EINVAL; } @@ -7626,7 +8287,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) { verbose(env, "Expected a dynptr of type %s as arg #%d\n", - dynptr_type_str(arg_to_dynptr_type(arg_type)), regno); + dynptr_type_str(arg_to_dynptr_type(arg_type)), regno - 1); return -EINVAL; } @@ -7662,12 +8323,17 @@ static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_ITER_DESTROY; } -static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg) +static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg_idx, + const struct btf_param *arg) { /* btf_check_iter_kfuncs() guarantees that first argument of any iter * kfunc is iter state pointer */ - return arg == 0 && is_iter_kfunc(meta); + if (is_iter_kfunc(meta)) + return arg_idx == 0; + + /* iter passed as an argument to a generic kfunc */ + return btf_param_match_suffix(meta->btf, arg, "__iter"); } static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx, @@ -7675,21 +8341,32 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; const struct btf_type *t; - const struct btf_param *arg; - int spi, err, i, nr_slots; - u32 btf_id; + int spi, err, i, nr_slots, btf_id; - /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */ - arg = &btf_params(meta->func_proto)[0]; - t = btf_type_skip_modifiers(meta->btf, arg->type, NULL); /* PTR */ - t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id); /* STRUCT */ + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d expected pointer to an iterator on stack\n", regno - 1); + return -EINVAL; + } + + /* For iter_{new,next,destroy} functions, btf_check_iter_kfuncs() + * ensures struct convention, so we wouldn't need to do any BTF + * validation here. But given iter state can be passed as a parameter + * to any kfunc, if arg has "__iter" suffix, we need to be a bit more + * conservative here. + */ + btf_id = btf_check_iter_arg(meta->btf, meta->func_proto, regno - 1); + if (btf_id < 0) { + verbose(env, "expected valid iter pointer as arg #%d\n", regno - 1); + return -EINVAL; + } + t = btf_type_by_id(meta->btf, btf_id); nr_slots = t->size / BPF_REG_SIZE; if (is_iter_new_kfunc(meta)) { /* bpf_iter_<type>_new() expects pointer to uninit iter state */ if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) { verbose(env, "expected uninitialized iter_%s as arg #%d\n", - iter_type_str(meta->btf, btf_id), regno); + iter_type_str(meta->btf, btf_id), regno - 1); return -EINVAL; } @@ -7704,14 +8381,16 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id if (err) return err; } else { - /* iter_next() or iter_destroy() expect initialized iter state*/ + /* iter_next() or iter_destroy(), as well as any kfunc + * accepting iter argument, expect initialized iter state + */ err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots); switch (err) { case 0: break; case -EINVAL: verbose(env, "expected an initialized iter_%s as arg #%d\n", - iter_type_str(meta->btf, btf_id), regno); + iter_type_str(meta->btf, btf_id), regno - 1); return err; case -EPROTO: verbose(env, "expected an RCU CS when using %s\n", meta->func_name); @@ -7818,6 +8497,15 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env, return 0; } +static struct bpf_reg_state *get_iter_from_state(struct bpf_verifier_state *cur_st, + struct bpf_kfunc_call_arg_meta *meta) +{ + int iter_frameno = meta->iter.frameno; + int iter_spi = meta->iter.spi; + + return &cur_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; +} + /* process_iter_next_call() is called when verifier gets to iterator's next * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer * to it as just "iter_next()" in comments below. @@ -7902,12 +8590,10 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr; struct bpf_reg_state *cur_iter, *queued_iter; - int iter_frameno = meta->iter.frameno; - int iter_spi = meta->iter.spi; BTF_TYPE_EMIT(struct bpf_iter); - cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + cur_iter = get_iter_from_state(cur_st, meta); if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE && cur_iter->iter.state != BPF_ITER_STATE_DRAINED) { @@ -7935,7 +8621,7 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx, if (!queued_st) return -ENOMEM; - queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr; + queued_iter = get_iter_from_state(queued_st, meta); queued_iter->iter.state = BPF_ITER_STATE_ACTIVE; queued_iter->iter.depth++; if (prev_st) @@ -7959,6 +8645,12 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type) type == ARG_CONST_SIZE_OR_ZERO; } +static bool arg_type_is_raw_mem(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_MEM && + type & MEM_UNINIT; +} + static bool arg_type_is_release(enum bpf_arg_type type) { return type & OBJ_RELEASE; @@ -7969,16 +8661,6 @@ static bool arg_type_is_dynptr(enum bpf_arg_type type) return base_type(type) == ARG_PTR_TO_DYNPTR; } -static int int_ptr_type_to_size(enum bpf_arg_type type) -{ - if (type == ARG_PTR_TO_INT) - return sizeof(u32); - else if (type == ARG_PTR_TO_LONG) - return sizeof(u64); - - return -EINVAL; -} - static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_arg_type *arg_type) @@ -8051,16 +8733,6 @@ static const struct bpf_reg_types mem_types = { }, }; -static const struct bpf_reg_types int_ptr_types = { - .types = { - PTR_TO_STACK, - PTR_TO_PACKET, - PTR_TO_PACKET_META, - PTR_TO_MAP_KEY, - PTR_TO_MAP_VALUE, - }, -}; - static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE, @@ -8091,7 +8763,12 @@ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } }; -static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }; +static const struct bpf_reg_types kptr_xchg_dest_types = { + .types = { + PTR_TO_MAP_VALUE, + PTR_TO_BTF_ID | MEM_ALLOC + } +}; static const struct bpf_reg_types dynptr_types = { .types = { PTR_TO_STACK, @@ -8116,14 +8793,12 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, [ARG_PTR_TO_RINGBUF_MEM] = &ringbuf_mem_types, - [ARG_PTR_TO_INT] = &int_ptr_types, - [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, [ARG_PTR_TO_FUNC] = &func_ptr_types, [ARG_PTR_TO_STACK] = &stack_ptr_types, [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, - [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_KPTR_XCHG_DEST] = &kptr_xchg_dest_types, [ARG_PTR_TO_DYNPTR] = &dynptr_types, }; @@ -8162,7 +8837,8 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, if (base_type(arg_type) == ARG_PTR_TO_MEM) type &= ~DYNPTR_TYPE_FLAG_MASK; - if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) { + /* Local kptr types are allowed as the source argument of bpf_kptr_xchg */ + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type) && regno == BPF_REG_2) { type &= ~MEM_ALLOC; type &= ~MEM_PERCPU; } @@ -8200,6 +8876,7 @@ found: switch ((int)reg->type) { case PTR_TO_BTF_ID: case PTR_TO_BTF_ID | PTR_TRUSTED: + case PTR_TO_BTF_ID | PTR_TRUSTED | PTR_MAYBE_NULL: case PTR_TO_BTF_ID | MEM_RCU: case PTR_TO_BTF_ID | PTR_MAYBE_NULL: case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU: @@ -8254,7 +8931,8 @@ found: verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); return -EFAULT; } - if (meta->func_id == BPF_FUNC_kptr_xchg) { + /* Check if local kptr in src arg matches kptr in dst arg */ + if (meta->func_id == BPF_FUNC_kptr_xchg && regno == BPF_REG_2) { if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) return -EACCES; } @@ -8334,6 +9012,7 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MEM | MEM_RINGBUF: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: + case PTR_TO_ARENA: case SCALAR_VALUE: return 0; /* All the rest must be rejected, except PTR_TO_BTF_ID which allows @@ -8472,6 +9151,69 @@ static int check_reg_const_str(struct bpf_verifier_env *env, return 0; } +/* Returns constant key value in `value` if possible, else negative error */ +static int get_constant_map_key(struct bpf_verifier_env *env, + struct bpf_reg_state *key, + u32 key_size, + s64 *value) +{ + struct bpf_func_state *state = func(env, key); + struct bpf_reg_state *reg; + int slot, spi, off; + int spill_size = 0; + int zero_size = 0; + int stack_off; + int i, err; + u8 *stype; + + if (!env->bpf_capable) + return -EOPNOTSUPP; + if (key->type != PTR_TO_STACK) + return -EOPNOTSUPP; + if (!tnum_is_const(key->var_off)) + return -EOPNOTSUPP; + + stack_off = key->off + key->var_off.value; + slot = -stack_off - 1; + spi = slot / BPF_REG_SIZE; + off = slot % BPF_REG_SIZE; + stype = state->stack[spi].slot_type; + + /* First handle precisely tracked STACK_ZERO */ + for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--) + zero_size++; + if (zero_size >= key_size) { + *value = 0; + return 0; + } + + /* Check that stack contains a scalar spill of expected size */ + if (!is_spilled_scalar_reg(&state->stack[spi])) + return -EOPNOTSUPP; + for (i = off; i >= 0 && stype[i] == STACK_SPILL; i--) + spill_size++; + if (spill_size != key_size) + return -EOPNOTSUPP; + + reg = &state->stack[spi].spilled_ptr; + if (!tnum_is_const(reg->var_off)) + /* Stack value not statically known */ + return -EOPNOTSUPP; + + /* We are relying on a constant value. So mark as precise + * to prevent pruning on it. + */ + bt_set_frame_slot(&env->bt, key->frameno, spi); + err = mark_chain_precision_batch(env); + if (err < 0) + return err; + + *value = reg->var_off.value; + return 0; +} + +static bool can_elide_value_nullness(enum bpf_map_type type); + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn, @@ -8482,6 +9224,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, enum bpf_arg_type arg_type = fn->arg_type[arg]; enum bpf_reg_type type = reg->type; u32 *arg_btf_id = NULL; + u32 key_size; int err = 0; if (arg_type == ARG_DONTCARE) @@ -8564,7 +9307,7 @@ skip_type_check: meta->release_regno = regno; } - if (reg->ref_obj_id) { + if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) { if (meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno, reg->ref_obj_id, @@ -8615,9 +9358,20 @@ skip_type_check: verbose(env, "invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_helper_mem_access(env, regno, - meta->map_ptr->key_size, false, - NULL); + key_size = meta->map_ptr->key_size; + err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); + if (err) + return err; + if (can_elide_value_nullness(meta->map_ptr->map_type)) { + err = get_constant_map_key(env, reg, key_size, &meta->const_map_key); + if (err < 0) { + meta->const_map_key = -1; + if (err == -EOPNOTSUPP) + err = 0; + else + return err; + } + } break; case ARG_PTR_TO_MAP_VALUE: if (type_may_be_null(arg_type) && register_is_null(reg)) @@ -8632,9 +9386,9 @@ skip_type_check: return -EACCES; } meta->raw_mode = arg_type & MEM_UNINIT; - err = check_helper_mem_access(env, regno, - meta->map_ptr->value_size, false, - meta); + err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, + arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, + false, meta); break; case ARG_PTR_TO_PERCPU_BTF_ID: if (!reg->btf_id) { @@ -8676,16 +9430,26 @@ skip_type_check: */ meta->raw_mode = arg_type & MEM_UNINIT; if (arg_type & MEM_FIXED_SIZE) { - err = check_helper_mem_access(env, regno, - fn->arg_size[arg], false, - meta); + err = check_helper_mem_access(env, regno, fn->arg_size[arg], + arg_type & MEM_WRITE ? BPF_WRITE : BPF_READ, + false, meta); + if (err) + return err; + if (arg_type & MEM_ALIGNED) + err = check_ptr_alignment(env, reg, 0, fn->arg_size[arg], true); } break; case ARG_CONST_SIZE: - err = check_mem_size_reg(env, reg, regno, false, meta); + err = check_mem_size_reg(env, reg, regno, + fn->arg_type[arg - 1] & MEM_WRITE ? + BPF_WRITE : BPF_READ, + false, meta); break; case ARG_CONST_SIZE_OR_ZERO: - err = check_mem_size_reg(env, reg, regno, true, meta); + err = check_mem_size_reg(env, reg, regno, + fn->arg_type[arg - 1] & MEM_WRITE ? + BPF_WRITE : BPF_READ, + true, meta); break; case ARG_PTR_TO_DYNPTR: err = process_dynptr_func(env, regno, insn_idx, arg_type, 0); @@ -8703,17 +9467,6 @@ skip_type_check: if (err) return err; break; - case ARG_PTR_TO_INT: - case ARG_PTR_TO_LONG: - { - int size = int_ptr_type_to_size(arg_type); - - err = check_helper_mem_access(env, regno, size, false, meta); - if (err) - return err; - err = check_ptr_alignment(env, reg, 0, size, true); - break; - } case ARG_PTR_TO_CONST_STR: { err = check_reg_const_str(env, reg, regno); @@ -8721,7 +9474,7 @@ skip_type_check: return err; break; } - case ARG_PTR_TO_KPTR: + case ARG_KPTR_XCHG_DEST: err = process_kptr_func(env, regno, meta); if (err) return err; @@ -8736,7 +9489,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) enum bpf_attach_type eatype = env->prog->expected_attach_type; enum bpf_prog_type type = resolve_prog_type(env->prog); - if (func_id != BPF_FUNC_map_update_elem) + if (func_id != BPF_FUNC_map_update_elem && + func_id != BPF_FUNC_map_delete_elem) return false; /* It's not possible to get access to a locked struct sock in these @@ -8747,6 +9501,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) if (eatype == BPF_TRACE_ITER) return true; break; + case BPF_PROG_TYPE_SOCK_OPS: + /* map_update allowed only via dedicated helpers with event type checks */ + if (func_id == BPF_FUNC_map_delete_elem) + return true; + break; case BPF_PROG_TYPE_SOCKET_FILTER: case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -8842,7 +9601,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SOCKMAP: if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update && - func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map && func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem && @@ -8852,7 +9610,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SOCKHASH: if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update && - func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash && func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem && @@ -9026,15 +9783,15 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) { int count = 0; - if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg1_type)) count++; - if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg2_type)) count++; - if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg3_type)) count++; - if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg4_type)) count++; - if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) + if (arg_type_is_raw_mem(fn->arg5_type)) count++; /* We only support one arg being in raw mode at the moment, @@ -9146,21 +9903,38 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } +static int release_reference_nomark(struct bpf_verifier_state *state, int ref_obj_id) +{ + int i; + + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].type != REF_TYPE_PTR) + continue; + if (state->refs[i].id == ref_obj_id) { + release_reference_state(state, i); + return 0; + } + } + return -EINVAL; +} + /* The pointer with the specified id has released its reference to kernel * resources. Identify all copies of the same pointer and clear the reference. + * + * This is the release function corresponding to acquire_reference(). Idempotent. */ -static int release_reference(struct bpf_verifier_env *env, - int ref_obj_id) +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) { + struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state; struct bpf_reg_state *reg; int err; - err = release_reference_state(cur_func(env), ref_obj_id); + err = release_reference_nomark(vstate, ref_obj_id); if (err) return err; - bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ if (reg->ref_obj_id == ref_obj_id) mark_reg_invalid(env, reg); })); @@ -9234,9 +10008,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls callsite, state->curframe + 1 /* frameno within this callchain */, subprog /* subprog number within this prog */); - /* Transfer references to the callee */ - err = copy_reference_state(callee, caller); - err = err ?: set_callee_state_cb(env, caller, callee, callsite); + err = set_callee_state_cb(env, caller, callee, callsite); if (err) goto err_out; @@ -9298,10 +10070,38 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, bpf_log(log, "arg#%d is expected to be non-NULL\n", i); return -EINVAL; } + } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { + /* + * Can pass any value and the kernel won't crash, but + * only PTR_TO_ARENA or SCALAR make sense. Everything + * else is a bug in the bpf program. Point it out to + * the user at the verification time instead of + * run-time debug nightmare. + */ + if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) { + bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno); + return -EINVAL; + } } else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) { + ret = check_func_arg_reg_off(env, reg, regno, ARG_PTR_TO_DYNPTR); + if (ret) + return ret; + ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0); if (ret) return ret; + } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { + struct bpf_call_arg_meta meta; + int err; + + if (register_is_null(reg) && type_may_be_null(arg->arg_type)) + continue; + + memset(&meta, 0, sizeof(meta)); /* leave func_id as zero */ + err = check_reg_type(env, regno, arg->arg_type, &arg->btf_id, &meta); + err = err ?: check_func_arg_reg_off(env, reg, regno, arg->arg_type); + if (err) + return err; } else { bpf_log(log, "verifier bug: unrecognized arg#%d type %d\n", i, arg->arg_type); @@ -9366,7 +10166,7 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins */ env->subprog_info[subprog].is_cb = true; if (bpf_pseudo_kfunc_call(insn) && - !is_sync_callback_calling_kfunc(insn->imm)) { + !is_callback_calling_kfunc(insn->imm)) { verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n", func_id_name(insn->imm), insn->imm); return -EFAULT; @@ -9377,15 +10177,14 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins return -EFAULT; } - if (insn->code == (BPF_JMP | BPF_CALL) && - insn->src_reg == 0 && - insn->imm == BPF_FUNC_timer_set_callback) { + if (is_async_callback_calling_insn(insn)) { struct bpf_verifier_state *async_cb; - /* there is no real recursion here. timer callbacks are async */ + /* there is no real recursion here. timer and workqueue callbacks are async */ env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, - insn_idx, subprog); + insn_idx, subprog, + is_bpf_wq_set_callback_impl_kfunc(insn->imm)); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; @@ -9438,6 +10237,26 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (subprog_is_global(env, subprog)) { const char *sub_name = subprog_name(env, subprog); + /* Only global subprogs cannot be called with a lock held. */ + if (env->cur_state->active_locks) { + verbose(env, "global function calls are not allowed while holding a lock,\n" + "use static function instead\n"); + return -EINVAL; + } + + /* Only global subprogs cannot be called with preemption disabled. */ + if (env->cur_state->active_preempt_locks) { + verbose(env, "global function calls are not allowed with preemption disabled,\n" + "use static function instead\n"); + return -EINVAL; + } + + if (env->cur_state->active_irq_id) { + verbose(env, "global function calls are not allowed with IRQs disabled,\n" + "use static function instead\n"); + return -EINVAL; + } + if (err) { verbose(env, "Caller passes invalid args into func#%d ('%s')\n", subprog, sub_name); @@ -9446,6 +10265,8 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, verbose(env, "Func#%d ('%s') is global and assumed valid.\n", subprog, sub_name); + if (env->subprog_info[subprog].changes_pkt_data) + clear_all_pkt_pointers(env); /* mark global subprog for verifying after main prog */ subprog_aux(env, subprog)->called = true; clear_caller_saved_regs(env, caller->regs); @@ -9472,9 +10293,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); - print_verifier_state(env, caller, true); + print_verifier_state(env, state, caller->frameno, true); verbose(env, "callee:\n"); - print_verifier_state(env, state->frame[state->curframe], true); + print_verifier_state(env, state, state->curframe, true); } return 0; @@ -9530,12 +10351,8 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, struct bpf_map *map; int err; - if (bpf_map_ptr_poisoned(insn_aux)) { - verbose(env, "tail_call abusing map_ptr\n"); - return -EINVAL; - } - - map = BPF_MAP_PTR(insn_aux->map_ptr_state); + /* valid map_ptr and poison value does not matter */ + map = insn_aux->map_ptr_state.map_ptr; if (!map->ops->map_set_for_each_callback_args || !map->ops->map_for_each_callback) { verbose(env, "callback function not allowed for map\n"); @@ -9558,7 +10375,7 @@ static int set_loop_callback_state(struct bpf_verifier_env *env, { /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, * u64 flags); - * callback_fn(u32 index, void *callback_ctx); + * callback_fn(u64 index, void *callback_ctx); */ callee->regs[BPF_REG_1].type = SCALAR_VALUE; callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; @@ -9711,9 +10528,13 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env) return is_rbtree_lock_required_kfunc(kfunc_btf_id); } -static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg) +static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg, + bool return_32bit) { - return range.minval <= reg->smin_value && reg->smax_value <= range.maxval; + if (return_32bit) + return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval; + else + return range.minval <= reg->smin_value && reg->smax_value <= range.maxval; } static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) @@ -9750,8 +10571,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) if (err) return err; - /* enforce R0 return value range */ - if (!retval_range_within(callee->callback_ret_range, r0)) { + /* enforce R0 return value range, and bpf_callback_t returns 64bit */ + if (!retval_range_within(callee->callback_ret_range, r0, false)) { verbose_invalid_scalar(env, r0, callee->callback_ret_range, "At callback return", "R0"); return -EINVAL; @@ -9766,18 +10587,6 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller->regs[BPF_REG_0] = *r0; } - /* callback_fn frame should have released its own additions to parent's - * reference state at this point, or check_reference_leak would - * complain, hence it must be the same as the caller. There is no need - * to copy it back. - */ - if (!callee->in_callback_fn) { - /* Transfer references to the caller */ - err = copy_reference_state(caller, callee); - if (err) - return err; - } - /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, * there function call logic would reschedule callback visit. If iteration * converges is_state_visited() would prune that visit eventually. @@ -9790,9 +10599,9 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, state, callee->frameno, true); verbose(env, "to caller at %d:\n", *insn_idx); - print_verifier_state(env, caller, true); + print_verifier_state(env, state, caller->frameno, true); } /* clear everything in the callee. In case of exceptional exits using * bpf_throw, this will be done by copy_verifier_state for extra frames. */ @@ -9894,12 +10703,12 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, return -EACCES; } - if (!BPF_MAP_PTR(aux->map_ptr_state)) + if (!aux->map_ptr_state.map_ptr) + bpf_map_ptr_store(aux, meta->map_ptr, + !meta->map_ptr->bypass_spec_v1, false); + else if (aux->map_ptr_state.map_ptr != meta->map_ptr) bpf_map_ptr_store(aux, meta->map_ptr, - !meta->map_ptr->bypass_spec_v1); - else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) - bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, - !meta->map_ptr->bypass_spec_v1); + !meta->map_ptr->bypass_spec_v1, true); return 0; } @@ -9942,15 +10751,15 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; bool refs_lingering = false; int i; - if (!exception_exit && state->frameno && !state->in_callback_fn) + if (!exception_exit && cur_func(env)->frameno) return 0; for (i = 0; i < state->acquired_refs; i++) { - if (!exception_exit && state->in_callback_fn && state->refs[i].callback_ref != state->frameno) + if (state->refs[i].type != REF_TYPE_PTR) continue; verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx); @@ -9959,6 +10768,39 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi return refs_lingering ? -EINVAL : 0; } +static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit, bool check_lock, const char *prefix) +{ + int err; + + if (check_lock && env->cur_state->active_locks) { + verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); + return -EINVAL; + } + + err = check_reference_leak(env, exception_exit); + if (err) { + verbose(env, "%s would lead to reference leak\n", prefix); + return err; + } + + if (check_lock && env->cur_state->active_irq_id) { + verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix); + return -EINVAL; + } + + if (check_lock && env->cur_state->active_rcu_lock) { + verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix); + return -EINVAL; + } + + if (check_lock && env->cur_state->active_preempt_locks) { + verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix); + return -EINVAL; + } + + return 0; +} + static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *regs) { @@ -10053,6 +10895,34 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno state->callback_subprogno == subprogno); } +/* Returns whether or not the given map type can potentially elide + * lookup return value nullness check. This is possible if the key + * is statically known. + */ +static bool can_elide_value_nullness(enum bpf_map_type type) +{ + switch (type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + return true; + default: + return false; + } +} + +static int get_helper_proto(struct bpf_verifier_env *env, int func_id, + const struct bpf_func_proto **ptr) +{ + if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) + return -ERANGE; + + if (!env->ops->get_func_proto) + return -EINVAL; + + *ptr = env->ops->get_func_proto(func_id, env->prog); + return *ptr ? 0 : -EINVAL; +} + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -10069,18 +10939,16 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* find function prototype */ func_id = insn->imm; - if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { - verbose(env, "invalid func %s#%d\n", func_id_name(func_id), - func_id); + err = get_helper_proto(env, insn->imm, &fn); + if (err == -ERANGE) { + verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); return -EINVAL; } - if (env->ops->get_func_proto) - fn = env->ops->get_func_proto(func_id, env->prog); - if (!fn) { - verbose(env, "unknown func %s#%d\n", func_id_name(func_id), - func_id); - return -EINVAL; + if (err) { + verbose(env, "program of this type cannot use helper %s#%d\n", + func_id_name(func_id), func_id); + return err; } /* eBPF programs must be GPL compatible to use GPL-ed functions */ @@ -10094,13 +10962,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } - if (!env->prog->aux->sleepable && fn->might_sleep) { + if (!in_sleepable(env) && fn->might_sleep) { verbose(env, "helper call might sleep in a non-sleepable prog\n"); return -EINVAL; } /* With LD_ABS/IND some JITs save/restore skb from r1. */ - changes_data = bpf_helper_changes_pkt_data(fn->func); + changes_data = bpf_helper_changes_pkt_data(func_id); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id); @@ -10124,7 +10992,29 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } - if (env->prog->aux->sleepable && is_storage_get_function(func_id)) + if (in_sleepable(env) && is_storage_get_function(func_id)) + env->insn_aux_data[insn_idx].storage_get_func_atomic = true; + } + + if (env->cur_state->active_preempt_locks) { + if (fn->might_sleep) { + verbose(env, "sleepable helper %s#%d in non-preemptible region\n", + func_id_name(func_id), func_id); + return -EINVAL; + } + + if (in_sleepable(env) && is_storage_get_function(func_id)) + env->insn_aux_data[insn_idx].storage_get_func_atomic = true; + } + + if (env->cur_state->active_irq_id) { + if (fn->might_sleep) { + verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n", + func_id_name(func_id), func_id); + return -EINVAL; + } + + if (in_sleepable(env) && is_storage_get_function(func_id)) env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } @@ -10174,7 +11064,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn struct bpf_func_state *state; struct bpf_reg_state *reg; - err = release_reference_state(cur_func(env), ref_obj_id); + err = release_reference_nomark(env->cur_state, ref_obj_id); if (!err) { bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg->ref_obj_id == ref_obj_id) { @@ -10205,11 +11095,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn switch (func_id) { case BPF_FUNC_tail_call: - err = check_reference_leak(env, false); - if (err) { - verbose(env, "tail_call would lead to reference leak\n"); + err = check_resource_leak(env, false, true, "tail_call"); + if (err) return err; - } break; case BPF_FUNC_get_local_storage: /* check that flags argument in get_local_storage(map, flags) is 0, @@ -10388,10 +11276,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn "kernel subsystem misconfigured verifier\n"); return -EINVAL; } + + if (func_id == BPF_FUNC_map_lookup_elem && + can_elide_value_nullness(meta.map_ptr->map_type) && + meta.const_map_key >= 0 && + meta.const_map_key < meta.map_ptr->max_entries) + ret_flag &= ~PTR_MAYBE_NULL; + regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].map_uid = meta.map_uid; regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; - if (!type_may_be_null(ret_type) && + if (!type_may_be_null(ret_flag) && btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) { regs[BPF_REG_0].id = ++env->id_gen; } @@ -10509,7 +11404,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; } else if (is_acquire_function(func_id, meta.map_ptr)) { - int id = acquire_reference_state(env, insn_idx); + int id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -10620,24 +11515,6 @@ static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_RCU_PROTECTED; } -static bool __kfunc_param_match_suffix(const struct btf *btf, - const struct btf_param *arg, - const char *suffix) -{ - int suffix_len = strlen(suffix), len; - const char *param_name; - - /* In the future, this can be ported to use BTF tagging */ - param_name = btf_name_by_offset(btf, arg->name_off); - if (str_is_empty(param_name)) - return false; - len = strlen(param_name); - if (len < suffix_len) - return false; - param_name += len - suffix_len; - return !strncmp(param_name, suffix, suffix_len); -} - static bool is_kfunc_arg_mem_size(const struct btf *btf, const struct btf_param *arg, const struct bpf_reg_state *reg) @@ -10648,7 +11525,7 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) return false; - return __kfunc_param_match_suffix(btf, arg, "__sz"); + return btf_param_match_suffix(btf, arg, "__sz"); } static bool is_kfunc_arg_const_mem_size(const struct btf *btf, @@ -10661,47 +11538,57 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf, if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) return false; - return __kfunc_param_match_suffix(btf, arg, "__szk"); + return btf_param_match_suffix(btf, arg, "__szk"); } static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__opt"); + return btf_param_match_suffix(btf, arg, "__opt"); } static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__k"); + return btf_param_match_suffix(btf, arg, "__k"); } static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__ign"); + return btf_param_match_suffix(btf, arg, "__ign"); +} + +static bool is_kfunc_arg_map(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__map"); } static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__alloc"); + return btf_param_match_suffix(btf, arg, "__alloc"); } static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__uninit"); + return btf_param_match_suffix(btf, arg, "__uninit"); } static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr"); + return btf_param_match_suffix(btf, arg, "__refcounted_kptr"); } static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__nullable"); + return btf_param_match_suffix(btf, arg, "__nullable"); } static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) { - return __kfunc_param_match_suffix(btf, arg, "__str"); + return btf_param_match_suffix(btf, arg, "__str"); +} + +static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__irq_flag"); } static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, @@ -10729,14 +11616,16 @@ enum { KF_ARG_LIST_NODE_ID, KF_ARG_RB_ROOT_ID, KF_ARG_RB_NODE_ID, + KF_ARG_WORKQUEUE_ID, }; BTF_ID_LIST(kf_arg_btf_ids) -BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_dynptr) BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) BTF_ID(struct, bpf_rb_root) BTF_ID(struct, bpf_rb_node) +BTF_ID(struct, bpf_wq) static bool __is_kfunc_ptr_arg_type(const struct btf *btf, const struct btf_param *arg, int type) @@ -10780,6 +11669,11 @@ static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_par return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID); } +static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID); +} + static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf, const struct btf_param *arg) { @@ -10848,6 +11742,9 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_RB_NODE, KF_ARG_PTR_TO_NULL, KF_ARG_PTR_TO_CONST_STR, + KF_ARG_PTR_TO_MAP, + KF_ARG_PTR_TO_WORKQUEUE, + KF_ARG_PTR_TO_IRQ_FLAG, }; enum special_kfunc_type { @@ -10873,7 +11770,17 @@ enum special_kfunc_type { KF_bpf_percpu_obj_new_impl, KF_bpf_percpu_obj_drop_impl, KF_bpf_throw, + KF_bpf_wq_set_callback_impl, + KF_bpf_preempt_disable, + KF_bpf_preempt_enable, KF_bpf_iter_css_task_new, + KF_bpf_session_cookie, + KF_bpf_get_kmem_cache, + KF_bpf_local_irq_save, + KF_bpf_local_irq_restore, + KF_bpf_iter_num_new, + KF_bpf_iter_num_next, + KF_bpf_iter_num_destroy, }; BTF_SET_START(special_kfunc_set) @@ -10889,14 +11796,17 @@ BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) +#ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +#endif BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) BTF_ID(func, bpf_throw) +BTF_ID(func, bpf_wq_set_callback_impl) #ifdef CONFIG_CGROUPS BTF_ID(func, bpf_iter_css_task_new) #endif @@ -10917,19 +11827,38 @@ BTF_ID(func, bpf_rcu_read_unlock) BTF_ID(func, bpf_rbtree_remove) BTF_ID(func, bpf_rbtree_add_impl) BTF_ID(func, bpf_rbtree_first) +#ifdef CONFIG_NET BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) +#else +BTF_ID_UNUSED +BTF_ID_UNUSED +#endif BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) BTF_ID(func, bpf_throw) +BTF_ID(func, bpf_wq_set_callback_impl) +BTF_ID(func, bpf_preempt_disable) +BTF_ID(func, bpf_preempt_enable) #ifdef CONFIG_CGROUPS BTF_ID(func, bpf_iter_css_task_new) #else BTF_ID_UNUSED #endif +#ifdef CONFIG_BPF_EVENTS +BTF_ID(func, bpf_session_cookie) +#else +BTF_ID_UNUSED +#endif +BTF_ID(func, bpf_get_kmem_cache) +BTF_ID(func, bpf_local_irq_save) +BTF_ID(func, bpf_local_irq_restore) +BTF_ID(func, bpf_iter_num_new) +BTF_ID(func, bpf_iter_num_next) +BTF_ID(func, bpf_iter_num_destroy) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -10951,6 +11880,16 @@ static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta) return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock]; } +static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable]; +} + +static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable]; +} + static enum kfunc_ptr_arg_type get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, @@ -10971,9 +11910,12 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, * type to our caller. When a set of conditions hold in the BTF type of * arguments, we resolve it to a known kfunc_ptr_arg_type. */ - if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) + if (btf_is_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno)) return KF_ARG_PTR_TO_CTX; + if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg)) + return KF_ARG_PTR_TO_NULL; + if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno])) return KF_ARG_PTR_TO_ALLOC_BTF_ID; @@ -10983,7 +11925,7 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_dynptr(meta->btf, &args[argno])) return KF_ARG_PTR_TO_DYNPTR; - if (is_kfunc_arg_iter(meta, argno)) + if (is_kfunc_arg_iter(meta, argno, &args[argno])) return KF_ARG_PTR_TO_ITER; if (is_kfunc_arg_list_head(meta->btf, &args[argno])) @@ -11001,6 +11943,15 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_const_str(meta->btf, &args[argno])) return KF_ARG_PTR_TO_CONST_STR; + if (is_kfunc_arg_map(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_MAP; + + if (is_kfunc_arg_wq(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_WORKQUEUE; + + if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_IRQ_FLAG; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11013,9 +11964,6 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_callback(env, meta->btf, &args[argno])) return KF_ARG_PTR_TO_CALLBACK; - if (is_kfunc_arg_nullable(meta->btf, &args[argno]) && register_is_null(reg)) - return KF_ARG_PTR_TO_NULL; - if (argno + 1 < nargs && (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]) || is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], ®s[regno + 1]))) @@ -11046,6 +11994,8 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, bool strict_type_match = false; const struct btf *reg_btf; const char *reg_ref_tname; + bool taking_projection; + bool struct_same; u32 reg_ref_id; if (base_type(reg->type) == PTR_TO_BTF_ID) { @@ -11080,16 +12030,23 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, * btf_struct_ids_match() to walk the struct at the 0th offset, and * resolve types. */ - if (is_kfunc_acquire(meta) || - (is_kfunc_release(meta) && reg->ref_obj_id) || + if ((is_kfunc_release(meta) && reg->ref_obj_id) || btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) strict_type_match = true; - WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off); + WARN_ON_ONCE(is_kfunc_release(meta) && + (reg->off || !tnum_is_const(reg->var_off) || + reg->var_off.value)); reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); - if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { + struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match); + /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot + * actually use it -- it must cast to the underlying type. So we allow + * caller to pass in the underlying type. + */ + taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); + if (!taking_projection && !struct_same) { verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, btf_type_str(reg_ref_t), reg_ref_tname); @@ -11098,12 +12055,59 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, return 0; } +static int process_irq_flag(struct bpf_verifier_env *env, int regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + bool irq_save; + int err; + + if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save]) { + irq_save = true; + } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore]) { + irq_save = false; + } else { + verbose(env, "verifier internal error: unknown irq flags kfunc\n"); + return -EFAULT; + } + + if (irq_save) { + if (!is_irq_flag_reg_valid_uninit(env, reg)) { + verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1); + return -EINVAL; + } + + err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false); + if (err) + return err; + + err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx); + if (err) + return err; + } else { + err = is_irq_flag_reg_valid_init(env, reg); + if (err) { + verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1); + return err; + } + + err = mark_irq_flag_read(env, reg); + if (err) + return err; + + err = unmark_stack_slot_irq_flag(env, reg); + if (err) + return err; + } + return 0; +} + + static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { - struct bpf_verifier_state *state = env->cur_state; struct btf_record *rec = reg_btf_record(reg); - if (!state->active_lock.ptr) { + if (!env->cur_state->active_locks) { verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); return -EFAULT; } @@ -11122,12 +12126,11 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id) { - struct bpf_func_state *state, *unused; + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *unused; struct bpf_reg_state *reg; int i; - state = cur_func(env); - if (!ref_obj_id) { verbose(env, "verifier internal error: ref_obj_id is zero for " "owning -> non-owning conversion\n"); @@ -11200,6 +12203,7 @@ static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_o */ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { + struct bpf_reference_state *s; void *ptr; u32 id; @@ -11216,10 +12220,10 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ } id = reg->id; - if (!env->cur_state->active_lock.ptr) + if (!env->cur_state->active_locks) return -EINVAL; - if (env->cur_state->active_lock.ptr != ptr || - env->cur_state->active_lock.id != id) { + s = find_lock_state(env->cur_state, REF_TYPE_LOCK, id, ptr); + if (!s) { verbose(env, "held lock and object are not in the same allocation\n"); return -EINVAL; } @@ -11241,23 +12245,51 @@ static bool is_bpf_rbtree_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_rbtree_first]; } +static bool is_bpf_iter_num_api_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_iter_num_new] || + btf_id == special_kfunc_list[KF_bpf_iter_num_next] || + btf_id == special_kfunc_list[KF_bpf_iter_num_destroy]; +} + static bool is_bpf_graph_api_kfunc(u32 btf_id) { return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) || btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]; } +static bool kfunc_spin_allowed(u32 btf_id) +{ + return is_bpf_graph_api_kfunc(btf_id) || is_bpf_iter_num_api_kfunc(btf_id); +} + static bool is_sync_callback_calling_kfunc(u32 btf_id) { return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]; } +static bool is_async_callback_calling_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl]; +} + static bool is_bpf_throw_kfunc(struct bpf_insn *insn) { return bpf_pseudo_kfunc_call(insn) && insn->off == 0 && insn->imm == special_kfunc_list[KF_bpf_throw]; } +static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id) +{ + return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl]; +} + +static bool is_callback_calling_kfunc(u32 btf_id) +{ + return is_sync_callback_calling_kfunc(btf_id) || + is_async_callback_calling_kfunc(btf_id); +} + static bool is_rbtree_lock_required_kfunc(u32 btf_id) { return is_bpf_rbtree_api_kfunc(btf_id); @@ -11413,7 +12445,7 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, node_off = reg->off + reg->var_off.value; field = reg_find_field_offset(reg, node_off, node_field_type); - if (!field || field->offset != node_off) { + if (!field) { verbose(env, "%s not found at offset=%u\n", node_type_name, node_off); return -EINVAL; } @@ -11483,7 +12515,7 @@ static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) return true; fallthrough; default: - return env->prog->aux->sleepable; + return in_sleepable(env); } } @@ -11601,6 +12633,35 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ switch (kf_arg_type) { case KF_ARG_PTR_TO_NULL: continue; + case KF_ARG_PTR_TO_MAP: + if (!reg->map_ptr) { + verbose(env, "pointer in R%d isn't map pointer\n", regno); + return -EINVAL; + } + if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) { + /* Use map_uid (which is unique id of inner map) to reject: + * inner_map1 = bpf_map_lookup_elem(outer_map, key1) + * inner_map2 = bpf_map_lookup_elem(outer_map, key2) + * if (inner_map1 && inner_map2) { + * wq = bpf_map_lookup_elem(inner_map1); + * if (wq) + * // mismatch would have been allowed + * bpf_wq_init(wq, inner_map2); + * } + * + * Comparing map_ptr is enough to distinguish normal and outer maps. + */ + if (meta->map.ptr != reg->map_ptr || + meta->map.uid != reg->map_uid) { + verbose(env, + "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", + meta->map.uid, reg->map_uid); + return -EINVAL; + } + } + meta->map.ptr = reg->map_ptr; + meta->map.uid = reg->map_uid; + fallthrough; case KF_ARG_PTR_TO_ALLOC_BTF_ID: case KF_ARG_PTR_TO_BTF_ID: if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta)) @@ -11616,12 +12677,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } } - fallthrough; case KF_ARG_PTR_TO_CTX: - /* Trusted arguments have the same offset checks as release arguments */ - arg_type |= OBJ_RELEASE; - break; case KF_ARG_PTR_TO_DYNPTR: case KF_ARG_PTR_TO_ITER: case KF_ARG_PTR_TO_LIST_HEAD: @@ -11633,7 +12690,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_CALLBACK: case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: - /* Trusted by default */ + case KF_ARG_PTR_TO_WORKQUEUE: + case KF_ARG_PTR_TO_IRQ_FLAG: break; default: WARN_ON_ONCE(1); @@ -11649,7 +12707,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ switch (kf_arg_type) { case KF_ARG_PTR_TO_CTX: if (reg->type != PTR_TO_CTX) { - verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t)); + verbose(env, "arg#%d expected pointer to ctx, but got %s\n", + i, reg_type_str(env, reg->type)); return -EINVAL; } @@ -11689,12 +12748,6 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; int clone_ref_obj_id = 0; - if (reg->type != PTR_TO_STACK && - reg->type != CONST_PTR_TO_DYNPTR) { - verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i); - return -EINVAL; - } - if (reg->type == CONST_PTR_TO_DYNPTR) dynptr_arg_type |= MEM_RDONLY; @@ -11817,6 +12870,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_MAP: + /* If argument has '__map' suffix expect 'struct bpf_map *' */ + ref_id = *reg2btf_ids[CONST_PTR_TO_MAP]; + ref_t = btf_type_by_id(btf_vmlinux, ref_id); + ref_tname = btf_name_by_offset(btf, ref_t->name_off); + fallthrough; case KF_ARG_PTR_TO_BTF_ID: /* Only base_type is checked, further checks are done here */ if ((base_type(reg->type) != PTR_TO_BTF_ID || @@ -11913,6 +12972,24 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret) return ret; break; + case KF_ARG_PTR_TO_WORKQUEUE: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#%d doesn't point to a map value\n", i); + return -EINVAL; + } + ret = process_wq_func(env, regno, meta); + if (ret < 0) + return ret; + break; + case KF_ARG_PTR_TO_IRQ_FLAG: + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); + return -EINVAL; + } + ret = process_irq_flag(env, regno, meta); + if (ret < 0) + return ret; + break; } } @@ -11972,11 +13049,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { - const struct btf_type *t, *ptr_type; + bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable; u32 i, nargs, ptr_type_id, release_ref_obj_id; struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; - bool sleepable, rcu_lock, rcu_unlock; + const struct btf_type *t, *ptr_type; struct bpf_kfunc_call_arg_meta meta; struct bpf_insn_aux_data *insn_aux; int err, insn_idx = *insn_idx_p; @@ -12004,7 +13081,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } sleepable = is_kfunc_sleepable(&meta); - if (sleepable && !env->prog->aux->sleepable) { + if (sleepable && !in_sleepable(env)) { verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name); return -EACCES; } @@ -12024,9 +13101,27 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (meta.func_id == special_kfunc_list[KF_bpf_session_cookie]) { + meta.r0_size = sizeof(u64); + meta.r0_rdonly = false; + } + + if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_timer_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); + preempt_disable = is_kfunc_bpf_preempt_disable(&meta); + preempt_enable = is_kfunc_bpf_preempt_enable(&meta); + if (env->cur_state->active_rcu_lock) { struct bpf_func_state *state; struct bpf_reg_state *reg; @@ -12059,6 +13154,27 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } + if (env->cur_state->active_preempt_locks) { + if (preempt_disable) { + env->cur_state->active_preempt_locks++; + } else if (preempt_enable) { + env->cur_state->active_preempt_locks--; + } else if (sleepable) { + verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name); + return -EACCES; + } + } else if (preempt_disable) { + env->cur_state->active_preempt_locks++; + } else if (preempt_enable) { + verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name); + return -EINVAL; + } + + if (env->cur_state->active_irq_id && sleepable) { + verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name); + return -EACCES; + } + /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -12291,6 +13407,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, meta.func_name); return -EFAULT; } + } else if (btf_type_is_void(ptr_type)) { + /* kfunc returning 'void *' is equivalent to returning scalar */ + mark_reg_unknown(env, regs, BPF_REG_0); } else if (!__btf_type_is_struct(ptr_type)) { if (!meta.r0_size) { __u32 sz; @@ -12326,6 +13445,20 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; + + if (meta.func_id == special_kfunc_list[KF_bpf_get_kmem_cache]) + regs[BPF_REG_0].type |= PTR_UNTRUSTED; + + if (is_iter_next_kfunc(&meta)) { + struct bpf_reg_state *cur_iter; + + cur_iter = get_iter_from_state(env->cur_state, &meta); + + if (cur_iter->type & MEM_RCU) /* KF_RCU_PROTECTED */ + regs[BPF_REG_0].type |= MEM_RCU; + else + regs[BPF_REG_0].type |= PTR_TRUSTED; + } } if (is_kfunc_ret_null(&meta)) { @@ -12335,7 +13468,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); if (is_kfunc_acquire(&meta)) { - int id = acquire_reference_state(env, insn_idx); + int id = acquire_reference(env, insn_idx); if (id < 0) return id; @@ -12381,46 +13514,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return 0; } -static bool signed_add_overflows(s64 a, s64 b) -{ - /* Do the add in u64, where overflow is well-defined */ - s64 res = (s64)((u64)a + (u64)b); - - if (b < 0) - return res > a; - return res < a; -} - -static bool signed_add32_overflows(s32 a, s32 b) -{ - /* Do the add in u32, where overflow is well-defined */ - s32 res = (s32)((u32)a + (u32)b); - - if (b < 0) - return res > a; - return res < a; -} - -static bool signed_sub_overflows(s64 a, s64 b) -{ - /* Do the sub in u64, where overflow is well-defined */ - s64 res = (s64)((u64)a - (u64)b); - - if (b < 0) - return res < a; - return res > a; -} - -static bool signed_sub32_overflows(s32 a, s32 b) -{ - /* Do the sub in u32, where overflow is well-defined */ - s32 res = (s32)((u32)a - (u32)b); - - if (b < 0) - return res < a; - return res > a; -} - static bool check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type) @@ -12828,6 +13921,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } switch (base_type(ptr_reg->type)) { + case PTR_TO_CTX: + case PTR_TO_MAP_VALUE: + case PTR_TO_MAP_KEY: + case PTR_TO_STACK: + case PTR_TO_PACKET_META: + case PTR_TO_PACKET: + case PTR_TO_TP_BUFFER: + case PTR_TO_BTF_ID: + case PTR_TO_MEM: + case PTR_TO_BUF: + case PTR_TO_FUNC: + case CONST_PTR_TO_DYNPTR: + break; case PTR_TO_FLOW_KEYS: if (known) break; @@ -12837,16 +13943,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (known && smin_val == 0 && opcode == BPF_ADD) break; fallthrough; - case PTR_TO_PACKET_END: - case PTR_TO_SOCKET: - case PTR_TO_SOCK_COMMON: - case PTR_TO_TCP_SOCK: - case PTR_TO_XDP_SOCK: + default: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str(env, ptr_reg->type)); return -EACCES; - default: - break; } /* In case of 'scalar += pointer', dst_reg inherits pointer type and id. @@ -12895,21 +13995,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, * added into the variable offset, and we copy the fixed offset * from ptr_reg. */ - if (signed_add_overflows(smin_ptr, smin_val) || - signed_add_overflows(smax_ptr, smax_val)) { + if (check_add_overflow(smin_ptr, smin_val, &dst_reg->smin_value) || + check_add_overflow(smax_ptr, smax_val, &dst_reg->smax_value)) { dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value = smin_ptr + smin_val; - dst_reg->smax_value = smax_ptr + smax_val; } - if (umin_ptr + umin_val < umin_ptr || - umax_ptr + umax_val < umax_ptr) { + if (check_add_overflow(umin_ptr, umin_val, &dst_reg->umin_value) || + check_add_overflow(umax_ptr, umax_val, &dst_reg->umax_value)) { dst_reg->umin_value = 0; dst_reg->umax_value = U64_MAX; - } else { - dst_reg->umin_value = umin_ptr + umin_val; - dst_reg->umax_value = umax_ptr + umax_val; } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; @@ -12952,14 +14046,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* A new variable offset is created. If the subtrahend is known * nonnegative, then any reg->range we had before is still good. */ - if (signed_sub_overflows(smin_ptr, smax_val) || - signed_sub_overflows(smax_ptr, smin_val)) { + if (check_sub_overflow(smin_ptr, smax_val, &dst_reg->smin_value) || + check_sub_overflow(smax_ptr, smin_val, &dst_reg->smax_value)) { /* Overflow possible, we know nothing */ dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value = smin_ptr - smax_val; - dst_reg->smax_value = smax_ptr - smin_val; } if (umin_ptr < umax_val) { /* Overflow possible, we know nothing */ @@ -13012,71 +14103,56 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin_val = src_reg->s32_min_value; - s32 smax_val = src_reg->s32_max_value; - u32 umin_val = src_reg->u32_min_value; - u32 umax_val = src_reg->u32_max_value; + s32 *dst_smin = &dst_reg->s32_min_value; + s32 *dst_smax = &dst_reg->s32_max_value; + u32 *dst_umin = &dst_reg->u32_min_value; + u32 *dst_umax = &dst_reg->u32_max_value; - if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) || - signed_add32_overflows(dst_reg->s32_max_value, smax_val)) { - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; - } else { - dst_reg->s32_min_value += smin_val; - dst_reg->s32_max_value += smax_val; + if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) || + check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) { + *dst_smin = S32_MIN; + *dst_smax = S32_MAX; } - if (dst_reg->u32_min_value + umin_val < umin_val || - dst_reg->u32_max_value + umax_val < umax_val) { - dst_reg->u32_min_value = 0; - dst_reg->u32_max_value = U32_MAX; - } else { - dst_reg->u32_min_value += umin_val; - dst_reg->u32_max_value += umax_val; + if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) || + check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) { + *dst_umin = 0; + *dst_umax = U32_MAX; } } static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin_val = src_reg->smin_value; - s64 smax_val = src_reg->smax_value; - u64 umin_val = src_reg->umin_value; - u64 umax_val = src_reg->umax_value; + s64 *dst_smin = &dst_reg->smin_value; + s64 *dst_smax = &dst_reg->smax_value; + u64 *dst_umin = &dst_reg->umin_value; + u64 *dst_umax = &dst_reg->umax_value; - if (signed_add_overflows(dst_reg->smin_value, smin_val) || - signed_add_overflows(dst_reg->smax_value, smax_val)) { - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value += smin_val; - dst_reg->smax_value += smax_val; + if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) || + check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) { + *dst_smin = S64_MIN; + *dst_smax = S64_MAX; } - if (dst_reg->umin_value + umin_val < umin_val || - dst_reg->umax_value + umax_val < umax_val) { - dst_reg->umin_value = 0; - dst_reg->umax_value = U64_MAX; - } else { - dst_reg->umin_value += umin_val; - dst_reg->umax_value += umax_val; + if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) || + check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) { + *dst_umin = 0; + *dst_umax = U64_MAX; } } static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin_val = src_reg->s32_min_value; - s32 smax_val = src_reg->s32_max_value; + s32 *dst_smin = &dst_reg->s32_min_value; + s32 *dst_smax = &dst_reg->s32_max_value; u32 umin_val = src_reg->u32_min_value; u32 umax_val = src_reg->u32_max_value; - if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) || - signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) { + if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) || + check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) { /* Overflow possible, we know nothing */ - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; - } else { - dst_reg->s32_min_value -= smax_val; - dst_reg->s32_max_value -= smin_val; + *dst_smin = S32_MIN; + *dst_smax = S32_MAX; } if (dst_reg->u32_min_value < umax_val) { /* Overflow possible, we know nothing */ @@ -13092,19 +14168,16 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin_val = src_reg->smin_value; - s64 smax_val = src_reg->smax_value; + s64 *dst_smin = &dst_reg->smin_value; + s64 *dst_smax = &dst_reg->smax_value; u64 umin_val = src_reg->umin_value; u64 umax_val = src_reg->umax_value; - if (signed_sub_overflows(dst_reg->smin_value, smax_val) || - signed_sub_overflows(dst_reg->smax_value, smin_val)) { + if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) || + check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) { /* Overflow possible, we know nothing */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - dst_reg->smin_value -= smax_val; - dst_reg->smax_value -= smin_val; + *dst_smin = S64_MIN; + *dst_smax = S64_MAX; } if (dst_reg->umin_value < umax_val) { /* Overflow possible, we know nothing */ @@ -13120,64 +14193,56 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s32 smin_val = src_reg->s32_min_value; - u32 umin_val = src_reg->u32_min_value; - u32 umax_val = src_reg->u32_max_value; + s32 *dst_smin = &dst_reg->s32_min_value; + s32 *dst_smax = &dst_reg->s32_max_value; + u32 *dst_umin = &dst_reg->u32_min_value; + u32 *dst_umax = &dst_reg->u32_max_value; + s32 tmp_prod[4]; - if (smin_val < 0 || dst_reg->s32_min_value < 0) { - /* Ain't nobody got time to multiply that sign */ - __mark_reg32_unbounded(dst_reg); - return; - } - /* Both values are positive, so we can work with unsigned and - * copy the result to signed (unless it exceeds S32_MAX). - */ - if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) { - /* Potential overflow, we know nothing */ - __mark_reg32_unbounded(dst_reg); - return; + if (check_mul_overflow(*dst_umax, src_reg->u32_max_value, dst_umax) || + check_mul_overflow(*dst_umin, src_reg->u32_min_value, dst_umin)) { + /* Overflow possible, we know nothing */ + *dst_umin = 0; + *dst_umax = U32_MAX; } - dst_reg->u32_min_value *= umin_val; - dst_reg->u32_max_value *= umax_val; - if (dst_reg->u32_max_value > S32_MAX) { + if (check_mul_overflow(*dst_smin, src_reg->s32_min_value, &tmp_prod[0]) || + check_mul_overflow(*dst_smin, src_reg->s32_max_value, &tmp_prod[1]) || + check_mul_overflow(*dst_smax, src_reg->s32_min_value, &tmp_prod[2]) || + check_mul_overflow(*dst_smax, src_reg->s32_max_value, &tmp_prod[3])) { /* Overflow possible, we know nothing */ - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; + *dst_smin = S32_MIN; + *dst_smax = S32_MAX; } else { - dst_reg->s32_min_value = dst_reg->u32_min_value; - dst_reg->s32_max_value = dst_reg->u32_max_value; + *dst_smin = min_array(tmp_prod, 4); + *dst_smax = max_array(tmp_prod, 4); } } static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg) { - s64 smin_val = src_reg->smin_value; - u64 umin_val = src_reg->umin_value; - u64 umax_val = src_reg->umax_value; + s64 *dst_smin = &dst_reg->smin_value; + s64 *dst_smax = &dst_reg->smax_value; + u64 *dst_umin = &dst_reg->umin_value; + u64 *dst_umax = &dst_reg->umax_value; + s64 tmp_prod[4]; - if (smin_val < 0 || dst_reg->smin_value < 0) { - /* Ain't nobody got time to multiply that sign */ - __mark_reg64_unbounded(dst_reg); - return; - } - /* Both values are positive, so we can work with unsigned and - * copy the result to signed (unless it exceeds S64_MAX). - */ - if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) { - /* Potential overflow, we know nothing */ - __mark_reg64_unbounded(dst_reg); - return; + if (check_mul_overflow(*dst_umax, src_reg->umax_value, dst_umax) || + check_mul_overflow(*dst_umin, src_reg->umin_value, dst_umin)) { + /* Overflow possible, we know nothing */ + *dst_umin = 0; + *dst_umax = U64_MAX; } - dst_reg->umin_value *= umin_val; - dst_reg->umax_value *= umax_val; - if (dst_reg->umax_value > S64_MAX) { + if (check_mul_overflow(*dst_smin, src_reg->smin_value, &tmp_prod[0]) || + check_mul_overflow(*dst_smin, src_reg->smax_value, &tmp_prod[1]) || + check_mul_overflow(*dst_smax, src_reg->smin_value, &tmp_prod[2]) || + check_mul_overflow(*dst_smax, src_reg->smax_value, &tmp_prod[3])) { /* Overflow possible, we know nothing */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; + *dst_smin = S64_MIN; + *dst_smax = S64_MAX; } else { - dst_reg->smin_value = dst_reg->umin_value; - dst_reg->smax_value = dst_reg->umax_value; + *dst_smin = min_array(tmp_prod, 4); + *dst_smax = max_array(tmp_prod, 4); } } @@ -13187,7 +14252,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, bool src_known = tnum_subreg_is_const(src_reg->var_off); bool dst_known = tnum_subreg_is_const(dst_reg->var_off); struct tnum var32_off = tnum_subreg(dst_reg->var_off); - s32 smin_val = src_reg->s32_min_value; u32 umax_val = src_reg->u32_max_value; if (src_known && dst_known) { @@ -13200,18 +14264,16 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, */ dst_reg->u32_min_value = var32_off.value; dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val); - if (dst_reg->s32_min_value < 0 || smin_val < 0) { - /* Lose signed bounds when ANDing negative numbers, - * ain't nobody got time for that. - */ - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; - } else { - /* ANDing two positives gives a positive, so safe to - * cast result into s64. - */ + + /* Safe to set s32 bounds by casting u32 result into s32 when u32 + * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. + */ + if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; + } else { + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; } } @@ -13220,7 +14282,6 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg, { bool src_known = tnum_is_const(src_reg->var_off); bool dst_known = tnum_is_const(dst_reg->var_off); - s64 smin_val = src_reg->smin_value; u64 umax_val = src_reg->umax_value; if (src_known && dst_known) { @@ -13233,18 +14294,16 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg, */ dst_reg->umin_value = dst_reg->var_off.value; dst_reg->umax_value = min(dst_reg->umax_value, umax_val); - if (dst_reg->smin_value < 0 || smin_val < 0) { - /* Lose signed bounds when ANDing negative numbers, - * ain't nobody got time for that. - */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - /* ANDing two positives gives a positive, so safe to - * cast result into s64. - */ + + /* Safe to set s64 bounds by casting u64 result into s64 when u64 + * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. + */ + if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) { dst_reg->smin_value = dst_reg->umin_value; dst_reg->smax_value = dst_reg->umax_value; + } else { + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; } /* We may learn something more from the var_off */ __update_reg_bounds(dst_reg); @@ -13256,7 +14315,6 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, bool src_known = tnum_subreg_is_const(src_reg->var_off); bool dst_known = tnum_subreg_is_const(dst_reg->var_off); struct tnum var32_off = tnum_subreg(dst_reg->var_off); - s32 smin_val = src_reg->s32_min_value; u32 umin_val = src_reg->u32_min_value; if (src_known && dst_known) { @@ -13269,18 +14327,16 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, */ dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val); dst_reg->u32_max_value = var32_off.value | var32_off.mask; - if (dst_reg->s32_min_value < 0 || smin_val < 0) { - /* Lose signed bounds when ORing negative numbers, - * ain't nobody got time for that. - */ - dst_reg->s32_min_value = S32_MIN; - dst_reg->s32_max_value = S32_MAX; - } else { - /* ORing two positives gives a positive, so safe to - * cast result into s64. - */ + + /* Safe to set s32 bounds by casting u32 result into s32 when u32 + * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. + */ + if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; + } else { + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; } } @@ -13289,7 +14345,6 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg, { bool src_known = tnum_is_const(src_reg->var_off); bool dst_known = tnum_is_const(dst_reg->var_off); - s64 smin_val = src_reg->smin_value; u64 umin_val = src_reg->umin_value; if (src_known && dst_known) { @@ -13302,18 +14357,16 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg, */ dst_reg->umin_value = max(dst_reg->umin_value, umin_val); dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; - if (dst_reg->smin_value < 0 || smin_val < 0) { - /* Lose signed bounds when ORing negative numbers, - * ain't nobody got time for that. - */ - dst_reg->smin_value = S64_MIN; - dst_reg->smax_value = S64_MAX; - } else { - /* ORing two positives gives a positive, so safe to - * cast result into s64. - */ + + /* Safe to set s64 bounds by casting u64 result into s64 when u64 + * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. + */ + if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) { dst_reg->smin_value = dst_reg->umin_value; dst_reg->smax_value = dst_reg->umax_value; + } else { + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; } /* We may learn something more from the var_off */ __update_reg_bounds(dst_reg); @@ -13325,7 +14378,6 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, bool src_known = tnum_subreg_is_const(src_reg->var_off); bool dst_known = tnum_subreg_is_const(dst_reg->var_off); struct tnum var32_off = tnum_subreg(dst_reg->var_off); - s32 smin_val = src_reg->s32_min_value; if (src_known && dst_known) { __mark_reg32_known(dst_reg, var32_off.value); @@ -13336,10 +14388,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, dst_reg->u32_min_value = var32_off.value; dst_reg->u32_max_value = var32_off.value | var32_off.mask; - if (dst_reg->s32_min_value >= 0 && smin_val >= 0) { - /* XORing two positive sign numbers gives a positive, - * so safe to cast u32 result into s32. - */ + /* Safe to set s32 bounds by casting u32 result into s32 when u32 + * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded. + */ + if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) { dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; } else { @@ -13353,7 +14405,6 @@ static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, { bool src_known = tnum_is_const(src_reg->var_off); bool dst_known = tnum_is_const(dst_reg->var_off); - s64 smin_val = src_reg->smin_value; if (src_known && dst_known) { /* dst_reg->var_off.value has been updated earlier */ @@ -13365,10 +14416,10 @@ static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, dst_reg->umin_value = dst_reg->var_off.value; dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; - if (dst_reg->smin_value >= 0 && smin_val >= 0) { - /* XORing two positive sign numbers gives a positive, - * so safe to cast u64 result into s64. - */ + /* Safe to set s64 bounds by casting u64 result into s64 when u64 + * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded. + */ + if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) { dst_reg->smin_value = dst_reg->umin_value; dst_reg->smax_value = dst_reg->umax_value; } else { @@ -13576,6 +14627,46 @@ static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, __update_reg_bounds(dst_reg); } +static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, + const struct bpf_reg_state *src_reg) +{ + bool src_is_const = false; + u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + + if (insn_bitness == 32) { + if (tnum_subreg_is_const(src_reg->var_off) + && src_reg->s32_min_value == src_reg->s32_max_value + && src_reg->u32_min_value == src_reg->u32_max_value) + src_is_const = true; + } else { + if (tnum_is_const(src_reg->var_off) + && src_reg->smin_value == src_reg->smax_value + && src_reg->umin_value == src_reg->umax_value) + src_is_const = true; + } + + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_AND: + case BPF_XOR: + case BPF_OR: + case BPF_MUL: + return true; + + /* Shift operators range is only computable if shift dimension operand + * is a constant. Shifts greater than 31 or 63 are undefined. This + * includes shifts by a negative number. + */ + case BPF_LSH: + case BPF_RSH: + case BPF_ARSH: + return (src_is_const && src_reg->umax_value < insn_bitness); + default: + return false; + } +} + /* WARNING: This function does calculations on 64-bit values, but the actual * execution may occur on 32-bit values. Therefore, things like bitshifts * need extra checks in the 32-bit case. @@ -13585,53 +14676,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg) { - struct bpf_reg_state *regs = cur_regs(env); u8 opcode = BPF_OP(insn->code); - bool src_known; - s64 smin_val, smax_val; - u64 umin_val, umax_val; - s32 s32_min_val, s32_max_val; - u32 u32_min_val, u32_max_val; - u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); int ret; - smin_val = src_reg.smin_value; - smax_val = src_reg.smax_value; - umin_val = src_reg.umin_value; - umax_val = src_reg.umax_value; - - s32_min_val = src_reg.s32_min_value; - s32_max_val = src_reg.s32_max_value; - u32_min_val = src_reg.u32_min_value; - u32_max_val = src_reg.u32_max_value; - - if (alu32) { - src_known = tnum_subreg_is_const(src_reg.var_off); - if ((src_known && - (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || - s32_min_val > s32_max_val || u32_min_val > u32_max_val) { - /* Taint dst register if offset had invalid bounds - * derived from e.g. dead branches. - */ - __mark_reg_unknown(env, dst_reg); - return 0; - } - } else { - src_known = tnum_is_const(src_reg.var_off); - if ((src_known && - (smin_val != smax_val || umin_val != umax_val)) || - smin_val > smax_val || umin_val > umax_val) { - /* Taint dst register if offset had invalid bounds - * derived from e.g. dead branches. - */ - __mark_reg_unknown(env, dst_reg); - return 0; - } - } - - if (!src_known && - opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) { + if (!is_safe_to_compute_dst_reg_range(insn, &src_reg)) { __mark_reg_unknown(env, dst_reg); return 0; } @@ -13688,46 +14737,24 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, scalar_min_max_xor(dst_reg, &src_reg); break; case BPF_LSH: - if (umax_val >= insn_bitness) { - /* Shifts greater than 31 or 63 are undefined. - * This includes shifts by a negative number. - */ - mark_reg_unknown(env, regs, insn->dst_reg); - break; - } if (alu32) scalar32_min_max_lsh(dst_reg, &src_reg); else scalar_min_max_lsh(dst_reg, &src_reg); break; case BPF_RSH: - if (umax_val >= insn_bitness) { - /* Shifts greater than 31 or 63 are undefined. - * This includes shifts by a negative number. - */ - mark_reg_unknown(env, regs, insn->dst_reg); - break; - } if (alu32) scalar32_min_max_rsh(dst_reg, &src_reg); else scalar_min_max_rsh(dst_reg, &src_reg); break; case BPF_ARSH: - if (umax_val >= insn_bitness) { - /* Shifts greater than 31 or 63 are undefined. - * This includes shifts by a negative number. - */ - mark_reg_unknown(env, regs, insn->dst_reg); - break; - } if (alu32) scalar32_min_max_arsh(dst_reg, &src_reg); else scalar_min_max_arsh(dst_reg, &src_reg); break; default: - mark_reg_unknown(env, regs, insn->dst_reg); break; } @@ -13748,18 +14775,30 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; + bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); u8 opcode = BPF_OP(insn->code); int err; dst_reg = ®s[insn->dst_reg]; src_reg = NULL; + + if (dst_reg->type == PTR_TO_ARENA) { + struct bpf_insn_aux_data *aux = cur_aux(env); + + if (BPF_CLASS(insn->code) == BPF_ALU64) + /* + * 32-bit operations zero upper bits automatically. + * 64-bit operations need to be converted to 32. + */ + aux->needs_zext = true; + + /* Any arithmetic operations are allowed on arena pointers */ + return 0; + } + if (dst_reg->type != SCALAR_VALUE) ptr_reg = dst_reg; - else - /* Make sure ID is cleared otherwise dst_reg min/max could be - * incorrectly propagated into other registers by find_equal_scalars() - */ - dst_reg->id = 0; + if (BPF_SRC(insn->code) == BPF_X) { src_reg = ®s[insn->src_reg]; if (src_reg->type != SCALAR_VALUE) { @@ -13814,16 +14853,53 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } - return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + if (err) + return err; + /* + * Compilers can generate the code + * r1 = r2 + * r1 += 0x1 + * if r2 < 1000 goto ... + * use r1 in memory access + * So for 64-bit alu remember constant delta between r2 and r1 and + * update r1 after 'if' condition. + */ + if (env->bpf_capable && + BPF_OP(insn->code) == BPF_ADD && !alu32 && + dst_reg->id && is_reg_const(src_reg, false)) { + u64 val = reg_const_value(src_reg, false); + + if ((dst_reg->id & BPF_ADD_CONST) || + /* prevent overflow in sync_linked_regs() later */ + val > (u32)S32_MAX) { + /* + * If the register already went through rX += val + * we cannot accumulate another val into rx->off. + */ + dst_reg->off = 0; + dst_reg->id = 0; + } else { + dst_reg->id |= BPF_ADD_CONST; + dst_reg->off = val; + } + } else { + /* + * Make sure ID is cleared otherwise dst_reg min/max could be + * incorrectly propagated into other registers by sync_linked_regs() + */ + dst_reg->id = 0; + } + return 0; } /* check validity of 32-bit and 64-bit arithmetic operations */ @@ -13870,19 +14946,24 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } else if (opcode == BPF_MOV) { if (BPF_SRC(insn->code) == BPF_X) { - if (insn->imm != 0) { - verbose(env, "BPF_MOV uses reserved fields\n"); - return -EINVAL; - } - if (BPF_CLASS(insn->code) == BPF_ALU) { - if (insn->off != 0 && insn->off != 8 && insn->off != 16) { + if ((insn->off != 0 && insn->off != 8 && insn->off != 16) || + insn->imm) { verbose(env, "BPF_MOV uses reserved fields\n"); return -EINVAL; } + } else if (insn->off == BPF_ADDR_SPACE_CAST) { + if (insn->imm != 1 && insn->imm != 1u << 16) { + verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n"); + return -EINVAL; + } + if (!env->prog->aux->arena) { + verbose(env, "addr_space_cast insn can only be used in a program that has an associated arena\n"); + return -EINVAL; + } } else { - if (insn->off != 0 && insn->off != 8 && insn->off != 16 && - insn->off != 32) { + if ((insn->off != 0 && insn->off != 8 && insn->off != 16 && + insn->off != 32) || insn->imm) { verbose(env, "BPF_MOV uses reserved fields\n"); return -EINVAL; } @@ -13907,20 +14988,21 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (BPF_SRC(insn->code) == BPF_X) { struct bpf_reg_state *src_reg = regs + insn->src_reg; struct bpf_reg_state *dst_reg = regs + insn->dst_reg; - bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id && - !tnum_is_const(src_reg->var_off); if (BPF_CLASS(insn->code) == BPF_ALU64) { - if (insn->off == 0) { + if (insn->imm) { + /* off == BPF_ADDR_SPACE_CAST */ + mark_reg_unknown(env, regs, insn->dst_reg); + if (insn->imm == 1) { /* cast from as(1) to as(0) */ + dst_reg->type = PTR_TO_ARENA; + /* PTR_TO_ARENA is 32-bit */ + dst_reg->subreg_def = env->insn_idx + 1; + } + } else if (insn->off == 0) { /* case: R1 = R2 * copy register state to dest reg */ - if (need_id) - /* Assign src and dst registers the same ID - * that will be used by find_equal_scalars() - * to propagate min/max range. - */ - src_reg->id = ++env->id_gen; + assign_scalar_id_before_mov(env, src_reg); copy_register_state(dst_reg, src_reg); dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = DEF_NOT_SUBREG; @@ -13935,8 +15017,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) bool no_sext; no_sext = src_reg->umax_value < (1ULL << (insn->off - 1)); - if (no_sext && need_id) - src_reg->id = ++env->id_gen; + if (no_sext) + assign_scalar_id_before_mov(env, src_reg); copy_register_state(dst_reg, src_reg); if (!no_sext) dst_reg->id = 0; @@ -13956,14 +15038,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EACCES; } else if (src_reg->type == SCALAR_VALUE) { if (insn->off == 0) { - bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX; + bool is_src_reg_u32 = get_reg_width(src_reg) <= 32; - if (is_src_reg_u32 && need_id) - src_reg->id = ++env->id_gen; + if (is_src_reg_u32) + assign_scalar_id_before_mov(env, src_reg); copy_register_state(dst_reg, src_reg); /* Make sure ID is cleared if src_reg is not in u32 * range otherwise dst_reg min/max could be incorrectly - * propagated into src_reg by find_equal_scalars() + * propagated into src_reg by sync_linked_regs() */ if (!is_src_reg_u32) dst_reg->id = 0; @@ -13973,8 +15055,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) /* case: W1 = (s8, s16)W2 */ bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1)); - if (no_sext && need_id) - src_reg->id = ++env->id_gen; + if (no_sext) + assign_scalar_id_before_mov(env, src_reg); copy_register_state(dst_reg, src_reg); if (!no_sext) dst_reg->id = 0; @@ -14412,7 +15494,19 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state struct tnum t; u64 val; -again: + /* In case of GE/GT/SGE/JST, reuse LE/LT/SLE/SLT logic from below */ + switch (opcode) { + case BPF_JGE: + case BPF_JGT: + case BPF_JSGE: + case BPF_JSGT: + opcode = flip_opcode(opcode); + swap(reg1, reg2); + break; + default: + break; + } + switch (opcode) { case BPF_JEQ: if (is_jmp32) { @@ -14555,14 +15649,6 @@ again: reg2->smin_value = max(reg1->smin_value + 1, reg2->smin_value); } break; - case BPF_JGE: - case BPF_JGT: - case BPF_JSGE: - case BPF_JSGT: - /* just reuse LE/LT logic above */ - opcode = flip_opcode(opcode); - swap(reg1, reg2); - goto again; default: return; } @@ -14570,7 +15656,7 @@ again: /* Adjusts the register min/max values in the case that the dst_reg and * src_reg are both SCALAR_VALUE registers (or we are simply doing a BPF_K - * check, in which case we havea fake SCALAR_VALUE representing insn->imm). + * check, in which case we have a fake SCALAR_VALUE representing insn->imm). * Technically we can do similar adjustments for pointers to the same object, * but we don't support that right now. */ @@ -14670,7 +15756,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, * No one could have freed the reference state before * doing the NULL check. */ - WARN_ON_ONCE(release_reference_state(state, id)); + WARN_ON_ONCE(release_reference_nomark(vstate, id)); bpf_for_each_reg_in_vstate(vstate, state, reg, ({ mark_ptr_or_null_reg(state, reg, id, is_null); @@ -14782,16 +15868,97 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, return true; } -static void find_equal_scalars(struct bpf_verifier_state *vstate, - struct bpf_reg_state *known_reg) +static void __collect_linked_regs(struct linked_regs *reg_set, struct bpf_reg_state *reg, + u32 id, u32 frameno, u32 spi_or_reg, bool is_reg) { - struct bpf_func_state *state; + struct linked_reg *e; + + if (reg->type != SCALAR_VALUE || (reg->id & ~BPF_ADD_CONST) != id) + return; + + e = linked_regs_push(reg_set); + if (e) { + e->frameno = frameno; + e->is_reg = is_reg; + e->regno = spi_or_reg; + } else { + reg->id = 0; + } +} + +/* For all R being scalar registers or spilled scalar registers + * in verifier state, save R in linked_regs if R->id == id. + * If there are too many Rs sharing same id, reset id for leftover Rs. + */ +static void collect_linked_regs(struct bpf_verifier_state *vstate, u32 id, + struct linked_regs *linked_regs) +{ + struct bpf_func_state *func; struct bpf_reg_state *reg; + int i, j; + + id = id & ~BPF_ADD_CONST; + for (i = vstate->curframe; i >= 0; i--) { + func = vstate->frame[i]; + for (j = 0; j < BPF_REG_FP; j++) { + reg = &func->regs[j]; + __collect_linked_regs(linked_regs, reg, id, i, j, true); + } + for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { + if (!is_spilled_reg(&func->stack[j])) + continue; + reg = &func->stack[j].spilled_ptr; + __collect_linked_regs(linked_regs, reg, id, i, j, false); + } + } +} + +/* For all R in linked_regs, copy known_reg range into R + * if R->id == known_reg->id. + */ +static void sync_linked_regs(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg, + struct linked_regs *linked_regs) +{ + struct bpf_reg_state fake_reg; + struct bpf_reg_state *reg; + struct linked_reg *e; + int i; + + for (i = 0; i < linked_regs->cnt; ++i) { + e = &linked_regs->entries[i]; + reg = e->is_reg ? &vstate->frame[e->frameno]->regs[e->regno] + : &vstate->frame[e->frameno]->stack[e->spi].spilled_ptr; + if (reg->type != SCALAR_VALUE || reg == known_reg) + continue; + if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) + continue; + if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || + reg->off == known_reg->off) { + s32 saved_subreg_def = reg->subreg_def; - bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) copy_register_state(reg, known_reg); - })); + reg->subreg_def = saved_subreg_def; + } else { + s32 saved_subreg_def = reg->subreg_def; + s32 saved_off = reg->off; + + fake_reg.type = SCALAR_VALUE; + __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off); + + /* reg = known_reg; reg += delta */ + copy_register_state(reg, known_reg); + /* + * Must preserve off, id and add_const flag, + * otherwise another sync_linked_regs() will be incorrect. + */ + reg->off = saved_off; + reg->subreg_def = saved_subreg_def; + + scalar32_min_max_add(reg, &fake_reg); + scalar_min_max_add(reg, &fake_reg); + reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); + } + } } static int check_cond_jmp_op(struct bpf_verifier_env *env, @@ -14802,18 +15969,42 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; struct bpf_reg_state *eq_branch_regs; - struct bpf_reg_state fake_reg = {}; + struct linked_regs linked_regs = {}; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; int err; /* Only conditional jumps are expected to reach here. */ - if (opcode == BPF_JA || opcode > BPF_JSLE) { + if (opcode == BPF_JA || opcode > BPF_JCOND) { verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode); return -EINVAL; } + if (opcode == BPF_JCOND) { + struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st; + int idx = *insn_idx; + + if (insn->code != (BPF_JMP | BPF_JCOND) || + insn->src_reg != BPF_MAY_GOTO || + insn->dst_reg || insn->imm) { + verbose(env, "invalid may_goto imm %d\n", insn->imm); + return -EINVAL; + } + prev_st = find_prev_entry(env, cur_st->parent, idx); + + /* branch out 'fallthrough' insn as a new state to explore */ + queued_st = push_stack(env, idx + 1, idx, false); + if (!queued_st) + return -ENOMEM; + + queued_st->may_goto_depth++; + if (prev_st) + widen_imprecise_scalars(env, prev_st, queued_st); + *insn_idx += insn->off; + return 0; + } + /* check src2 operand */ err = check_reg_arg(env, insn->dst_reg, SRC_OP); if (err) @@ -14843,7 +16034,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } - src_reg = &fake_reg; + src_reg = &env->fake_reg[0]; + memset(src_reg, 0, sizeof(*src_reg)); src_reg->type = SCALAR_VALUE; __mark_reg_known(src_reg, insn->imm); } @@ -14873,7 +16065,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx)) return -EFAULT; if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -14887,10 +16079,25 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx)) return -EFAULT; if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); return 0; } + /* Push scalar registers sharing same ID to jump history, + * do this before creating 'other_branch', so that both + * 'this_branch' and 'other_branch' share this history + * if parent state is created. + */ + if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id) + collect_linked_regs(this_branch, src_reg->id, &linked_regs); + if (dst_reg->type == SCALAR_VALUE && dst_reg->id) + collect_linked_regs(this_branch, dst_reg->id, &linked_regs); + if (linked_regs.cnt > 1) { + err = push_insn_history(env, this_branch, 0, linked_regs_pack(&linked_regs)); + if (err) + return err; + } + other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false); if (!other_branch) @@ -14903,10 +16110,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, &other_branch_regs[insn->src_reg], dst_reg, src_reg, opcode, is_jmp32); } else /* BPF_SRC(insn->code) == BPF_K */ { + /* reg_set_min_max() can mangle the fake_reg. Make a copy + * so that these are two different memory locations. The + * src_reg is not used beyond here in context of K. + */ + memcpy(&env->fake_reg[1], &env->fake_reg[0], + sizeof(env->fake_reg[0])); err = reg_set_min_max(env, &other_branch_regs[insn->dst_reg], - src_reg /* fake one */, - dst_reg, src_reg /* same fake one */, + &env->fake_reg[0], + dst_reg, &env->fake_reg[1], opcode, is_jmp32); } if (err) @@ -14915,13 +16128,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (BPF_SRC(insn->code) == BPF_X && src_reg->type == SCALAR_VALUE && src_reg->id && !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { - find_equal_scalars(this_branch, src_reg); - find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); + sync_linked_regs(this_branch, src_reg, &linked_regs); + sync_linked_regs(other_branch, &other_branch_regs[insn->src_reg], &linked_regs); } if (dst_reg->type == SCALAR_VALUE && dst_reg->id && !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { - find_equal_scalars(this_branch, dst_reg); - find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); + sync_linked_regs(this_branch, dst_reg, &linked_regs); + sync_linked_regs(other_branch, &other_branch_regs[insn->dst_reg], &linked_regs); } /* if one pointer register is compared to another pointer @@ -14983,7 +16196,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EACCES; } if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); return 0; } @@ -15065,6 +16278,10 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { + if (map->map_type == BPF_MAP_TYPE_ARENA) { + __mark_reg_unknown(env, dst_reg); + return 0; + } dst_reg->type = PTR_TO_MAP_VALUE; dst_reg->off = aux->map_off; WARN_ON_ONCE(map->max_entries != 1); @@ -15140,21 +16357,9 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) * gen_ld_abs() may terminate the program at runtime, leading to * reference leak. */ - err = check_reference_leak(env, false); - if (err) { - verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n"); + err = check_resource_leak(env, false, true, "BPF_LD_[ABS|IND]"); + if (err) return err; - } - - if (env->cur_state->active_lock.ptr) { - verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n"); - return -EINVAL; - } - - if (env->cur_state->active_rcu_lock) { - verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n"); - return -EINVAL; - } if (regs[ctx_reg].type != PTR_TO_CTX) { verbose(env, @@ -15200,6 +16405,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char int err; struct bpf_func_state *frame = env->cur_state->frame[0]; const bool is_subprog = frame->subprogno; + bool return_32bit = false; /* LSM and struct_ops func-ptr's return type could be "void" */ if (!is_subprog || frame->in_exception_callback_fn) { @@ -15299,18 +16505,30 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char return -ENOTSUPP; } break; + case BPF_PROG_TYPE_KPROBE: + switch (env->prog->expected_attach_type) { + case BPF_TRACE_KPROBE_SESSION: + case BPF_TRACE_UPROBE_SESSION: + range = retval_range(0, 1); + break; + default: + return 0; + } + break; case BPF_PROG_TYPE_SK_LOOKUP: range = retval_range(SK_DROP, SK_PASS); break; case BPF_PROG_TYPE_LSM: if (env->prog->expected_attach_type != BPF_LSM_CGROUP) { - /* Regular BPF_PROG_TYPE_LSM programs can return - * any value. - */ - return 0; - } - if (!env->prog->aux->attach_func_proto->type) { + /* no range found, any return value is allowed */ + if (!get_func_retval_range(env->prog, &range)) + return 0; + /* no restricted range, any return value is allowed */ + if (range.minval == S32_MIN && range.maxval == S32_MAX) + return 0; + return_32bit = true; + } else if (!env->prog->aux->attach_func_proto->type) { /* Make sure programs that attach to void * hooks don't try to modify return value. */ @@ -15340,7 +16558,7 @@ enforce_retval: if (err) return err; - if (!retval_range_within(range, reg)) { + if (!retval_range_within(range, reg, return_32bit)) { verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name); if (!is_subprog && prog->expected_attach_type == BPF_LSM_CGROUP && @@ -15356,6 +16574,29 @@ enforce_retval: return 0; } +static void mark_subprog_changes_pkt_data(struct bpf_verifier_env *env, int off) +{ + struct bpf_subprog_info *subprog; + + subprog = find_containing_subprog(env, off); + subprog->changes_pkt_data = true; +} + +/* 't' is an index of a call-site. + * 'w' is a callee entry point. + * Eventually this function would be called when env->cfg.insn_state[w] == EXPLORED. + * Rely on DFS traversal order and absence of recursive calls to guarantee that + * callee's change_pkt_data marks would be correct at that moment. + */ +static void merge_callee_effects(struct bpf_verifier_env *env, int t, int w) +{ + struct bpf_subprog_info *caller, *callee; + + caller = find_containing_subprog(env, t); + callee = find_containing_subprog(env, w); + caller->changes_pkt_data |= callee->changes_pkt_data; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -15489,6 +16730,7 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, bool visit_callee) { int ret, insn_sz; + int w; insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1; ret = push_insn(t, t + insn_sz, FALLTHROUGH, env); @@ -15500,12 +16742,279 @@ static int visit_func_call_insn(int t, struct bpf_insn *insns, mark_jmp_point(env, t + insn_sz); if (visit_callee) { + w = t + insns[t].imm + 1; mark_prune_point(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); + merge_callee_effects(env, t, w); + ret = push_insn(t, w, BRANCH, env); } return ret; } +/* Bitmask with 1s for all caller saved registers */ +#define ALL_CALLER_SAVED_REGS ((1u << CALLER_SAVED_REGS) - 1) + +/* Return a bitmask specifying which caller saved registers are + * clobbered by a call to a helper *as if* this helper follows + * bpf_fastcall contract: + * - includes R0 if function is non-void; + * - includes R1-R5 if corresponding parameter has is described + * in the function prototype. + */ +static u32 helper_fastcall_clobber_mask(const struct bpf_func_proto *fn) +{ + u32 mask; + int i; + + mask = 0; + if (fn->ret_type != RET_VOID) + mask |= BIT(BPF_REG_0); + for (i = 0; i < ARRAY_SIZE(fn->arg_type); ++i) + if (fn->arg_type[i] != ARG_DONTCARE) + mask |= BIT(BPF_REG_1 + i); + return mask; +} + +/* True if do_misc_fixups() replaces calls to helper number 'imm', + * replacement patch is presumed to follow bpf_fastcall contract + * (see mark_fastcall_pattern_for_call() below). + */ +static bool verifier_inlines_helper_call(struct bpf_verifier_env *env, s32 imm) +{ + switch (imm) { +#ifdef CONFIG_X86_64 + case BPF_FUNC_get_smp_processor_id: + return env->prog->jit_requested && bpf_jit_supports_percpu_insn(); +#endif + default: + return false; + } +} + +/* Same as helper_fastcall_clobber_mask() but for kfuncs, see comment above */ +static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) +{ + u32 vlen, i, mask; + + vlen = btf_type_vlen(meta->func_proto); + mask = 0; + if (!btf_type_is_void(btf_type_by_id(meta->btf, meta->func_proto->type))) + mask |= BIT(BPF_REG_0); + for (i = 0; i < vlen; ++i) + mask |= BIT(BPF_REG_1 + i); + return mask; +} + +/* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ +static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) +{ + return meta->kfunc_flags & KF_FASTCALL; +} + +/* LLVM define a bpf_fastcall function attribute. + * This attribute means that function scratches only some of + * the caller saved registers defined by ABI. + * For BPF the set of such registers could be defined as follows: + * - R0 is scratched only if function is non-void; + * - R1-R5 are scratched only if corresponding parameter type is defined + * in the function prototype. + * + * The contract between kernel and clang allows to simultaneously use + * such functions and maintain backwards compatibility with old + * kernels that don't understand bpf_fastcall calls: + * + * - for bpf_fastcall calls clang allocates registers as-if relevant r0-r5 + * registers are not scratched by the call; + * + * - as a post-processing step, clang visits each bpf_fastcall call and adds + * spill/fill for every live r0-r5; + * + * - stack offsets used for the spill/fill are allocated as lowest + * stack offsets in whole function and are not used for any other + * purposes; + * + * - when kernel loads a program, it looks for such patterns + * (bpf_fastcall function surrounded by spills/fills) and checks if + * spill/fill stack offsets are used exclusively in fastcall patterns; + * + * - if so, and if verifier or current JIT inlines the call to the + * bpf_fastcall function (e.g. a helper call), kernel removes unnecessary + * spill/fill pairs; + * + * - when old kernel loads a program, presence of spill/fill pairs + * keeps BPF program valid, albeit slightly less efficient. + * + * For example: + * + * r1 = 1; + * r2 = 2; + * *(u64 *)(r10 - 8) = r1; r1 = 1; + * *(u64 *)(r10 - 16) = r2; r2 = 2; + * call %[to_be_inlined] --> call %[to_be_inlined] + * r2 = *(u64 *)(r10 - 16); r0 = r1; + * r1 = *(u64 *)(r10 - 8); r0 += r2; + * r0 = r1; exit; + * r0 += r2; + * exit; + * + * The purpose of mark_fastcall_pattern_for_call is to: + * - look for such patterns; + * - mark spill and fill instructions in env->insn_aux_data[*].fastcall_pattern; + * - mark set env->insn_aux_data[*].fastcall_spills_num for call instruction; + * - update env->subprog_info[*]->fastcall_stack_off to find an offset + * at which bpf_fastcall spill/fill stack slots start; + * - update env->subprog_info[*]->keep_fastcall_stack. + * + * The .fastcall_pattern and .fastcall_stack_off are used by + * check_fastcall_stack_contract() to check if every stack access to + * fastcall spill/fill stack slot originates from spill/fill + * instructions, members of fastcall patterns. + * + * If such condition holds true for a subprogram, fastcall patterns could + * be rewritten by remove_fastcall_spills_fills(). + * Otherwise bpf_fastcall patterns are not changed in the subprogram + * (code, presumably, generated by an older clang version). + * + * For example, it is *not* safe to remove spill/fill below: + * + * r1 = 1; + * *(u64 *)(r10 - 8) = r1; r1 = 1; + * call %[to_be_inlined] --> call %[to_be_inlined] + * r1 = *(u64 *)(r10 - 8); r0 = *(u64 *)(r10 - 8); <---- wrong !!! + * r0 = *(u64 *)(r10 - 8); r0 += r1; + * r0 += r1; exit; + * exit; + */ +static void mark_fastcall_pattern_for_call(struct bpf_verifier_env *env, + struct bpf_subprog_info *subprog, + int insn_idx, s16 lowest_off) +{ + struct bpf_insn *insns = env->prog->insnsi, *stx, *ldx; + struct bpf_insn *call = &env->prog->insnsi[insn_idx]; + const struct bpf_func_proto *fn; + u32 clobbered_regs_mask = ALL_CALLER_SAVED_REGS; + u32 expected_regs_mask; + bool can_be_inlined = false; + s16 off; + int i; + + if (bpf_helper_call(call)) { + if (get_helper_proto(env, call->imm, &fn) < 0) + /* error would be reported later */ + return; + clobbered_regs_mask = helper_fastcall_clobber_mask(fn); + can_be_inlined = fn->allow_fastcall && + (verifier_inlines_helper_call(env, call->imm) || + bpf_jit_inlines_helper_call(call->imm)); + } + + if (bpf_pseudo_kfunc_call(call)) { + struct bpf_kfunc_call_arg_meta meta; + int err; + + err = fetch_kfunc_meta(env, call, &meta, NULL); + if (err < 0) + /* error would be reported later */ + return; + + clobbered_regs_mask = kfunc_fastcall_clobber_mask(&meta); + can_be_inlined = is_fastcall_kfunc_call(&meta); + } + + if (clobbered_regs_mask == ALL_CALLER_SAVED_REGS) + return; + + /* e.g. if helper call clobbers r{0,1}, expect r{2,3,4,5} in the pattern */ + expected_regs_mask = ~clobbered_regs_mask & ALL_CALLER_SAVED_REGS; + + /* match pairs of form: + * + * *(u64 *)(r10 - Y) = rX (where Y % 8 == 0) + * ... + * call %[to_be_inlined] + * ... + * rX = *(u64 *)(r10 - Y) + */ + for (i = 1, off = lowest_off; i <= ARRAY_SIZE(caller_saved); ++i, off += BPF_REG_SIZE) { + if (insn_idx - i < 0 || insn_idx + i >= env->prog->len) + break; + stx = &insns[insn_idx - i]; + ldx = &insns[insn_idx + i]; + /* must be a stack spill/fill pair */ + if (stx->code != (BPF_STX | BPF_MEM | BPF_DW) || + ldx->code != (BPF_LDX | BPF_MEM | BPF_DW) || + stx->dst_reg != BPF_REG_10 || + ldx->src_reg != BPF_REG_10) + break; + /* must be a spill/fill for the same reg */ + if (stx->src_reg != ldx->dst_reg) + break; + /* must be one of the previously unseen registers */ + if ((BIT(stx->src_reg) & expected_regs_mask) == 0) + break; + /* must be a spill/fill for the same expected offset, + * no need to check offset alignment, BPF_DW stack access + * is always 8-byte aligned. + */ + if (stx->off != off || ldx->off != off) + break; + expected_regs_mask &= ~BIT(stx->src_reg); + env->insn_aux_data[insn_idx - i].fastcall_pattern = 1; + env->insn_aux_data[insn_idx + i].fastcall_pattern = 1; + } + if (i == 1) + return; + + /* Conditionally set 'fastcall_spills_num' to allow forward + * compatibility when more helper functions are marked as + * bpf_fastcall at compile time than current kernel supports, e.g: + * + * 1: *(u64 *)(r10 - 8) = r1 + * 2: call A ;; assume A is bpf_fastcall for current kernel + * 3: r1 = *(u64 *)(r10 - 8) + * 4: *(u64 *)(r10 - 8) = r1 + * 5: call B ;; assume B is not bpf_fastcall for current kernel + * 6: r1 = *(u64 *)(r10 - 8) + * + * There is no need to block bpf_fastcall rewrite for such program. + * Set 'fastcall_pattern' for both calls to keep check_fastcall_stack_contract() happy, + * don't set 'fastcall_spills_num' for call B so that remove_fastcall_spills_fills() + * does not remove spill/fill pair {4,6}. + */ + if (can_be_inlined) + env->insn_aux_data[insn_idx].fastcall_spills_num = i - 1; + else + subprog->keep_fastcall_stack = 1; + subprog->fastcall_stack_off = min(subprog->fastcall_stack_off, off); +} + +static int mark_fastcall_patterns(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn *insn; + s16 lowest_off; + int s, i; + + for (s = 0; s < env->subprog_cnt; ++s, ++subprog) { + /* find lowest stack spill offset used in this subprog */ + lowest_off = 0; + for (i = subprog->start; i < (subprog + 1)->start; ++i) { + insn = env->prog->insnsi + i; + if (insn->code != (BPF_STX | BPF_MEM | BPF_DW) || + insn->dst_reg != BPF_REG_10) + continue; + lowest_off = min(lowest_off, insn->off); + } + /* use this offset to find fastcall patterns */ + for (i = subprog->start; i < (subprog + 1)->start; ++i) { + insn = env->prog->insnsi + i; + if (insn->code != (BPF_JMP | BPF_CALL)) + continue; + mark_fastcall_pattern_for_call(env, subprog, i, lowest_off); + } + } + return 0; +} + /* Visits the instruction at index t and returns one of the following: * < 0 - an error occurred * DONE_EXPLORING - the instruction was fully explored @@ -15531,7 +17040,7 @@ static int visit_insn(int t, struct bpf_verifier_env *env) return DONE_EXPLORING; case BPF_CALL: - if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback) + if (is_async_callback_calling_insn(insn)) /* Mark this call insn as a prune point to trigger * is_state_visited() check before call itself is * processed by __check_func_call(). Otherwise new @@ -15553,6 +17062,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) mark_prune_point(env, t); mark_jmp_point(env, t); } + if (bpf_helper_call(insn) && bpf_helper_changes_pkt_data(insn->imm)) + mark_subprog_changes_pkt_data(env, t); if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { struct bpf_kfunc_call_arg_meta meta; @@ -15597,6 +17108,8 @@ static int visit_insn(int t, struct bpf_verifier_env *env) default: /* conditional jump with two edges */ mark_prune_point(env, t); + if (is_may_goto_insn(insn)) + mark_force_checkpoint(env, t); ret = push_insn(t, t + 1, FALLTHROUGH, env); if (ret) @@ -15685,6 +17198,7 @@ walk_cfg: } } ret = 0; /* cfg looks good */ + env->prog->aux->changes_pkt_data = env->subprog_info[0].changes_pkt_data; err_free: kvfree(insn_state); @@ -16160,8 +17674,8 @@ static int check_btf_info(struct bpf_verifier_env *env, } /* check %cur's range satisfies %old's */ -static bool range_within(struct bpf_reg_state *old, - struct bpf_reg_state *cur) +static bool range_within(const struct bpf_reg_state *old, + const struct bpf_reg_state *cur) { return old->umin_value <= cur->umin_value && old->umax_value >= cur->umax_value && @@ -16325,21 +17839,28 @@ static bool regs_exact(const struct bpf_reg_state *rold, check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); } +enum exact_level { + NOT_EXACT, + EXACT, + RANGE_WITHIN +}; + /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact) + struct bpf_reg_state *rcur, struct bpf_idmap *idmap, + enum exact_level exact) { - if (exact) + if (exact == EXACT) return regs_exact(rold, rcur, idmap); - if (!(rold->live & REG_LIVE_READ)) + if (!(rold->live & REG_LIVE_READ) && exact == NOT_EXACT) /* explored state didn't use this */ return true; - if (rold->type == NOT_INIT) - /* explored state can't have used this */ - return true; - if (rcur->type == NOT_INIT) - return false; + if (rold->type == NOT_INIT) { + if (exact == NOT_EXACT || rcur->type == NOT_INIT) + /* explored state can't have used this */ + return true; + } /* Enforce that register types have to match exactly, including their * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general @@ -16374,8 +17895,12 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && check_scalar_ids(rold->id, rcur->id, idmap); } - if (!rold->precise) + if (!rold->precise && exact == NOT_EXACT) return true; + if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) + return false; + if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off)) + return false; /* Why check_ids() for scalar registers? * * Consider the following BPF code: @@ -16388,7 +17913,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, * * First verification path is [1-6]: * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; - * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark + * - at (5) r6 would be marked <= X, sync_linked_regs() would also mark * r7 <= X, because r6 and r7 share same id. * Next verification path is [1-4, 6]. * @@ -16442,13 +17967,53 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, * the same stack frame, since fp-8 in foo != fp-8 in bar */ return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; + case PTR_TO_ARENA: + return true; default: return regs_exact(rold, rcur, idmap); } } +static struct bpf_reg_state unbound_reg; + +static __init int unbound_reg_init(void) +{ + __mark_reg_unknown_imprecise(&unbound_reg); + unbound_reg.live |= REG_LIVE_READ; + return 0; +} +late_initcall(unbound_reg_init); + +static bool is_stack_all_misc(struct bpf_verifier_env *env, + struct bpf_stack_state *stack) +{ + u32 i; + + for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) { + if ((stack->slot_type[i] == STACK_MISC) || + (stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack)) + continue; + return false; + } + + return true; +} + +static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env, + struct bpf_stack_state *stack) +{ + if (is_spilled_scalar_reg64(stack)) + return &stack->spilled_ptr; + + if (is_stack_all_misc(env, stack)) + return &unbound_reg; + + return NULL; +} + static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact) + struct bpf_func_state *cur, struct bpf_idmap *idmap, + enum exact_level exact) { int i, spi; @@ -16461,12 +18026,14 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; - if (exact && - old->stack[spi].slot_type[i % BPF_REG_SIZE] != - cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + if (exact != NOT_EXACT && + (i >= cur->allocated_stack || + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false; - if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) { + if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) + && exact == NOT_EXACT) { i += BPF_REG_SIZE - 1; /* explored state didn't use this */ continue; @@ -16485,6 +18052,20 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, if (i >= cur->allocated_stack) return false; + /* 64-bit scalar spill vs all slots MISC and vice versa. + * Load from all slots MISC produces unbound scalar. + * Construct a fake register for such stack and call + * regsafe() to ensure scalar ids are compared. + */ + old_reg = scalar_reg_for_stack(env, &old->stack[spi]); + cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]); + if (old_reg && cur_reg) { + if (!regsafe(env, old_reg, cur_reg, idmap, exact)) + return false; + i += BPF_REG_SIZE - 1; + continue; + } + /* if old state was safe with misc data in the stack * it will be safe with zero-initialized stack. * The opposite is not true @@ -16543,6 +18124,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) return false; break; + case STACK_IRQ_FLAG: + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; case STACK_MISC: case STACK_ZERO: case STACK_INVALID: @@ -16555,7 +18142,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return true; } -static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, +static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, struct bpf_idmap *idmap) { int i; @@ -16563,9 +18150,34 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, if (old->acquired_refs != cur->acquired_refs) return false; + if (old->active_locks != cur->active_locks) + return false; + + if (old->active_preempt_locks != cur->active_preempt_locks) + return false; + + if (old->active_rcu_lock != cur->active_rcu_lock) + return false; + + if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) + return false; + for (i = 0; i < old->acquired_refs; i++) { - if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || + old->refs[i].type != cur->refs[i].type) + return false; + switch (old->refs[i].type) { + case REF_TYPE_PTR: + case REF_TYPE_IRQ: + break; + case REF_TYPE_LOCK: + if (old->refs[i].ptr != cur->refs[i].ptr) + return false; + break; + default: + WARN_ONCE(1, "Unhandled enum type for reference state: %d\n", old->refs[i].type); return false; + } } return true; @@ -16598,7 +18210,7 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, * the current state will reach 'bpf_exit' instruction safely */ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, bool exact) + struct bpf_func_state *cur, enum exact_level exact) { int i; @@ -16613,9 +18225,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) return false; - if (!refsafe(old, cur, &env->idmap_scratch)) - return false; - return true; } @@ -16628,7 +18237,7 @@ static void reset_idmap_scratch(struct bpf_verifier_env *env) static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, struct bpf_verifier_state *cur, - bool exact) + enum exact_level exact) { int i; @@ -16643,20 +18252,10 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; - if (old->active_lock.ptr != cur->active_lock.ptr) - return false; - - /* Old and cur active_lock's have to be either both present - * or both absent. - */ - if (!!old->active_lock.id != !!cur->active_lock.id) + if (old->in_sleepable != cur->in_sleepable) return false; - if (old->active_lock.id && - !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch)) - return false; - - if (old->active_rcu_lock != cur->active_rcu_lock) + if (!refsafe(old, cur, &env->idmap_scratch)) return false; /* for states to be equal callsites have to be the same @@ -16921,9 +18520,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; int i, j, n, err, states_cnt = 0; - bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx); - bool add_new_state = force_new_state; - bool force_exact; + bool force_new_state, add_new_state, force_exact; + + force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || + /* Avoid accumulating infinitely long jmp history */ + cur->insn_hist_end - cur->insn_hist_start > 40; /* bpf progs typically have pruning point every 4 instructions * http://vger.kernel.org/bpfconf2019.html#session-1 @@ -16933,6 +18534,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * In tests that amounts to up to 50% reduction into total verifier * memory consumption and 20% verifier time speedup. */ + add_new_state = force_new_state; if (env->jmps_processed - env->prev_jmps_processed >= 2 && env->insn_processed - env->prev_insn_processed >= 8) add_new_state = true; @@ -17002,7 +18604,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * => unsafe memory access at 11 would not be caught. */ if (is_iter_next_insn(env, insn_idx)) { - if (states_equal(env, &sl->state, cur, true)) { + if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) { struct bpf_func_state *cur_frame; struct bpf_reg_state *iter_state, *iter_reg; int spi; @@ -17025,22 +18627,30 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) } goto skip_inf_loop_check; } + if (is_may_goto_insn_at(env, insn_idx)) { + if (sl->state.may_goto_depth != cur->may_goto_depth && + states_equal(env, &sl->state, cur, RANGE_WITHIN)) { + update_loop_entry(cur, &sl->state); + goto hit; + } + } if (calls_callback(env, insn_idx)) { - if (states_equal(env, &sl->state, cur, true)) + if (states_equal(env, &sl->state, cur, RANGE_WITHIN)) goto hit; goto skip_inf_loop_check; } /* attempt to detect infinite loop to avoid unnecessary doomed work */ if (states_maybe_looping(&sl->state, cur) && - states_equal(env, &sl->state, cur, false) && + states_equal(env, &sl->state, cur, EXACT) && !iter_active_depths_differ(&sl->state, cur) && + sl->state.may_goto_depth == cur->may_goto_depth && sl->state.callback_unroll_depth == cur->callback_unroll_depth) { verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); verbose(env, "cur state:"); - print_verifier_state(env, cur->frame[cur->curframe], true); + print_verifier_state(env, cur, cur->curframe, true); verbose(env, "old state:"); - print_verifier_state(env, sl->state.frame[cur->curframe], true); + print_verifier_state(env, &sl->state, cur->curframe, true); return -EINVAL; } /* if the verifier is processing a loop, avoid adding new state @@ -17089,7 +18699,7 @@ skip_inf_loop_check: */ loop_entry = get_loop_entry(&sl->state); force_exact = loop_entry && loop_entry->branches > 0; - if (states_equal(env, &sl->state, cur, force_exact)) { + if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { if (force_exact) update_loop_entry(cur, loop_entry); hit: @@ -17107,12 +18717,12 @@ hit: err = propagate_liveness(env, &sl->state, cur); /* if previous state reached the exit with precision and - * current state is equivalent to it (except precsion marks) + * current state is equivalent to it (except precision marks) * the precision needs to be propagated back in * the current state. */ if (is_jmp_point(env, env->insn_idx)) - err = err ? : push_jmp_history(env, cur, 0); + err = err ? : push_insn_history(env, cur, 0, 0); err = err ? : propagate_precision(env, &sl->state); if (err) return err; @@ -17211,8 +18821,8 @@ next: cur->parent = new; cur->first_insn_idx = insn_idx; + cur->insn_hist_start = cur->insn_hist_end; cur->dfs_depth = new->dfs_depth + 1; - clear_jmp_history(cur); new_sl->next = *explored_state(env, insn_idx); *explored_state(env, insn_idx) = new_sl; /* connect new state to parentage chain. Current frame needs all @@ -17259,6 +18869,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_TCP_SOCK: case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: + case PTR_TO_ARENA: return false; default: return true; @@ -17284,7 +18895,7 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) } static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type, - bool allow_trust_missmatch) + bool allow_trust_mismatch) { enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type; @@ -17302,7 +18913,7 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ * src_reg == stack|map in some other branch. * Reject it. */ - if (allow_trust_missmatch && + if (allow_trust_mismatch && base_type(type) == PTR_TO_BTF_ID && base_type(*prev_type) == PTR_TO_BTF_ID) { /* @@ -17379,7 +18990,7 @@ static int do_check(struct bpf_verifier_env *env) } if (is_jmp_point(env, env->insn_idx)) { - err = push_jmp_history(env, state, 0); + err = push_insn_history(env, state, 0, 0); if (err) return err; } @@ -17395,7 +19006,7 @@ static int do_check(struct bpf_verifier_env *env) env->prev_insn_idx, env->insn_idx, env->cur_state->speculative ? " (speculative execution)" : ""); - print_verifier_state(env, state->frame[state->curframe], true); + print_verifier_state(env, state, state->curframe, true); do_print_state = false; } @@ -17407,7 +19018,7 @@ static int do_check(struct bpf_verifier_env *env) }; if (verifier_state_scratched(env)) - print_insn_state(env, state->frame[state->curframe]); + print_insn_state(env, state, state->curframe); verbose_linfo(env, env->insn_idx, "; "); env->prev_log_pos = env->log.end_pos; @@ -17539,11 +19150,10 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (env->cur_state->active_lock.ptr) { + if (env->cur_state->active_locks) { if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || - (insn->src_reg == BPF_PSEUDO_CALL) || (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && - (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) { + (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) { verbose(env, "function calls are not allowed while holding a lock\n"); return -EINVAL; } @@ -17589,25 +19199,14 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } process_bpf_exit_full: - if (env->cur_state->active_lock.ptr && - !in_rbtree_lock_required_cb(env)) { - verbose(env, "bpf_spin_unlock is missing\n"); - return -EINVAL; - } - - if (env->cur_state->active_rcu_lock && - !in_rbtree_lock_required_cb(env)) { - verbose(env, "bpf_rcu_read_unlock is missing\n"); - return -EINVAL; - } - /* We must do check_reference_leak here before * prepare_func_exit to handle the case when * state->curframe > 0, it may be a callback * function, for which reference_state must * match caller reference state when it exits. */ - err = check_reference_leak(env, exception_exit); + err = check_resource_leak(env, exception_exit, !env->cur_state->curframe, + "BPF_EXIT instruction in main prog"); if (err) return err; @@ -17714,50 +19313,68 @@ static int find_btf_percpu_datasec(struct btf *btf) return -ENOENT; } +/* + * Add btf to the used_btfs array and return the index. (If the btf was + * already added, then just return the index.) Upon successful insertion + * increase btf refcnt, and, if present, also refcount the corresponding + * kernel module. + */ +static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf) +{ + struct btf_mod_pair *btf_mod; + int i; + + /* check whether we recorded this BTF (and maybe module) already */ + for (i = 0; i < env->used_btf_cnt; i++) + if (env->used_btfs[i].btf == btf) + return i; + + if (env->used_btf_cnt >= MAX_USED_BTFS) + return -E2BIG; + + btf_get(btf); + + btf_mod = &env->used_btfs[env->used_btf_cnt]; + btf_mod->btf = btf; + btf_mod->module = NULL; + + /* if we reference variables from kernel module, bump its refcount */ + if (btf_is_module(btf)) { + btf_mod->module = btf_try_get_module(btf); + if (!btf_mod->module) { + btf_put(btf); + return -ENXIO; + } + } + + return env->used_btf_cnt++; +} + /* replace pseudo btf_id with kernel symbol address */ -static int check_pseudo_btf_id(struct bpf_verifier_env *env, - struct bpf_insn *insn, - struct bpf_insn_aux_data *aux) +static int __check_pseudo_btf_id(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct bpf_insn_aux_data *aux, + struct btf *btf) { const struct btf_var_secinfo *vsi; const struct btf_type *datasec; - struct btf_mod_pair *btf_mod; const struct btf_type *t; const char *sym_name; bool percpu = false; u32 type, id = insn->imm; - struct btf *btf; s32 datasec_id; u64 addr; - int i, btf_fd, err; - - btf_fd = insn[1].imm; - if (btf_fd) { - btf = btf_get_by_fd(btf_fd); - if (IS_ERR(btf)) { - verbose(env, "invalid module BTF object FD specified.\n"); - return -EINVAL; - } - } else { - if (!btf_vmlinux) { - verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); - return -EINVAL; - } - btf = btf_vmlinux; - btf_get(btf); - } + int i; t = btf_type_by_id(btf, id); if (!t) { verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); - err = -ENOENT; - goto err_put; + return -ENOENT; } if (!btf_type_is_var(t) && !btf_type_is_func(t)) { verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id); - err = -EINVAL; - goto err_put; + return -EINVAL; } sym_name = btf_name_by_offset(btf, t->name_off); @@ -17765,8 +19382,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, if (!addr) { verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", sym_name); - err = -ENOENT; - goto err_put; + return -ENOENT; } insn[0].imm = (u32)addr; insn[1].imm = addr >> 32; @@ -17774,7 +19390,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, if (btf_type_is_func(t)) { aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; aux->btf_var.mem_size = 0; - goto check_btf; + return 0; } datasec_id = find_btf_percpu_datasec(btf); @@ -17805,8 +19421,7 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, tname = btf_name_by_offset(btf, t->name_off); verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret)); - err = -EINVAL; - goto err_put; + return -EINVAL; } aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; aux->btf_var.mem_size = tsize; @@ -17815,39 +19430,43 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, aux->btf_var.btf = btf; aux->btf_var.btf_id = type; } -check_btf: - /* check whether we recorded this BTF (and maybe module) already */ - for (i = 0; i < env->used_btf_cnt; i++) { - if (env->used_btfs[i].btf == btf) { - btf_put(btf); - return 0; - } - } - if (env->used_btf_cnt >= MAX_USED_BTFS) { - err = -E2BIG; - goto err_put; - } + return 0; +} - btf_mod = &env->used_btfs[env->used_btf_cnt]; - btf_mod->btf = btf; - btf_mod->module = NULL; +static int check_pseudo_btf_id(struct bpf_verifier_env *env, + struct bpf_insn *insn, + struct bpf_insn_aux_data *aux) +{ + struct btf *btf; + int btf_fd; + int err; - /* if we reference variables from kernel module, bump its refcount */ - if (btf_is_module(btf)) { - btf_mod->module = btf_try_get_module(btf); - if (!btf_mod->module) { - err = -ENXIO; - goto err_put; + btf_fd = insn[1].imm; + if (btf_fd) { + CLASS(fd, f)(btf_fd); + + btf = __btf_get_by_fd(f); + if (IS_ERR(btf)) { + verbose(env, "invalid module BTF object FD specified.\n"); + return -EINVAL; } + } else { + if (!btf_vmlinux) { + verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); + return -EINVAL; + } + btf = btf_vmlinux; } - env->used_btf_cnt++; + err = __check_pseudo_btf_id(env, insn, aux, btf); + if (err) + return err; + err = __add_used_btf(env, btf); + if (err < 0) + return err; return 0; -err_put: - btf_put(btf); - return err; } static bool is_tracing_prog_type(enum bpf_prog_type type) @@ -17864,6 +19483,12 @@ static bool is_tracing_prog_type(enum bpf_prog_type type) } } +static bool bpf_map_is_cgroup_storage(struct bpf_map *map) +{ + return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); +} + static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog) @@ -17898,6 +19523,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if (btf_record_has_field(map->record, BPF_WORKQUEUE)) { + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_wq yet\n"); + return -EINVAL; + } + } + if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); @@ -17909,7 +19541,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return -EINVAL; } - if (prog->aux->sleepable) + if (prog->sleepable) switch (map->map_type) { case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_LRU_HASH: @@ -17925,6 +19557,9 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_SK_STORAGE: case BPF_MAP_TYPE_TASK_STORAGE: case BPF_MAP_TYPE_CGRP_STORAGE: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_ARENA: break; default: verbose(env, @@ -17932,13 +19567,89 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return -EINVAL; } + if (bpf_map_is_cgroup_storage(map) && + bpf_cgroup_storage_assign(env->prog->aux, map)) { + verbose(env, "only one cgroup storage of each type is allowed\n"); + return -EBUSY; + } + + if (map->map_type == BPF_MAP_TYPE_ARENA) { + if (env->prog->aux->arena) { + verbose(env, "Only one arena per program\n"); + return -EBUSY; + } + if (!env->allow_ptr_leaks || !env->bpf_capable) { + verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n"); + return -EPERM; + } + if (!env->prog->jit_requested) { + verbose(env, "JIT is required to use arena\n"); + return -EOPNOTSUPP; + } + if (!bpf_jit_supports_arena()) { + verbose(env, "JIT doesn't support arena\n"); + return -EOPNOTSUPP; + } + env->prog->aux->arena = (void *)map; + if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { + verbose(env, "arena's user address must be set via map_extra or mmap()\n"); + return -EINVAL; + } + } + return 0; } -static bool bpf_map_is_cgroup_storage(struct bpf_map *map) +static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) { - return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + int i, err; + + /* check whether we recorded this map already */ + for (i = 0; i < env->used_map_cnt; i++) + if (env->used_maps[i] == map) + return i; + + if (env->used_map_cnt >= MAX_USED_MAPS) { + verbose(env, "The total number of maps per program has reached the limit of %u\n", + MAX_USED_MAPS); + return -E2BIG; + } + + err = check_map_prog_compatibility(env, map, env->prog); + if (err) + return err; + + if (env->prog->sleepable) + atomic64_inc(&map->sleepable_refcnt); + + /* hold the map. If the program is rejected by verifier, + * the map will be released by release_maps() or it + * will be used by the valid program until it's unloaded + * and all maps are released in bpf_free_used_maps() + */ + bpf_map_inc(map); + + env->used_maps[env->used_map_cnt++] = map; + + return env->used_map_cnt - 1; +} + +/* Add map behind fd to used maps list, if it's not already there, and return + * its index. + * Returns <0 on error, or >= 0 index, on success. + */ +static int add_used_map(struct bpf_verifier_env *env, int fd) +{ + struct bpf_map *map; + CLASS(fd, f)(fd); + + map = __bpf_map_get(f); + if (IS_ERR(map)) { + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); + return PTR_ERR(map); + } + + return __add_used_map(env, map); } /* find and rewrite pseudo imm in ld_imm64 instructions: @@ -17952,7 +19663,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; - int i, j, err; + int i, err; err = bpf_prog_calc_tag(env->prog); if (err) @@ -17969,7 +19680,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_insn_aux_data *aux; struct bpf_map *map; - struct fd f; + int map_idx; u64 addr; u32 fd; @@ -18032,21 +19743,14 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) break; } - f = fdget(fd); - map = __bpf_map_get(f); - if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", - insn[0].imm); - return PTR_ERR(map); - } - - err = check_map_prog_compatibility(env, map, env->prog); - if (err) { - fdput(f); - return err; - } + map_idx = add_used_map(env, fd); + if (map_idx < 0) + return map_idx; + map = env->used_maps[map_idx]; aux = &env->insn_aux_data[i]; + aux->map_index = map_idx; + if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { addr = (unsigned long)map; @@ -18055,13 +19759,11 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) if (off >= BPF_MAX_VAR_OFF) { verbose(env, "direct value offset of %u is not allowed\n", off); - fdput(f); return -EINVAL; } if (!map->ops->map_direct_value_addr) { verbose(env, "no direct value access support for this map type\n"); - fdput(f); return -EINVAL; } @@ -18069,7 +19771,6 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) if (err) { verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", map->value_size, off); - fdput(f); return err; } @@ -18080,40 +19781,6 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) insn[0].imm = (u32)addr; insn[1].imm = addr >> 32; - /* check whether we recorded this map already */ - for (j = 0; j < env->used_map_cnt; j++) { - if (env->used_maps[j] == map) { - aux->map_index = j; - fdput(f); - goto next_insn; - } - } - - if (env->used_map_cnt >= MAX_USED_MAPS) { - fdput(f); - return -E2BIG; - } - - if (env->prog->aux->sleepable) - atomic64_inc(&map->sleepable_refcnt); - /* hold the map. If the program is rejected by verifier, - * the map will be released by release_maps() or it - * will be used by the valid program until it's unloaded - * and all maps are released in bpf_free_used_maps() - */ - bpf_map_inc(map); - - aux->map_index = env->used_map_cnt; - env->used_maps[env->used_map_cnt++] = map; - - if (bpf_map_is_cgroup_storage(map) && - bpf_cgroup_storage_assign(env->prog->aux, map)) { - verbose(env, "only one cgroup storage of each type is allowed\n"); - fdput(f); - return -EBUSY; - } - - fdput(f); next_insn: insn++; i++; @@ -18144,8 +19811,7 @@ static void release_maps(struct bpf_verifier_env *env) /* drop refcnt of maps used by the rejected program */ static void release_btfs(struct bpf_verifier_env *env) { - __bpf_free_used_btfs(env->prog->aux, env->used_btfs, - env->used_btf_cnt); + __bpf_free_used_btfs(env->used_btfs, env->used_btf_cnt); } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ @@ -18256,6 +19922,44 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of return new_prog; } +/* + * For all jmp insns in a given 'prog' that point to 'tgt_idx' insn adjust the + * jump offset by 'delta'. + */ +static int adjust_jmp_off(struct bpf_prog *prog, u32 tgt_idx, u32 delta) +{ + struct bpf_insn *insn = prog->insnsi; + u32 insn_cnt = prog->len, i; + s32 imm; + s16 off; + + for (i = 0; i < insn_cnt; i++, insn++) { + u8 code = insn->code; + + if (tgt_idx <= i && i < tgt_idx + delta) + continue; + + if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || + BPF_OP(code) == BPF_CALL || BPF_OP(code) == BPF_EXIT) + continue; + + if (insn->code == (BPF_JMP32 | BPF_JA)) { + if (i + 1 + insn->imm != tgt_idx) + continue; + if (check_add_overflow(insn->imm, delta, &imm)) + return -ERANGE; + insn->imm = imm; + } else { + if (i + 1 + insn->off != tgt_idx) + continue; + if (check_add_overflow(insn->off, delta, &off)) + return -ERANGE; + insn->off = off; + } + } + return 0; +} + static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt) { @@ -18494,22 +20198,29 @@ static int opt_remove_dead_code(struct bpf_verifier_env *env) return 0; } +static const struct bpf_insn NOP = BPF_JMP_IMM(BPF_JA, 0, 0, 0); +static const struct bpf_insn MAY_GOTO_0 = BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0, 0); + static int opt_remove_nops(struct bpf_verifier_env *env) { - const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0); struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; + bool is_may_goto_0, is_ja; int i, err; for (i = 0; i < insn_cnt; i++) { - if (memcmp(&insn[i], &ja, sizeof(ja))) + is_may_goto_0 = !memcmp(&insn[i], &MAY_GOTO_0, sizeof(MAY_GOTO_0)); + is_ja = !memcmp(&insn[i], &NOP, sizeof(NOP)); + + if (!is_may_goto_0 && !is_ja) continue; err = verifier_remove_insns(env, i, 1); if (err) return err; insn_cnt--; - i--; + /* Go back one insn to catch may_goto +1; may_goto +0 sequence */ + i -= (is_may_goto_0 && i > 0) ? 2 : 1; } return 0; @@ -18621,14 +20332,39 @@ apply_patch_buffer: */ static int convert_ctx_accesses(struct bpf_verifier_env *env) { + struct bpf_subprog_info *subprogs = env->subprog_info; const struct bpf_verifier_ops *ops = env->ops; - int i, cnt, size, ctx_field_size, delta = 0; + int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0; const int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16], *insn; + struct bpf_insn *epilogue_buf = env->epilogue_buf; + struct bpf_insn *insn_buf = env->insn_buf; + struct bpf_insn *insn; u32 target_size, size_default, off; struct bpf_prog *new_prog; enum bpf_access_type type; bool is_narrower_load; + int epilogue_idx = 0; + + if (ops->gen_epilogue) { + epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog, + -(subprogs[0].stack_depth + 8)); + if (epilogue_cnt >= INSN_BUF_SIZE) { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (epilogue_cnt) { + /* Save the ARG_PTR_TO_CTX for the epilogue to use */ + cnt = 0; + subprogs[0].stack_depth += 8; + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_1, + -subprogs[0].stack_depth); + insn_buf[cnt++] = env->prog->insnsi[0]; + new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + delta += cnt - 1; + } + } if (ops->gen_prologue || env->seen_direct_write) { if (!ops->gen_prologue) { @@ -18637,7 +20373,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog); - if (cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } else if (cnt) { @@ -18650,6 +20386,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } } + if (delta) + WARN_ON(adjust_jmp_off(env->prog, 0, delta)); + if (bpf_prog_is_offloaded(env->prog->aux)) return 0; @@ -18676,6 +20415,31 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; + } else if ((insn->code == (BPF_STX | BPF_ATOMIC | BPF_W) || + insn->code == (BPF_STX | BPF_ATOMIC | BPF_DW)) && + env->insn_aux_data[i + delta].ptr_type == PTR_TO_ARENA) { + insn->code = BPF_STX | BPF_PROBE_ATOMIC | BPF_SIZE(insn->code); + env->prog->aux->num_exentries++; + continue; + } else if (insn->code == (BPF_JMP | BPF_EXIT) && + epilogue_cnt && + i + delta < subprogs[1].start) { + /* Generate epilogue for the main prog */ + if (epilogue_idx) { + /* jump back to the earlier generated epilogue */ + insn_buf[0] = BPF_JMP32_A(epilogue_idx - i - delta - 1); + cnt = 1; + } else { + memcpy(insn_buf, epilogue_buf, + epilogue_cnt * sizeof(*epilogue_buf)); + cnt = epilogue_cnt; + /* epilogue_idx cannot be 0. It must have at + * least one ctx ptr saving insn before the + * epilogue. + */ + epilogue_idx = i + delta; + } + goto patch_insn_buf; } else { continue; } @@ -18733,6 +20497,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) env->prog->aux->num_exentries++; } continue; + case PTR_TO_ARENA: + if (BPF_MODE(insn->code) == BPF_MEMSX) { + verbose(env, "sign extending loads from arena are not supported yet\n"); + return -EOPNOTSUPP; + } + insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code); + env->prog->aux->num_exentries++; + continue; default: continue; } @@ -18770,7 +20542,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) target_size = 0; cnt = convert_ctx_access(type, insn, insn_buf, env->prog, &target_size); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || + if (cnt == 0 || cnt >= INSN_BUF_SIZE || (ctx_field_size && !target_size)) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; @@ -18779,7 +20551,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) if (is_narrower_load && size < target_size) { u8 shift = bpf_ctx_narrow_access_offset( off, size, size_default) * 8; - if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) { + if (shift && cnt + 1 >= INSN_BUF_SIZE) { verbose(env, "bpf verifier narrow ctx load misconfigured\n"); return -EINVAL; } @@ -18804,6 +20576,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->dst_reg, insn->dst_reg, size * 8, 0); +patch_insn_buf: new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) return -ENOMEM; @@ -18854,12 +20627,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) env->insn_aux_data[i].call_imm = insn->imm; /* point imm to __bpf_call_base+1 from JITs point of view */ insn->imm = 1; - if (bpf_pseudo_func(insn)) + if (bpf_pseudo_func(insn)) { +#if defined(MODULES_VADDR) + u64 addr = MODULES_VADDR; +#else + u64 addr = VMALLOC_START; +#endif /* jit (e.g. x86_64) may emit fewer instructions * if it learns a u32 imm is the same as a u64 imm. - * Force a non zero here. + * Set close enough to possible prog address. */ - insn[1].imm = 1; + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32; + } } err = bpf_prog_alloc_jited_linfo(prog); @@ -18891,6 +20671,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (bpf_prog_calc_tag(func[i])) goto out_free; func[i]->is_func = 1; + func[i]->sleepable = prog->sleepable; func[i]->aux->func_idx = i; /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; @@ -18910,6 +20691,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; + if (env->subprog_info[i].priv_stack_mode == PRIV_STACK_ADAPTIVE) + func[i]->aux->jits_use_priv_stack = true; + func[i]->jit_requested = 1; func[i]->blinding_requested = prog->blinding_requested; func[i]->aux->kfunc_tab = prog->aux->kfunc_tab; @@ -18918,17 +20702,27 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->nr_linfo = prog->aux->nr_linfo; func[i]->aux->jited_linfo = prog->aux->jited_linfo; func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx; + func[i]->aux->arena = prog->aux->arena; num_exentries = 0; insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEM32 || BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) num_exentries++; + if ((BPF_CLASS(insn->code) == BPF_STX || + BPF_CLASS(insn->code) == BPF_ST) && + BPF_MODE(insn->code) == BPF_PROBE_MEM32) + num_exentries++; + if (BPF_CLASS(insn->code) == BPF_STX && + BPF_MODE(insn->code) == BPF_PROBE_ATOMIC) + num_exentries++; } func[i]->aux->num_exentries = num_exentries; func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; func[i]->aux->exception_cb = env->subprog_info[i].is_exception_cb; + func[i]->aux->changes_pkt_data = env->subprog_info[i].changes_pkt_data; if (!i) func[i]->aux->exception_boundary = env->seen_exception; func[i] = bpf_int_jit_compile(func[i]); @@ -18989,10 +20783,14 @@ static int jit_subprogs(struct bpf_verifier_env *env) * bpf_prog_load will add the kallsyms for the main program. */ for (i = 1; i < env->subprog_cnt; i++) { - bpf_prog_lock_ro(func[i]); - bpf_prog_kallsyms_add(func[i]); + err = bpf_prog_lock_ro(func[i]); + if (err) + goto out_free; } + for (i = 1; i < env->subprog_cnt; i++) + bpf_prog_kallsyms_add(func[i]); + /* Last step: make now unused interpreter insns from main * prog consistent for later dump requests, so they can * later look the same as if they were interpreted only. @@ -19252,6 +21050,13 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) { insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1); *cnt = 1; + } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) { + struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) }; + + insn_buf[0] = ld_addrs[0]; + insn_buf[1] = ld_addrs[1]; + insn_buf[2] = *insn; + *cnt = 3; } return 0; } @@ -19296,10 +21101,13 @@ static int do_misc_fixups(struct bpf_verifier_env *env) const int insn_cnt = prog->len; const struct bpf_map_ops *ops; struct bpf_insn_aux_data *aux; - struct bpf_insn insn_buf[16]; + struct bpf_insn *insn_buf = env->insn_buf; struct bpf_prog *new_prog; struct bpf_map *map_ptr; - int i, ret, cnt, delta = 0; + int i, ret, cnt, delta = 0, cur_subprog = 0; + struct bpf_subprog_info *subprogs = env->subprog_info; + u16 stack_depth = subprogs[cur_subprog].stack_depth; + u16 stack_depth_extra = 0; if (env->seen_exception && !env->exception_callback_subprog) { struct bpf_insn patch[] = { @@ -19319,14 +21127,63 @@ static int do_misc_fixups(struct bpf_verifier_env *env) mark_subprog_exc_cb(env, env->exception_callback_subprog); } - for (i = 0; i < insn_cnt; i++, insn++) { - /* Make divide-by-zero exceptions impossible. */ + for (i = 0; i < insn_cnt;) { + if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) { + if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) || + (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { + /* convert to 32-bit mov that clears upper 32-bit */ + insn->code = BPF_ALU | BPF_MOV | BPF_X; + /* clear off and imm, so it's a normal 'wX = wY' from JIT pov */ + insn->off = 0; + insn->imm = 0; + } /* cast from as(0) to as(1) should be handled by JIT */ + goto next_insn; + } + + if (env->insn_aux_data[i + delta].needs_zext) + /* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */ + insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code); + + /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ + if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || + insn->code == (BPF_ALU | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && + insn->off == 1 && insn->imm == -1) { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + bool isdiv = BPF_OP(insn->code) == BPF_DIV; + struct bpf_insn *patchlet; + struct bpf_insn chk_and_sdiv[] = { + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + }; + struct bpf_insn chk_and_smod[] = { + BPF_MOV32_IMM(insn->dst_reg, 0), + }; + + patchlet = isdiv ? chk_and_sdiv : chk_and_smod; + cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod); + + new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + + /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; bool isdiv = BPF_OP(insn->code) == BPF_DIV; + bool is_sdiv = isdiv && insn->off == 1; + bool is_smod = !isdiv && insn->off == 1; struct bpf_insn *patchlet; struct bpf_insn chk_and_div[] = { /* [R,W]x div 0 -> 0 */ @@ -19346,10 +21203,62 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BPF_JMP_IMM(BPF_JA, 0, 0, 1), BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; + struct bpf_insn chk_and_sdiv[] = { + /* [R,W]x sdiv 0 -> 0 + * LLONG_MIN sdiv -1 -> LLONG_MIN + * INT_MIN sdiv -1 -> INT_MIN + */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 4, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 1, 0), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_MOV | BPF_K, insn->dst_reg, + 0, 0, 0), + /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + }; + struct bpf_insn chk_and_smod[] = { + /* [R,W]x mod 0 -> [R,W]x */ + /* [R,W]x mod -1 -> 0 */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 3, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 3 + (is64 ? 0 : 1), 1), + BPF_MOV32_IMM(insn->dst_reg, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), + }; - patchlet = isdiv ? chk_and_div : chk_and_mod; - cnt = isdiv ? ARRAY_SIZE(chk_and_div) : - ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + if (is_sdiv) { + patchlet = chk_and_sdiv; + cnt = ARRAY_SIZE(chk_and_sdiv); + } else if (is_smod) { + patchlet = chk_and_smod; + cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0); + } else { + patchlet = isdiv ? chk_and_div : chk_and_mod; + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + } new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) @@ -19358,7 +21267,37 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; + } + + /* Make it impossible to de-reference a userspace address */ + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { + struct bpf_insn *patch = &insn_buf[0]; + u64 uaddress_limit = bpf_arch_uaddress_limit(); + + if (!uaddress_limit) + goto next_insn; + + *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); + if (insn->off) + *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); + *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); + *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); + *patch++ = *insn; + *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); + + cnt = patch - insn_buf; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; } /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ @@ -19366,7 +21305,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) { cnt = env->ops->gen_ld_abs(insn, insn_buf); - if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt == 0 || cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@ -19378,7 +21317,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Rewrite pointer arithmetic to mitigate speculation attacks. */ @@ -19393,7 +21332,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) aux = &env->insn_aux_data[i + delta]; if (!aux->alu_state || aux->alu_state == BPF_ALU_NON_POINTER) - continue; + goto next_insn; isneg = aux->alu_state & BPF_ALU_NEG_VALUE; issrc = (aux->alu_state & BPF_ALU_SANITIZE) == @@ -19431,19 +21370,42 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; + } + + if (is_may_goto_insn(insn)) { + int stack_off = -stack_depth - 8; + + stack_depth_extra = 8; + insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_AX, BPF_REG_10, stack_off); + if (insn->off >= 0) + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off + 2); + else + insn_buf[1] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_AX, 0, insn->off - 1); + insn_buf[2] = BPF_ALU64_IMM(BPF_SUB, BPF_REG_AX, 1); + insn_buf[3] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_AX, stack_off); + cnt = 4; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; } if (insn->code != (BPF_JMP | BPF_CALL)) - continue; + goto next_insn; if (insn->src_reg == BPF_PSEUDO_CALL) - continue; + goto next_insn; if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt); if (ret) return ret; if (cnt == 0) - continue; + goto next_insn; new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); if (!new_prog) @@ -19452,9 +21414,13 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } + /* Skip inlining the helper call if the JIT does it. */ + if (bpf_jit_inlines_helper_call(insn->imm)) + goto next_insn; + if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) @@ -19488,7 +21454,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) !bpf_map_ptr_unpriv(aux)) { struct bpf_jit_poke_descriptor desc = { .reason = BPF_POKE_REASON_TAIL_CALL, - .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), + .tail_call.map = aux->map_ptr_state.map_ptr, .tail_call.key = bpf_map_key_immediate(aux), .insn_idx = i + delta, }; @@ -19500,11 +21466,11 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } insn->imm = ret + 1; - continue; + goto next_insn; } if (!bpf_map_ptr_unpriv(aux)) - continue; + goto next_insn; /* instead of changing every JIT dealing with tail_call * emit two extra insns: @@ -19517,7 +21483,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) return -EINVAL; } - map_ptr = BPF_MAP_PTR(aux->map_ptr_state); + map_ptr = aux->map_ptr_state.map_ptr; insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, map_ptr->max_entries, 2); insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, @@ -19533,7 +21499,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } if (insn->imm == BPF_FUNC_timer_set_callback) { @@ -19570,7 +21536,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } if (is_storage_get_function(insn->imm)) { - if (!env->prog->aux->sleepable || + if (!in_sleepable(env) || env->insn_aux_data[i + delta].storage_get_func_atomic) insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC); else @@ -19625,14 +21591,14 @@ static int do_misc_fixups(struct bpf_verifier_env *env) if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; - map_ptr = BPF_MAP_PTR(aux->map_ptr_state); + map_ptr = aux->map_ptr_state.map_ptr; ops = map_ptr->ops; if (insn->imm == BPF_FUNC_map_lookup_elem && ops->map_gen_lookup) { cnt = ops->map_gen_lookup(map_ptr, insn_buf); if (cnt == -EOPNOTSUPP) goto patch_map_ops_generic; - if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) { + if (cnt <= 0 || cnt >= INSN_BUF_SIZE) { verbose(env, "bpf verifier is misconfigured\n"); return -EINVAL; } @@ -19645,7 +21611,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, @@ -19676,31 +21642,31 @@ patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: insn->imm = BPF_CALL_IMM(ops->map_lookup_elem); - continue; + goto next_insn; case BPF_FUNC_map_update_elem: insn->imm = BPF_CALL_IMM(ops->map_update_elem); - continue; + goto next_insn; case BPF_FUNC_map_delete_elem: insn->imm = BPF_CALL_IMM(ops->map_delete_elem); - continue; + goto next_insn; case BPF_FUNC_map_push_elem: insn->imm = BPF_CALL_IMM(ops->map_push_elem); - continue; + goto next_insn; case BPF_FUNC_map_pop_elem: insn->imm = BPF_CALL_IMM(ops->map_pop_elem); - continue; + goto next_insn; case BPF_FUNC_map_peek_elem: insn->imm = BPF_CALL_IMM(ops->map_peek_elem); - continue; + goto next_insn; case BPF_FUNC_redirect_map: insn->imm = BPF_CALL_IMM(ops->map_redirect); - continue; + goto next_insn; case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); - continue; + goto next_insn; case BPF_FUNC_map_lookup_percpu_elem: insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); - continue; + goto next_insn; } goto patch_call_imm; @@ -19728,9 +21694,37 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } +#if defined(CONFIG_X86_64) && !defined(CONFIG_UML) + /* Implement bpf_get_smp_processor_id() inline. */ + if (insn->imm == BPF_FUNC_get_smp_processor_id && + verifier_inlines_helper_call(env, insn->imm)) { + /* BPF_FUNC_get_smp_processor_id inlining is an + * optimization, so if pcpu_hot.cpu_number is ever + * changed in some incompatible and hard to support + * way, it's fine to back out this inlining logic + */ +#ifdef CONFIG_SMP + insn_buf[0] = BPF_MOV32_IMM(BPF_REG_0, (u32)(unsigned long)&pcpu_hot.cpu_number); + insn_buf[1] = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); + insn_buf[2] = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0); + cnt = 3; +#else + insn_buf[0] = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0); + cnt = 1; +#endif + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } +#endif /* Implement bpf_get_func_arg inline. */ if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_arg) { @@ -19753,7 +21747,7 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement bpf_get_func_ret inline. */ @@ -19781,7 +21775,7 @@ patch_map_ops_generic: delta += cnt - 1; env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement get_func_arg_cnt inline. */ @@ -19796,7 +21790,7 @@ patch_map_ops_generic: env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } /* Implement bpf_get_func_ip inline. */ @@ -19811,9 +21805,82 @@ patch_map_ops_generic: env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; - continue; + goto next_insn; } + /* Implement bpf_get_branch_snapshot inline. */ + if (IS_ENABLED(CONFIG_PERF_EVENTS) && + prog->jit_requested && BITS_PER_LONG == 64 && + insn->imm == BPF_FUNC_get_branch_snapshot) { + /* We are dealing with the following func protos: + * u64 bpf_get_branch_snapshot(void *buf, u32 size, u64 flags); + * int perf_snapshot_branch_stack(struct perf_branch_entry *entries, u32 cnt); + */ + const u32 br_entry_size = sizeof(struct perf_branch_entry); + + /* struct perf_branch_entry is part of UAPI and is + * used as an array element, so extremely unlikely to + * ever grow or shrink + */ + BUILD_BUG_ON(br_entry_size != 24); + + /* if (unlikely(flags)) return -EINVAL */ + insn_buf[0] = BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 0, 7); + + /* Transform size (bytes) into number of entries (cnt = size / 24). + * But to avoid expensive division instruction, we implement + * divide-by-3 through multiplication, followed by further + * division by 8 through 3-bit right shift. + * Refer to book "Hacker's Delight, 2nd ed." by Henry S. Warren, Jr., + * p. 227, chapter "Unsigned Division by 3" for details and proofs. + * + * N / 3 <=> M * N / 2^33, where M = (2^33 + 1) / 3 = 0xaaaaaaab. + */ + insn_buf[1] = BPF_MOV32_IMM(BPF_REG_0, 0xaaaaaaab); + insn_buf[2] = BPF_ALU64_REG(BPF_MUL, BPF_REG_2, BPF_REG_0); + insn_buf[3] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36); + + /* call perf_snapshot_branch_stack implementation */ + insn_buf[4] = BPF_EMIT_CALL(static_call_query(perf_snapshot_branch_stack)); + /* if (entry_cnt == 0) return -ENOENT */ + insn_buf[5] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4); + /* return entry_cnt * sizeof(struct perf_branch_entry) */ + insn_buf[6] = BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, br_entry_size); + insn_buf[7] = BPF_JMP_A(3); + /* return -EINVAL; */ + insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL); + insn_buf[9] = BPF_JMP_A(1); + /* return -ENOENT; */ + insn_buf[10] = BPF_MOV64_IMM(BPF_REG_0, -ENOENT); + cnt = 11; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + + /* Implement bpf_kptr_xchg inline */ + if (prog->jit_requested && BITS_PER_LONG == 64 && + insn->imm == BPF_FUNC_kptr_xchg && + bpf_jit_supports_ptr_xchg()) { + insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2); + insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0); + cnt = 2; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed @@ -19826,6 +21893,47 @@ patch_call_imm: return -EFAULT; } insn->imm = fn->func - __bpf_call_base; +next_insn: + if (subprogs[cur_subprog + 1].start == i + delta + 1) { + subprogs[cur_subprog].stack_depth += stack_depth_extra; + subprogs[cur_subprog].stack_extra = stack_depth_extra; + cur_subprog++; + stack_depth = subprogs[cur_subprog].stack_depth; + stack_depth_extra = 0; + } + i++; + insn++; + } + + env->prog->aux->stack_depth = subprogs[0].stack_depth; + for (i = 0; i < env->subprog_cnt; i++) { + int subprog_start = subprogs[i].start; + int stack_slots = subprogs[i].stack_extra / 8; + + if (!stack_slots) + continue; + if (stack_slots > 1) { + verbose(env, "verifier bug: stack_slots supports may_goto only\n"); + return -EFAULT; + } + + /* Add ST insn to subprog prologue to init extra stack */ + insn_buf[0] = BPF_ST_MEM(BPF_DW, BPF_REG_FP, + -subprogs[i].stack_depth, BPF_MAX_LOOPS); + /* Copy first actual insn to preserve it */ + insn_buf[1] = env->prog->insnsi[subprog_start]; + + new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, 2); + if (!new_prog) + return -ENOMEM; + env->prog = prog = new_prog; + /* + * If may_goto is a first insn of a prog there could be a jmp + * insn that points to it, hence adjust all such jmps to point + * to insn after BPF_ST that inits may_goto count. + * Adjustment will succeed because bpf_patch_insn_data() didn't fail. + */ + WARN_ON(adjust_jmp_off(env->prog, subprog_start, 1)); } /* Since poke tab is now finalized, publish aux to tracker. */ @@ -19854,7 +21962,7 @@ static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, int position, s32 stack_base, u32 callback_subprogno, - u32 *cnt) + u32 *total_cnt) { s32 r6_offset = stack_base + 0 * BPF_REG_SIZE; s32 r7_offset = stack_base + 1 * BPF_REG_SIZE; @@ -19863,55 +21971,56 @@ static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env, int reg_loop_cnt = BPF_REG_7; int reg_loop_ctx = BPF_REG_8; + struct bpf_insn *insn_buf = env->insn_buf; struct bpf_prog *new_prog; u32 callback_start; u32 call_insn_offset; s32 callback_offset; + u32 cnt = 0; /* This represents an inlined version of bpf_iter.c:bpf_loop, * be careful to modify this code in sync. */ - struct bpf_insn insn_buf[] = { - /* Return error and jump to the end of the patch if - * expected number of iterations is too big. - */ - BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2), - BPF_MOV32_IMM(BPF_REG_0, -E2BIG), - BPF_JMP_IMM(BPF_JA, 0, 0, 16), - /* spill R6, R7, R8 to use these as loop vars */ - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset), - /* initialize loop vars */ - BPF_MOV64_REG(reg_loop_max, BPF_REG_1), - BPF_MOV32_IMM(reg_loop_cnt, 0), - BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3), - /* loop header, - * if reg_loop_cnt >= reg_loop_max skip the loop body - */ - BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5), - /* callback call, - * correct callback offset would be set after patching - */ - BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt), - BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx), - BPF_CALL_REL(0), - /* increment loop counter */ - BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1), - /* jump to loop header if callback returned 0 */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6), - /* return value of bpf_loop, - * set R0 to the number of iterations - */ - BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt), - /* restore original values of R6, R7, R8 */ - BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset), - BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset), - BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset), - }; - *cnt = ARRAY_SIZE(insn_buf); - new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt); + /* Return error and jump to the end of the patch if + * expected number of iterations is too big. + */ + insn_buf[cnt++] = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2); + insn_buf[cnt++] = BPF_MOV32_IMM(BPF_REG_0, -E2BIG); + insn_buf[cnt++] = BPF_JMP_IMM(BPF_JA, 0, 0, 16); + /* spill R6, R7, R8 to use these as loop vars */ + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset); + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset); + insn_buf[cnt++] = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset); + /* initialize loop vars */ + insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_max, BPF_REG_1); + insn_buf[cnt++] = BPF_MOV32_IMM(reg_loop_cnt, 0); + insn_buf[cnt++] = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3); + /* loop header, + * if reg_loop_cnt >= reg_loop_max skip the loop body + */ + insn_buf[cnt++] = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5); + /* callback call, + * correct callback offset would be set after patching + */ + insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt); + insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx); + insn_buf[cnt++] = BPF_CALL_REL(0); + /* increment loop counter */ + insn_buf[cnt++] = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1); + /* jump to loop header if callback returned 0 */ + insn_buf[cnt++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6); + /* return value of bpf_loop, + * set R0 to the number of iterations + */ + insn_buf[cnt++] = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt); + /* restore original values of R6, R7, R8 */ + insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset); + insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset); + insn_buf[cnt++] = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset); + + *total_cnt = cnt; + new_prog = bpf_patch_insn_data(env, position, insn_buf, cnt); if (!new_prog) return new_prog; @@ -19986,6 +22095,40 @@ static int optimize_bpf_loop(struct bpf_verifier_env *env) return 0; } +/* Remove unnecessary spill/fill pairs, members of fastcall pattern, + * adjust subprograms stack depth when possible. + */ +static int remove_fastcall_spills_fills(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn_aux_data *aux = env->insn_aux_data; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + u32 spills_num; + bool modified = false; + int i, j; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (aux[i].fastcall_spills_num > 0) { + spills_num = aux[i].fastcall_spills_num; + /* NOPs would be removed by opt_remove_nops() */ + for (j = 1; j <= spills_num; ++j) { + *(insn - j) = NOP; + *(insn + j) = NOP; + } + modified = true; + } + if ((subprog + 1)->start == i + 1) { + if (modified && !subprog->keep_fastcall_stack) + subprog->stack_depth = -subprog->fastcall_stack_off; + subprog++; + modified = false; + } + } + + return 0; +} + static void free_states(struct bpf_verifier_env *env) { struct bpf_verifier_state_list *sl, *sln; @@ -20046,7 +22189,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) state->first_insn_idx = env->subprog_info[subprog].start; state->last_insn_idx = -1; - regs = state->frame[state->curframe]->regs; if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) { const char *sub_name = subprog_name(env, subprog); @@ -20090,6 +22232,21 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog) mark_reg_known_zero(env, regs, i); reg->mem_size = arg->mem_size; reg->id = ++env->id_gen; + } else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) { + reg->type = PTR_TO_BTF_ID; + if (arg->arg_type & PTR_MAYBE_NULL) + reg->type |= PTR_MAYBE_NULL; + if (arg->arg_type & PTR_UNTRUSTED) + reg->type |= PTR_UNTRUSTED; + if (arg->arg_type & PTR_TRUSTED) + reg->type |= PTR_TRUSTED; + mark_reg_known_zero(env, regs, i); + reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */ + reg->btf_id = arg->btf_id; + reg->id = ++env->id_gen; + } else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) { + /* caller can pass either PTR_TO_ARENA or SCALAR */ + mark_reg_unknown(env, regs, i); } else { WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n", i - BPF_REG_1, arg->arg_type); @@ -20238,26 +22395,44 @@ static void print_verification_stats(struct bpf_verifier_env *env) static int check_struct_ops_btf_id(struct bpf_verifier_env *env) { const struct btf_type *t, *func_proto; + const struct bpf_struct_ops_desc *st_ops_desc; const struct bpf_struct_ops *st_ops; const struct btf_member *member; struct bpf_prog *prog = env->prog; u32 btf_id, member_idx; + struct btf *btf; const char *mname; + int err; if (!prog->gpl_compatible) { verbose(env, "struct ops programs must have a GPL compatible license\n"); return -EINVAL; } + if (!prog->aux->attach_btf_id) + return -ENOTSUPP; + + btf = prog->aux->attach_btf; + if (btf_is_module(btf)) { + /* Make sure st_ops is valid through the lifetime of env */ + env->attach_btf_mod = btf_try_get_module(btf); + if (!env->attach_btf_mod) { + verbose(env, "struct_ops module %s is not found\n", + btf_get_name(btf)); + return -ENOTSUPP; + } + } + btf_id = prog->aux->attach_btf_id; - st_ops = bpf_struct_ops_find(btf_id); - if (!st_ops) { + st_ops_desc = bpf_struct_ops_find(btf, btf_id); + if (!st_ops_desc) { verbose(env, "attach_btf_id %u is not a supported struct\n", btf_id); return -ENOTSUPP; } + st_ops = st_ops_desc->st_ops; - t = st_ops->type; + t = st_ops_desc->type; member_idx = prog->expected_attach_type; if (member_idx >= btf_type_vlen(t)) { verbose(env, "attach to invalid member idx %u of struct %s\n", @@ -20266,8 +22441,8 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } member = &btf_type_member(t)[member_idx]; - mname = btf_name_by_offset(btf_vmlinux, member->name_off); - func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, + mname = btf_name_by_offset(btf, member->name_off); + func_proto = btf_type_resolve_func_ptr(btf, member->type, NULL); if (!func_proto) { verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", @@ -20275,8 +22450,15 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) return -EINVAL; } + err = bpf_struct_ops_supported(st_ops, __btf_member_bit_offset(t, member) / 8); + if (err) { + verbose(env, "attach to unsupported member %s of struct %s\n", + mname, st_ops->name); + return err; + } + if (st_ops->check_member) { - int err = st_ops->check_member(t, member, prog); + err = st_ops->check_member(t, member, prog); if (err) { verbose(env, "attach to unsupported member %s of struct %s\n", @@ -20285,6 +22467,17 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } } + if (prog->aux->priv_stack_requested && !bpf_jit_supports_private_stack()) { + verbose(env, "Private stack not supported by jit\n"); + return -EACCES; + } + + /* btf_ctx_access() used this to provide argument type info */ + prog->aux->ctx_arg_info = + st_ops_desc->arg_info[member_idx].info; + prog->aux->ctx_arg_info_size = + st_ops_desc->arg_info[member_idx].cnt; + prog->aux->attach_func_proto = func_proto; prog->aux->attach_func_name = mname; env->ops = st_ops->verifier_ops; @@ -20310,8 +22503,12 @@ BTF_SET_START(btf_non_sleepable_error_inject) * Assume non-sleepable from bpf safety point of view. */ BTF_ID(func, __filemap_add_folio) +#ifdef CONFIG_FAIL_PAGE_ALLOC BTF_ID(func, should_fail_alloc_page) +#endif +#ifdef CONFIG_FAILSLAB BTF_ID(func, should_failslab) +#endif BTF_SET_END(btf_non_sleepable_error_inject) static int check_non_sleepable_error_inject(u32 btf_id) @@ -20327,11 +22524,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, { bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING; + char trace_symbol[KSYM_SYMBOL_LEN]; const char prefix[] = "btf_trace_"; + struct bpf_raw_event_map *btp; int ret = 0, subprog = -1, i; const struct btf_type *t; bool conservative = true; - const char *tname; + const char *tname, *fname; struct btf *btf; long addr = 0; struct module *mod = NULL; @@ -20358,6 +22557,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, } if (tgt_prog) { struct bpf_prog_aux *aux = tgt_prog->aux; + bool tgt_changes_pkt_data; if (bpf_prog_is_dev_bound(prog->aux) && !bpf_prog_dev_bound_match(prog, tgt_prog)) { @@ -20392,6 +22592,14 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, "Extension programs should be JITed\n"); return -EINVAL; } + tgt_changes_pkt_data = aux->func + ? aux->func[subprog]->aux->changes_pkt_data + : aux->changes_pkt_data; + if (prog->aux->changes_pkt_data && !tgt_changes_pkt_data) { + bpf_log(log, + "Extension program changes packet data, while original does not\n"); + return -EINVAL; + } } if (!tgt_prog->jited) { bpf_log(log, "Can attach to only JITed progs\n"); @@ -20462,10 +22670,34 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return -EINVAL; } tname += sizeof(prefix) - 1; - t = btf_type_by_id(btf, t->type); - if (!btf_type_is_ptr(t)) - /* should never happen in valid vmlinux build */ + + /* The func_proto of "btf_trace_##tname" is generated from typedef without argument + * names. Thus using bpf_raw_event_map to get argument names. + */ + btp = bpf_get_raw_tracepoint(tname); + if (!btp) return -EINVAL; + fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL, + trace_symbol); + bpf_put_raw_tracepoint(btp); + + if (fname) + ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC); + + if (!fname || ret < 0) { + bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n", + prefix, tname); + t = btf_type_by_id(btf, t->type); + if (!btf_type_is_ptr(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + } else { + t = btf_type_by_id(btf, ret); + if (!btf_type_is_func(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + } + t = btf_type_by_id(btf, t->type); if (!btf_type_is_func_proto(t)) /* should never happen in valid vmlinux build */ @@ -20542,7 +22774,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, } } - if (prog->aux->sleepable) { + if (prog->sleepable) { ret = -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_TRACING: @@ -20653,14 +22885,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) u64 key; if (prog->type == BPF_PROG_TYPE_SYSCALL) { - if (prog->aux->sleepable) + if (prog->sleepable) /* attach_btf_id checked to be zero already */ return 0; verbose(env, "Syscall programs can only be sleepable\n"); return -EINVAL; } - if (prog->aux->sleepable && !can_be_sleepable(prog)) { + if (prog->sleepable && !can_be_sleepable(prog)) { verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n"); return -EINVAL; } @@ -20737,6 +22969,73 @@ struct btf *bpf_get_btf_vmlinux(void) return btf_vmlinux; } +/* + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In + * this case expect that every file descriptor in the array is either a map or + * a BTF. Everything else is considered to be trash. + */ +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) +{ + struct bpf_map *map; + struct btf *btf; + CLASS(fd, f)(fd); + int err; + + map = __bpf_map_get(f); + if (!IS_ERR(map)) { + err = __add_used_map(env, map); + if (err < 0) + return err; + return 0; + } + + btf = __btf_get_by_fd(f); + if (!IS_ERR(btf)) { + err = __add_used_btf(env, btf); + if (err < 0) + return err; + return 0; + } + + verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); + return PTR_ERR(map); +} + +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) +{ + size_t size = sizeof(int); + int ret; + int fd; + u32 i; + + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); + + /* + * The only difference between old (no fd_array_cnt is given) and new + * APIs is that in the latter case the fd_array is expected to be + * continuous and is scanned for map fds right away + */ + if (!attr->fd_array_cnt) + return 0; + + /* Check for integer overflow */ + if (attr->fd_array_cnt >= (U32_MAX / size)) { + verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt); + return -EINVAL; + } + + for (i = 0; i < attr->fd_array_cnt; i++) { + if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size)) + return -EFAULT; + + ret = add_fd_from_fd_array(env, fd); + if (ret) + return ret; + } + + return 0; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); @@ -20752,7 +23051,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; @@ -20768,8 +23067,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; - env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); - is_priv = bpf_capable(); + + env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); + env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); + env->bypass_spec_v1 = bpf_bypass_spec_v1(env->prog->aux->token); + env->bypass_spec_v4 = bpf_bypass_spec_v4(env->prog->aux->token); + env->bpf_capable = is_priv = bpf_token_capable(env->prog->aux->token, CAP_BPF); bpf_get_btf_vmlinux(); @@ -20786,6 +23089,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret) goto err_unlock; + ret = process_fd_array(env, attr, uattr); + if (ret) + goto skip_full_check; + mark_verifier_state_clean(env); if (IS_ERR(btf_vmlinux)) { @@ -20801,12 +23108,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false; - env->allow_ptr_leaks = bpf_allow_ptr_leaks(); - env->allow_uninit_stack = bpf_allow_uninit_stack(); - env->bypass_spec_v1 = bpf_bypass_spec_v1(); - env->bypass_spec_v4 = bpf_bypass_spec_v4(); - env->bpf_capable = bpf_capable(); - if (is_priv) env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ; env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; @@ -20834,10 +23135,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; - ret = check_attach_btf_id(env); - if (ret) - goto skip_full_check; - ret = resolve_pseudo_ldimm64(env); if (ret < 0) goto skip_full_check; @@ -20852,6 +23149,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret < 0) goto skip_full_check; + ret = check_attach_btf_id(env); + if (ret) + goto skip_full_check; + + ret = mark_fastcall_patterns(env); + if (ret < 0) + goto skip_full_check; + ret = do_check_main(env); ret = ret ?: do_check_subprogs(env); @@ -20861,6 +23166,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 skip_full_check: kvfree(env->explored_states); + /* might decrease stack depth, keep it before passes that + * allocate additional slots. + */ + if (ret == 0) + ret = remove_fastcall_spills_fills(env); + if (ret == 0) ret = check_max_stack_depth(env); @@ -20972,11 +23283,14 @@ err_release_maps: env->prog->expected_attach_type = 0; *prog = env->prog; + + module_put(env->attach_btf_mod); err_unlock: if (!is_priv) mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); + kvfree(env->insn_hist); err_free_env: - kfree(env); + kvfree(env); return ret; } |