diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 697 |
1 files changed, 541 insertions, 156 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7ee218827259..388245e8826e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -255,6 +255,7 @@ struct bpf_call_arg_meta { int mem_size; u64 msize_max_value; int ref_obj_id; + int dynptr_id; int map_uid; int func_id; struct btf *btf; @@ -638,31 +639,57 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } -static int get_spi(s32 off) +static int __get_spi(s32 off) { return (-off - 1) / BPF_REG_SIZE; } +static struct bpf_func_state *func(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg) +{ + struct bpf_verifier_state *cur = env->cur_state; + + return cur->frame[reg->frameno]; +} + static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) { - int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; - /* We need to check that slots between [spi - nr_slots + 1, spi] are - * within [0, allocated_stack). - * - * Please note that the spi grows downwards. For example, a dynptr - * takes the size of two stack slots; the first slot will be at - * spi and the second slot will be at spi - 1. - */ - return spi - nr_slots + 1 >= 0 && spi < allocated_slots; + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; } -static struct bpf_func_state *func(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg) +static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { - struct bpf_verifier_state *cur = env->cur_state; + int off, spi; - return cur->frame[reg->frameno]; + if (!tnum_is_const(reg->var_off)) { + verbose(env, "dynptr has to be at a constant offset\n"); + return -EINVAL; + } + + off = reg->off + reg->var_off.value; + if (off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset=%d\n", off); + return -EINVAL; + } + + spi = __get_spi(off); + if (spi < 1) { + verbose(env, "cannot pass in dynptr at an offset=%d\n", off); + return -EINVAL; + } + + if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS)) + return -ERANGE; + return spi; } static const char *kernel_type_name(const struct btf* btf, u32 id) @@ -727,37 +754,58 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type) static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, - bool first_slot); + bool first_slot, int dynptr_id); static void __mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg); -static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1, +static void mark_dynptr_stack_regs(struct bpf_verifier_env *env, + struct bpf_reg_state *sreg1, struct bpf_reg_state *sreg2, enum bpf_dynptr_type type) { - __mark_dynptr_reg(sreg1, type, true); - __mark_dynptr_reg(sreg2, type, false); + int id = ++env->id_gen; + + __mark_dynptr_reg(sreg1, type, true, id); + __mark_dynptr_reg(sreg2, type, false, id); } -static void mark_dynptr_cb_reg(struct bpf_reg_state *reg, +static void mark_dynptr_cb_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, enum bpf_dynptr_type type) { - __mark_dynptr_reg(reg, type, true); + __mark_dynptr_reg(reg, type, true, ++env->id_gen); } +static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, + struct bpf_func_state *state, int spi); static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, enum bpf_arg_type arg_type, int insn_idx) { struct bpf_func_state *state = func(env, reg); enum bpf_dynptr_type type; - int spi, i, id; - - spi = get_spi(reg->off); - - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) - return -EINVAL; + int spi, i, id, err; + + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + + /* We cannot assume both spi and spi - 1 belong to the same dynptr, + * hence we need to call destroy_if_dynptr_stack_slot twice for both, + * to ensure that for the following example: + * [d1][d1][d2][d2] + * spi 3 2 1 0 + * So marking spi = 2 should lead to destruction of both d1 and d2. In + * case they do belong to same dynptr, second call won't see slot_type + * as STACK_DYNPTR and will simply skip destruction. + */ + err = destroy_if_dynptr_stack_slot(env, state, spi); + if (err) + return err; + err = destroy_if_dynptr_stack_slot(env, state, spi - 1); + if (err) + return err; for (i = 0; i < BPF_REG_SIZE; i++) { state->stack[spi].slot_type[i] = STACK_DYNPTR; @@ -768,7 +816,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ if (type == BPF_DYNPTR_TYPE_INVALID) return -EINVAL; - mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr, + mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr, &state->stack[spi - 1].spilled_ptr, type); if (dynptr_type_refcounted(type)) { @@ -781,6 +829,9 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ state->stack[spi - 1].spilled_ptr.ref_obj_id = id; } + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + return 0; } @@ -789,10 +840,9 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re struct bpf_func_state *state = func(env, reg); int spi, i; - spi = get_spi(reg->off); - - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) - return -EINVAL; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; for (i = 0; i < BPF_REG_SIZE; i++) { state->stack[spi].slot_type[i] = STACK_INVALID; @@ -805,43 +855,133 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); + + /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot? + * + * While we don't allow reading STACK_INVALID, it is still possible to + * do <8 byte writes marking some but not all slots as STACK_MISC. Then, + * helpers or insns can do partial read of that part without failing, + * but check_stack_range_initialized, check_stack_read_var_off, and + * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of + * the slot conservatively. Hence we need to prevent those liveness + * marking walks. + * + * This was not a problem before because STACK_INVALID is only set by + * default (where the default reg state has its reg->parent as NULL), or + * in clean_live_states after REG_LIVE_DONE (at which point + * mark_reg_read won't walk reg->parent chain), but not randomly during + * verifier state exploration (like we did above). Hence, for our case + * parentage chain will still be live (i.e. reg->parent may be + * non-NULL), while earlier reg->parent was NULL, so we need + * REG_LIVE_WRITTEN to screen off read marker propagation when it is + * done later on reads or by mark_dynptr_read as well to unnecessary + * mark registers in verifier state. + */ + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + return 0; } -static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static void __mark_reg_unknown(const struct bpf_verifier_env *env, + struct bpf_reg_state *reg); + +static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, + struct bpf_func_state *state, int spi) { - struct bpf_func_state *state = func(env, reg); - int spi, i; + struct bpf_func_state *fstate; + struct bpf_reg_state *dreg; + int i, dynptr_id; - if (reg->type == CONST_PTR_TO_DYNPTR) - return false; + /* We always ensure that STACK_DYNPTR is never set partially, + * hence just checking for slot_type[0] is enough. This is + * different for STACK_SPILL, where it may be only set for + * 1 byte, so code has to use is_spilled_reg. + */ + if (state->stack[spi].slot_type[0] != STACK_DYNPTR) + return 0; - spi = get_spi(reg->off); - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) - return true; + /* Reposition spi to first slot */ + if (!state->stack[spi].spilled_ptr.dynptr.first_slot) + spi = spi + 1; + + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + verbose(env, "cannot overwrite referenced dynptr\n"); + return -EINVAL; + } + + mark_stack_slot_scratched(env, spi); + mark_stack_slot_scratched(env, spi - 1); + /* Writing partially to one dynptr stack slot destroys both. */ for (i = 0; i < BPF_REG_SIZE; i++) { - if (state->stack[spi].slot_type[i] == STACK_DYNPTR || - state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) - return false; + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; } + dynptr_id = state->stack[spi].spilled_ptr.id; + /* Invalidate any slices associated with this dynptr */ + bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({ + /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */ + if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM) + continue; + if (dreg->dynptr_id == dynptr_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, dreg); + else + __mark_reg_unknown(env, dreg); + } + })); + + /* Do not release reference state, we are destroying dynptr on stack, + * not using some helper to release it. Just reset register. + */ + __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); + __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); + + /* Same reason as unmark_stack_slots_dynptr above */ + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi) +{ + if (reg->type == CONST_PTR_TO_DYNPTR) + return false; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + if (spi < 0) + return spi == -ERANGE; + /* We allow overwriting existing unreferenced STACK_DYNPTR slots, see + * mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to + * ensure dynptr objects at the slots we are touching are completely + * destructed before we reinitialize them for a new one. For referenced + * ones, destroy_if_dynptr_stack_slot returns an error early instead of + * delaying it until the end where the user will get "Unreleased + * reference" error. + */ return true; } -static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + int spi) { struct bpf_func_state *state = func(env, reg); - int spi; int i; /* This already represents first slot of initialized bpf_dynptr */ if (reg->type == CONST_PTR_TO_DYNPTR) return true; - spi = get_spi(reg->off); - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || - !state->stack[spi].spilled_ptr.dynptr.first_slot) + if (spi < 0) + return false; + if (!state->stack[spi].spilled_ptr.dynptr.first_slot) return false; for (i = 0; i < BPF_REG_SIZE; i++) { @@ -868,7 +1008,9 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg if (reg->type == CONST_PTR_TO_DYNPTR) { return reg->dynptr.type == dynptr_type; } else { - spi = get_spi(reg->off); + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return false; return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type; } } @@ -1404,9 +1546,11 @@ static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) */ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) { - /* Clear id, off, and union(map_ptr, range) */ + /* Clear off and union(map_ptr, range) */ memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); + reg->id = 0; + reg->ref_obj_id = 0; ___mark_reg_known(reg, imm); } @@ -1447,7 +1591,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env, } static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, - bool first_slot) + bool first_slot, int dynptr_id) { /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply @@ -1455,6 +1599,8 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type ty */ __mark_reg_known_zero(reg); reg->type = CONST_PTR_TO_DYNPTR; + /* Give each dynptr a unique id to uniquely associate slices to it. */ + reg->id = dynptr_id; reg->dynptr.type = type; reg->dynptr.first_slot = first_slot; } @@ -1752,11 +1898,13 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg) { /* - * Clear type, id, off, and union(map_ptr, range) and + * Clear type, off, and union(map_ptr, range) and * padding between 'type' and union */ memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); reg->type = SCALAR_VALUE; + reg->id = 0; + reg->ref_obj_id = 0; reg->var_off = tnum_unknown; reg->frameno = 0; reg->precise = !env->bpf_capable; @@ -2185,6 +2333,12 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -EINVAL; } + if (bpf_dev_bound_kfunc_id(func_id)) { + err = bpf_dev_bound_kfunc_check(&env->log, prog_aux); + if (err) + return err; + } + desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; desc->imm = call_imm; @@ -2386,6 +2540,32 @@ static int mark_reg_read(struct bpf_verifier_env *env, return 0; } +static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, ret; + + /* For CONST_PTR_TO_DYNPTR, it must have already been done by + * check_reg_arg in check_helper_call and mark_btf_func_reg_size in + * check_kfunc_call. + */ + if (reg->type == CONST_PTR_TO_DYNPTR) + return 0; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + /* Caller ensures dynptr is valid and initialized, which means spi is in + * bounds and spi is the first dynptr slot. Simply mark stack slot as + * read. + */ + ret = mark_reg_read(env, &state->stack[spi].spilled_ptr, + state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64); + if (ret) + return ret; + return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr, + state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64); +} + /* This function is supposed to be used by the following 32-bit optimization * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. @@ -3318,6 +3498,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, env->insn_aux_data[insn_idx].sanitize_stack_spill = true; } + err = destroy_if_dynptr_stack_slot(env, state, spi); + if (err) + return err; + mark_stack_slot_scratched(env, spi); if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { @@ -3431,6 +3615,14 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, if (err) return err; + for (i = min_off; i < max_off; i++) { + int spi; + + spi = __get_spi(i); + err = destroy_if_dynptr_stack_slot(env, state, spi); + if (err) + return err; + } /* Variable offset writes destroy any spilled pointers in range. */ for (i = min_off; i < max_off; i++) { @@ -4770,6 +4962,25 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) return 0; } +#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields) + +BTF_TYPE_SAFE_NESTED(struct task_struct) { + const cpumask_t *cpus_ptr; +}; + +static bool nested_ptr_is_trusted(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + int off) +{ + /* If its parent is not trusted, it can't regain its trusted status. */ + if (!is_trusted_reg(reg)) + return false; + + BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct)); + + return btf_nested_type_is_trusted(&env->log, reg, off); +} + static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, @@ -4858,10 +5069,17 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (type_flag(reg->type) & PTR_UNTRUSTED) flag |= PTR_UNTRUSTED; - /* By default any pointer obtained from walking a trusted pointer is - * no longer trusted except the rcu case below. + /* By default any pointer obtained from walking a trusted pointer is no + * longer trusted, unless the field being accessed has explicitly been + * marked as inheriting its parent's state of trust. + * + * An RCU-protected pointer can also be deemed trusted if we are in an + * RCU read region. This case is handled below. */ - flag &= ~PTR_TRUSTED; + if (nested_ptr_is_trusted(env, reg, off)) + flag |= PTR_TRUSTED; + else + flag &= ~PTR_TRUSTED; if (flag & MEM_RCU) { /* Mark value register as MEM_RCU only if it is protected by @@ -5458,6 +5676,31 @@ static int check_stack_range_initialized( } if (meta && meta->raw_mode) { + /* Ensure we won't be overwriting dynptrs when simulating byte + * by byte access in check_helper_call using meta.access_size. + * This would be a problem if we have a helper in the future + * which takes: + * + * helper(uninit_mem, len, dynptr) + * + * Now, uninint_mem may overlap with dynptr pointer. Hence, it + * may end up writing to dynptr itself when touching memory from + * arg 1. This can be relaxed on a case by case basis for known + * safe cases, but reject due to the possibilitiy of aliasing by + * default. + */ + for (i = min_off; i < max_off + access_size; i++) { + int stack_off = -i - 1; + + spi = __get_spi(i); + /* raw_mode may write past allocated_stack */ + if (state->allocated_stack <= stack_off) + continue; + if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) { + verbose(env, "potential write to dynptr at off=%d disallowed\n", i); + return -EACCES; + } + } meta->access_size = access_size; meta->regno = regno; return 0; @@ -5945,6 +6188,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + int spi = 0; /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*): @@ -5955,12 +6199,14 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, } /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to * check_func_arg_reg_off's logic. We only need to check offset - * alignment for PTR_TO_STACK. + * and its alignment for PTR_TO_STACK. */ - if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) { - verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off); - return -EINVAL; + if (reg->type == PTR_TO_STACK) { + spi = dynptr_get_spi(env, reg); + if (spi < 0 && spi != -ERANGE) + return spi; } + /* MEM_UNINIT - Points to memory that is an appropriate candidate for * constructing a mutable bpf_dynptr object. * @@ -5977,7 +6223,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, * to. */ if (arg_type & MEM_UNINIT) { - if (!is_dynptr_reg_valid_uninit(env, reg)) { + if (!is_dynptr_reg_valid_uninit(env, reg, spi)) { verbose(env, "Dynptr has to be an uninitialized dynptr\n"); return -EINVAL; } @@ -5992,13 +6238,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, meta->uninit_dynptr_regno = regno; } else /* MEM_RDONLY and None case from above */ { + int err; + /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) { verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n"); return -EINVAL; } - if (!is_dynptr_reg_valid_init(env, reg)) { + if (!is_dynptr_reg_valid_init(env, reg, spi)) { verbose(env, "Expected an initialized dynptr as arg #%d\n", regno); @@ -6025,6 +6273,10 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno, err_extra, regno); return -EINVAL; } + + err = mark_dynptr_read(env, reg); + if (err) + return err; } return 0; } @@ -6362,15 +6614,29 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, } } -static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct bpf_func_state *state = func(env, reg); int spi; if (reg->type == CONST_PTR_TO_DYNPTR) - return reg->ref_obj_id; + return reg->id; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + return state->stack[spi].spilled_ptr.id; +} + +static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi; - spi = get_spi(reg->off); + if (reg->type == CONST_PTR_TO_DYNPTR) + return reg->ref_obj_id; + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; return state->stack[spi].spilled_ptr.ref_obj_id; } @@ -6444,9 +6710,8 @@ skip_type_check: * PTR_TO_STACK. */ if (reg->type == PTR_TO_STACK) { - spi = get_spi(reg->off); - if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || - !state->stack[spi].spilled_ptr.ref_obj_id) { + spi = dynptr_get_spi(env, reg); + if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) { verbose(env, "arg %d is an unacquired reference\n", regno); return -EINVAL; } @@ -7428,7 +7693,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx); */ __mark_reg_not_init(env, &callee->regs[BPF_REG_0]); - mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL); + mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL); callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; /* unused */ @@ -7633,6 +7898,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, struct bpf_reg_state *fmt_reg = ®s[BPF_REG_3]; struct bpf_reg_state *data_len_reg = ®s[BPF_REG_5]; struct bpf_map *fmt_map = fmt_reg->map_ptr; + struct bpf_bprintf_data data = {}; int err, fmt_map_off, num_args; u64 fmt_addr; char *fmt; @@ -7657,7 +7923,7 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env, /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we * can focus on validating the format specifiers. */ - err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args); + err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data); if (err < 0) verbose(env, "Invalid format string\n"); @@ -7933,13 +8199,32 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { if (arg_type_is_dynptr(fn->arg_type[i])) { struct bpf_reg_state *reg = ®s[BPF_REG_1 + i]; + int id, ref_obj_id; + + if (meta.dynptr_id) { + verbose(env, "verifier internal error: meta.dynptr_id already set\n"); + return -EFAULT; + } if (meta.ref_obj_id) { verbose(env, "verifier internal error: meta.ref_obj_id already set\n"); return -EFAULT; } - meta.ref_obj_id = dynptr_ref_obj_id(env, reg); + id = dynptr_id(env, reg); + if (id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr id\n"); + return id; + } + + ref_obj_id = dynptr_ref_obj_id(env, reg); + if (ref_obj_id < 0) { + verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n"); + return ref_obj_id; + } + + meta.dynptr_id = id; + meta.ref_obj_id = ref_obj_id; break; } } @@ -8095,6 +8380,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } + if (is_dynptr_ref_function(func_id)) + regs[BPF_REG_0].dynptr_id = meta.dynptr_id; + if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; @@ -8551,9 +8839,37 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, reg_ref_id = *reg2btf_ids[base_type(reg->type)]; } - if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id)) + /* Enforce strict type matching for calls to kfuncs that are acquiring + * or releasing a reference, or are no-cast aliases. We do _not_ + * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default, + * as we want to enable BPF programs to pass types that are bitwise + * equivalent without forcing them to explicitly cast with something + * like bpf_cast_to_kern_ctx(). + * + * For example, say we had a type like the following: + * + * struct bpf_cpumask { + * cpumask_t cpumask; + * refcount_t usage; + * }; + * + * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed + * to a struct cpumask, so it would be safe to pass a struct + * bpf_cpumask * to a kfunc expecting a struct cpumask *. + * + * The philosophy here is similar to how we allow scalars of different + * types to be passed to kfuncs as long as the size is the same. The + * only difference here is that we're simply allowing + * btf_struct_ids_match() to walk the struct at the 0th offset, and + * resolve types. + */ + if (is_kfunc_acquire(meta) || + (is_kfunc_release(meta) && reg->ref_obj_id) || + btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id)) strict_type_match = true; + WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off); + reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { @@ -8792,21 +9108,22 @@ static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env, field = meta->arg_list_head.field; - et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id); + et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id); t = btf_type_by_id(reg->btf, reg->btf_id); - if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf, - field->list_head.value_btf_id, true)) { + if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf, + field->graph_root.value_btf_id, true)) { verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d " "in struct %s, but arg is at offset=%d in struct %s\n", - field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off), + field->graph_root.node_offset, + btf_name_by_offset(field->graph_root.btf, et->name_off), list_node_off, btf_name_by_offset(reg->btf, t->name_off)); return -EINVAL; } - if (list_node_off != field->list_head.node_offset) { + if (list_node_off != field->graph_root.node_offset) { verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n", - list_node_off, field->list_head.node_offset, - btf_name_by_offset(field->list_head.btf, et->name_off)); + list_node_off, field->graph_root.node_offset, + btf_name_by_offset(field->graph_root.btf, et->name_off)); return -EINVAL; } /* Set arg#1 for expiration after unlock */ @@ -8896,6 +9213,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ return -EINVAL; } + if (is_kfunc_trusted_args(meta) && + (register_is_null(reg) || type_may_be_null(reg->type))) { + verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i); + return -EACCES; + } + if (reg->ref_obj_id) { if (is_kfunc_release(meta) && meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", @@ -9248,9 +9571,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; - regs[BPF_REG_0].btf = field->list_head.btf; - regs[BPF_REG_0].btf_id = field->list_head.value_btf_id; - regs[BPF_REG_0].off = field->list_head.node_offset; + regs[BPF_REG_0].btf = field->graph_root.btf; + regs[BPF_REG_0].btf_id = field->graph_root.value_btf_id; + regs[BPF_REG_0].off = field->graph_root.node_offset; } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED; @@ -12969,6 +13292,13 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) { unsigned int i; + /* either both IDs should be set or both should be zero */ + if (!!old_id != !!cur_id) + return false; + + if (old_id == 0) /* cur_id == 0 as well */ + return true; + for (i = 0; i < BPF_ID_MAP_SIZE; i++) { if (!idmap[i].old) { /* Reached an empty slot; haven't seen this id before */ @@ -13080,79 +13410,74 @@ next: } } +static bool regs_exact(const struct bpf_reg_state *rold, + const struct bpf_reg_state *rcur, + struct bpf_id_pair *idmap) +{ + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + check_ids(rold->id, rcur->id, idmap) && + check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap); +} + /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { - bool equal; - if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ return true; - - equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; - if (rold->type == NOT_INIT) /* explored state can't have used this */ return true; if (rcur->type == NOT_INIT) return false; + + /* Enforce that register types have to match exactly, including their + * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general + * rule. + * + * One can make a point that using a pointer register as unbounded + * SCALAR would be technically acceptable, but this could lead to + * pointer leaks because scalars are allowed to leak while pointers + * are not. We could make this safe in special cases if root is + * calling us, but it's probably not worth the hassle. + * + * Also, register types that are *not* MAYBE_NULL could technically be + * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE + * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point + * to the same map). + * However, if the old MAYBE_NULL register then got NULL checked, + * doing so could have affected others with the same id, and we can't + * check for that because we lost the id when we converted to + * a non-MAYBE_NULL variant. + * So, as a general rule we don't allow mixing MAYBE_NULL and + * non-MAYBE_NULL registers as well. + */ + if (rold->type != rcur->type) + return false; + switch (base_type(rold->type)) { case SCALAR_VALUE: - if (equal) + if (regs_exact(rold, rcur, idmap)) return true; if (env->explore_alu_limits) return false; - if (rcur->type == SCALAR_VALUE) { - if (!rold->precise) - return true; - /* new val must satisfy old val knowledge */ - return range_within(rold, rcur) && - tnum_in(rold->var_off, rcur->var_off); - } else { - /* We're trying to use a pointer in place of a scalar. - * Even if the scalar was unbounded, this could lead to - * pointer leaks because scalars are allowed to leak - * while pointers are not. We could make this safe in - * special cases if root is calling us, but it's - * probably not worth the hassle. - */ - return false; - } + if (!rold->precise) + return true; + /* new val must satisfy old val knowledge */ + return range_within(rold, rcur) && + tnum_in(rold->var_off, rcur->var_off); case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: - /* a PTR_TO_MAP_VALUE could be safe to use as a - * PTR_TO_MAP_VALUE_OR_NULL into the same map. - * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- - * checked, doing so could have affected others with the same - * id, and we can't check for that because we lost the id when - * we converted to a PTR_TO_MAP_VALUE. - */ - if (type_may_be_null(rold->type)) { - if (!type_may_be_null(rcur->type)) - return false; - if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) - return false; - /* Check our ids match any regs they're supposed to */ - return check_ids(rold->id, rcur->id, idmap); - } - /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. - * 'id' is not compared, since it's only used for maps with - * bpf_spin_lock inside map element and in such cases if - * the rest of the prog is valid for one map element then - * it's valid for all map elements regardless of the key - * used in bpf_map_lookup() */ - return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 && range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off) && check_ids(rold->id, rcur->id, idmap); case PTR_TO_PACKET_META: case PTR_TO_PACKET: - if (rcur->type != rold->type) - return false; /* We must have at least as much range as the old ptr * did, so that any accesses which were safe before are * still safe. This is true even if old range < old off, @@ -13167,7 +13492,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, if (rold->off != rcur->off) return false; /* id relations must be preserved */ - if (rold->id && !check_ids(rold->id, rcur->id, idmap)) + if (!check_ids(rold->id, rcur->id, idmap)) return false; /* new val must satisfy old val knowledge */ return range_within(rold, rcur) && @@ -13176,15 +13501,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, /* two stack pointers are equal only if they're pointing to * the same stack frame, since fp-8 in foo != fp-8 in bar */ - return equal && rold->frameno == rcur->frameno; + return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno; default: - /* Only valid matches are exact, which memcmp() */ - return equal; + return regs_exact(rold, rcur, idmap); } - - /* Shouldn't get here; if we do, say it's not safe */ - WARN_ON_ONCE(1); - return false; } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, @@ -13231,10 +13551,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return false; if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1) continue; - if (!is_spilled_reg(&old->stack[spi])) - continue; - if (!regsafe(env, &old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, idmap)) + /* Both old and cur are having same slot_type */ + switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) { + case STACK_SPILL: /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -13245,17 +13564,48 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, * such verifier states are not equivalent. * return false to continue verification of this path */ + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) + return false; + break; + case STACK_DYNPTR: + { + const struct bpf_reg_state *old_reg, *cur_reg; + + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + if (old_reg->dynptr.type != cur_reg->dynptr.type || + old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot || + !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; + } + case STACK_MISC: + case STACK_ZERO: + case STACK_INVALID: + continue; + /* Ensure that new unhandled slot types return false by default */ + default: return false; + } } return true; } -static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur) +static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, + struct bpf_id_pair *idmap) { + int i; + if (old->acquired_refs != cur->acquired_refs) return false; - return !memcmp(old->refs, cur->refs, - sizeof(*old->refs) * old->acquired_refs); + + for (i = 0; i < old->acquired_refs; i++) { + if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap)) + return false; + } + + return true; } /* compare two verifier states @@ -13297,7 +13647,7 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; - if (!refsafe(old, cur)) + if (!refsafe(old, cur, env->idmap_scratch)) return false; return true; @@ -13834,7 +14184,7 @@ static int do_check(struct bpf_verifier_env *env) env->prev_log_len = env->log.len_used; } - if (bpf_prog_is_dev_bound(env->prog->aux)) { + if (bpf_prog_is_offloaded(env->prog->aux)) { err = bpf_prog_offload_verify_insn(env, env->insn_idx, env->prev_insn_idx); if (err) @@ -14314,7 +14664,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } - if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && + if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); return -EINVAL; @@ -14795,7 +15145,7 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt) unsigned int orig_prog_len = env->prog->len; int err; - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) bpf_prog_offload_remove_insns(env, off, cnt); err = bpf_remove_insns(env->prog, off, cnt); @@ -14876,7 +15226,7 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env) else continue; - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) bpf_prog_offload_replace_insn(env, i, &ja); memcpy(insn, &ja, sizeof(ja)); @@ -15063,7 +15413,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) } } - if (bpf_prog_is_dev_bound(env->prog->aux)) + if (bpf_prog_is_offloaded(env->prog->aux)) return 0; insn = env->prog->insnsi + delta; @@ -15463,7 +15813,7 @@ static int fixup_call_args(struct bpf_verifier_env *env) int err = 0; if (env->prog->jit_requested && - !bpf_prog_is_dev_bound(env->prog->aux)) { + !bpf_prog_is_offloaded(env->prog->aux)) { err = jit_subprogs(env); if (err == 0) return 0; @@ -15507,12 +15857,25 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn *insn_buf, int insn_idx, int *cnt) { const struct bpf_kfunc_desc *desc; + void *xdp_kfunc; if (!insn->imm) { verbose(env, "invalid kernel function call not eliminated in verifier pass\n"); return -EINVAL; } + *cnt = 0; + + if (bpf_dev_bound_kfunc_id(insn->imm)) { + xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm); + if (xdp_kfunc) { + insn->imm = BPF_CALL_IMM(xdp_kfunc); + return 0; + } + + /* fallback to default kfunc when not supported by netdev */ + } + /* insn->imm has the btf func_id. Replace it with * an address (relative to __bpf_call_base). */ @@ -15523,7 +15886,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } - *cnt = 0; insn->imm = desc->imm; if (insn->off) return 0; @@ -16449,7 +16811,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } if (st_ops->check_member) { - int err = st_ops->check_member(t, member); + int err = st_ops->check_member(t, member, prog); if (err) { verbose(env, "attach to unsupported member %s of struct %s\n", @@ -16530,6 +16892,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (tgt_prog) { struct bpf_prog_aux *aux = tgt_prog->aux; + if (bpf_prog_is_dev_bound(prog->aux) && + !bpf_prog_dev_bound_match(prog, tgt_prog)) { + bpf_log(log, "Target program bound device mismatch"); + return -EINVAL; + } + for (i = 0; i < aux->func_info_cnt; i++) if (aux->func_info[i].type_id == btf_id) { subprog = i; @@ -16751,6 +17119,24 @@ BTF_ID(func, rcu_read_unlock_strict) #endif BTF_SET_END(btf_id_deny) +static bool can_be_sleepable(struct bpf_prog *prog) +{ + if (prog->type == BPF_PROG_TYPE_TRACING) { + switch (prog->expected_attach_type) { + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_TRACE_ITER: + return true; + default: + return false; + } + } + return prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ || + prog->type == BPF_PROG_TYPE_STRUCT_OPS; +} + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -16769,9 +17155,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } - if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && - prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) { - verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n"); + if (prog->aux->sleepable && !can_be_sleepable(prog)) { + verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n"); return -EINVAL; } @@ -16950,7 +17335,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) if (ret < 0) goto skip_full_check; - if (bpf_prog_is_dev_bound(env->prog->aux)) { + if (bpf_prog_is_offloaded(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env->prog); if (ret) goto skip_full_check; @@ -16963,7 +17348,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) ret = do_check_subprogs(env); ret = ret ?: do_check_main(env); - if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) + if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux)) ret = bpf_prog_offload_finalize(env); skip_full_check: @@ -16998,7 +17383,7 @@ skip_full_check: /* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. */ - if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) { + if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) { ret = opt_subreg_zext_lo32_rnd_hi32(env, attr); env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret : false; |