summaryrefslogtreecommitdiff
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c2355
1 files changed, 1495 insertions, 860 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a7d6e0c5928b..399f03e62508 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -44,6 +44,12 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
#undef BPF_LINK_TYPE
};
+enum bpf_features {
+ BPF_FEAT_RDONLY_CAST_TO_VOID = 0,
+ BPF_FEAT_STREAMS = 1,
+ __MAX_BPF_FEAT,
+};
+
struct bpf_mem_alloc bpf_global_percpu_ma;
static bool bpf_global_percpu_ma_set;
@@ -405,7 +411,8 @@ static bool reg_not_null(const struct bpf_reg_state *reg)
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON ||
(type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
- type == PTR_TO_MEM;
+ (type == PTR_TO_MEM && !(reg->type & PTR_UNTRUSTED)) ||
+ type == CONST_PTR_TO_MAP;
}
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
@@ -855,7 +862,7 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
* dynptr
*/
if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
- verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
+ verifier_bug(env, "misconfigured ref_obj_id");
return -EFAULT;
}
if (state->stack[i].spilled_ptr.dynptr.first_slot)
@@ -1403,7 +1410,7 @@ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
goto out;
alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
- new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
+ new_arr = krealloc(arr, alloc_size, GFP_KERNEL_ACCOUNT);
if (!new_arr) {
kfree(arr);
return NULL;
@@ -1420,7 +1427,7 @@ out:
static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src)
{
dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
- sizeof(struct bpf_reference_state), GFP_KERNEL);
+ sizeof(struct bpf_reference_state), GFP_KERNEL_ACCOUNT);
if (!dst->refs)
return -ENOMEM;
@@ -1439,7 +1446,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st
size_t n = src->allocated_stack / BPF_REG_SIZE;
dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!dst->stack)
return -ENOMEM;
@@ -1647,7 +1654,7 @@ static void update_peak_states(struct bpf_verifier_env *env)
{
u32 cur_states;
- cur_states = env->explored_states_size + env->free_list_size;
+ cur_states = env->explored_states_size + env->free_list_size + env->num_backedges;
env->peak_states = max(env->peak_states, cur_states);
}
@@ -1659,6 +1666,13 @@ static void free_func_state(struct bpf_func_state *state)
kfree(state);
}
+static void clear_jmp_history(struct bpf_verifier_state *state)
+{
+ kfree(state->jmp_history);
+ state->jmp_history = NULL;
+ state->jmp_history_cnt = 0;
+}
+
static void free_verifier_state(struct bpf_verifier_state *state,
bool free_self)
{
@@ -1669,11 +1683,12 @@ static void free_verifier_state(struct bpf_verifier_state *state,
state->frame[i] = NULL;
}
kfree(state->refs);
+ clear_jmp_history(state);
if (free_self)
kfree(state);
}
-/* struct bpf_verifier_state->{parent,loop_entry} refer to states
+/* struct bpf_verifier_state->parent refers to states
* that are in either of env->{expored_states,free_list}.
* In both cases the state is contained in struct bpf_verifier_state_list.
*/
@@ -1684,37 +1699,24 @@ static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_
return NULL;
}
-static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st)
-{
- if (st->loop_entry)
- return container_of(st->loop_entry, struct bpf_verifier_state_list, state);
- return NULL;
-}
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st);
/* A state can be freed if it is no longer referenced:
* - is in the env->free_list;
* - has no children states;
- * - is not used as loop_entry.
- *
- * Freeing a state can make it's loop_entry free-able.
*/
static void maybe_free_verifier_state(struct bpf_verifier_env *env,
struct bpf_verifier_state_list *sl)
{
- struct bpf_verifier_state_list *loop_entry_sl;
-
- while (sl && sl->in_free_list &&
- sl->state.branches == 0 &&
- sl->state.used_as_loop_entry == 0) {
- loop_entry_sl = state_loop_entry_as_list(&sl->state);
- if (loop_entry_sl)
- loop_entry_sl->state.used_as_loop_entry--;
- list_del(&sl->node);
- free_verifier_state(&sl->state, false);
- kfree(sl);
- env->free_list_size--;
- sl = loop_entry_sl;
- }
+ if (!sl->in_free_list
+ || sl->state.branches != 0
+ || incomplete_read_marks(env, &sl->state))
+ return;
+ list_del(&sl->node);
+ free_verifier_state(&sl->state, false);
+ kfree(sl);
+ env->free_list_size--;
}
/* copy verifier state from src to dst growing dst stack space
@@ -1733,6 +1735,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
struct bpf_func_state *dst;
int i, err;
+ dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
+ src->jmp_history_cnt, sizeof(*dst_state->jmp_history),
+ GFP_KERNEL_ACCOUNT);
+ if (!dst_state->jmp_history)
+ return -ENOMEM;
+ dst_state->jmp_history_cnt = src->jmp_history_cnt;
+
/* if dst has more stack frames then src frame, free them, this is also
* necessary in case of exceptional exits using bpf_throw.
*/
@@ -1750,17 +1759,14 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->parent = src->parent;
dst_state->first_insn_idx = src->first_insn_idx;
dst_state->last_insn_idx = src->last_insn_idx;
- dst_state->insn_hist_start = src->insn_hist_start;
- dst_state->insn_hist_end = src->insn_hist_end;
dst_state->dfs_depth = src->dfs_depth;
dst_state->callback_unroll_depth = src->callback_unroll_depth;
- dst_state->used_as_loop_entry = src->used_as_loop_entry;
dst_state->may_goto_depth = src->may_goto_depth;
- dst_state->loop_entry = src->loop_entry;
+ dst_state->equal_state = src->equal_state;
for (i = 0; i <= src->curframe; i++) {
dst = dst_state->frame[i];
if (!dst) {
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL_ACCOUNT);
if (!dst)
return -ENOMEM;
dst_state->frame[i] = dst;
@@ -1799,181 +1805,241 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta
return true;
}
-/* Open coded iterators allow back-edges in the state graph in order to
- * check unbounded loops that iterators.
- *
- * In is_state_visited() it is necessary to know if explored states are
- * part of some loops in order to decide whether non-exact states
- * comparison could be used:
- * - non-exact states comparison establishes sub-state relation and uses
- * read and precision marks to do so, these marks are propagated from
- * children states and thus are not guaranteed to be final in a loop;
- * - exact states comparison just checks if current and explored states
- * are identical (and thus form a back-edge).
- *
- * Paper "A New Algorithm for Identifying Loops in Decompilation"
- * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
- * algorithm for loop structure detection and gives an overview of
- * relevant terminology. It also has helpful illustrations.
- *
- * [1] https://api.semanticscholar.org/CorpusID:15784067
- *
- * We use a similar algorithm but because loop nested structure is
- * irrelevant for verifier ours is significantly simpler and resembles
- * strongly connected components algorithm from Sedgewick's textbook.
- *
- * Define topmost loop entry as a first node of the loop traversed in a
- * depth first search starting from initial state. The goal of the loop
- * tracking algorithm is to associate topmost loop entries with states
- * derived from these entries.
- *
- * For each step in the DFS states traversal algorithm needs to identify
- * the following situations:
- *
- * initial initial initial
- * | | |
- * V V V
- * ... ... .---------> hdr
- * | | | |
- * V V | V
- * cur .-> succ | .------...
- * | | | | | |
- * V | V | V V
- * succ '-- cur | ... ...
- * | | |
- * | V V
- * | succ <- cur
- * | |
- * | V
- * | ...
- * | |
- * '----'
- *
- * (A) successor state of cur (B) successor state of cur or it's entry
- * not yet traversed are in current DFS path, thus cur and succ
- * are members of the same outermost loop
- *
- * initial initial
- * | |
- * V V
- * ... ...
- * | |
- * V V
- * .------... .------...
- * | | | |
- * V V V V
- * .-> hdr ... ... ...
- * | | | | |
- * | V V V V
- * | succ <- cur succ <- cur
- * | | |
- * | V V
- * | ... ...
- * | | |
- * '----' exit
- *
- * (C) successor state of cur is a part of some loop but this loop
- * does not include cur or successor state is not in a loop at all.
- *
- * Algorithm could be described as the following python code:
- *
- * traversed = set() # Set of traversed nodes
- * entries = {} # Mapping from node to loop entry
- * depths = {} # Depth level assigned to graph node
- * path = set() # Current DFS path
- *
- * # Find outermost loop entry known for n
- * def get_loop_entry(n):
- * h = entries.get(n, None)
- * while h in entries:
- * h = entries[h]
- * return h
- *
- * # Update n's loop entry if h comes before n in current DFS path.
- * def update_loop_entry(n, h):
- * if h in path and depths[entries.get(n, n)] < depths[n]:
- * entries[n] = h1
+/* Return IP for a given frame in a call stack */
+static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
+{
+ return frame == st->curframe
+ ? st->insn_idx
+ : st->frame[frame + 1]->callsite;
+}
+
+/* For state @st look for a topmost frame with frame_insn_idx() in some SCC,
+ * if such frame exists form a corresponding @callchain as an array of
+ * call sites leading to this frame and SCC id.
+ * E.g.:
*
- * def dfs(n, depth):
- * traversed.add(n)
- * path.add(n)
- * depths[n] = depth
- * for succ in G.successors(n):
- * if succ not in traversed:
- * # Case A: explore succ and update cur's loop entry
- * # only if succ's entry is in current DFS path.
- * dfs(succ, depth + 1)
- * h = entries.get(succ, None)
- * update_loop_entry(n, h)
- * else:
- * # Case B or C depending on `h1 in path` check in update_loop_entry().
- * update_loop_entry(n, succ)
- * path.remove(n)
+ * void foo() { A: loop {... SCC#1 ...}; }
+ * void bar() { B: loop { C: foo(); ... SCC#2 ... }
+ * D: loop { E: foo(); ... SCC#3 ... } }
+ * void main() { F: bar(); }
*
- * To adapt this algorithm for use with verifier:
- * - use st->branch == 0 as a signal that DFS of succ had been finished
- * and cur's loop entry has to be updated (case A), handle this in
- * update_branch_counts();
- * - use st->branch > 0 as a signal that st is in the current DFS path;
- * - handle cases B and C in is_state_visited().
+ * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending
+ * on @st frame call sites being (F,C,A) or (F,E,A).
*/
-static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env,
- struct bpf_verifier_state *st)
+static bool compute_scc_callchain(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ struct bpf_scc_callchain *callchain)
{
- struct bpf_verifier_state *topmost = st->loop_entry;
- u32 steps = 0;
+ u32 i, scc, insn_idx;
- while (topmost && topmost->loop_entry) {
- if (verifier_bug_if(steps++ > st->dfs_depth, env, "infinite loop"))
- return ERR_PTR(-EFAULT);
- topmost = topmost->loop_entry;
+ memset(callchain, 0, sizeof(*callchain));
+ for (i = 0; i <= st->curframe; i++) {
+ insn_idx = frame_insn_idx(st, i);
+ scc = env->insn_aux_data[insn_idx].scc;
+ if (scc) {
+ callchain->scc = scc;
+ break;
+ } else if (i < st->curframe) {
+ callchain->callsites[i] = insn_idx;
+ } else {
+ return false;
+ }
}
- return topmost;
+ return true;
}
-static void update_loop_entry(struct bpf_verifier_env *env,
- struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
+/* Check if bpf_scc_visit instance for @callchain exists. */
+static struct bpf_scc_visit *scc_visit_lookup(struct bpf_verifier_env *env,
+ struct bpf_scc_callchain *callchain)
{
- /* The hdr->branches check decides between cases B and C in
- * comment for get_loop_entry(). If hdr->branches == 0 then
- * head's topmost loop entry is not in current DFS path,
- * hence 'cur' and 'hdr' are not in the same loop and there is
- * no need to update cur->loop_entry.
- */
- if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) {
- if (cur->loop_entry) {
- cur->loop_entry->used_as_loop_entry--;
- maybe_free_verifier_state(env, state_loop_entry_as_list(cur));
- }
- cur->loop_entry = hdr;
- hdr->used_as_loop_entry++;
+ struct bpf_scc_info *info = env->scc_info[callchain->scc];
+ struct bpf_scc_visit *visits = info->visits;
+ u32 i;
+
+ if (!info)
+ return NULL;
+ for (i = 0; i < info->num_visits; i++)
+ if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0)
+ return &visits[i];
+ return NULL;
+}
+
+/* Allocate a new bpf_scc_visit instance corresponding to @callchain.
+ * Allocated instances are alive for a duration of the do_check_common()
+ * call and are freed by free_states().
+ */
+static struct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env,
+ struct bpf_scc_callchain *callchain)
+{
+ struct bpf_scc_visit *visit;
+ struct bpf_scc_info *info;
+ u32 scc, num_visits;
+ u64 new_sz;
+
+ scc = callchain->scc;
+ info = env->scc_info[scc];
+ num_visits = info ? info->num_visits : 0;
+ new_sz = sizeof(*info) + sizeof(struct bpf_scc_visit) * (num_visits + 1);
+ info = kvrealloc(env->scc_info[scc], new_sz, GFP_KERNEL_ACCOUNT);
+ if (!info)
+ return NULL;
+ env->scc_info[scc] = info;
+ info->num_visits = num_visits + 1;
+ visit = &info->visits[num_visits];
+ memset(visit, 0, sizeof(*visit));
+ memcpy(&visit->callchain, callchain, sizeof(*callchain));
+ return visit;
+}
+
+/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */
+static char *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
+{
+ char *buf = env->tmp_str_buf;
+ int i, delta = 0;
+
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "(");
+ for (i = 0; i < ARRAY_SIZE(callchain->callsites); i++) {
+ if (!callchain->callsites[i])
+ break;
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u,",
+ callchain->callsites[i]);
+ }
+ delta += snprintf(buf + delta, TMP_STR_BUF_LEN - delta, "%u)", callchain->scc);
+ return env->tmp_str_buf;
+}
+
+/* If callchain for @st exists (@st is in some SCC), ensure that
+ * bpf_scc_visit instance for this callchain exists.
+ * If instance does not exist or is empty, assign visit->entry_state to @st.
+ */
+static int maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain))
+ return 0;
+ visit = scc_visit_lookup(env, callchain);
+ visit = visit ?: scc_visit_alloc(env, callchain);
+ if (!visit)
+ return -ENOMEM;
+ if (!visit->entry_state) {
+ visit->entry_state = st;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
+ }
+ return 0;
+}
+
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit);
+
+/* If callchain for @st exists (@st is in some SCC), make it empty:
+ * - set visit->entry_state to NULL;
+ * - flush accumulated backedges.
+ */
+static int maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain))
+ return 0;
+ visit = scc_visit_lookup(env, callchain);
+ if (!visit) {
+ verifier_bug(env, "scc exit: no visit info for call chain %s",
+ format_callchain(env, callchain));
+ return -EFAULT;
+ }
+ if (visit->entry_state != st)
+ return 0;
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
+ visit->entry_state = NULL;
+ env->num_backedges -= visit->num_backedges;
+ visit->num_backedges = 0;
+ update_peak_states(env);
+ return propagate_backedges(env, visit);
+}
+
+/* Lookup an bpf_scc_visit instance corresponding to @st callchain
+ * and add @backedge to visit->backedges. @st callchain must exist.
+ */
+static int add_scc_backedge(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st,
+ struct bpf_scc_backedge *backedge)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain)) {
+ verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
+ st->insn_idx);
+ return -EFAULT;
+ }
+ visit = scc_visit_lookup(env, callchain);
+ if (!visit) {
+ verifier_bug(env, "add backedge: no visit info for call chain %s",
+ format_callchain(env, callchain));
+ return -EFAULT;
}
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
+ backedge->next = visit->backedges;
+ visit->backedges = backedge;
+ visit->num_backedges++;
+ env->num_backedges++;
+ update_peak_states(env);
+ return 0;
}
-static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+/* bpf_reg_state->live marks for registers in a state @st are incomplete,
+ * if state @st is in some SCC and not all execution paths starting at this
+ * SCC are fully explored.
+ */
+static bool incomplete_read_marks(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *st)
+{
+ struct bpf_scc_callchain *callchain = &env->callchain_buf;
+ struct bpf_scc_visit *visit;
+
+ if (!compute_scc_callchain(env, st, callchain))
+ return false;
+ visit = scc_visit_lookup(env, callchain);
+ if (!visit)
+ return false;
+ return !!visit->backedges;
+}
+
+static void free_backedges(struct bpf_scc_visit *visit)
+{
+ struct bpf_scc_backedge *backedge, *next;
+
+ for (backedge = visit->backedges; backedge; backedge = next) {
+ free_verifier_state(&backedge->state, false);
+ next = backedge->next;
+ kvfree(backedge);
+ }
+ visit->backedges = NULL;
+}
+
+static int update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{
struct bpf_verifier_state_list *sl = NULL, *parent_sl;
struct bpf_verifier_state *parent;
+ int err;
while (st) {
u32 br = --st->branches;
- /* br == 0 signals that DFS exploration for 'st' is finished,
- * thus it is necessary to update parent's loop entry if it
- * turned out that st is a part of some loop.
- * This is a part of 'case A' in get_loop_entry() comment.
- */
- if (br == 0 && st->parent && st->loop_entry)
- update_loop_entry(env, st->parent, st->loop_entry);
-
- /* WARN_ON(br > 1) technically makes sense here,
+ /* verifier_bug_if(br > 1, ...) technically makes sense here,
* but see comment in push_stack(), hence:
*/
- WARN_ONCE((int)br < 0,
- "BUG update_branch_counts:branches_to_explore=%d\n",
- br);
+ verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br);
if (br)
break;
+ err = maybe_exit_scc(env, st);
+ if (err)
+ return err;
parent = st->parent;
parent_sl = state_parent_as_list(st);
if (sl)
@@ -1981,6 +2047,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
st = parent;
sl = parent_sl;
}
+ return 0;
}
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
@@ -2012,6 +2079,18 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
return 0;
}
+static bool error_recoverable_with_nospec(int err)
+{
+ /* Should only return true for non-fatal errors that are allowed to
+ * occur during speculative verification. For these we can insert a
+ * nospec and the program might still be accepted. Do not include
+ * something like ENOMEM because it is likely to re-occur for the next
+ * architectural path once it has been recovered-from in all speculative
+ * paths.
+ */
+ return err == -EPERM || err == -EACCES || err == -EINVAL;
+}
+
static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
int insn_idx, int prev_insn_idx,
bool speculative)
@@ -2020,9 +2099,9 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
struct bpf_verifier_stack_elem *elem;
int err;
- elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT);
if (!elem)
- goto err;
+ return NULL;
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
@@ -2032,12 +2111,12 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
env->stack_size++;
err = copy_verifier_state(&elem->st, cur);
if (err)
- goto err;
+ return NULL;
elem->st.speculative |= speculative;
if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
verbose(env, "The sequence of %d jumps is too complex.\n",
env->stack_size);
- goto err;
+ return NULL;
}
if (elem->st.parent) {
++elem->st.parent->branches;
@@ -2052,12 +2131,6 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
*/
}
return &elem->st;
-err:
- free_verifier_state(env->cur_state, true);
- env->cur_state = NULL;
- /* pop all elements and return */
- while (!pop_stack(env, NULL, NULL, false));
- return NULL;
}
#define CALLER_SAVED_REGS 6
@@ -2450,6 +2523,58 @@ static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
if ((u64)reg->smin_value <= (u64)reg->smax_value) {
reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
+ } else {
+ /* If the s64 range crosses the sign boundary, then it's split
+ * between the beginning and end of the U64 domain. In that
+ * case, we can derive new bounds if the u64 range overlaps
+ * with only one end of the s64 range.
+ *
+ * In the following example, the u64 range overlaps only with
+ * positive portion of the s64 range.
+ *
+ * 0 U64_MAX
+ * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] |
+ * |----------------------------|----------------------------|
+ * |xxxxx s64 range xxxxxxxxx] [xxxxxxx|
+ * 0 S64_MAX S64_MIN -1
+ *
+ * We can thus derive the following new s64 and u64 ranges.
+ *
+ * 0 U64_MAX
+ * | [xxxxxx u64 range xxxxx] |
+ * |----------------------------|----------------------------|
+ * | [xxxxxx s64 range xxxxx] |
+ * 0 S64_MAX S64_MIN -1
+ *
+ * If they overlap in two places, we can't derive anything
+ * because reg_state can't represent two ranges per numeric
+ * domain.
+ *
+ * 0 U64_MAX
+ * | [xxxxxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxxxxx] |
+ * |----------------------------|----------------------------|
+ * |xxxxx s64 range xxxxxxxxx] [xxxxxxxxxx|
+ * 0 S64_MAX S64_MIN -1
+ *
+ * The first condition below corresponds to the first diagram
+ * above.
+ */
+ if (reg->umax_value < (u64)reg->smin_value) {
+ reg->smin_value = (s64)reg->umin_value;
+ reg->umax_value = min_t(u64, reg->umax_value, reg->smax_value);
+ } else if ((u64)reg->smax_value < reg->umin_value) {
+ /* This second condition considers the case where the u64 range
+ * overlaps with the negative portion of the s64 range:
+ *
+ * 0 U64_MAX
+ * | [xxxxxxxxxxxxxx u64 range xxxxxxxxxxxxxx] |
+ * |----------------------------|----------------------------|
+ * |xxxxxxxxx] [xxxxxxxxxxxx s64 range |
+ * 0 S64_MAX S64_MIN -1
+ */
+ reg->smax_value = (s64)reg->umax_value;
+ reg->umin_value = max_t(u64, reg->umin_value, reg->smin_value);
+ }
}
}
@@ -2481,20 +2606,6 @@ static void __reg_deduce_mixed_bounds(struct bpf_reg_state *reg)
reg->smin_value = max_t(s64, reg->smin_value, new_smin);
reg->smax_value = min_t(s64, reg->smax_value, new_smax);
- /* if s32 can be treated as valid u32 range, we can use it as well */
- if ((u32)reg->s32_min_value <= (u32)reg->s32_max_value) {
- /* s32 -> u64 tightening */
- new_umin = (reg->umin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
- new_umax = (reg->umax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
- reg->umin_value = max_t(u64, reg->umin_value, new_umin);
- reg->umax_value = min_t(u64, reg->umax_value, new_umax);
- /* s32 -> s64 tightening */
- new_smin = (reg->smin_value & ~0xffffffffULL) | (u32)reg->s32_min_value;
- new_smax = (reg->smax_value & ~0xffffffffULL) | (u32)reg->s32_max_value;
- reg->smin_value = max_t(s64, reg->smin_value, new_smin);
- reg->smax_value = min_t(s64, reg->smax_value, new_smax);
- }
-
/* Here we would like to handle a special case after sign extending load,
* when upper bits for a 64-bit range are all 1s or all 0s.
*
@@ -2561,6 +2672,7 @@ static void reg_bounds_sync(struct bpf_reg_state *reg)
/* We might have learned something about the sign bit. */
__reg_deduce_bounds(reg);
__reg_deduce_bounds(reg);
+ __reg_deduce_bounds(reg);
/* We might have learned some bits from the bounds. */
__reg_bound_offset(reg);
/* Intersecting with the old var_off might have improved our bounds
@@ -2607,13 +2719,13 @@ static int reg_bounds_sanity_check(struct bpf_verifier_env *env,
return 0;
out:
- verbose(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
- "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)\n",
- ctx, msg, reg->umin_value, reg->umax_value,
- reg->smin_value, reg->smax_value,
- reg->u32_min_value, reg->u32_max_value,
- reg->s32_min_value, reg->s32_max_value,
- reg->var_off.value, reg->var_off.mask);
+ verifier_bug(env, "REG INVARIANTS VIOLATION (%s): %s u64=[%#llx, %#llx] "
+ "s64=[%#llx, %#llx] u32=[%#x, %#x] s32=[%#x, %#x] var_off=(%#llx, %#llx)",
+ ctx, msg, reg->umin_value, reg->umax_value,
+ reg->smin_value, reg->smax_value,
+ reg->u32_min_value, reg->u32_max_value,
+ reg->s32_min_value, reg->s32_max_value,
+ reg->var_off.value, reg->var_off.mask);
if (env->test_reg_invariants)
return -EFAULT;
__mark_reg_unbounded(reg);
@@ -2723,22 +2835,33 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
__mark_reg_not_init(env, regs + regno);
}
-static void mark_btf_ld_reg(struct bpf_verifier_env *env,
- struct bpf_reg_state *regs, u32 regno,
- enum bpf_reg_type reg_type,
- struct btf *btf, u32 btf_id,
- enum bpf_type_flag flag)
+static int mark_btf_ld_reg(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs, u32 regno,
+ enum bpf_reg_type reg_type,
+ struct btf *btf, u32 btf_id,
+ enum bpf_type_flag flag)
{
- if (reg_type == SCALAR_VALUE) {
+ switch (reg_type) {
+ case SCALAR_VALUE:
mark_reg_unknown(env, regs, regno);
- return;
+ return 0;
+ case PTR_TO_BTF_ID:
+ mark_reg_known_zero(env, regs, regno);
+ regs[regno].type = PTR_TO_BTF_ID | flag;
+ regs[regno].btf = btf;
+ regs[regno].btf_id = btf_id;
+ if (type_may_be_null(flag))
+ regs[regno].id = ++env->id_gen;
+ return 0;
+ case PTR_TO_MEM:
+ mark_reg_known_zero(env, regs, regno);
+ regs[regno].type = PTR_TO_MEM | flag;
+ regs[regno].mem_size = 0;
+ return 0;
+ default:
+ verifier_bug(env, "unexpected reg_type %d in %s\n", reg_type, __func__);
+ return -EFAULT;
}
- mark_reg_known_zero(env, regs, regno);
- regs[regno].type = PTR_TO_BTF_ID | flag;
- regs[regno].btf = btf;
- regs[regno].btf_id = btf_id;
- if (type_may_be_null(flag))
- regs[regno].id = ++env->id_gen;
}
#define DEF_NOT_SUBREG (0)
@@ -2787,9 +2910,9 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
struct bpf_verifier_stack_elem *elem;
struct bpf_func_state *frame;
- elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
+ elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT);
if (!elem)
- goto err;
+ return NULL;
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
@@ -2801,35 +2924,24 @@ static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
verbose(env,
"The sequence of %d jumps is too complex for async cb.\n",
env->stack_size);
- goto err;
+ return NULL;
}
/* Unlike push_stack() do not copy_verifier_state().
* The caller state doesn't matter.
* This is async callback. It starts in a fresh stack.
* Initialize it similar to do_check_common().
- * But we do need to make sure to not clobber insn_hist, so we keep
- * chaining insn_hist_start/insn_hist_end indices as for a normal
- * child state.
*/
elem->st.branches = 1;
elem->st.in_sleepable = is_sleepable;
- elem->st.insn_hist_start = env->cur_state->insn_hist_end;
- elem->st.insn_hist_end = elem->st.insn_hist_start;
- frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL_ACCOUNT);
if (!frame)
- goto err;
+ return NULL;
init_func_state(env, frame,
BPF_MAIN_FUNC /* callsite */,
0 /* frameno within this callchain */,
subprog /* subprog number within this prog */);
elem->st.frame[0] = frame;
return &elem->st;
-err:
- free_verifier_state(env->cur_state, true);
- env->cur_state = NULL;
- /* pop all elements and return */
- while (!pop_stack(env, NULL, NULL, false));
- return NULL;
}
@@ -3167,7 +3279,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return -EINVAL;
}
- tab = kzalloc(sizeof(*tab), GFP_KERNEL);
+ tab = kzalloc(sizeof(*tab), GFP_KERNEL_ACCOUNT);
if (!tab)
return -ENOMEM;
prog_aux->kfunc_tab = tab;
@@ -3183,7 +3295,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
return 0;
if (!btf_tab && offset) {
- btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
+ btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL_ACCOUNT);
if (!btf_tab)
return -ENOMEM;
prog_aux->kfunc_btf_tab = btf_tab;
@@ -3843,10 +3955,11 @@ static void linked_regs_unpack(u64 val, struct linked_regs *s)
}
/* for any branch, call, exit record the history of jmps in the given state */
-static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
- int insn_flags, u64 linked_regs)
+static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur,
+ int insn_flags, u64 linked_regs)
{
- struct bpf_insn_hist_entry *p;
+ u32 cnt = cur->jmp_history_cnt;
+ struct bpf_jmp_history_entry *p;
size_t alloc_size;
/* combine instruction flags if we already recorded this instruction */
@@ -3866,32 +3979,29 @@ static int push_insn_history(struct bpf_verifier_env *env, struct bpf_verifier_s
return 0;
}
- if (cur->insn_hist_end + 1 > env->insn_hist_cap) {
- alloc_size = size_mul(cur->insn_hist_end + 1, sizeof(*p));
- p = kvrealloc(env->insn_hist, alloc_size, GFP_USER);
- if (!p)
- return -ENOMEM;
- env->insn_hist = p;
- env->insn_hist_cap = alloc_size / sizeof(*p);
- }
+ cnt++;
+ alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
+ p = krealloc(cur->jmp_history, alloc_size, GFP_KERNEL_ACCOUNT);
+ if (!p)
+ return -ENOMEM;
+ cur->jmp_history = p;
- p = &env->insn_hist[cur->insn_hist_end];
+ p = &cur->jmp_history[cnt - 1];
p->idx = env->insn_idx;
p->prev_idx = env->prev_insn_idx;
p->flags = insn_flags;
p->linked_regs = linked_regs;
-
- cur->insn_hist_end++;
+ cur->jmp_history_cnt = cnt;
env->cur_hist_ent = p;
return 0;
}
-static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *env,
- u32 hist_start, u32 hist_end, int insn_idx)
+static struct bpf_jmp_history_entry *get_jmp_hist_entry(struct bpf_verifier_state *st,
+ u32 hist_end, int insn_idx)
{
- if (hist_end > hist_start && env->insn_hist[hist_end - 1].idx == insn_idx)
- return &env->insn_hist[hist_end - 1];
+ if (hist_end > 0 && st->jmp_history[hist_end - 1].idx == insn_idx)
+ return &st->jmp_history[hist_end - 1];
return NULL;
}
@@ -3908,26 +4018,25 @@ static struct bpf_insn_hist_entry *get_insn_hist_entry(struct bpf_verifier_env *
* history entry recording a jump from last instruction of parent state and
* first instruction of given state.
*/
-static int get_prev_insn_idx(const struct bpf_verifier_env *env,
- struct bpf_verifier_state *st,
- int insn_idx, u32 hist_start, u32 *hist_endp)
+static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
+ u32 *history)
{
- u32 hist_end = *hist_endp;
- u32 cnt = hist_end - hist_start;
+ u32 cnt = *history;
- if (insn_idx == st->first_insn_idx) {
+ if (i == st->first_insn_idx) {
if (cnt == 0)
return -ENOENT;
- if (cnt == 1 && env->insn_hist[hist_start].idx == insn_idx)
+ if (cnt == 1 && st->jmp_history[0].idx == i)
return -ENOENT;
}
- if (cnt && env->insn_hist[hist_end - 1].idx == insn_idx) {
- (*hist_endp)--;
- return env->insn_hist[hist_end - 1].prev_idx;
+ if (cnt && st->jmp_history[cnt - 1].idx == i) {
+ i = st->jmp_history[cnt - 1].prev_idx;
+ (*history)--;
} else {
- return insn_idx - 1;
+ i--;
}
+ return i;
}
static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
@@ -4108,7 +4217,7 @@ static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
/* If any register R in hist->linked_regs is marked as precise in bt,
* do bt_set_frame_{reg,slot}(bt, R) for all registers in hist->linked_regs.
*/
-static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_insn_hist_entry *hist)
+static void bt_sync_linked_regs(struct backtrack_state *bt, struct bpf_jmp_history_entry *hist)
{
struct linked_regs linked_regs;
bool some_precise = false;
@@ -4153,7 +4262,7 @@ static bool calls_callback(struct bpf_verifier_env *env, int insn_idx);
* - *was* processed previously during backtracking.
*/
static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
- struct bpf_insn_hist_entry *hist, struct backtrack_state *bt)
+ struct bpf_jmp_history_entry *hist, struct backtrack_state *bt)
{
struct bpf_insn *insn = env->prog->insnsi + idx;
u8 class = BPF_CLASS(insn->code);
@@ -4448,7 +4557,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
* . if (scalar cond K|scalar)
* . helper_call(.., scalar, ...) where ARG_CONST is expected
* backtrack through the verifier states and mark all registers and
- * stack slots with spilled constants that these scalar regisers
+ * stack slots with spilled constants that these scalar registers
* should be precise.
* . during state pruning two registers (or spilled stack slots)
* are equivalent if both are not precise.
@@ -4571,7 +4680,7 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
* SCALARS, as well as any other registers and slots that contribute to
* a tracked state of given registers/stack slots, depending on specific BPF
* assembly instructions (see backtrack_insns() for exact instruction handling
- * logic). This backtracking relies on recorded insn_hist and is able to
+ * logic). This backtracking relies on recorded jmp_history and is able to
* traverse entire chain of parent states. This process ends only when all the
* necessary registers/slots and their transitive dependencies are marked as
* precise.
@@ -4651,23 +4760,27 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
* mark_all_scalars_imprecise() to hopefully get more permissive and generic
* finalized states which help in short circuiting more future states.
*/
-static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
+static int __mark_chain_precision(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *starting_state,
+ int regno,
+ bool *changed)
{
+ struct bpf_verifier_state *st = starting_state;
struct backtrack_state *bt = &env->bt;
- struct bpf_verifier_state *st = env->cur_state;
int first_idx = st->first_insn_idx;
- int last_idx = env->insn_idx;
+ int last_idx = starting_state->insn_idx;
int subseq_idx = -1;
struct bpf_func_state *func;
+ bool tmp, skip_first = true;
struct bpf_reg_state *reg;
- bool skip_first = true;
int i, fr, err;
if (!env->bpf_capable)
return 0;
+ changed = changed ?: &tmp;
/* set frame number from which we are starting to backtrack */
- bt_init(bt, env->cur_state->curframe);
+ bt_init(bt, starting_state->curframe);
/* Do sanity checks against current state of register and/or stack
* slot, but don't set precise flag in current state, as precision
@@ -4677,7 +4790,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
if (regno >= 0) {
reg = &func->regs[regno];
if (reg->type != SCALAR_VALUE) {
- WARN_ONCE(1, "backtracing misuse");
+ verifier_bug(env, "backtracking misuse");
return -EFAULT;
}
bt_set_reg(bt, regno);
@@ -4688,9 +4801,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
for (;;) {
DECLARE_BITMAP(mask, 64);
- u32 hist_start = st->insn_hist_start;
- u32 hist_end = st->insn_hist_end;
- struct bpf_insn_hist_entry *hist;
+ u32 history = st->jmp_history_cnt;
+ struct bpf_jmp_history_entry *hist;
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
@@ -4712,8 +4824,10 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
for_each_set_bit(i, mask, 32) {
reg = &st->frame[0]->regs[i];
bt_clear_reg(bt, i);
- if (reg->type == SCALAR_VALUE)
+ if (reg->type == SCALAR_VALUE) {
reg->precise = true;
+ *changed = true;
+ }
}
return 0;
}
@@ -4728,11 +4842,11 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
err = 0;
skip_first = false;
} else {
- hist = get_insn_hist_entry(env, hist_start, hist_end, i);
+ hist = get_jmp_hist_entry(st, history, i);
err = backtrack_insn(env, i, subseq_idx, hist, bt);
}
if (err == -ENOTSUPP) {
- mark_all_scalars_precise(env, env->cur_state);
+ mark_all_scalars_precise(env, starting_state);
bt_reset(bt);
return 0;
} else if (err) {
@@ -4745,7 +4859,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
*/
return 0;
subseq_idx = i;
- i = get_prev_insn_idx(env, st, i, hist_start, &hist_end);
+ i = get_prev_insn_idx(st, i, &history);
if (i == -ENOENT)
break;
if (i >= env->prog->len) {
@@ -4772,10 +4886,12 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bt_clear_frame_reg(bt, fr, i);
continue;
}
- if (reg->precise)
+ if (reg->precise) {
bt_clear_frame_reg(bt, fr, i);
- else
+ } else {
reg->precise = true;
+ *changed = true;
+ }
}
bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
@@ -4790,10 +4906,12 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
continue;
}
reg = &func->stack[i].spilled_ptr;
- if (reg->precise)
+ if (reg->precise) {
bt_clear_frame_slot(bt, fr, i);
- else
+ } else {
reg->precise = true;
+ *changed = true;
+ }
}
if (env->log.level & BPF_LOG_LEVEL2) {
fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
@@ -4820,7 +4938,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
* fallback to marking all precise
*/
if (!bt_empty(bt)) {
- mark_all_scalars_precise(env, env->cur_state);
+ mark_all_scalars_precise(env, starting_state);
bt_reset(bt);
}
@@ -4829,15 +4947,16 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
int mark_chain_precision(struct bpf_verifier_env *env, int regno)
{
- return __mark_chain_precision(env, regno);
+ return __mark_chain_precision(env, env->cur_state, regno, NULL);
}
/* mark_chain_precision_batch() assumes that env->bt is set in the caller to
* desired reg and stack masks across all relevant frames
*/
-static int mark_chain_precision_batch(struct bpf_verifier_env *env)
+static int mark_chain_precision_batch(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *starting_state)
{
- return __mark_chain_precision(env, -1);
+ return __mark_chain_precision(env, starting_state, -1, NULL);
}
static bool is_spillable_regtype(enum bpf_reg_type type)
@@ -5026,7 +5145,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
}
if (sanitize)
- env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+ env->insn_aux_data[insn_idx].nospec_result = true;
}
err = destroy_if_dynptr_stack_slot(env, state, spi);
@@ -5109,7 +5228,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
}
if (insn_flags)
- return push_insn_history(env, env->cur_state, insn_flags, 0);
+ return push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -5416,7 +5535,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
insn_flags = 0; /* we are not restoring spilled register */
}
if (insn_flags)
- return push_insn_history(env, env->cur_state, insn_flags, 0);
+ return push_jmp_history(env, env->cur_state, insn_flags, 0);
return 0;
}
@@ -5896,6 +6015,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
int class = BPF_CLASS(insn->code);
struct bpf_reg_state *val_reg;
+ int ret;
/* Things we already checked for in check_map_access and caller:
* - Reject cases where variable offset may touch kptr
@@ -5929,8 +6049,11 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
/* We can simply mark the value_regno receiving the pointer
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
- mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
+ ret = mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID,
+ kptr_field->kptr.btf, kptr_field->kptr.btf_id,
+ btf_ld_kptr_type(env, kptr_field));
+ if (ret < 0)
+ return ret;
} else if (class == BPF_STX) {
val_reg = reg_state(env, value_regno);
if (!register_is_null(val_reg) &&
@@ -6993,6 +7116,10 @@ BTF_TYPE_SAFE_RCU(struct css_set) {
struct cgroup *dfl_cgrp;
};
+BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state) {
+ struct cgroup *cgroup;
+};
+
/* RCU trusted: these fields are trusted in RCU CS and can be NULL */
BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
struct file __rcu *exe_file;
@@ -7027,8 +7154,7 @@ BTF_TYPE_SAFE_TRUSTED(struct file) {
struct inode *f_inode;
};
-BTF_TYPE_SAFE_TRUSTED(struct dentry) {
- /* no negative dentry-s in places where bpf can see it */
+BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry) {
struct inode *d_inode;
};
@@ -7043,6 +7169,7 @@ static bool type_is_rcu(struct bpf_verifier_env *env,
BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup_subsys_state));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
}
@@ -7066,7 +7193,6 @@ static bool type_is_trusted(struct bpf_verifier_env *env,
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
- BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
}
@@ -7076,6 +7202,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
const char *field_name, u32 btf_id)
{
BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
+ BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
"__safe_trusted_or_null");
@@ -7139,7 +7266,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
if (!btf_is_kernel(reg->btf)) {
- verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
+ verifier_bug(env, "reg->btf must be kernel btf");
return -EFAULT;
}
ret = env->ops->btf_struct_access(&env->log, reg, off, size);
@@ -7155,7 +7282,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
!(reg->type & MEM_RCU) && !reg->ref_obj_id) {
- verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
+ verifier_bug(env, "ref_obj_id for allocated object must be non-zero");
return -EFAULT;
}
@@ -7225,8 +7352,11 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
clear_trusted_flags(&flag);
}
- if (atype == BPF_READ && value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
+ if (atype == BPF_READ && value_regno >= 0) {
+ ret = mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
@@ -7280,13 +7410,19 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
/* Simulate access to a PTR_TO_BTF_ID */
memset(&map_reg, 0, sizeof(map_reg));
- mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
+ ret = mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID,
+ btf_vmlinux, *map->ops->map_btf_id, 0);
+ if (ret < 0)
+ return ret;
ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
if (ret < 0)
return ret;
- if (value_regno >= 0)
- mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
+ if (value_regno >= 0) {
+ ret = mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
@@ -7470,6 +7606,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
}
} else if (base_type(reg->type) == PTR_TO_MEM) {
bool rdonly_mem = type_is_rdonly_mem(reg->type);
+ bool rdonly_untrusted = rdonly_mem && (reg->type & PTR_UNTRUSTED);
if (type_may_be_null(reg->type)) {
verbose(env, "R%d invalid mem access '%s'\n", regno,
@@ -7489,8 +7626,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return -EACCES;
}
- err = check_mem_region_access(env, regno, off, size,
- reg->mem_size, false);
+ /*
+ * Accesses to untrusted PTR_TO_MEM are done through probe
+ * instructions, hence no need to check bounds in that case.
+ */
+ if (!rdonly_untrusted)
+ err = check_mem_region_access(env, regno, off, size,
+ reg->mem_size, false);
if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
@@ -8517,7 +8659,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn
* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
*/
if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
- verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
+ verifier_bug(env, "misconfigured dynptr helper type flags");
return -EFAULT;
}
@@ -8883,8 +9025,8 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
- verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
- cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
+ verifier_bug(env, "unexpected iterator state %d (%s)",
+ cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
return -EFAULT;
}
@@ -8894,7 +9036,7 @@ static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
*/
if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
!same_callsites(cur_st->parent, cur_st)) {
- verbose(env, "bug: bad parent state for iter next call");
+ verifier_bug(env, "bad parent state for iter next call");
return -EFAULT;
}
/* Note cur_st->parent in the call below, it is necessary to skip
@@ -8953,8 +9095,8 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
{
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
- verbose(env, "invalid map_ptr to access map->type\n");
- return -EACCES;
+ verifier_bug(env, "invalid map_ptr to access map->type");
+ return -EFAULT;
}
switch (meta->map_ptr->map_type) {
@@ -9100,7 +9242,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
compatible = compatible_reg_types[base_type(arg_type)];
if (!compatible) {
- verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
+ verifier_bug(env, "unsupported arg type %d", arg_type);
return -EFAULT;
}
@@ -9182,7 +9324,7 @@ found:
if (!arg_btf_id) {
if (!compatible->btf_id) {
- verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
+ verifier_bug(env, "missing arg compatible BTF ID");
return -EFAULT;
}
arg_btf_id = compatible->btf_id;
@@ -9214,7 +9356,7 @@ found:
case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
meta->func_id != BPF_FUNC_kptr_xchg) {
- verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
+ verifier_bug(env, "unimplemented handling of MEM_ALLOC");
return -EFAULT;
}
/* Check if local kptr in src arg matches kptr in dst arg */
@@ -9229,7 +9371,7 @@ found:
/* Handled by helper specific checks */
break;
default:
- verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
+ verifier_bug(env, "invalid PTR_TO_BTF_ID register for type match");
return -EFAULT;
}
return 0;
@@ -9490,7 +9632,7 @@ static int get_constant_map_key(struct bpf_verifier_env *env,
* to prevent pruning on it.
*/
bt_set_frame_slot(&env->bt, key->frameno, spi);
- err = mark_chain_precision_batch(env);
+ err = mark_chain_precision_batch(env, env->cur_state);
if (err < 0)
return err;
@@ -9587,7 +9729,7 @@ skip_type_check:
return -EINVAL;
}
if (meta->release_regno) {
- verbose(env, "verifier internal error: more than one release argument\n");
+ verifier_bug(env, "more than one release argument");
return -EFAULT;
}
meta->release_regno = regno;
@@ -9595,10 +9737,10 @@ skip_type_check:
if (reg->ref_obj_id && base_type(arg_type) != ARG_KPTR_XCHG_DEST) {
if (meta->ref_obj_id) {
- verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+ verbose(env, "more than one arg with ref_obj_id R%d %u %u",
regno, reg->ref_obj_id,
meta->ref_obj_id);
- return -EFAULT;
+ return -EACCES;
}
meta->ref_obj_id = reg->ref_obj_id;
}
@@ -9641,8 +9783,8 @@ skip_type_check:
* we have to check map_key here. Otherwise it means
* that kernel subsystem misconfigured verifier
*/
- verbose(env, "invalid map_ptr to access map->key\n");
- return -EACCES;
+ verifier_bug(env, "invalid map_ptr to access map->key");
+ return -EFAULT;
}
key_size = meta->map_ptr->key_size;
err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL);
@@ -9668,8 +9810,8 @@ skip_type_check:
*/
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
- verbose(env, "invalid map_ptr to access map->value\n");
- return -EACCES;
+ verifier_bug(env, "invalid map_ptr to access map->value");
+ return -EFAULT;
}
meta->raw_mode = arg_type & MEM_UNINIT;
err = check_helper_mem_access(env, regno, meta->map_ptr->value_size,
@@ -9698,7 +9840,7 @@ skip_type_check:
if (err)
return err;
} else {
- verbose(env, "verifier internal error\n");
+ verifier_bug(env, "spin lock arg on unexpected helper");
return -EFAULT;
}
break;
@@ -10279,7 +10421,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls
}
caller = state->frame[state->curframe];
- callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+ callee = kzalloc(sizeof(*callee), GFP_KERNEL_ACCOUNT);
if (!callee)
return -ENOMEM;
state->frame[state->curframe + 1] = callee;
@@ -10334,6 +10476,12 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
bpf_log(log, "R%d is not a scalar\n", regno);
return -EINVAL;
}
+ } else if (arg->arg_type & PTR_UNTRUSTED) {
+ /*
+ * Anything is allowed for untrusted arguments, as these are
+ * read-only and probe read instructions would protect against
+ * invalid memory access.
+ */
} else if (arg->arg_type == ARG_PTR_TO_CTX) {
ret = check_func_arg_reg_off(env, reg, regno, ARG_DONTCARE);
if (ret < 0)
@@ -10856,8 +11004,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
return -EINVAL;
}
if (!calls_callback(env, callee->callsite)) {
- verbose(env, "BUG: in callback at %d, callsite %d !calls_callback\n",
- *insn_idx, callee->callsite);
+ verifier_bug(env, "in callback at %d, callsite %d !calls_callback",
+ *insn_idx, callee->callsite);
return -EFAULT;
}
} else {
@@ -10964,8 +11112,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return 0;
if (map == NULL) {
- verbose(env, "kernel subsystem misconfigured verifier\n");
- return -EINVAL;
+ verifier_bug(env, "expected map for helper call");
+ return -EFAULT;
}
/* In case of read-only, some additional restrictions
@@ -11003,7 +11151,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
if (func_id != BPF_FUNC_tail_call)
return 0;
if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
- verbose(env, "kernel subsystem misconfigured verifier\n");
+ verbose(env, "expected prog array map for tail call");
return -EINVAL;
}
@@ -11146,7 +11294,7 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
return -ENOTSUPP;
}
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env)
{
return &env->insn_aux_data[env->insn_idx];
}
@@ -11256,9 +11404,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(func_id);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
- verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
- func_id_name(func_id), func_id);
- return -EINVAL;
+ verifier_bug(env, "func %s#%d: r1 != ctx", func_id_name(func_id), func_id);
+ return -EFAULT;
}
memset(&meta, 0, sizeof(meta));
@@ -11266,8 +11413,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
err = check_func_proto(fn, func_id);
if (err) {
- verbose(env, "kernel subsystem misconfigured func %s#%d\n",
- func_id_name(func_id), func_id);
+ verifier_bug(env, "incorrect func proto %s#%d", func_id_name(func_id), func_id);
return err;
}
@@ -11340,7 +11486,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
*/
if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
- verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
+ verifier_bug(env, "CONST_PTR_TO_DYNPTR cannot be released");
return -EFAULT;
}
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
@@ -11457,23 +11603,23 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (meta.dynptr_id) {
- verbose(env, "verifier internal error: meta.dynptr_id already set\n");
+ verifier_bug(env, "meta.dynptr_id already set");
return -EFAULT;
}
if (meta.ref_obj_id) {
- verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
+ verifier_bug(env, "meta.ref_obj_id already set");
return -EFAULT;
}
id = dynptr_id(env, reg);
if (id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ verifier_bug(env, "failed to obtain dynptr id");
return id;
}
ref_obj_id = dynptr_ref_obj_id(env, reg);
if (ref_obj_id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
+ verifier_bug(env, "failed to obtain dynptr ref_obj_id");
return ref_obj_id;
}
@@ -11558,9 +11704,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
* to map element returned from bpf_map_lookup_elem()
*/
if (meta.map_ptr == NULL) {
- verbose(env,
- "kernel subsystem misconfigured verifier\n");
- return -EINVAL;
+ verifier_bug(env, "unexpected null map_ptr");
+ return -EFAULT;
}
if (func_id == BPF_FUNC_map_lookup_elem &&
@@ -11650,10 +11795,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
} else {
if (fn->ret_btf_id == BPF_PTR_POISON) {
- verbose(env, "verifier internal error:");
- verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
- func_id_name(func_id));
- return -EINVAL;
+ verifier_bug(env, "func %s has non-overwritten BPF_PTR_POISON return type",
+ func_id_name(func_id));
+ return -EFAULT;
}
ret_btf = btf_vmlinux;
ret_btf_id = *fn->ret_btf_id;
@@ -11678,8 +11822,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs[BPF_REG_0].id = ++env->id_gen;
if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
- verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
- func_id_name(func_id), func_id);
+ verifier_bug(env, "func %s#%d sets ref_obj_id more than once",
+ func_id_name(func_id), func_id);
return -EFAULT;
}
@@ -12391,7 +12535,7 @@ static int process_irq_flag(struct bpf_verifier_env *env, int regno,
if (meta->func_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore])
kfunc_class = IRQ_LOCK_KFUNC;
} else {
- verbose(env, "verifier internal error: unknown irq flags kfunc\n");
+ verifier_bug(env, "unknown irq flags kfunc");
return -EFAULT;
}
@@ -12432,12 +12576,12 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state
struct btf_record *rec = reg_btf_record(reg);
if (!env->cur_state->active_locks) {
- verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
+ verifier_bug(env, "%s w/o active lock", __func__);
return -EFAULT;
}
if (type_flag(reg->type) & NON_OWN_REF) {
- verbose(env, "verifier internal error: NON_OWN_REF already set\n");
+ verifier_bug(env, "NON_OWN_REF already set");
return -EFAULT;
}
@@ -12456,8 +12600,7 @@ static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_o
int i;
if (!ref_obj_id) {
- verbose(env, "verifier internal error: ref_obj_id is zero for "
- "owning -> non-owning conversion\n");
+ verifier_bug(env, "ref_obj_id is zero for owning -> non-owning conversion");
return -EFAULT;
}
@@ -12477,7 +12620,7 @@ static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_o
return 0;
}
- verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
+ verifier_bug(env, "ref state missing for ref_obj_id");
return -EFAULT;
}
@@ -12539,7 +12682,7 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_
ptr = reg->btf;
break;
default:
- verbose(env, "verifier internal error: unknown reg type for lock check\n");
+ verifier_bug(env, "unknown reg type for lock check");
return -EFAULT;
}
id = reg->id;
@@ -12700,7 +12843,7 @@ __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
u32 head_off;
if (meta->btf != btf_vmlinux) {
- verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
+ verifier_bug(env, "unexpected btf mismatch in kfunc call");
return -EFAULT;
}
@@ -12731,7 +12874,7 @@ __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
}
if (*head_field) {
- verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
+ verifier_bug(env, "repeating %s arg", head_type_name);
return -EFAULT;
}
*head_field = field;
@@ -12768,7 +12911,7 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
u32 node_off;
if (meta->btf != btf_vmlinux) {
- verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
+ verifier_bug(env, "unexpected btf mismatch in kfunc call");
return -EFAULT;
}
@@ -12896,7 +13039,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_prog(btf, &args[i])) {
/* Used to reject repeated use of __prog. */
if (meta->arg_prog) {
- verbose(env, "Only 1 prog->aux argument supported per-kfunc\n");
+ verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
return -EFAULT;
}
meta->arg_prog = true;
@@ -12912,7 +13055,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_constant(meta->btf, &args[i])) {
if (meta->arg_constant.found) {
- verbose(env, "verifier internal error: only one constant argument permitted\n");
+ verifier_bug(env, "only one constant argument permitted");
return -EFAULT;
}
if (!tnum_is_const(reg->var_off)) {
@@ -12964,9 +13107,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (reg->ref_obj_id) {
if (is_kfunc_release(meta) && meta->ref_obj_id) {
- verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
- regno, reg->ref_obj_id,
- meta->ref_obj_id);
+ verifier_bug(env, "more than one arg with ref_obj_id R%d %u %u",
+ regno, reg->ref_obj_id,
+ meta->ref_obj_id);
return -EFAULT;
}
meta->ref_obj_id = reg->ref_obj_id;
@@ -13046,7 +13189,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_RES_SPIN_LOCK:
break;
default:
- WARN_ON_ONCE(1);
+ verifier_bug(env, "unknown kfunc arg type %d", kf_arg_type);
return -EFAULT;
}
@@ -13115,14 +13258,14 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
- verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
+ verifier_bug(env, "no dynptr type for parent of clone");
return -EFAULT;
}
dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
- verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
+ verifier_bug(env, "missing ref obj id for parent of clone");
return -EFAULT;
}
}
@@ -13135,7 +13278,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
int id = dynptr_id(env, reg);
if (id < 0) {
- verbose(env, "verifier internal error: failed to obtain dynptr id\n");
+ verifier_bug(env, "failed to obtain dynptr id");
return id;
}
meta->initialized_dynptr.id = id;
@@ -13271,7 +13414,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
if (meta->arg_constant.found) {
- verbose(env, "verifier internal error: only one constant argument permitted\n");
+ verifier_bug(env, "only one constant argument permitted");
return -EFAULT;
}
if (!tnum_is_const(size_reg->var_off)) {
@@ -13303,7 +13446,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
rec = reg_btf_record(reg);
if (!rec) {
- verbose(env, "verifier internal error: Couldn't find btf_record\n");
+ verifier_bug(env, "Couldn't find btf_record");
return -EFAULT;
}
@@ -13537,16 +13680,24 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
regs[BPF_REG_0].btf_id = meta->ret_btf_id;
} else if (meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
ret_t = btf_type_by_id(desc_btf, meta->arg_constant.value);
- if (!ret_t || !btf_type_is_struct(ret_t)) {
+ if (!ret_t) {
+ verbose(env, "Unknown type ID %lld passed to kfunc bpf_rdonly_cast\n",
+ meta->arg_constant.value);
+ return -EINVAL;
+ } else if (btf_type_is_struct(ret_t)) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ regs[BPF_REG_0].btf = desc_btf;
+ regs[BPF_REG_0].btf_id = meta->arg_constant.value;
+ } else if (btf_type_is_void(ret_t)) {
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ regs[BPF_REG_0].type = PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED;
+ regs[BPF_REG_0].mem_size = 0;
+ } else {
verbose(env,
- "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
+ "kfunc bpf_rdonly_cast type ID argument must be of a struct or void\n");
return -EINVAL;
}
-
- mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
- regs[BPF_REG_0].btf = desc_btf;
- regs[BPF_REG_0].btf_id = meta->arg_constant.value;
} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
enum bpf_type_flag type_flag = get_dynptr_type_flag(meta->initialized_dynptr.type);
@@ -13554,7 +13705,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
mark_reg_known_zero(env, regs, BPF_REG_0);
if (!meta->arg_constant.found) {
- verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
+ verifier_bug(env, "bpf_dynptr_slice(_rdwr) no constant size");
return -EFAULT;
}
@@ -13574,7 +13725,7 @@ static int check_special_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_ca
}
if (!meta->initialized_dynptr.id) {
- verbose(env, "verifier internal error: no dynptr id\n");
+ verifier_bug(env, "no dynptr id");
return -EFAULT;
}
regs[BPF_REG_0].dynptr_id = meta->initialized_dynptr.id;
@@ -14014,7 +14165,9 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
const struct bpf_insn *insn)
{
- return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
+ return env->bypass_spec_v1 ||
+ BPF_SRC(insn->code) == BPF_K ||
+ cur_aux(env)->nospec;
}
static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
@@ -14214,10 +14367,9 @@ static int sanitize_err(struct bpf_verifier_env *env,
case REASON_STACK:
verbose(env, "R%d could not be pushed for speculative verification, %s\n",
dst, err);
- break;
+ return -ENOMEM;
default:
- verbose(env, "verifier internal error: unknown reason (%d)\n",
- reason);
+ verifier_bug(env, "unknown reason (%d)", reason);
break;
}
@@ -14284,7 +14436,7 @@ static int sanitize_check_bounds(struct bpf_verifier_env *env,
}
break;
default:
- break;
+ return -EOPNOTSUPP;
}
return 0;
@@ -14311,7 +14463,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
struct bpf_sanitize_info info = {};
u8 opcode = BPF_OP(insn->code);
u32 dst = insn->dst_reg;
- int ret;
+ int ret, bounds_ret;
dst_reg = &regs[dst];
@@ -14343,6 +14495,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
}
+ /*
+ * Accesses to untrusted PTR_TO_MEM are done through probe
+ * instructions, hence no need to track offsets.
+ */
+ if (base_type(ptr_reg->type) == PTR_TO_MEM && (ptr_reg->type & PTR_UNTRUSTED))
+ return 0;
+
switch (base_type(ptr_reg->type)) {
case PTR_TO_CTX:
case PTR_TO_MAP_VALUE:
@@ -14511,11 +14670,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
return -EINVAL;
reg_bounds_sync(dst_reg);
- if (sanitize_check_bounds(env, insn, dst_reg) < 0)
- return -EACCES;
+ bounds_ret = sanitize_check_bounds(env, insn, dst_reg);
+ if (bounds_ret == -EACCES)
+ return bounds_ret;
if (sanitize_needed(opcode)) {
ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
&info, true);
+ if (verifier_bug_if(!can_skip_alu_sanitation(env, insn)
+ && !env->cur_state->speculative
+ && bounds_ret
+ && !ret,
+ env, "Pointer type unsupported by sanitize_check_bounds() not rejected by retrieve_ptr_limit() as required")) {
+ return -EFAULT;
+ }
if (ret < 0)
return sanitize_err(env, insn, ret, off_reg, dst_reg);
}
@@ -14530,14 +14697,25 @@ static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
s32 *dst_smax = &dst_reg->s32_max_value;
u32 *dst_umin = &dst_reg->u32_min_value;
u32 *dst_umax = &dst_reg->u32_max_value;
+ u32 umin_val = src_reg->u32_min_value;
+ u32 umax_val = src_reg->u32_max_value;
+ bool min_overflow, max_overflow;
if (check_add_overflow(*dst_smin, src_reg->s32_min_value, dst_smin) ||
check_add_overflow(*dst_smax, src_reg->s32_max_value, dst_smax)) {
*dst_smin = S32_MIN;
*dst_smax = S32_MAX;
}
- if (check_add_overflow(*dst_umin, src_reg->u32_min_value, dst_umin) ||
- check_add_overflow(*dst_umax, src_reg->u32_max_value, dst_umax)) {
+
+ /* If either all additions overflow or no additions overflow, then
+ * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
+ * dst_umax + src_umax. Otherwise (some additions overflow), set
+ * the output bounds to unbounded.
+ */
+ min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
+ max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
+
+ if (!min_overflow && max_overflow) {
*dst_umin = 0;
*dst_umax = U32_MAX;
}
@@ -14550,14 +14728,25 @@ static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
s64 *dst_smax = &dst_reg->smax_value;
u64 *dst_umin = &dst_reg->umin_value;
u64 *dst_umax = &dst_reg->umax_value;
+ u64 umin_val = src_reg->umin_value;
+ u64 umax_val = src_reg->umax_value;
+ bool min_overflow, max_overflow;
if (check_add_overflow(*dst_smin, src_reg->smin_value, dst_smin) ||
check_add_overflow(*dst_smax, src_reg->smax_value, dst_smax)) {
*dst_smin = S64_MIN;
*dst_smax = S64_MAX;
}
- if (check_add_overflow(*dst_umin, src_reg->umin_value, dst_umin) ||
- check_add_overflow(*dst_umax, src_reg->umax_value, dst_umax)) {
+
+ /* If either all additions overflow or no additions overflow, then
+ * it is okay to set: dst_umin = dst_umin + src_umin, dst_umax =
+ * dst_umax + src_umax. Otherwise (some additions overflow), set
+ * the output bounds to unbounded.
+ */
+ min_overflow = check_add_overflow(*dst_umin, umin_val, dst_umin);
+ max_overflow = check_add_overflow(*dst_umax, umax_val, dst_umax);
+
+ if (!min_overflow && max_overflow) {
*dst_umin = 0;
*dst_umax = U64_MAX;
}
@@ -14568,8 +14757,11 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
{
s32 *dst_smin = &dst_reg->s32_min_value;
s32 *dst_smax = &dst_reg->s32_max_value;
+ u32 *dst_umin = &dst_reg->u32_min_value;
+ u32 *dst_umax = &dst_reg->u32_max_value;
u32 umin_val = src_reg->u32_min_value;
u32 umax_val = src_reg->u32_max_value;
+ bool min_underflow, max_underflow;
if (check_sub_overflow(*dst_smin, src_reg->s32_max_value, dst_smin) ||
check_sub_overflow(*dst_smax, src_reg->s32_min_value, dst_smax)) {
@@ -14577,14 +14769,18 @@ static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
*dst_smin = S32_MIN;
*dst_smax = S32_MAX;
}
- if (dst_reg->u32_min_value < umax_val) {
- /* Overflow possible, we know nothing */
- dst_reg->u32_min_value = 0;
- dst_reg->u32_max_value = U32_MAX;
- } else {
- /* Cannot overflow (as long as bounds are consistent) */
- dst_reg->u32_min_value -= umax_val;
- dst_reg->u32_max_value -= umin_val;
+
+ /* If either all subtractions underflow or no subtractions
+ * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
+ * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
+ * underflow), set the output bounds to unbounded.
+ */
+ min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
+ max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
+
+ if (min_underflow && !max_underflow) {
+ *dst_umin = 0;
+ *dst_umax = U32_MAX;
}
}
@@ -14593,8 +14789,11 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
{
s64 *dst_smin = &dst_reg->smin_value;
s64 *dst_smax = &dst_reg->smax_value;
+ u64 *dst_umin = &dst_reg->umin_value;
+ u64 *dst_umax = &dst_reg->umax_value;
u64 umin_val = src_reg->umin_value;
u64 umax_val = src_reg->umax_value;
+ bool min_underflow, max_underflow;
if (check_sub_overflow(*dst_smin, src_reg->smax_value, dst_smin) ||
check_sub_overflow(*dst_smax, src_reg->smin_value, dst_smax)) {
@@ -14602,14 +14801,18 @@ static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
*dst_smin = S64_MIN;
*dst_smax = S64_MAX;
}
- if (dst_reg->umin_value < umax_val) {
- /* Overflow possible, we know nothing */
- dst_reg->umin_value = 0;
- dst_reg->umax_value = U64_MAX;
- } else {
- /* Cannot overflow (as long as bounds are consistent) */
- dst_reg->umin_value -= umax_val;
- dst_reg->umax_value -= umin_val;
+
+ /* If either all subtractions underflow or no subtractions
+ * underflow, it is okay to set: dst_umin = dst_umin - src_umax,
+ * dst_umax = dst_umax - src_umin. Otherwise (some subtractions
+ * underflow), set the output bounds to unbounded.
+ */
+ min_underflow = check_sub_overflow(*dst_umin, umax_val, dst_umin);
+ max_underflow = check_sub_overflow(*dst_umax, umin_val, dst_umax);
+
+ if (min_underflow && !max_underflow) {
+ *dst_umin = 0;
+ *dst_umax = U64_MAX;
}
}
@@ -15071,6 +15274,7 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
switch (BPF_OP(insn->code)) {
case BPF_ADD:
case BPF_SUB:
+ case BPF_NEG:
case BPF_AND:
case BPF_XOR:
case BPF_OR:
@@ -15139,6 +15343,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
scalar_min_max_sub(dst_reg, &src_reg);
dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
break;
+ case BPF_NEG:
+ env->fake_reg[0] = *dst_reg;
+ __mark_reg_known(dst_reg, 0);
+ scalar32_min_max_sub(dst_reg, &env->fake_reg[0]);
+ scalar_min_max_sub(dst_reg, &env->fake_reg[0]);
+ dst_reg->var_off = tnum_neg(env->fake_reg[0].var_off);
+ break;
case BPF_MUL:
dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
scalar32_min_max_mul(dst_reg, &src_reg);
@@ -15278,12 +15489,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
if (WARN_ON_ONCE(ptr_reg)) {
print_verifier_state(env, vstate, vstate->curframe, true);
verbose(env, "verifier internal error: unexpected ptr_reg\n");
- return -EINVAL;
+ return -EFAULT;
}
if (WARN_ON(!src_reg)) {
print_verifier_state(env, vstate, vstate->curframe, true);
verbose(env, "verifier internal error: no src_reg\n");
- return -EINVAL;
+ return -EFAULT;
}
err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
if (err)
@@ -15362,7 +15573,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
/* check dest operand */
- err = check_reg_arg(env, insn->dst_reg, DST_OP);
+ if (opcode == BPF_NEG) {
+ err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
+ err = err ?: adjust_scalar_min_max_vals(env, insn,
+ &regs[insn->dst_reg],
+ regs[insn->dst_reg]);
+ } else {
+ err = check_reg_arg(env, insn->dst_reg, DST_OP);
+ }
if (err)
return err;
@@ -16029,6 +16247,10 @@ static void regs_refine_cond_op(struct bpf_reg_state *reg1, struct bpf_reg_state
if (!is_reg_const(reg2, is_jmp32))
break;
val = reg_const_value(reg2, is_jmp32);
+ /* Forget the ranges before narrowing tnums, to avoid invariant
+ * violations if we're on a dead branch.
+ */
+ __mark_reg_unbounded(reg1);
if (is_jmp32) {
t = tnum_and(tnum_subreg(reg1->var_off), tnum_const(~val));
reg1->var_off = tnum_with_subreg(reg1->var_off, t);
@@ -16473,7 +16695,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
}
if (insn_flags) {
- err = push_insn_history(env, this_branch, insn_flags, 0);
+ err = push_jmp_history(env, this_branch, insn_flags, 0);
if (err)
return err;
}
@@ -16531,7 +16753,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
if (dst_reg->type == SCALAR_VALUE && dst_reg->id)
collect_linked_regs(this_branch, dst_reg->id, &linked_regs);
if (linked_regs.cnt > 1) {
- err = push_insn_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
+ err = push_jmp_history(env, this_branch, 0, linked_regs_pack(&linked_regs));
if (err)
return err;
}
@@ -16686,7 +16908,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
dst_reg->btf_id = aux->btf_var.btf_id;
break;
default:
- verbose(env, "bpf verifier is misconfigured\n");
+ verifier_bug(env, "pseudo btf id: unexpected dst reg type");
return -EFAULT;
}
return 0;
@@ -16728,8 +16950,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_MAP_IDX) {
dst_reg->type = CONST_PTR_TO_MAP;
} else {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "unexpected src reg value for ldimm64");
+ return -EFAULT;
}
return 0;
@@ -16775,8 +16997,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
if (!env->ops->gen_ld_abs) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "gen_ld_abs is null");
+ return -EFAULT;
}
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
@@ -17186,7 +17408,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
/* forward- or cross-edge */
insn_state[t] = DISCOVERED | e;
} else {
- verbose(env, "insn state internal bug\n");
+ verifier_bug(env, "insn state internal bug");
return -EFAULT;
}
return DONE_EXPLORING;
@@ -17606,17 +17828,18 @@ static int check_cfg(struct bpf_verifier_env *env)
int *insn_stack, *insn_state, *insn_postorder;
int ex_insn_beg, i, ret = 0;
- insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
if (!insn_state)
return -ENOMEM;
- insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
if (!insn_stack) {
kvfree(insn_state);
return -ENOMEM;
}
- insn_postorder = env->cfg.insn_postorder = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ insn_postorder = env->cfg.insn_postorder =
+ kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
if (!insn_postorder) {
kvfree(insn_state);
kvfree(insn_stack);
@@ -17646,7 +17869,7 @@ walk_cfg:
break;
default:
if (ret > 0) {
- verbose(env, "visit_insn internal bug\n");
+ verifier_bug(env, "visit_insn internal bug");
ret = -EFAULT;
}
goto err_free;
@@ -17654,7 +17877,7 @@ walk_cfg:
}
if (env->cfg.cur_stack < 0) {
- verbose(env, "pop stack internal bug\n");
+ verifier_bug(env, "pop stack internal bug");
ret = -EFAULT;
goto err_free;
}
@@ -17750,7 +17973,7 @@ static int check_btf_func_early(struct bpf_verifier_env *env,
urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
min_size = min_t(u32, krec_size, urec_size);
- krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
+ krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
@@ -17850,7 +18073,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
krecord = prog->aux->func_info;
- info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!info_aux)
return -ENOMEM;
@@ -17936,7 +18159,7 @@ static int check_btf_line(struct bpf_verifier_env *env,
* pass in a smaller bpf_line_info object.
*/
linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
- GFP_KERNEL | __GFP_NOWARN);
+ GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!linfo)
return -ENOMEM;
@@ -18259,10 +18482,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
{
int i;
- if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
- /* all regs in this state in all frames were already marked */
- return;
-
for (i = 0; i <= st->curframe; i++)
clean_func_state(env, st->frame[i]);
}
@@ -18270,7 +18489,7 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
/* the parentage chains form a tree.
* the verifier states are added to state lists at given insn and
* pushed into state stack for future exploration.
- * when the verifier reaches bpf_exit insn some of the verifer states
+ * when the verifier reaches bpf_exit insn some of the verifier states
* stored in the state lists have their final liveness state already,
* but a lot of states will get revised from liveness point of view when
* the verifier explores other branches.
@@ -18302,7 +18521,6 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
static void clean_live_states(struct bpf_verifier_env *env, int insn,
struct bpf_verifier_state *cur)
{
- struct bpf_verifier_state *loop_entry;
struct bpf_verifier_state_list *sl;
struct list_head *pos, *head;
@@ -18311,12 +18529,14 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn,
sl = container_of(pos, struct bpf_verifier_state_list, node);
if (sl->state.branches)
continue;
- loop_entry = get_loop_entry(env, &sl->state);
- if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches)
- continue;
if (sl->state.insn_idx != insn ||
!same_callsites(&sl->state, cur))
continue;
+ if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE)
+ /* all regs in this state in all frames were already marked */
+ continue;
+ if (incomplete_read_marks(env, &sl->state))
+ continue;
clean_verifier_state(env, &sl->state);
}
}
@@ -18763,9 +18983,7 @@ static bool states_equal(struct bpf_verifier_env *env,
* and all frame states need to be equivalent
*/
for (i = 0; i <= old->curframe; i++) {
- insn_idx = i == old->curframe
- ? env->insn_idx
- : old->frame[i + 1]->callsite;
+ insn_idx = frame_insn_idx(old, i);
if (old->frame[i]->callsite != cur->frame[i]->callsite)
return false;
if (!func_states_equal(env, old->frame[i], cur->frame[i], insn_idx, exact))
@@ -18812,12 +19030,15 @@ static int propagate_liveness_reg(struct bpf_verifier_env *env,
*/
static int propagate_liveness(struct bpf_verifier_env *env,
const struct bpf_verifier_state *vstate,
- struct bpf_verifier_state *vparent)
+ struct bpf_verifier_state *vparent,
+ bool *changed)
{
struct bpf_reg_state *state_reg, *parent_reg;
struct bpf_func_state *state, *parent;
int i, frame, err = 0;
+ bool tmp = false;
+ changed = changed ?: &tmp;
if (vparent->curframe != vstate->curframe) {
WARN(1, "propagate_live: parent frame %d current frame %d\n",
vparent->curframe, vstate->curframe);
@@ -18836,6 +19057,7 @@ static int propagate_liveness(struct bpf_verifier_env *env,
&parent_reg[i]);
if (err < 0)
return err;
+ *changed |= err > 0;
if (err == REG_LIVE_READ64)
mark_insn_zext(env, &parent_reg[i]);
}
@@ -18847,6 +19069,7 @@ static int propagate_liveness(struct bpf_verifier_env *env,
state_reg = &state->stack[i].spilled_ptr;
err = propagate_liveness_reg(env, state_reg,
parent_reg);
+ *changed |= err > 0;
if (err < 0)
return err;
}
@@ -18858,7 +19081,9 @@ static int propagate_liveness(struct bpf_verifier_env *env,
* propagate them into the current state
*/
static int propagate_precision(struct bpf_verifier_env *env,
- const struct bpf_verifier_state *old)
+ const struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur,
+ bool *changed)
{
struct bpf_reg_state *state_reg;
struct bpf_func_state *state;
@@ -18906,13 +19131,53 @@ static int propagate_precision(struct bpf_verifier_env *env,
verbose(env, "\n");
}
- err = mark_chain_precision_batch(env);
+ err = __mark_chain_precision(env, cur, -1, changed);
if (err < 0)
return err;
return 0;
}
+#define MAX_BACKEDGE_ITERS 64
+
+/* Propagate read and precision marks from visit->backedges[*].state->equal_state
+ * to corresponding parent states of visit->backedges[*].state until fixed point is reached,
+ * then free visit->backedges.
+ * After execution of this function incomplete_read_marks() will return false
+ * for all states corresponding to @visit->callchain.
+ */
+static int propagate_backedges(struct bpf_verifier_env *env, struct bpf_scc_visit *visit)
+{
+ struct bpf_scc_backedge *backedge;
+ struct bpf_verifier_state *st;
+ bool changed;
+ int i, err;
+
+ i = 0;
+ do {
+ if (i++ > MAX_BACKEDGE_ITERS) {
+ if (env->log.level & BPF_LOG_LEVEL2)
+ verbose(env, "%s: too many iterations\n", __func__);
+ for (backedge = visit->backedges; backedge; backedge = backedge->next)
+ mark_all_scalars_precise(env, &backedge->state);
+ break;
+ }
+ changed = false;
+ for (backedge = visit->backedges; backedge; backedge = backedge->next) {
+ st = &backedge->state;
+ err = propagate_liveness(env, st->equal_state, st, &changed);
+ if (err)
+ return err;
+ err = propagate_precision(env, st->equal_state, st, &changed);
+ if (err)
+ return err;
+ }
+ } while (changed);
+
+ free_backedges(visit);
+ return 0;
+}
+
static bool states_maybe_looping(struct bpf_verifier_state *old,
struct bpf_verifier_state *cur)
{
@@ -18940,7 +19205,7 @@ static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
* terminology) calls specially: as opposed to bounded BPF loops, it *expects*
* states to match, which otherwise would look like an infinite loop. So while
* iter_next() calls are taken care of, we still need to be careful and
- * prevent erroneous and too eager declaration of "ininite loop", when
+ * prevent erroneous and too eager declaration of "infinite loop", when
* iterators are involved.
*
* Here's a situation in pseudo-BPF assembly form:
@@ -18982,7 +19247,7 @@ static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
*
* This approach allows to keep infinite loop heuristic even in the face of
* active iterator. E.g., C snippet below is and will be detected as
- * inifintely looping:
+ * infinitely looping:
*
* struct bpf_iter_num it;
* int *p, x;
@@ -19022,14 +19287,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
struct bpf_verifier_state_list *new_sl;
struct bpf_verifier_state_list *sl;
- struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
+ struct bpf_verifier_state *cur = env->cur_state, *new;
+ bool force_new_state, add_new_state, loop;
int i, j, n, err, states_cnt = 0;
- bool force_new_state, add_new_state, force_exact;
struct list_head *pos, *tmp, *head;
force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
/* Avoid accumulating infinitely long jmp history */
- cur->insn_hist_end - cur->insn_hist_start > 40;
+ cur->jmp_history_cnt > 40;
/* bpf progs typically have pruning point every 4 instructions
* http://vger.kernel.org/bpfconf2019.html#session-1
@@ -19046,6 +19311,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
clean_live_states(env, insn_idx, cur);
+ loop = false;
head = explored_state(env, insn_idx);
list_for_each_safe(pos, tmp, head) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
@@ -19125,7 +19391,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
- update_loop_entry(env, cur, &sl->state);
+ loop = true;
goto hit;
}
}
@@ -19134,7 +19400,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
if (is_may_goto_insn_at(env, insn_idx)) {
if (sl->state.may_goto_depth != cur->may_goto_depth &&
states_equal(env, &sl->state, cur, RANGE_WITHIN)) {
- update_loop_entry(env, cur, &sl->state);
+ loop = true;
goto hit;
}
}
@@ -19176,38 +19442,9 @@ skip_inf_loop_check:
add_new_state = false;
goto miss;
}
- /* If sl->state is a part of a loop and this loop's entry is a part of
- * current verification path then states have to be compared exactly.
- * 'force_exact' is needed to catch the following case:
- *
- * initial Here state 'succ' was processed first,
- * | it was eventually tracked to produce a
- * V state identical to 'hdr'.
- * .---------> hdr All branches from 'succ' had been explored
- * | | and thus 'succ' has its .branches == 0.
- * | V
- * | .------... Suppose states 'cur' and 'succ' correspond
- * | | | to the same instruction + callsites.
- * | V V In such case it is necessary to check
- * | ... ... if 'succ' and 'cur' are states_equal().
- * | | | If 'succ' and 'cur' are a part of the
- * | V V same loop exact flag has to be set.
- * | succ <- cur To check if that is the case, verify
- * | | if loop entry of 'succ' is in current
- * | V DFS path.
- * | ...
- * | |
- * '----'
- *
- * Additional details are in the comment before get_loop_entry().
- */
- loop_entry = get_loop_entry(env, &sl->state);
- if (IS_ERR(loop_entry))
- return PTR_ERR(loop_entry);
- force_exact = loop_entry && loop_entry->branches > 0;
- if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) {
- if (force_exact)
- update_loop_entry(env, cur, loop_entry);
+ /* See comments for mark_all_regs_read_and_precise() */
+ loop = incomplete_read_marks(env, &sl->state);
+ if (states_equal(env, &sl->state, cur, loop ? RANGE_WITHIN : NOT_EXACT)) {
hit:
sl->hit_cnt++;
/* reached equivalent register/stack state,
@@ -19220,7 +19457,7 @@ hit:
* they'll be immediately forgotten as we're pruning
* this state and will pop a new one.
*/
- err = propagate_liveness(env, &sl->state, cur);
+ err = propagate_liveness(env, &sl->state, cur, NULL);
/* if previous state reached the exit with precision and
* current state is equivalent to it (except precision marks)
@@ -19228,10 +19465,98 @@ hit:
* the current state.
*/
if (is_jmp_point(env, env->insn_idx))
- err = err ? : push_insn_history(env, cur, 0, 0);
- err = err ? : propagate_precision(env, &sl->state);
+ err = err ? : push_jmp_history(env, cur, 0, 0);
+ err = err ? : propagate_precision(env, &sl->state, cur, NULL);
if (err)
return err;
+ /* When processing iterator based loops above propagate_liveness and
+ * propagate_precision calls are not sufficient to transfer all relevant
+ * read and precision marks. E.g. consider the following case:
+ *
+ * .-> A --. Assume the states are visited in the order A, B, C.
+ * | | | Assume that state B reaches a state equivalent to state A.
+ * | v v At this point, state C is not processed yet, so state A
+ * '-- B C has not received any read or precision marks from C.
+ * Thus, marks propagated from A to B are incomplete.
+ *
+ * The verifier mitigates this by performing the following steps:
+ *
+ * - Prior to the main verification pass, strongly connected components
+ * (SCCs) are computed over the program's control flow graph,
+ * intraprocedurally.
+ *
+ * - During the main verification pass, `maybe_enter_scc()` checks
+ * whether the current verifier state is entering an SCC. If so, an
+ * instance of a `bpf_scc_visit` object is created, and the state
+ * entering the SCC is recorded as the entry state.
+ *
+ * - This instance is associated not with the SCC itself, but with a
+ * `bpf_scc_callchain`: a tuple consisting of the call sites leading to
+ * the SCC and the SCC id. See `compute_scc_callchain()`.
+ *
+ * - When a verification path encounters a `states_equal(...,
+ * RANGE_WITHIN)` condition, there exists a call chain describing the
+ * current state and a corresponding `bpf_scc_visit` instance. A copy
+ * of the current state is created and added to
+ * `bpf_scc_visit->backedges`.
+ *
+ * - When a verification path terminates, `maybe_exit_scc()` is called
+ * from `update_branch_counts()`. For states with `branches == 0`, it
+ * checks whether the state is the entry state of any `bpf_scc_visit`
+ * instance. If it is, this indicates that all paths originating from
+ * this SCC visit have been explored. `propagate_backedges()` is then
+ * called, which propagates read and precision marks through the
+ * backedges until a fixed point is reached.
+ * (In the earlier example, this would propagate marks from A to B,
+ * from C to A, and then again from A to B.)
+ *
+ * A note on callchains
+ * --------------------
+ *
+ * Consider the following example:
+ *
+ * void foo() { loop { ... SCC#1 ... } }
+ * void main() {
+ * A: foo();
+ * B: ...
+ * C: foo();
+ * }
+ *
+ * Here, there are two distinct callchains leading to SCC#1:
+ * - (A, SCC#1)
+ * - (C, SCC#1)
+ *
+ * Each callchain identifies a separate `bpf_scc_visit` instance that
+ * accumulates backedge states. The `propagate_{liveness,precision}()`
+ * functions traverse the parent state of each backedge state, which
+ * means these parent states must remain valid (i.e., not freed) while
+ * the corresponding `bpf_scc_visit` instance exists.
+ *
+ * Associating `bpf_scc_visit` instances directly with SCCs instead of
+ * callchains would break this invariant:
+ * - States explored during `C: foo()` would contribute backedges to
+ * SCC#1, but SCC#1 would only be exited once the exploration of
+ * `A: foo()` completes.
+ * - By that time, the states explored between `A: foo()` and `C: foo()`
+ * (i.e., `B: ...`) may have already been freed, causing the parent
+ * links for states from `C: foo()` to become invalid.
+ */
+ if (loop) {
+ struct bpf_scc_backedge *backedge;
+
+ backedge = kzalloc(sizeof(*backedge), GFP_KERNEL_ACCOUNT);
+ if (!backedge)
+ return -ENOMEM;
+ err = copy_verifier_state(&backedge->state, cur);
+ backedge->state.equal_state = &sl->state;
+ backedge->state.insn_idx = insn_idx;
+ err = err ?: add_scc_backedge(env, &sl->state, backedge);
+ if (err) {
+ free_verifier_state(&backedge->state, false);
+ kvfree(backedge);
+ return err;
+ }
+ }
return 1;
}
miss:
@@ -19283,7 +19608,7 @@ miss:
* When looping the sl->state.branches will be > 0 and this state
* will not be considered for equivalence until branches == 0.
*/
- new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
+ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL_ACCOUNT);
if (!new_sl)
return -ENOMEM;
env->total_states++;
@@ -19305,13 +19630,20 @@ miss:
return err;
}
new->insn_idx = insn_idx;
- WARN_ONCE(new->branches != 1,
- "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
+ verifier_bug_if(new->branches != 1, env,
+ "%s:branches_to_explore=%d insn %d",
+ __func__, new->branches, insn_idx);
+ err = maybe_enter_scc(env, new);
+ if (err) {
+ free_verifier_state(new, false);
+ kvfree(new_sl);
+ return err;
+ }
cur->parent = new;
cur->first_insn_idx = insn_idx;
- cur->insn_hist_start = cur->insn_hist_end;
cur->dfs_depth = new->dfs_depth + 1;
+ clear_jmp_history(cur);
list_add(&new_sl->node, head);
/* connect new state to parentage chain. Current frame needs all
@@ -19383,10 +19715,27 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
!reg_type_mismatch_ok(prev));
}
+static bool is_ptr_to_mem_or_btf_id(enum bpf_reg_type type)
+{
+ switch (base_type(type)) {
+ case PTR_TO_MEM:
+ case PTR_TO_BTF_ID:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_ptr_to_mem(enum bpf_reg_type type)
+{
+ return base_type(type) == PTR_TO_MEM;
+}
+
static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
bool allow_trust_mismatch)
{
enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
+ enum bpf_reg_type merged_type;
if (*prev_type == NOT_INIT) {
/* Saw a valid insn
@@ -19403,15 +19752,24 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
* Reject it.
*/
if (allow_trust_mismatch &&
- base_type(type) == PTR_TO_BTF_ID &&
- base_type(*prev_type) == PTR_TO_BTF_ID) {
+ is_ptr_to_mem_or_btf_id(type) &&
+ is_ptr_to_mem_or_btf_id(*prev_type)) {
/*
* Have to support a use case when one path through
* the program yields TRUSTED pointer while another
* is UNTRUSTED. Fallback to UNTRUSTED to generate
* BPF_PROBE_MEM/BPF_PROBE_MEMSX.
+ * Same behavior of MEM_RDONLY flag.
*/
- *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
+ if (is_ptr_to_mem(type) || is_ptr_to_mem(*prev_type))
+ merged_type = PTR_TO_MEM;
+ else
+ merged_type = PTR_TO_BTF_ID;
+ if ((type & PTR_UNTRUSTED) || (*prev_type & PTR_UNTRUSTED))
+ merged_type |= PTR_UNTRUSTED;
+ if ((type & MEM_RDONLY) || (*prev_type & MEM_RDONLY))
+ merged_type |= MEM_RDONLY;
+ *prev_type = merged_type;
} else {
verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
@@ -19421,20 +19779,224 @@ static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type typ
return 0;
}
+enum {
+ PROCESS_BPF_EXIT = 1
+};
+
+static int process_bpf_exit_full(struct bpf_verifier_env *env,
+ bool *do_print_state,
+ bool exception_exit)
+{
+ /* We must do check_reference_leak here before
+ * prepare_func_exit to handle the case when
+ * state->curframe > 0, it may be a callback function,
+ * for which reference_state must match caller reference
+ * state when it exits.
+ */
+ int err = check_resource_leak(env, exception_exit,
+ !env->cur_state->curframe,
+ "BPF_EXIT instruction in main prog");
+ if (err)
+ return err;
+
+ /* The side effect of the prepare_func_exit which is
+ * being skipped is that it frees bpf_func_state.
+ * Typically, process_bpf_exit will only be hit with
+ * outermost exit. copy_verifier_state in pop_stack will
+ * handle freeing of any extra bpf_func_state left over
+ * from not processing all nested function exits. We
+ * also skip return code checks as they are not needed
+ * for exceptional exits.
+ */
+ if (exception_exit)
+ return PROCESS_BPF_EXIT;
+
+ if (env->cur_state->curframe) {
+ /* exit from nested function */
+ err = prepare_func_exit(env, &env->insn_idx);
+ if (err)
+ return err;
+ *do_print_state = true;
+ return 0;
+ }
+
+ err = check_return_code(env, BPF_REG_0, "R0");
+ if (err)
+ return err;
+ return PROCESS_BPF_EXIT;
+}
+
+static int do_check_insn(struct bpf_verifier_env *env, bool *do_print_state)
+{
+ int err;
+ struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx];
+ u8 class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ err = check_alu_op(env, insn);
+ if (err)
+ return err;
+
+ } else if (class == BPF_LDX) {
+ bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
+
+ /* Check for reserved fields is already done in
+ * resolve_pseudo_ldimm64().
+ */
+ err = check_load_mem(env, insn, false, is_ldsx, true, "ldx");
+ if (err)
+ return err;
+ } else if (class == BPF_STX) {
+ if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+ err = check_atomic(env, insn);
+ if (err)
+ return err;
+ env->insn_idx++;
+ return 0;
+ }
+
+ if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
+ verbose(env, "BPF_STX uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ err = check_store_reg(env, insn, false);
+ if (err)
+ return err;
+ } else if (class == BPF_ST) {
+ enum bpf_reg_type dst_reg_type;
+
+ if (BPF_MODE(insn->code) != BPF_MEM ||
+ insn->src_reg != BPF_REG_0) {
+ verbose(env, "BPF_ST uses reserved fields\n");
+ return -EINVAL;
+ }
+ /* check src operand */
+ err = check_reg_arg(env, insn->dst_reg, SRC_OP);
+ if (err)
+ return err;
+
+ dst_reg_type = cur_regs(env)[insn->dst_reg].type;
+
+ /* check that memory (dst_reg + off) is writeable */
+ err = check_mem_access(env, env->insn_idx, insn->dst_reg,
+ insn->off, BPF_SIZE(insn->code),
+ BPF_WRITE, -1, false, false);
+ if (err)
+ return err;
+
+ err = save_aux_ptr_type(env, dst_reg_type, false);
+ if (err)
+ return err;
+ } else if (class == BPF_JMP || class == BPF_JMP32) {
+ u8 opcode = BPF_OP(insn->code);
+
+ env->jmps_processed++;
+ if (opcode == BPF_CALL) {
+ if (BPF_SRC(insn->code) != BPF_K ||
+ (insn->src_reg != BPF_PSEUDO_KFUNC_CALL &&
+ insn->off != 0) ||
+ (insn->src_reg != BPF_REG_0 &&
+ insn->src_reg != BPF_PSEUDO_CALL &&
+ insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
+ insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
+ verbose(env, "BPF_CALL uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ if (env->cur_state->active_locks) {
+ if ((insn->src_reg == BPF_REG_0 &&
+ insn->imm != BPF_FUNC_spin_unlock) ||
+ (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
+ (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
+ verbose(env,
+ "function calls are not allowed while holding a lock\n");
+ return -EINVAL;
+ }
+ }
+ if (insn->src_reg == BPF_PSEUDO_CALL) {
+ err = check_func_call(env, insn, &env->insn_idx);
+ } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ err = check_kfunc_call(env, insn, &env->insn_idx);
+ if (!err && is_bpf_throw_kfunc(insn))
+ return process_bpf_exit_full(env, do_print_state, true);
+ } else {
+ err = check_helper_call(env, insn, &env->insn_idx);
+ }
+ if (err)
+ return err;
+
+ mark_reg_scratched(env, BPF_REG_0);
+ } else if (opcode == BPF_JA) {
+ if (BPF_SRC(insn->code) != BPF_K ||
+ insn->src_reg != BPF_REG_0 ||
+ insn->dst_reg != BPF_REG_0 ||
+ (class == BPF_JMP && insn->imm != 0) ||
+ (class == BPF_JMP32 && insn->off != 0)) {
+ verbose(env, "BPF_JA uses reserved fields\n");
+ return -EINVAL;
+ }
+
+ if (class == BPF_JMP)
+ env->insn_idx += insn->off + 1;
+ else
+ env->insn_idx += insn->imm + 1;
+ return 0;
+ } else if (opcode == BPF_EXIT) {
+ if (BPF_SRC(insn->code) != BPF_K ||
+ insn->imm != 0 ||
+ insn->src_reg != BPF_REG_0 ||
+ insn->dst_reg != BPF_REG_0 ||
+ class == BPF_JMP32) {
+ verbose(env, "BPF_EXIT uses reserved fields\n");
+ return -EINVAL;
+ }
+ return process_bpf_exit_full(env, do_print_state, false);
+ } else {
+ err = check_cond_jmp_op(env, insn, &env->insn_idx);
+ if (err)
+ return err;
+ }
+ } else if (class == BPF_LD) {
+ u8 mode = BPF_MODE(insn->code);
+
+ if (mode == BPF_ABS || mode == BPF_IND) {
+ err = check_ld_abs(env, insn);
+ if (err)
+ return err;
+
+ } else if (mode == BPF_IMM) {
+ err = check_ld_imm(env, insn);
+ if (err)
+ return err;
+
+ env->insn_idx++;
+ sanitize_mark_insn_seen(env);
+ } else {
+ verbose(env, "invalid BPF_LD mode\n");
+ return -EINVAL;
+ }
+ } else {
+ verbose(env, "unknown insn class %d\n", class);
+ return -EINVAL;
+ }
+
+ env->insn_idx++;
+ return 0;
+}
+
static int do_check(struct bpf_verifier_env *env)
{
bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
struct bpf_verifier_state *state = env->cur_state;
struct bpf_insn *insns = env->prog->insnsi;
- struct bpf_reg_state *regs;
int insn_cnt = env->prog->len;
bool do_print_state = false;
int prev_insn_idx = -1;
for (;;) {
- bool exception_exit = false;
struct bpf_insn *insn;
- u8 class;
+ struct bpf_insn_aux_data *insn_aux;
int err;
/* reset current history entry on each new instruction */
@@ -19448,7 +20010,7 @@ static int do_check(struct bpf_verifier_env *env)
}
insn = &insns[env->insn_idx];
- class = BPF_CLASS(insn->code);
+ insn_aux = &env->insn_aux_data[env->insn_idx];
if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
verbose(env,
@@ -19458,6 +20020,7 @@ static int do_check(struct bpf_verifier_env *env)
}
state->last_insn_idx = env->prev_insn_idx;
+ state->insn_idx = env->insn_idx;
if (is_prune_point(env, env->insn_idx)) {
err = is_state_visited(env, env->insn_idx);
@@ -19479,7 +20042,7 @@ static int do_check(struct bpf_verifier_env *env)
}
if (is_jmp_point(env, env->insn_idx)) {
- err = push_insn_history(env, state, 0, 0);
+ err = push_jmp_history(env, state, 0, 0);
if (err)
return err;
}
@@ -19518,215 +20081,67 @@ static int do_check(struct bpf_verifier_env *env)
return err;
}
- regs = cur_regs(env);
sanitize_mark_insn_seen(env);
prev_insn_idx = env->insn_idx;
- if (class == BPF_ALU || class == BPF_ALU64) {
- err = check_alu_op(env, insn);
- if (err)
- return err;
-
- } else if (class == BPF_LDX) {
- bool is_ldsx = BPF_MODE(insn->code) == BPF_MEMSX;
+ /* Reduce verification complexity by stopping speculative path
+ * verification when a nospec is encountered.
+ */
+ if (state->speculative && insn_aux->nospec)
+ goto process_bpf_exit;
- /* Check for reserved fields is already done in
- * resolve_pseudo_ldimm64().
+ err = do_check_insn(env, &do_print_state);
+ if (error_recoverable_with_nospec(err) && state->speculative) {
+ /* Prevent this speculative path from ever reaching the
+ * insn that would have been unsafe to execute.
*/
- err = check_load_mem(env, insn, false, is_ldsx, true,
- "ldx");
- if (err)
- return err;
- } else if (class == BPF_STX) {
- if (BPF_MODE(insn->code) == BPF_ATOMIC) {
- err = check_atomic(env, insn);
- if (err)
- return err;
- env->insn_idx++;
- continue;
- }
-
- if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
- verbose(env, "BPF_STX uses reserved fields\n");
- return -EINVAL;
- }
-
- err = check_store_reg(env, insn, false);
- if (err)
- return err;
- } else if (class == BPF_ST) {
- enum bpf_reg_type dst_reg_type;
-
- if (BPF_MODE(insn->code) != BPF_MEM ||
- insn->src_reg != BPF_REG_0) {
- verbose(env, "BPF_ST uses reserved fields\n");
- return -EINVAL;
- }
- /* check src operand */
- err = check_reg_arg(env, insn->dst_reg, SRC_OP);
- if (err)
- return err;
-
- dst_reg_type = regs[insn->dst_reg].type;
-
- /* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, env->insn_idx, insn->dst_reg,
- insn->off, BPF_SIZE(insn->code),
- BPF_WRITE, -1, false, false);
- if (err)
- return err;
-
- err = save_aux_ptr_type(env, dst_reg_type, false);
+ insn_aux->nospec = true;
+ /* If it was an ADD/SUB insn, potentially remove any
+ * markings for alu sanitization.
+ */
+ insn_aux->alu_state = 0;
+ goto process_bpf_exit;
+ } else if (err < 0) {
+ return err;
+ } else if (err == PROCESS_BPF_EXIT) {
+ goto process_bpf_exit;
+ }
+ WARN_ON_ONCE(err);
+
+ if (state->speculative && insn_aux->nospec_result) {
+ /* If we are on a path that performed a jump-op, this
+ * may skip a nospec patched-in after the jump. This can
+ * currently never happen because nospec_result is only
+ * used for the write-ops
+ * `*(size*)(dst_reg+off)=src_reg|imm32` which must
+ * never skip the following insn. Still, add a warning
+ * to document this in case nospec_result is used
+ * elsewhere in the future.
+ *
+ * All non-branch instructions have a single
+ * fall-through edge. For these, nospec_result should
+ * already work.
+ */
+ if (verifier_bug_if(BPF_CLASS(insn->code) == BPF_JMP ||
+ BPF_CLASS(insn->code) == BPF_JMP32, env,
+ "speculation barrier after jump instruction may not have the desired effect"))
+ return -EFAULT;
+process_bpf_exit:
+ mark_verifier_state_scratched(env);
+ err = update_branch_counts(env, env->cur_state);
if (err)
return err;
- } else if (class == BPF_JMP || class == BPF_JMP32) {
- u8 opcode = BPF_OP(insn->code);
-
- env->jmps_processed++;
- if (opcode == BPF_CALL) {
- if (BPF_SRC(insn->code) != BPF_K ||
- (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
- && insn->off != 0) ||
- (insn->src_reg != BPF_REG_0 &&
- insn->src_reg != BPF_PSEUDO_CALL &&
- insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
- insn->dst_reg != BPF_REG_0 ||
- class == BPF_JMP32) {
- verbose(env, "BPF_CALL uses reserved fields\n");
- return -EINVAL;
- }
-
- if (env->cur_state->active_locks) {
- if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
- (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
- (insn->off != 0 || !kfunc_spin_allowed(insn->imm)))) {
- verbose(env, "function calls are not allowed while holding a lock\n");
- return -EINVAL;
- }
- }
- if (insn->src_reg == BPF_PSEUDO_CALL) {
- err = check_func_call(env, insn, &env->insn_idx);
- } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
- err = check_kfunc_call(env, insn, &env->insn_idx);
- if (!err && is_bpf_throw_kfunc(insn)) {
- exception_exit = true;
- goto process_bpf_exit_full;
- }
- } else {
- err = check_helper_call(env, insn, &env->insn_idx);
- }
- if (err)
- return err;
-
- mark_reg_scratched(env, BPF_REG_0);
- } else if (opcode == BPF_JA) {
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->src_reg != BPF_REG_0 ||
- insn->dst_reg != BPF_REG_0 ||
- (class == BPF_JMP && insn->imm != 0) ||
- (class == BPF_JMP32 && insn->off != 0)) {
- verbose(env, "BPF_JA uses reserved fields\n");
- return -EINVAL;
- }
-
- if (class == BPF_JMP)
- env->insn_idx += insn->off + 1;
- else
- env->insn_idx += insn->imm + 1;
- continue;
-
- } else if (opcode == BPF_EXIT) {
- if (BPF_SRC(insn->code) != BPF_K ||
- insn->imm != 0 ||
- insn->src_reg != BPF_REG_0 ||
- insn->dst_reg != BPF_REG_0 ||
- class == BPF_JMP32) {
- verbose(env, "BPF_EXIT uses reserved fields\n");
- return -EINVAL;
- }
-process_bpf_exit_full:
- /* We must do check_reference_leak here before
- * prepare_func_exit to handle the case when
- * state->curframe > 0, it may be a callback
- * function, for which reference_state must
- * match caller reference state when it exits.
- */
- err = check_resource_leak(env, exception_exit, !env->cur_state->curframe,
- "BPF_EXIT instruction in main prog");
- if (err)
- return err;
-
- /* The side effect of the prepare_func_exit
- * which is being skipped is that it frees
- * bpf_func_state. Typically, process_bpf_exit
- * will only be hit with outermost exit.
- * copy_verifier_state in pop_stack will handle
- * freeing of any extra bpf_func_state left over
- * from not processing all nested function
- * exits. We also skip return code checks as
- * they are not needed for exceptional exits.
- */
- if (exception_exit)
- goto process_bpf_exit;
-
- if (state->curframe) {
- /* exit from nested function */
- err = prepare_func_exit(env, &env->insn_idx);
- if (err)
- return err;
- do_print_state = true;
- continue;
- }
-
- err = check_return_code(env, BPF_REG_0, "R0");
- if (err)
- return err;
-process_bpf_exit:
- mark_verifier_state_scratched(env);
- update_branch_counts(env, env->cur_state);
- err = pop_stack(env, &prev_insn_idx,
- &env->insn_idx, pop_log);
- if (err < 0) {
- if (err != -ENOENT)
- return err;
- break;
- } else {
- if (verifier_bug_if(env->cur_state->loop_entry, env,
- "broken loop detection"))
- return -EFAULT;
- do_print_state = true;
- continue;
- }
- } else {
- err = check_cond_jmp_op(env, insn, &env->insn_idx);
- if (err)
- return err;
- }
- } else if (class == BPF_LD) {
- u8 mode = BPF_MODE(insn->code);
-
- if (mode == BPF_ABS || mode == BPF_IND) {
- err = check_ld_abs(env, insn);
- if (err)
- return err;
-
- } else if (mode == BPF_IMM) {
- err = check_ld_imm(env, insn);
- if (err)
+ err = pop_stack(env, &prev_insn_idx, &env->insn_idx,
+ pop_log);
+ if (err < 0) {
+ if (err != -ENOENT)
return err;
-
- env->insn_idx++;
- sanitize_mark_insn_seen(env);
+ break;
} else {
- verbose(env, "invalid BPF_LD mode\n");
- return -EINVAL;
+ do_print_state = true;
+ continue;
}
- } else {
- verbose(env, "unknown insn class %d\n", class);
- return -EINVAL;
}
-
- env->insn_idx++;
}
return 0;
@@ -20678,7 +21093,10 @@ static int opt_remove_nops(struct bpf_verifier_env *env)
static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
const union bpf_attr *attr)
{
- struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
+ struct bpf_insn *patch;
+ /* use env->insn_buf as two independent buffers */
+ struct bpf_insn *zext_patch = env->insn_buf;
+ struct bpf_insn *rnd_hi32_patch = &env->insn_buf[2];
struct bpf_insn_aux_data *aux = env->insn_aux_data;
int i, patch_len, delta = 0, len = env->prog->len;
struct bpf_insn *insns = env->prog->insnsi;
@@ -20797,8 +21215,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
-(subprogs[0].stack_depth + 8));
if (epilogue_cnt >= INSN_BUF_SIZE) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "epilogue is too long");
+ return -EFAULT;
} else if (epilogue_cnt) {
/* Save the ARG_PTR_TO_CTX for the epilogue to use */
cnt = 0;
@@ -20820,14 +21238,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (ops->gen_prologue || env->seen_direct_write) {
if (!ops->gen_prologue) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "gen_prologue is null");
+ return -EFAULT;
}
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
if (cnt >= INSN_BUF_SIZE) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "prologue is too long");
+ return -EFAULT;
} else if (cnt) {
new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
if (!new_prog)
@@ -20854,6 +21272,28 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
bpf_convert_ctx_access_t convert_ctx_access;
u8 mode;
+ if (env->insn_aux_data[i + delta].nospec) {
+ WARN_ON_ONCE(env->insn_aux_data[i + delta].alu_state);
+ struct bpf_insn *patch = insn_buf;
+
+ *patch++ = BPF_ST_NOSPEC();
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ /* This can not be easily merged with the
+ * nospec_result-case, because an insn may require a
+ * nospec before and after itself. Therefore also do not
+ * 'continue' here but potentially apply further
+ * patching to insn. *insn should equal patch[1] now.
+ */
+ }
+
if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
@@ -20903,14 +21343,16 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
}
if (type == BPF_WRITE &&
- env->insn_aux_data[i + delta].sanitize_stack_spill) {
- struct bpf_insn patch[] = {
- *insn,
- BPF_ST_NOSPEC(),
- };
+ env->insn_aux_data[i + delta].nospec_result) {
+ /* nospec_result is only used to mitigate Spectre v4 and
+ * to limit verification-time for Spectre v1.
+ */
+ struct bpf_insn *patch = insn_buf;
- cnt = ARRAY_SIZE(patch);
- new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+ *patch++ = *insn;
+ *patch++ = BPF_ST_NOSPEC();
+ cnt = patch - insn_buf;
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
@@ -20945,6 +21387,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
* for this case.
*/
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
+ case PTR_TO_MEM | MEM_RDONLY | PTR_UNTRUSTED:
if (type == BPF_READ) {
if (BPF_MODE(insn->code) == BPF_MEM)
insn->code = BPF_LDX | BPF_PROBE_MEM |
@@ -20983,8 +21426,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
u8 size_code;
if (type == BPF_WRITE) {
- verbose(env, "bpf verifier narrow ctx access misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "narrow ctx access misconfigured");
+ return -EFAULT;
}
size_code = BPF_H;
@@ -21002,16 +21445,16 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
&target_size);
if (cnt == 0 || cnt >= INSN_BUF_SIZE ||
(ctx_field_size && !target_size)) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "error during ctx access conversion");
+ return -EFAULT;
}
if (is_narrower_load && size < target_size) {
u8 shift = bpf_ctx_narrow_access_offset(
off, size, size_default) * 8;
if (shift && cnt + 1 >= INSN_BUF_SIZE) {
- verbose(env, "bpf verifier narrow ctx load misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "narrow ctx load misconfigured");
+ return -EFAULT;
}
if (ctx_field_size <= 4) {
if (shift)
@@ -21441,8 +21884,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
*/
desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
if (!desc) {
- verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
- insn->imm);
+ verifier_bug(env, "kernel function descriptor not found for func_id %u",
+ insn->imm);
return -EFAULT;
}
@@ -21457,8 +21900,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
- verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
- insn_idx);
+ verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
return -EFAULT;
}
@@ -21474,15 +21917,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
- verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
- insn_idx);
+ verifier_bug(env, "NULL kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
return -EFAULT;
}
if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
!kptr_struct_meta) {
- verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
- insn_idx);
+ verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
return -EFAULT;
}
@@ -21504,8 +21947,8 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
if (!kptr_struct_meta) {
- verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
- insn_idx);
+ verifier_bug(env, "kptr_struct_meta expected at insn_idx %d",
+ insn_idx);
return -EFAULT;
}
@@ -21539,7 +21982,7 @@ static int add_hidden_subprog(struct bpf_verifier_env *env, struct bpf_insn *pat
/* We only reserve one slot for hidden subprogs in subprog_info. */
if (env->hidden_subprog_cnt) {
- verbose(env, "verifier internal error: only one hidden subprog supported\n");
+ verifier_bug(env, "only one hidden subprog supported");
return -EFAULT;
}
/* We're not patching any existing instruction, just appending the new
@@ -21579,13 +22022,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
u16 stack_depth_extra = 0;
if (env->seen_exception && !env->exception_callback_subprog) {
- struct bpf_insn patch[] = {
- env->prog->insnsi[insn_cnt - 1],
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- };
+ struct bpf_insn *patch = insn_buf;
- ret = add_hidden_subprog(env, patch, ARRAY_SIZE(patch));
+ *patch++ = env->prog->insnsi[insn_cnt - 1];
+ *patch++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
+ *patch++ = BPF_EXIT_INSN();
+ ret = add_hidden_subprog(env, insn_buf, patch - insn_buf);
if (ret < 0)
return ret;
prog = env->prog;
@@ -21621,20 +22063,18 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->off == 1 && insn->imm == -1) {
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
- struct bpf_insn *patchlet;
- struct bpf_insn chk_and_sdiv[] = {
- BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_NEG | BPF_K, insn->dst_reg,
- 0, 0, 0),
- };
- struct bpf_insn chk_and_smod[] = {
- BPF_MOV32_IMM(insn->dst_reg, 0),
- };
+ struct bpf_insn *patch = insn_buf;
- patchlet = isdiv ? chk_and_sdiv : chk_and_smod;
- cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod);
+ if (isdiv)
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0);
+ else
+ *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
- new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
+ cnt = patch - insn_buf;
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
@@ -21653,83 +22093,79 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
bool is_sdiv = isdiv && insn->off == 1;
bool is_smod = !isdiv && insn->off == 1;
- struct bpf_insn *patchlet;
- struct bpf_insn chk_and_div[] = {
- /* [R,W]x div 0 -> 0 */
- BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JNE | BPF_K, insn->src_reg,
- 0, 2, 0),
- BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- *insn,
- };
- struct bpf_insn chk_and_mod[] = {
- /* [R,W]x mod 0 -> [R,W]x */
- BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JEQ | BPF_K, insn->src_reg,
- 0, 1 + (is64 ? 0 : 1), 0),
- *insn,
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
- };
- struct bpf_insn chk_and_sdiv[] = {
+ struct bpf_insn *patch = insn_buf;
+
+ if (is_sdiv) {
/* [R,W]x sdiv 0 -> 0
* LLONG_MIN sdiv -1 -> LLONG_MIN
* INT_MIN sdiv -1 -> INT_MIN
*/
- BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
- BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_ADD | BPF_K, BPF_REG_AX,
- 0, 0, 1),
- BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JGT | BPF_K, BPF_REG_AX,
- 0, 4, 1),
- BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JEQ | BPF_K, BPF_REG_AX,
- 0, 1, 0),
- BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_MOV | BPF_K, insn->dst_reg,
- 0, 0, 0),
+ *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 4, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 1, 0);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_MOV | BPF_K, insn->dst_reg,
+ 0, 0, 0);
/* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */
- BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_NEG | BPF_K, insn->dst_reg,
- 0, 0, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- *insn,
- };
- struct bpf_insn chk_and_smod[] = {
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_NEG | BPF_K, insn->dst_reg,
+ 0, 0, 0);
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
+ } else if (is_smod) {
/* [R,W]x mod 0 -> [R,W]x */
/* [R,W]x mod -1 -> 0 */
- BPF_MOV64_REG(BPF_REG_AX, insn->src_reg),
- BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
- BPF_ADD | BPF_K, BPF_REG_AX,
- 0, 0, 1),
- BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JGT | BPF_K, BPF_REG_AX,
- 0, 3, 1),
- BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
- BPF_JEQ | BPF_K, BPF_REG_AX,
- 0, 3 + (is64 ? 0 : 1), 1),
- BPF_MOV32_IMM(insn->dst_reg, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- *insn,
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
- };
-
- if (is_sdiv) {
- patchlet = chk_and_sdiv;
- cnt = ARRAY_SIZE(chk_and_sdiv);
- } else if (is_smod) {
- patchlet = chk_and_smod;
- cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0);
+ *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) |
+ BPF_ADD | BPF_K, BPF_REG_AX,
+ 0, 0, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JGT | BPF_K, BPF_REG_AX,
+ 0, 3, 1);
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, BPF_REG_AX,
+ 0, 3 + (is64 ? 0 : 1), 1);
+ *patch++ = BPF_MOV32_IMM(insn->dst_reg, 0);
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = *insn;
+
+ if (!is64) {
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
+ }
+ cnt = patch - insn_buf;
+ } else if (isdiv) {
+ /* [R,W]x div 0 -> 0 */
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JNE | BPF_K, insn->src_reg,
+ 0, 2, 0);
+ *patch++ = BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg);
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = *insn;
+ cnt = patch - insn_buf;
} else {
- patchlet = isdiv ? chk_and_div : chk_and_mod;
- cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
- ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
+ /* [R,W]x mod 0 -> [R,W]x */
+ *patch++ = BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
+ BPF_JEQ | BPF_K, insn->src_reg,
+ 0, 1 + (is64 ? 0 : 1), 0);
+ *patch++ = *insn;
+
+ if (!is64) {
+ *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *patch++ = BPF_MOV32_REG(insn->dst_reg, insn->dst_reg);
+ }
+ cnt = patch - insn_buf;
}
- new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
@@ -21743,7 +22179,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
- struct bpf_insn *patch = &insn_buf[0];
+ struct bpf_insn *patch = insn_buf;
u64 uaddress_limit = bpf_arch_uaddress_limit();
if (!uaddress_limit)
@@ -21775,8 +22211,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
BPF_MODE(insn->code) == BPF_IND)) {
cnt = env->ops->gen_ld_abs(insn, insn_buf);
if (cnt == 0 || cnt >= INSN_BUF_SIZE) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "%d insns generated for ld_abs", cnt);
+ return -EFAULT;
}
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
@@ -21794,7 +22230,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
- struct bpf_insn *patch = &insn_buf[0];
+ struct bpf_insn *patch = insn_buf;
bool issrc, isneg, isimm;
u32 off_reg;
@@ -22111,8 +22547,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
if (cnt == -EOPNOTSUPP)
goto patch_map_ops_generic;
if (cnt <= 0 || cnt >= INSN_BUF_SIZE) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "%d insns generated for map lookup", cnt);
+ return -EFAULT;
}
new_prog = bpf_patch_insn_data(env, i + delta,
@@ -22399,9 +22835,9 @@ patch_call_imm:
* programs to call them, must be real in-kernel functions
*/
if (!fn->func) {
- verbose(env,
- "kernel subsystem misconfigured func %s#%d\n",
- func_id_name(insn->imm), insn->imm);
+ verifier_bug(env,
+ "not inlined functions %s#%d is missing func",
+ func_id_name(insn->imm), insn->imm);
return -EFAULT;
}
insn->imm = fn->func - __bpf_call_base;
@@ -22471,8 +22907,8 @@ next_insn:
if (!map_ptr->ops->map_poke_track ||
!map_ptr->ops->map_poke_untrack ||
!map_ptr->ops->map_poke_run) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
+ verifier_bug(env, "poke tab is misconfigured");
+ return -EFAULT;
}
ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
@@ -22662,7 +23098,12 @@ static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl;
struct list_head *head, *pos, *tmp;
- int i;
+ struct bpf_scc_info *info;
+ int i, j;
+
+ free_verifier_state(env->cur_state, true);
+ env->cur_state = NULL;
+ while (!pop_stack(env, NULL, NULL, false));
list_for_each_safe(pos, tmp, &env->free_list) {
sl = container_of(pos, struct bpf_verifier_state_list, node);
@@ -22671,6 +23112,14 @@ static void free_states(struct bpf_verifier_env *env)
}
INIT_LIST_HEAD(&env->free_list);
+ for (i = 0; i < env->scc_cnt; ++i) {
+ info = env->scc_info[i];
+ for (j = 0; j < info->num_visits; j++)
+ free_backedges(&info->visits[j]);
+ kvfree(info);
+ env->scc_info[i] = NULL;
+ }
+
if (!env->explored_states)
return;
@@ -22698,13 +23147,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
env->prev_linfo = NULL;
env->pass_cnt++;
- state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
+ state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL_ACCOUNT);
if (!state)
return -ENOMEM;
state->curframe = 0;
state->speculative = false;
state->branches = 1;
- state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+ state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL_ACCOUNT);
if (!state->frame[0]) {
kfree(state);
return -ENOMEM;
@@ -22755,11 +23204,12 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
__mark_dynptr_reg(reg, BPF_DYNPTR_TYPE_LOCAL, true, ++env->id_gen);
} else if (base_type(arg->arg_type) == ARG_PTR_TO_MEM) {
reg->type = PTR_TO_MEM;
- if (arg->arg_type & PTR_MAYBE_NULL)
- reg->type |= PTR_MAYBE_NULL;
+ reg->type |= arg->arg_type &
+ (PTR_MAYBE_NULL | PTR_UNTRUSTED | MEM_RDONLY);
mark_reg_known_zero(env, regs, i);
reg->mem_size = arg->mem_size;
- reg->id = ++env->id_gen;
+ if (arg->arg_type & PTR_MAYBE_NULL)
+ reg->id = ++env->id_gen;
} else if (base_type(arg->arg_type) == ARG_PTR_TO_BTF_ID) {
reg->type = PTR_TO_BTF_ID;
if (arg->arg_type & PTR_MAYBE_NULL)
@@ -22776,8 +23226,8 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
/* caller can pass either PTR_TO_ARENA or SCALAR */
mark_reg_unknown(env, regs, i);
} else {
- WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n",
- i - BPF_REG_1, arg->arg_type);
+ verifier_bug(env, "unhandled arg#%d type %d",
+ i - BPF_REG_1, arg->arg_type);
ret = -EFAULT;
goto out;
}
@@ -22807,14 +23257,6 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
ret = do_check(env);
out:
- /* check for NULL is necessary, since cur_state can be freed inside
- * do_check() under memory pressure.
- */
- if (env->cur_state) {
- free_verifier_state(env->cur_state, true);
- env->cur_state = NULL;
- }
- while (!pop_stack(env, NULL, NULL, false));
if (!ret && pop_log)
bpf_vlog_reset(&env->log, 0);
free_states(env);
@@ -22930,7 +23372,7 @@ static void print_verification_stats(struct bpf_verifier_env *env)
int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog,
const struct bpf_ctx_arg_aux *info, u32 cnt)
{
- prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL);
+ prog->aux->ctx_arg_info = kmemdup_array(info, cnt, sizeof(*info), GFP_KERNEL_ACCOUNT);
prog->aux->ctx_arg_info_size = cnt;
return prog->aux->ctx_arg_info ? 0 : -ENOMEM;
@@ -23539,11 +23981,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return ret;
} else if (prog->type == BPF_PROG_TYPE_TRACING &&
btf_id_set_contains(&btf_id_deny, btf_id)) {
+ verbose(env, "Attaching tracing programs to function '%s' is rejected.\n",
+ tgt_info.tgt_name);
return -EINVAL;
} else if ((prog->expected_attach_type == BPF_TRACE_FEXIT ||
prog->expected_attach_type == BPF_MODIFY_RETURN) &&
btf_id_set_contains(&noreturn_deny, btf_id)) {
- verbose(env, "Attaching fexit/fmod_ret to __noreturn functions is rejected.\n");
+ verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n",
+ tgt_info.tgt_name);
return -EINVAL;
}
@@ -23672,6 +24117,7 @@ static bool can_jump(struct bpf_insn *insn)
case BPF_JSLT:
case BPF_JSLE:
case BPF_JCOND:
+ case BPF_JSET:
return true;
}
@@ -23874,7 +24320,7 @@ static int compute_live_registers(struct bpf_verifier_env *env)
* - repeat the computation while {in,out} fields changes for
* any instruction.
*/
- state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL);
+ state = kvcalloc(insn_cnt, sizeof(*state), GFP_KERNEL_ACCOUNT);
if (!state) {
err = -ENOMEM;
goto out;
@@ -23912,6 +24358,10 @@ static int compute_live_registers(struct bpf_verifier_env *env)
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "Live regs before insn:\n");
for (i = 0; i < insn_cnt; ++i) {
+ if (env->insn_aux_data[i].scc)
+ verbose(env, "%3d ", env->insn_aux_data[i].scc);
+ else
+ verbose(env, " ");
verbose(env, "%3d: ", i);
for (j = BPF_REG_0; j < BPF_REG_10; ++j)
if (insn_aux[i].live_regs_before & BIT(j))
@@ -23933,6 +24383,185 @@ out:
return err;
}
+/*
+ * Compute strongly connected components (SCCs) on the CFG.
+ * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
+ * If instruction is a sole member of its SCC and there are no self edges,
+ * assign it SCC number of zero.
+ * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
+ */
+static int compute_scc(struct bpf_verifier_env *env)
+{
+ const u32 NOT_ON_STACK = U32_MAX;
+
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ const u32 insn_cnt = env->prog->len;
+ int stack_sz, dfs_sz, err = 0;
+ u32 *stack, *pre, *low, *dfs;
+ u32 succ_cnt, i, j, t, w;
+ u32 next_preorder_num;
+ u32 next_scc_id;
+ bool assign_scc;
+ u32 succ[2];
+
+ next_preorder_num = 1;
+ next_scc_id = 1;
+ /*
+ * - 'stack' accumulates vertices in DFS order, see invariant comment below;
+ * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
+ * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
+ * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
+ */
+ stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL_ACCOUNT);
+ dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL_ACCOUNT);
+ if (!stack || !pre || !low || !dfs) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ /*
+ * References:
+ * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
+ * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
+ *
+ * The algorithm maintains the following invariant:
+ * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
+ * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
+ *
+ * Consequently:
+ * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
+ * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
+ * and thus there is an SCC (loop) containing both 'u' and 'v'.
+ * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
+ * and 'v' can be considered the root of some SCC.
+ *
+ * Here is a pseudo-code for an explicitly recursive version of the algorithm:
+ *
+ * NOT_ON_STACK = insn_cnt + 1
+ * pre = [0] * insn_cnt
+ * low = [0] * insn_cnt
+ * scc = [0] * insn_cnt
+ * stack = []
+ *
+ * next_preorder_num = 1
+ * next_scc_id = 1
+ *
+ * def recur(w):
+ * nonlocal next_preorder_num
+ * nonlocal next_scc_id
+ *
+ * pre[w] = next_preorder_num
+ * low[w] = next_preorder_num
+ * next_preorder_num += 1
+ * stack.append(w)
+ * for s in successors(w):
+ * # Note: for classic algorithm the block below should look as:
+ * #
+ * # if pre[s] == 0:
+ * # recur(s)
+ * # low[w] = min(low[w], low[s])
+ * # elif low[s] != NOT_ON_STACK:
+ * # low[w] = min(low[w], pre[s])
+ * #
+ * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
+ * # does not break the invariant and makes itartive version of the algorithm
+ * # simpler. See 'Algorithm #3' from [2].
+ *
+ * # 's' not yet visited
+ * if pre[s] == 0:
+ * recur(s)
+ * # if 's' is on stack, pick lowest reachable preorder number from it;
+ * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
+ * # so 'min' would be a noop.
+ * low[w] = min(low[w], low[s])
+ *
+ * if low[w] == pre[w]:
+ * # 'w' is the root of an SCC, pop all vertices
+ * # below 'w' on stack and assign same SCC to them.
+ * while True:
+ * t = stack.pop()
+ * low[t] = NOT_ON_STACK
+ * scc[t] = next_scc_id
+ * if t == w:
+ * break
+ * next_scc_id += 1
+ *
+ * for i in range(0, insn_cnt):
+ * if pre[i] == 0:
+ * recur(i)
+ *
+ * Below implementation replaces explicit recursion with array 'dfs'.
+ */
+ for (i = 0; i < insn_cnt; i++) {
+ if (pre[i])
+ continue;
+ stack_sz = 0;
+ dfs_sz = 1;
+ dfs[0] = i;
+dfs_continue:
+ while (dfs_sz) {
+ w = dfs[dfs_sz - 1];
+ if (pre[w] == 0) {
+ low[w] = next_preorder_num;
+ pre[w] = next_preorder_num;
+ next_preorder_num++;
+ stack[stack_sz++] = w;
+ }
+ /* Visit 'w' successors */
+ succ_cnt = insn_successors(env->prog, w, succ);
+ for (j = 0; j < succ_cnt; ++j) {
+ if (pre[succ[j]]) {
+ low[w] = min(low[w], low[succ[j]]);
+ } else {
+ dfs[dfs_sz++] = succ[j];
+ goto dfs_continue;
+ }
+ }
+ /*
+ * Preserve the invariant: if some vertex above in the stack
+ * is reachable from 'w', keep 'w' on the stack.
+ */
+ if (low[w] < pre[w]) {
+ dfs_sz--;
+ goto dfs_continue;
+ }
+ /*
+ * Assign SCC number only if component has two or more elements,
+ * or if component has a self reference.
+ */
+ assign_scc = stack[stack_sz - 1] != w;
+ for (j = 0; j < succ_cnt; ++j) {
+ if (succ[j] == w) {
+ assign_scc = true;
+ break;
+ }
+ }
+ /* Pop component elements from stack */
+ do {
+ t = stack[--stack_sz];
+ low[t] = NOT_ON_STACK;
+ if (assign_scc)
+ aux[t].scc = next_scc_id;
+ } while (t != w);
+ if (assign_scc)
+ next_scc_id++;
+ dfs_sz--;
+ }
+ }
+ env->scc_info = kvcalloc(next_scc_id, sizeof(*env->scc_info), GFP_KERNEL_ACCOUNT);
+ if (!env->scc_info) {
+ err = -ENOMEM;
+ goto exit;
+ }
+exit:
+ kvfree(stack);
+ kvfree(pre);
+ kvfree(low);
+ kvfree(dfs);
+ return err;
+}
+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
u64 start_time = ktime_get_ns();
@@ -23941,6 +24570,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
u32 log_true_size;
bool is_priv;
+ BTF_TYPE_EMIT(enum bpf_features);
+
/* no program is valid */
if (ARRAY_SIZE(bpf_verifier_ops) == 0)
return -EINVAL;
@@ -23948,7 +24579,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
/* 'struct bpf_verifier_env' can be global, but since it's not small,
* allocate/free it every time bpf_check() is called
*/
- env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+ env = kvzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL_ACCOUNT);
if (!env)
return -ENOMEM;
@@ -24011,7 +24642,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
env->explored_states = kvcalloc(state_htab_size(env),
sizeof(struct list_head),
- GFP_USER);
+ GFP_KERNEL_ACCOUNT);
ret = -ENOMEM;
if (!env->explored_states)
goto skip_full_check;
@@ -24054,6 +24685,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret)
goto skip_full_check;
+ ret = compute_scc(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = compute_live_registers(env);
if (ret < 0)
goto skip_full_check;
@@ -24138,7 +24773,7 @@ skip_full_check:
/* if program passed verifier, update used_maps in bpf_prog_info */
env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
sizeof(env->used_maps[0]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!env->prog->aux->used_maps) {
ret = -ENOMEM;
@@ -24153,7 +24788,7 @@ skip_full_check:
/* if program passed verifier, update used_btfs in bpf_prog_aux */
env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
sizeof(env->used_btfs[0]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!env->prog->aux->used_btfs) {
ret = -ENOMEM;
goto err_release_maps;
@@ -24194,9 +24829,9 @@ err_unlock:
if (!is_priv)
mutex_unlock(&bpf_verifier_lock);
vfree(env->insn_aux_data);
- kvfree(env->insn_hist);
err_free_env:
kvfree(env->cfg.insn_postorder);
+ kvfree(env->scc_info);
kvfree(env);
return ret;
}