summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bloom_filter.c3
-rw-r--r--kernel/bpf/bpf_local_storage.c3
-rw-r--r--kernel/bpf/bpf_struct_ops.c3
-rw-r--r--kernel/bpf/btf.c19
-rw-r--r--kernel/bpf/core.c8
-rw-r--r--kernel/bpf/cpumap.c4
-rw-r--r--kernel/bpf/cpumask.c38
-rw-r--r--kernel/bpf/devmap.c3
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c12
-rw-r--r--kernel/bpf/lpm_trie.c3
-rw-r--r--kernel/bpf/memalloc.c31
-rw-r--r--kernel/bpf/preload/bpf_preload_kern.c4
-rw-r--r--kernel/bpf/queue_stack_maps.c4
-rw-r--r--kernel/bpf/reuseport_array.c3
-rw-r--r--kernel/bpf/stackmap.c3
-rw-r--r--kernel/bpf/syscall.c184
-rw-r--r--kernel/bpf/verifier.c248
18 files changed, 395 insertions, 184 deletions
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
index 540331b610a9..addf3dd57b59 100644
--- a/kernel/bpf/bloom_filter.c
+++ b/kernel/bpf/bloom_filter.c
@@ -86,9 +86,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_bloom_filter *bloom;
- if (!bpf_capable())
- return ERR_PTR(-EPERM);
-
if (attr->key_size != 0 || attr->value_size == 0 ||
attr->max_entries == 0 ||
attr->map_flags & ~BLOOM_CREATE_FLAG_MASK ||
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 47d9948d768f..b5149cfce7d4 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -723,9 +723,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
!attr->btf_key_type_id || !attr->btf_value_type_id)
return -EINVAL;
- if (!bpf_capable())
- return -EPERM;
-
if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
return -E2BIG;
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index d3f0a4825fa6..116a0ce378ec 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -655,9 +655,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
const struct btf_type *t, *vt;
struct bpf_map *map;
- if (!bpf_capable())
- return ERR_PTR(-EPERM);
-
st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
if (!st_ops)
return ERR_PTR(-ENOTSUPP);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index bbcae434fda5..29fe21099298 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -492,25 +492,26 @@ static bool btf_type_is_fwd(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
}
-static bool btf_type_nosize(const struct btf_type *t)
+static bool btf_type_is_datasec(const struct btf_type *t)
{
- return btf_type_is_void(t) || btf_type_is_fwd(t) ||
- btf_type_is_func(t) || btf_type_is_func_proto(t);
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
}
-static bool btf_type_nosize_or_null(const struct btf_type *t)
+static bool btf_type_is_decl_tag(const struct btf_type *t)
{
- return !t || btf_type_nosize(t);
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
}
-static bool btf_type_is_datasec(const struct btf_type *t)
+static bool btf_type_nosize(const struct btf_type *t)
{
- return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
+ return btf_type_is_void(t) || btf_type_is_fwd(t) ||
+ btf_type_is_func(t) || btf_type_is_func_proto(t) ||
+ btf_type_is_decl_tag(t);
}
-static bool btf_type_is_decl_tag(const struct btf_type *t)
+static bool btf_type_nosize_or_null(const struct btf_type *t)
{
- return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
+ return !t || btf_type_nosize(t);
}
static bool btf_type_is_decl_tag_target(const struct btf_type *t)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7421487422d4..dc85240a0134 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2064,14 +2064,16 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
#undef PROG_NAME_LIST
#define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size),
-static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
- const struct bpf_insn *insn) = {
+static __maybe_unused
+u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5,
+ const struct bpf_insn *insn) = {
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};
#undef PROG_NAME_LIST
+#ifdef CONFIG_BPF_SYSCALL
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
{
stack_depth = max_t(u32, stack_depth, 1);
@@ -2080,7 +2082,7 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth)
__bpf_call_base_args;
insn->code = BPF_JMP | BPF_CALL_ARGS;
}
-
+#endif
#else
static unsigned int __bpf_prog_ret0_warn(const void *ctx,
const struct bpf_insn *insn)
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8ec18faa74ac..8a33e8747a0e 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -28,7 +28,6 @@
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
-#include <linux/capability.h>
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>
@@ -89,9 +88,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
u32 value_size = attr->value_size;
struct bpf_cpu_map *cmap;
- if (!bpf_capable())
- return ERR_PTR(-EPERM);
-
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
(value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index 7efdf5d770ca..938a60ff4295 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -132,6 +132,21 @@ __bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
}
/**
+ * bpf_cpumask_first_and() - Return the index of the first nonzero bit from the
+ * AND of two cpumasks.
+ * @src1: The first cpumask.
+ * @src2: The second cpumask.
+ *
+ * Find the index of the first nonzero bit of the AND of two cpumasks.
+ * struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
+ */
+__bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1,
+ const struct cpumask *src2)
+{
+ return cpumask_first_and(src1, src2);
+}
+
+/**
* bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask.
* @cpu: The CPU to be set in the cpumask.
* @cpumask: The BPF cpumask in which a bit is being set.
@@ -367,7 +382,7 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask
}
/**
- * bpf_cpumask_any() - Return a random set CPU from a cpumask.
+ * bpf_cpumask_any_distribute() - Return a random set CPU from a cpumask.
* @cpumask: The cpumask being queried.
*
* Return:
@@ -376,26 +391,28 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask
*
* A struct bpf_cpumask pointer may be safely passed to @src.
*/
-__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask)
+__bpf_kfunc u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask)
{
- return cpumask_any(cpumask);
+ return cpumask_any_distribute(cpumask);
}
/**
- * bpf_cpumask_any_and() - Return a random set CPU from the AND of two
- * cpumasks.
+ * bpf_cpumask_any_and_distribute() - Return a random set CPU from the AND of
+ * two cpumasks.
* @src1: The first cpumask.
* @src2: The second cpumask.
*
* Return:
- * * A random set bit within [0, num_cpus) if at least one bit is set.
+ * * A random set bit within [0, num_cpus) from the AND of two cpumasks, if at
+ * least one bit is set.
* * >= num_cpus if no bit is set.
*
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
*/
-__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2)
+__bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
+ const struct cpumask *src2)
{
- return cpumask_any_and(src1, src2);
+ return cpumask_any_and_distribute(src1, src2);
}
__diag_pop();
@@ -406,6 +423,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_first_and, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU)
@@ -422,8 +440,8 @@ BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU)
-BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU)
-BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU)
+BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU)
BTF_SET8_END(cpumask_kfunc_btf_ids)
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 802692fa3905..49cc0b5671c6 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -160,9 +160,6 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
struct bpf_dtab *dtab;
int err;
- if (!capable(CAP_NET_ADMIN))
- return ERR_PTR(-EPERM);
-
dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE);
if (!dtab)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 9901efee4339..56d3da7d0bc6 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -422,12 +422,6 @@ static int htab_map_alloc_check(union bpf_attr *attr)
BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
offsetof(struct htab_elem, hash_node.pprev));
- if (lru && !bpf_capable())
- /* LRU implementation is much complicated than other
- * maps. Hence, limit to CAP_BPF.
- */
- return -EPERM;
-
if (zero_seed && !capable(CAP_SYS_ADMIN))
/* Guard against local DoS, and discourage production use. */
return -EPERM;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4ef4c4f8a355..9e80efa59a5d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1933,8 +1933,12 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta
* bpf_refcount type so that it is emitted in vmlinux BTF
*/
ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off);
+ if (!refcount_inc_not_zero((refcount_t *)ref))
+ return NULL;
- refcount_inc((refcount_t *)ref);
+ /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null
+ * in verifier.c
+ */
return (void *)p__refcounted_kptr;
}
@@ -1950,7 +1954,7 @@ static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head
INIT_LIST_HEAD(h);
if (!list_empty(n)) {
/* Only called from BPF prog, no need to migrate_disable */
- __bpf_obj_drop_impl(n - off, rec);
+ __bpf_obj_drop_impl((void *)n - off, rec);
return -EINVAL;
}
@@ -2032,7 +2036,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node,
if (!RB_EMPTY_NODE(n)) {
/* Only called from BPF prog, no need to migrate_disable */
- __bpf_obj_drop_impl(n - off, rec);
+ __bpf_obj_drop_impl((void *)n - off, rec);
return -EINVAL;
}
@@ -2406,7 +2410,7 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index e0d3ddf2037a..17c7e7782a1f 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -544,9 +544,6 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
struct lpm_trie *trie;
- if (!bpf_capable())
- return ERR_PTR(-EPERM);
-
/* check sanity of attributes */
if (attr->max_entries == 0 ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 410637c225fb..0668bcd7c926 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -211,9 +211,9 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
mem_cgroup_put(memcg);
}
-static void free_one(struct bpf_mem_cache *c, void *obj)
+static void free_one(void *obj, bool percpu)
{
- if (c->percpu_size) {
+ if (percpu) {
free_percpu(((void **)obj)[1]);
kfree(obj);
return;
@@ -222,14 +222,19 @@ static void free_one(struct bpf_mem_cache *c, void *obj)
kfree(obj);
}
-static void __free_rcu(struct rcu_head *head)
+static void free_all(struct llist_node *llnode, bool percpu)
{
- struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu);
- struct llist_node *llnode = llist_del_all(&c->waiting_for_gp);
struct llist_node *pos, *t;
llist_for_each_safe(pos, t, llnode)
- free_one(c, pos);
+ free_one(pos, percpu);
+}
+
+static void __free_rcu(struct rcu_head *head)
+{
+ struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu);
+
+ free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size);
atomic_set(&c->call_rcu_in_progress, 0);
}
@@ -432,7 +437,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
static void drain_mem_cache(struct bpf_mem_cache *c)
{
- struct llist_node *llnode, *t;
+ bool percpu = !!c->percpu_size;
/* No progs are using this bpf_mem_cache, but htab_map_free() called
* bpf_mem_cache_free() for all remaining elements and they can be in
@@ -441,14 +446,10 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
* Except for waiting_for_gp list, there are no concurrent operations
* on these lists, so it is safe to use __llist_del_all().
*/
- llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu))
- free_one(c, llnode);
- llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp))
- free_one(c, llnode);
- llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist))
- free_one(c, llnode);
- llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra))
- free_one(c, llnode);
+ free_all(__llist_del_all(&c->free_by_rcu), percpu);
+ free_all(llist_del_all(&c->waiting_for_gp), percpu);
+ free_all(__llist_del_all(&c->free_llist), percpu);
+ free_all(__llist_del_all(&c->free_llist_extra), percpu);
}
static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma)
diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c
index b56f9f3314fd..0c63bc2cd895 100644
--- a/kernel/bpf/preload/bpf_preload_kern.c
+++ b/kernel/bpf/preload/bpf_preload_kern.c
@@ -23,9 +23,9 @@ static void free_links_and_skel(void)
static int preload(struct bpf_preload_info *obj)
{
- strlcpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name));
+ strscpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name));
obj[0].link = maps_link;
- strlcpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name));
+ strscpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name));
obj[1].link = progs_link;
return 0;
}
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 601609164ef3..8d2ddcb7566b 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -7,7 +7,6 @@
#include <linux/bpf.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <linux/capability.h>
#include <linux/btf_ids.h>
#include "percpu_freelist.h"
@@ -46,9 +45,6 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
/* Called from syscall */
static int queue_stack_map_alloc_check(union bpf_attr *attr)
{
- if (!bpf_capable())
- return -EPERM;
-
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 0 ||
attr->value_size == 0 ||
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index cbf2d8d784b8..4b4f9670f1a9 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -151,9 +151,6 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
int numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
- if (!bpf_capable())
- return ERR_PTR(-EPERM);
-
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node);
if (!array)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b25fce425b2c..458bb80b14d5 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -74,9 +74,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
u64 cost, n_buckets;
int err;
- if (!bpf_capable())
- return ERR_PTR(-EPERM);
-
if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4497b193dd20..a2aef900519c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = {
.map_mem_usage = bpf_map_offload_map_mem_usage,
};
-static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
-{
- const struct bpf_map_ops *ops;
- u32 type = attr->map_type;
- struct bpf_map *map;
- int err;
-
- if (type >= ARRAY_SIZE(bpf_map_types))
- return ERR_PTR(-EINVAL);
- type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
- ops = bpf_map_types[type];
- if (!ops)
- return ERR_PTR(-EINVAL);
-
- if (ops->map_alloc_check) {
- err = ops->map_alloc_check(attr);
- if (err)
- return ERR_PTR(err);
- }
- if (attr->map_ifindex)
- ops = &bpf_map_offload_ops;
- if (!ops->map_mem_usage)
- return ERR_PTR(-EINVAL);
- map = ops->map_alloc(attr);
- if (IS_ERR(map))
- return map;
- map->ops = ops;
- map->map_type = type;
- return map;
-}
-
static void bpf_map_write_active_inc(struct bpf_map *map)
{
atomic64_inc(&map->writecnt);
@@ -1127,7 +1096,9 @@ free_map_tab:
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
+ const struct bpf_map_ops *ops;
int numa_node = bpf_map_attr_numa_node(attr);
+ u32 map_type = attr->map_type;
struct bpf_map *map;
int f_flags;
int err;
@@ -1158,9 +1129,85 @@ static int map_create(union bpf_attr *attr)
return -EINVAL;
/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
- map = find_and_alloc_map(attr);
+ map_type = attr->map_type;
+ if (map_type >= ARRAY_SIZE(bpf_map_types))
+ return -EINVAL;
+ map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
+ ops = bpf_map_types[map_type];
+ if (!ops)
+ return -EINVAL;
+
+ if (ops->map_alloc_check) {
+ err = ops->map_alloc_check(attr);
+ if (err)
+ return err;
+ }
+ if (attr->map_ifindex)
+ ops = &bpf_map_offload_ops;
+ if (!ops->map_mem_usage)
+ return -EINVAL;
+
+ /* Intent here is for unprivileged_bpf_disabled to block BPF map
+ * creation for unprivileged users; other actions depend
+ * on fd availability and access to bpffs, so are dependent on
+ * object creation success. Even with unprivileged BPF disabled,
+ * capability checks are still carried out.
+ */
+ if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+ return -EPERM;
+
+ /* check privileged map type permissions */
+ switch (map_type) {
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_USER_RINGBUF:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ /* unprivileged */
+ break;
+ case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
+ case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
+ case BPF_MAP_TYPE_BLOOM_FILTER:
+ case BPF_MAP_TYPE_LPM_TRIE:
+ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+ case BPF_MAP_TYPE_STACK_TRACE:
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ case BPF_MAP_TYPE_LRU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_STRUCT_OPS:
+ case BPF_MAP_TYPE_CPUMAP:
+ if (!bpf_capable())
+ return -EPERM;
+ break;
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_XSKMAP:
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ break;
+ default:
+ WARN(1, "unsupported map type %d", map_type);
+ return -EPERM;
+ }
+
+ map = ops->map_alloc(attr);
if (IS_ERR(map))
return PTR_ERR(map);
+ map->ops = ops;
+ map->map_type = map_type;
err = bpf_obj_name_cpy(map->name, attr->map_name,
sizeof(attr->map_name));
@@ -2507,7 +2554,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
struct btf *attach_btf = NULL;
int err;
char license[128];
- bool is_gpl;
if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;
@@ -2526,15 +2572,15 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
!bpf_capable())
return -EPERM;
- /* copy eBPF program license from user space */
- if (strncpy_from_bpfptr(license,
- make_bpfptr(attr->license, uattr.is_kernel),
- sizeof(license) - 1) < 0)
- return -EFAULT;
- license[sizeof(license) - 1] = 0;
-
- /* eBPF programs must be GPL compatible to use GPL-ed functions */
- is_gpl = license_is_gpl_compatible(license);
+ /* Intent here is for unprivileged_bpf_disabled to block BPF program
+ * creation for unprivileged users; other actions depend
+ * on fd availability and access to bpffs, so are dependent on
+ * object creation success. Even with unprivileged BPF disabled,
+ * capability checks are still carried out for these
+ * and other operations.
+ */
+ if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+ return -EPERM;
if (attr->insn_cnt == 0 ||
attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
@@ -2618,12 +2664,20 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
make_bpfptr(attr->insns, uattr.is_kernel),
bpf_prog_insn_size(prog)) != 0)
goto free_prog_sec;
+ /* copy eBPF program license from user space */
+ if (strncpy_from_bpfptr(license,
+ make_bpfptr(attr->license, uattr.is_kernel),
+ sizeof(license) - 1) < 0)
+ goto free_prog_sec;
+ license[sizeof(license) - 1] = 0;
+
+ /* eBPF programs must be GPL compatible to use GPL-ed functions */
+ prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0;
prog->orig_prog = NULL;
prog->jited = 0;
atomic64_set(&prog->aux->refcnt, 1);
- prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_dev_bound(prog->aux)) {
err = bpf_prog_dev_bound_init(prog, attr);
@@ -2797,28 +2851,31 @@ static void bpf_link_put_deferred(struct work_struct *work)
bpf_link_free(link);
}
-/* bpf_link_put can be called from atomic context, but ensures that resources
- * are freed from process context
+/* bpf_link_put might be called from atomic context. It needs to be called
+ * from sleepable context in order to acquire sleeping locks during the process.
*/
void bpf_link_put(struct bpf_link *link)
{
if (!atomic64_dec_and_test(&link->refcnt))
return;
- if (in_atomic()) {
- INIT_WORK(&link->work, bpf_link_put_deferred);
- schedule_work(&link->work);
- } else {
- bpf_link_free(link);
- }
+ INIT_WORK(&link->work, bpf_link_put_deferred);
+ schedule_work(&link->work);
}
EXPORT_SYMBOL(bpf_link_put);
+static void bpf_link_put_direct(struct bpf_link *link)
+{
+ if (!atomic64_dec_and_test(&link->refcnt))
+ return;
+ bpf_link_free(link);
+}
+
static int bpf_link_release(struct inode *inode, struct file *filp)
{
struct bpf_link *link = filp->private_data;
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return 0;
}
@@ -4801,7 +4858,7 @@ out_put_progs:
if (ret)
bpf_prog_put(new_prog);
out_put_link:
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return ret;
}
@@ -4824,7 +4881,7 @@ static int link_detach(union bpf_attr *attr)
else
ret = -EOPNOTSUPP;
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return ret;
}
@@ -4894,7 +4951,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
fd = bpf_link_new_fd(link);
if (fd < 0)
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return fd;
}
@@ -4971,7 +5028,7 @@ static int bpf_iter_create(union bpf_attr *attr)
return PTR_ERR(link);
err = bpf_iter_new_fd(link);
- bpf_link_put(link);
+ bpf_link_put_direct(link);
return err;
}
@@ -5041,23 +5098,8 @@ out_prog_put:
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
- bool capable;
int err;
- capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
-
- /* Intent here is for unprivileged_bpf_disabled to block key object
- * creation commands for unprivileged users; other actions depend
- * of fd availability and access to bpffs, so are dependent on
- * object creation success. Capabilities are later verified for
- * operations such as load and map create, so even with unprivileged
- * BPF disabled, capability checks are still carried out for these
- * and other operations.
- */
- if (!capable &&
- (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
- return -EPERM;
-
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b54193de762b..11e54dd8b6dd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -197,6 +197,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static void specialize_kfunc(struct bpf_verifier_env *env,
u32 func_id, u16 offset, unsigned long *addr);
+static bool is_trusted_reg(const struct bpf_reg_state *reg);
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
@@ -298,16 +299,19 @@ struct bpf_kfunc_call_arg_meta {
bool found;
} arg_constant;
- /* arg_btf and arg_btf_id are used by kfunc-specific handling,
+ /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
* generally to pass info about user-defined local kptr types to later
* verification logic
* bpf_obj_drop
* Record the local kptr type to be drop'd
* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
- * Record the local kptr type to be refcount_incr'd
+ * Record the local kptr type to be refcount_incr'd and use
+ * arg_owning_ref to determine whether refcount_acquire should be
+ * fallible
*/
struct btf *arg_btf;
u32 arg_btf_id;
+ bool arg_owning_ref;
struct {
struct btf_field *field;
@@ -439,8 +443,11 @@ static bool type_may_be_null(u32 type)
return type & PTR_MAYBE_NULL;
}
-static bool reg_type_not_null(enum bpf_reg_type type)
+static bool reg_not_null(const struct bpf_reg_state *reg)
{
+ enum bpf_reg_type type;
+
+ type = reg->type;
if (type_may_be_null(type))
return false;
@@ -450,6 +457,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON ||
+ (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
type == PTR_TO_MEM;
}
@@ -3771,6 +3779,96 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_
}
}
+static bool idset_contains(struct bpf_idset *s, u32 id)
+{
+ u32 i;
+
+ for (i = 0; i < s->count; ++i)
+ if (s->ids[i] == id)
+ return true;
+
+ return false;
+}
+
+static int idset_push(struct bpf_idset *s, u32 id)
+{
+ if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
+ return -EFAULT;
+ s->ids[s->count++] = id;
+ return 0;
+}
+
+static void idset_reset(struct bpf_idset *s)
+{
+ s->count = 0;
+}
+
+/* Collect a set of IDs for all registers currently marked as precise in env->bt.
+ * Mark all registers with these IDs as precise.
+ */
+static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
+{
+ struct bpf_idset *precise_ids = &env->idset_scratch;
+ struct backtrack_state *bt = &env->bt;
+ struct bpf_func_state *func;
+ struct bpf_reg_state *reg;
+ DECLARE_BITMAP(mask, 64);
+ int i, fr;
+
+ idset_reset(precise_ids);
+
+ for (fr = bt->frame; fr >= 0; fr--) {
+ func = st->frame[fr];
+
+ bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
+ for_each_set_bit(i, mask, 32) {
+ reg = &func->regs[i];
+ if (!reg->id || reg->type != SCALAR_VALUE)
+ continue;
+ if (idset_push(precise_ids, reg->id))
+ return -EFAULT;
+ }
+
+ bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
+ for_each_set_bit(i, mask, 64) {
+ if (i >= func->allocated_stack / BPF_REG_SIZE)
+ break;
+ if (!is_spilled_scalar_reg(&func->stack[i]))
+ continue;
+ reg = &func->stack[i].spilled_ptr;
+ if (!reg->id)
+ continue;
+ if (idset_push(precise_ids, reg->id))
+ return -EFAULT;
+ }
+ }
+
+ for (fr = 0; fr <= st->curframe; ++fr) {
+ func = st->frame[fr];
+
+ for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
+ reg = &func->regs[i];
+ if (!reg->id)
+ continue;
+ if (!idset_contains(precise_ids, reg->id))
+ continue;
+ bt_set_frame_reg(bt, fr, i);
+ }
+ for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
+ if (!is_spilled_scalar_reg(&func->stack[i]))
+ continue;
+ reg = &func->stack[i].spilled_ptr;
+ if (!reg->id)
+ continue;
+ if (!idset_contains(precise_ids, reg->id))
+ continue;
+ bt_set_frame_slot(bt, fr, i);
+ }
+ }
+
+ return 0;
+}
+
/*
* __mark_chain_precision() backtracks BPF program instruction sequence and
* chain of verifier states making sure that register *regno* (if regno >= 0)
@@ -3902,6 +4000,31 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
bt->frame, last_idx, first_idx, subseq_idx);
}
+ /* If some register with scalar ID is marked as precise,
+ * make sure that all registers sharing this ID are also precise.
+ * This is needed to estimate effect of find_equal_scalars().
+ * Do this at the last instruction of each state,
+ * bpf_reg_state::id fields are valid for these instructions.
+ *
+ * Allows to track precision in situation like below:
+ *
+ * r2 = unknown value
+ * ...
+ * --- state #0 ---
+ * ...
+ * r1 = r2 // r1 and r2 now share the same ID
+ * ...
+ * --- state #1 {r1.id = A, r2.id = A} ---
+ * ...
+ * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
+ * ...
+ * --- state #2 {r1.id = A, r2.id = A} ---
+ * r3 = r10
+ * r3 += r1 // need to mark both r1 and r2
+ */
+ if (mark_precise_scalar_ids(env, st))
+ return -EFAULT;
+
if (last_idx < 0) {
/* we are at the entry into subprog, which
* is expected for global funcs, but only if
@@ -5894,7 +6017,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
* program allocated objects (which always have ref_obj_id > 0),
* but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
*/
- if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
verbose(env, "only read is supported\n");
return -EACCES;
}
@@ -7514,7 +7637,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
if (base_type(arg_type) == ARG_PTR_TO_MEM)
type &= ~DYNPTR_TYPE_FLAG_MASK;
- if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC)
+ if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
type &= ~MEM_ALLOC;
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
@@ -9681,11 +9804,6 @@ static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_ACQUIRE;
}
-static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
-{
- return meta->kfunc_flags & KF_RET_NULL;
-}
-
static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_RELEASE;
@@ -10001,6 +10119,16 @@ BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
+static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
+{
+ if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
+ meta->arg_owning_ref) {
+ return false;
+ }
+
+ return meta->kfunc_flags & KF_RET_NULL;
+}
+
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
@@ -10478,6 +10606,8 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
node_off, btf_name_by_offset(reg->btf, t->name_off));
return -EINVAL;
}
+ meta->arg_btf = reg->btf;
+ meta->arg_btf_id = reg->btf_id;
if (node_off != field->graph_root.node_offset) {
verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
@@ -10881,10 +11011,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
meta->subprogno = reg->subprogno;
break;
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
- if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) {
+ if (!type_is_ptr_alloc_obj(reg->type)) {
verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
return -EINVAL;
}
+ if (!type_is_non_owning_ref(reg->type))
+ meta->arg_owning_ref = true;
rec = reg_btf_record(reg);
if (!rec) {
@@ -11047,6 +11179,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
insn_aux->insert_off = regs[BPF_REG_2].off;
+ insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
err = ref_convert_owning_non_owning(env, release_ref_obj_id);
if (err) {
verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
@@ -12804,12 +12937,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (BPF_SRC(insn->code) == BPF_X) {
struct bpf_reg_state *src_reg = regs + insn->src_reg;
struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
+ bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
+ !tnum_is_const(src_reg->var_off);
if (BPF_CLASS(insn->code) == BPF_ALU64) {
/* case: R1 = R2
* copy register state to dest reg
*/
- if (src_reg->type == SCALAR_VALUE && !src_reg->id)
+ if (need_id)
/* Assign src and dst registers the same ID
* that will be used by find_equal_scalars()
* to propagate min/max range.
@@ -12828,7 +12963,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
} else if (src_reg->type == SCALAR_VALUE) {
bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
- if (is_src_reg_u32 && !src_reg->id)
+ if (is_src_reg_u32 && need_id)
src_reg->id = ++env->id_gen;
copy_register_state(dst_reg, src_reg);
/* Make sure ID is cleared if src_reg is not in u32 range otherwise
@@ -13160,7 +13295,7 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
bool is_jmp32)
{
if (__is_pointer_value(false, reg)) {
- if (!reg_type_not_null(reg->type))
+ if (!reg_not_null(reg))
return -1;
/* If pointer is valid tests against zero will fail so we can
@@ -14984,8 +15119,9 @@ static bool range_within(struct bpf_reg_state *old,
* So we look through our idmap to see if this old id has been seen before. If
* so, we require the new id to match; otherwise, we add the id pair to the map.
*/
-static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
+static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
{
+ struct bpf_id_pair *map = idmap->map;
unsigned int i;
/* either both IDs should be set or both should be zero */
@@ -14996,20 +15132,34 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
return true;
for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
- if (!idmap[i].old) {
+ if (!map[i].old) {
/* Reached an empty slot; haven't seen this id before */
- idmap[i].old = old_id;
- idmap[i].cur = cur_id;
+ map[i].old = old_id;
+ map[i].cur = cur_id;
return true;
}
- if (idmap[i].old == old_id)
- return idmap[i].cur == cur_id;
+ if (map[i].old == old_id)
+ return map[i].cur == cur_id;
+ if (map[i].cur == cur_id)
+ return false;
}
/* We ran out of idmap slots, which should be impossible */
WARN_ON_ONCE(1);
return false;
}
+/* Similar to check_ids(), but allocate a unique temporary ID
+ * for 'old_id' or 'cur_id' of zero.
+ * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
+ */
+static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
+{
+ old_id = old_id ? old_id : ++idmap->tmp_id_gen;
+ cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
+
+ return check_ids(old_id, cur_id, idmap);
+}
+
static void clean_func_state(struct bpf_verifier_env *env,
struct bpf_func_state *st)
{
@@ -15108,7 +15258,7 @@ next:
static bool regs_exact(const struct bpf_reg_state *rold,
const struct bpf_reg_state *rcur,
- struct bpf_id_pair *idmap)
+ struct bpf_idmap *idmap)
{
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
check_ids(rold->id, rcur->id, idmap) &&
@@ -15117,7 +15267,7 @@ static bool regs_exact(const struct bpf_reg_state *rold,
/* Returns true if (rold safe implies rcur safe) */
static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
- struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
+ struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
{
if (!(rold->live & REG_LIVE_READ))
/* explored state didn't use this */
@@ -15154,15 +15304,42 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
switch (base_type(rold->type)) {
case SCALAR_VALUE:
- if (regs_exact(rold, rcur, idmap))
- return true;
- if (env->explore_alu_limits)
- return false;
+ if (env->explore_alu_limits) {
+ /* explore_alu_limits disables tnum_in() and range_within()
+ * logic and requires everything to be strict
+ */
+ return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
+ check_scalar_ids(rold->id, rcur->id, idmap);
+ }
if (!rold->precise)
return true;
- /* new val must satisfy old val knowledge */
+ /* Why check_ids() for scalar registers?
+ *
+ * Consider the following BPF code:
+ * 1: r6 = ... unbound scalar, ID=a ...
+ * 2: r7 = ... unbound scalar, ID=b ...
+ * 3: if (r6 > r7) goto +1
+ * 4: r6 = r7
+ * 5: if (r6 > X) goto ...
+ * 6: ... memory operation using r7 ...
+ *
+ * First verification path is [1-6]:
+ * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
+ * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
+ * r7 <= X, because r6 and r7 share same id.
+ * Next verification path is [1-4, 6].
+ *
+ * Instruction (6) would be reached in two states:
+ * I. r6{.id=b}, r7{.id=b} via path 1-6;
+ * II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
+ *
+ * Use check_ids() to distinguish these states.
+ * ---
+ * Also verify that new value satisfies old value range knowledge.
+ */
return range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off);
+ tnum_in(rold->var_off, rcur->var_off) &&
+ check_scalar_ids(rold->id, rcur->id, idmap);
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
case PTR_TO_MEM:
@@ -15208,7 +15385,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
}
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
- struct bpf_func_state *cur, struct bpf_id_pair *idmap)
+ struct bpf_func_state *cur, struct bpf_idmap *idmap)
{
int i, spi;
@@ -15311,7 +15488,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
}
static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
- struct bpf_id_pair *idmap)
+ struct bpf_idmap *idmap)
{
int i;
@@ -15359,13 +15536,13 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
for (i = 0; i < MAX_BPF_REG; i++)
if (!regsafe(env, &old->regs[i], &cur->regs[i],
- env->idmap_scratch))
+ &env->idmap_scratch))
return false;
- if (!stacksafe(env, old, cur, env->idmap_scratch))
+ if (!stacksafe(env, old, cur, &env->idmap_scratch))
return false;
- if (!refsafe(old, cur, env->idmap_scratch))
+ if (!refsafe(old, cur, &env->idmap_scratch))
return false;
return true;
@@ -15380,7 +15557,8 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->curframe != cur->curframe)
return false;
- memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+ env->idmap_scratch.tmp_id_gen = env->id_gen;
+ memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
/* Verification state from speculative execution simulation
* must never prune a non-speculative execution one.
@@ -15398,7 +15576,7 @@ static bool states_equal(struct bpf_verifier_env *env,
return false;
if (old->active_lock.id &&
- !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
+ !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
return false;
if (old->active_rcu_lock != cur->active_rcu_lock)