summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-10-12 16:16:50 -0700
committerJakub Kicinski <kuba@kernel.org>2020-10-12 16:16:50 -0700
commitccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae (patch)
tree3028901b29bf4ab04cd50d61da4fecdb20b182b6 /kernel/bpf
parenta308283fdbf712b30061d2b4567530eb9e8dc1b4 (diff)
parent376dcfe3a4e5a5475a84e6b5f926066a8614f887 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-10-12 The main changes are: 1) The BPF verifier improvements to track register allocation pattern, from Alexei and Yonghong. 2) libbpf relocation support for different size load/store, from Andrii. 3) bpf_redirect_peer() helper and support for inner map array with different max_entries, from Daniel. 4) BPF support for per-cpu variables, form Hao. 5) sockmap improvements, from John. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/arraymap.c17
-rw-r--r--kernel/bpf/btf.c25
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/helpers.c32
-rw-r--r--kernel/bpf/percpu_freelist.c101
-rw-r--r--kernel/bpf/percpu_freelist.h1
-rw-r--r--kernel/bpf/syscall.c4
-rw-r--r--kernel/bpf/verifier.c270
8 files changed, 397 insertions, 59 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index bd777dd6f967..c6c81eceb68f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -16,7 +16,7 @@
#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
- BPF_F_PRESERVE_ELEMS)
+ BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -62,7 +62,7 @@ int array_map_alloc_check(union bpf_attr *attr)
return -EINVAL;
if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
- attr->map_flags & BPF_F_MMAPABLE)
+ attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP))
return -EINVAL;
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
@@ -214,7 +214,7 @@ static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
-static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
@@ -223,6 +223,9 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
+ if (map->map_flags & BPF_F_INNER_MAP)
+ return -EOPNOTSUPP;
+
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (!map->bypass_spec_v1) {
@@ -496,8 +499,10 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
static bool array_map_meta_equal(const struct bpf_map *meta0,
const struct bpf_map *meta1)
{
- return meta0->max_entries == meta1->max_entries &&
- bpf_map_meta_equal(meta0, meta1);
+ if (!bpf_map_meta_equal(meta0, meta1))
+ return false;
+ return meta0->map_flags & BPF_F_INNER_MAP ? true :
+ meta0->max_entries == meta1->max_entries;
}
struct bpf_iter_seq_array_map_info {
@@ -1251,7 +1256,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
return READ_ONCE(*inner_map);
}
-static u32 array_of_map_gen_lookup(struct bpf_map *map,
+static int array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4d0ee7839fdb..ed7d02e8bc93 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -188,11 +188,6 @@
i < btf_type_vlen(struct_type); \
i++, member++)
-#define for_each_vsi(i, struct_type, member) \
- for (i = 0, member = btf_type_var_secinfo(struct_type); \
- i < btf_type_vlen(struct_type); \
- i++, member++)
-
#define for_each_vsi_from(i, from, struct_type, member) \
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
i < btf_type_vlen(struct_type); \
@@ -440,16 +435,6 @@ static bool btf_type_nosize_or_null(const struct btf_type *t)
return !t || btf_type_nosize(t);
}
-/* union is only a special case of struct:
- * all its offsetof(member) == 0
- */
-static bool btf_type_is_struct(const struct btf_type *t)
-{
- u8 kind = BTF_INFO_KIND(t->info);
-
- return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
-}
-
static bool __btf_type_is_struct(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
@@ -460,11 +445,6 @@ static bool btf_type_is_array(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
}
-static bool btf_type_is_var(const struct btf_type *t)
-{
- return BTF_INFO_KIND(t->info) == BTF_KIND_VAR;
-}
-
static bool btf_type_is_datasec(const struct btf_type *t)
{
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
@@ -613,11 +593,6 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
return (const struct btf_var *)(t + 1);
}
-static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t)
-{
- return (const struct btf_var_secinfo *)(t + 1);
-}
-
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3395cf140d22..1815e97d4c9c 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -612,7 +612,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
* bpf_prog
* __htab_map_lookup_elem
*/
-static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
@@ -651,7 +651,7 @@ static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key)
return __htab_lru_map_lookup_elem(map, key, false);
}
-static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
+static int htab_lru_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
@@ -2070,7 +2070,7 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
return READ_ONCE(*inner_map);
}
-static u32 htab_of_map_gen_lookup(struct bpf_map *map,
+static int htab_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_insn *insn = insn_buf;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e825441781ab..25520f5eeaf6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -623,6 +623,34 @@ const struct bpf_func_proto bpf_copy_from_user_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu)
+{
+ if (cpu >= nr_cpu_ids)
+ return (unsigned long)NULL;
+
+ return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu);
+}
+
+const struct bpf_func_proto bpf_per_cpu_ptr_proto = {
+ .func = bpf_per_cpu_ptr,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+ .arg2_type = ARG_ANYTHING,
+};
+
+BPF_CALL_1(bpf_this_cpu_ptr, const void *, percpu_ptr)
+{
+ return (unsigned long)this_cpu_ptr((const void __percpu *)percpu_ptr);
+}
+
+const struct bpf_func_proto bpf_this_cpu_ptr_proto = {
+ .func = bpf_this_cpu_ptr,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID,
+ .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID,
+};
+
const struct bpf_func_proto bpf_get_current_task_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
@@ -689,6 +717,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_snprintf_btf_proto;
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
+ case BPF_FUNC_bpf_per_cpu_ptr:
+ return &bpf_per_cpu_ptr_proto;
+ case BPF_FUNC_bpf_this_cpu_ptr:
+ return &bpf_this_cpu_ptr_proto;
default:
break;
}
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index b367430e611c..3d897de89061 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -17,6 +17,8 @@ int pcpu_freelist_init(struct pcpu_freelist *s)
raw_spin_lock_init(&head->lock);
head->first = NULL;
}
+ raw_spin_lock_init(&s->extralist.lock);
+ s->extralist.first = NULL;
return 0;
}
@@ -40,12 +42,50 @@ static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
raw_spin_unlock(&head->lock);
}
+static inline bool pcpu_freelist_try_push_extra(struct pcpu_freelist *s,
+ struct pcpu_freelist_node *node)
+{
+ if (!raw_spin_trylock(&s->extralist.lock))
+ return false;
+
+ pcpu_freelist_push_node(&s->extralist, node);
+ raw_spin_unlock(&s->extralist.lock);
+ return true;
+}
+
+static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
+ struct pcpu_freelist_node *node)
+{
+ int cpu, orig_cpu;
+
+ orig_cpu = cpu = raw_smp_processor_id();
+ while (1) {
+ struct pcpu_freelist_head *head;
+
+ head = per_cpu_ptr(s->freelist, cpu);
+ if (raw_spin_trylock(&head->lock)) {
+ pcpu_freelist_push_node(head, node);
+ raw_spin_unlock(&head->lock);
+ return;
+ }
+ cpu = cpumask_next(cpu, cpu_possible_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = 0;
+
+ /* cannot lock any per cpu lock, try extralist */
+ if (cpu == orig_cpu &&
+ pcpu_freelist_try_push_extra(s, node))
+ return;
+ }
+}
+
void __pcpu_freelist_push(struct pcpu_freelist *s,
struct pcpu_freelist_node *node)
{
- struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
-
- ___pcpu_freelist_push(head, node);
+ if (in_nmi())
+ ___pcpu_freelist_push_nmi(s, node);
+ else
+ ___pcpu_freelist_push(this_cpu_ptr(s->freelist), node);
}
void pcpu_freelist_push(struct pcpu_freelist *s,
@@ -81,7 +121,7 @@ again:
}
}
-struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
+static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
{
struct pcpu_freelist_head *head;
struct pcpu_freelist_node *node;
@@ -102,8 +142,59 @@ struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
if (cpu >= nr_cpu_ids)
cpu = 0;
if (cpu == orig_cpu)
- return NULL;
+ break;
+ }
+
+ /* per cpu lists are all empty, try extralist */
+ raw_spin_lock(&s->extralist.lock);
+ node = s->extralist.first;
+ if (node)
+ s->extralist.first = node->next;
+ raw_spin_unlock(&s->extralist.lock);
+ return node;
+}
+
+static struct pcpu_freelist_node *
+___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
+{
+ struct pcpu_freelist_head *head;
+ struct pcpu_freelist_node *node;
+ int orig_cpu, cpu;
+
+ orig_cpu = cpu = raw_smp_processor_id();
+ while (1) {
+ head = per_cpu_ptr(s->freelist, cpu);
+ if (raw_spin_trylock(&head->lock)) {
+ node = head->first;
+ if (node) {
+ head->first = node->next;
+ raw_spin_unlock(&head->lock);
+ return node;
+ }
+ raw_spin_unlock(&head->lock);
+ }
+ cpu = cpumask_next(cpu, cpu_possible_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = 0;
+ if (cpu == orig_cpu)
+ break;
}
+
+ /* cannot pop from per cpu lists, try extralist */
+ if (!raw_spin_trylock(&s->extralist.lock))
+ return NULL;
+ node = s->extralist.first;
+ if (node)
+ s->extralist.first = node->next;
+ raw_spin_unlock(&s->extralist.lock);
+ return node;
+}
+
+struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
+{
+ if (in_nmi())
+ return ___pcpu_freelist_pop_nmi(s);
+ return ___pcpu_freelist_pop(s);
}
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index fbf8a8a28979..3c76553cfe57 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -13,6 +13,7 @@ struct pcpu_freelist_head {
struct pcpu_freelist {
struct pcpu_freelist_head __percpu *freelist;
+ struct pcpu_freelist_head extralist;
};
struct pcpu_freelist_node {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f1528c2a6927..1110ecd7d1f3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4323,8 +4323,10 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
used_maps_old = prog->aux->used_maps;
for (i = 0; i < prog->aux->used_map_cnt; i++)
- if (used_maps_old[i] == map)
+ if (used_maps_old[i] == map) {
+ bpf_map_put(map);
goto out_unlock;
+ }
used_maps_new = kmalloc_array(prog->aux->used_map_cnt + 1,
sizeof(used_maps_new[0]),
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3923c570070b..c43a5e8f0818 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -238,6 +238,8 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
+ u32 btf_id;
+ u32 ret_btf_id;
};
struct btf *btf_vmlinux;
@@ -517,6 +519,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_XDP_SOCK] = "xdp_sock",
[PTR_TO_BTF_ID] = "ptr_",
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
+ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
[PTR_TO_MEM] = "mem",
[PTR_TO_MEM_OR_NULL] = "mem_or_null",
[PTR_TO_RDONLY_BUF] = "rdonly_buf",
@@ -583,7 +586,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
/* reg->off should be 0 for SCALAR_VALUE */
verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
+ if (t == PTR_TO_BTF_ID ||
+ t == PTR_TO_BTF_ID_OR_NULL ||
+ t == PTR_TO_PERCPU_BTF_ID)
verbose(env, "%s", kernel_type_name(reg->btf_id));
verbose(env, "(id=%d", reg->id);
if (reg_type_may_be_refcounted_or_null(t))
@@ -2204,6 +2209,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_RDONLY_BUF_OR_NULL:
case PTR_TO_RDWR_BUF:
case PTR_TO_RDWR_BUF_OR_NULL:
+ case PTR_TO_PERCPU_BTF_ID:
return true;
default:
return false;
@@ -2221,6 +2227,20 @@ static bool register_is_const(struct bpf_reg_state *reg)
return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
}
+static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
+{
+ return tnum_is_unknown(reg->var_off) &&
+ reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
+ reg->umin_value == 0 && reg->umax_value == U64_MAX &&
+ reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
+ reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
+}
+
+static bool register_is_bounded(struct bpf_reg_state *reg)
+{
+ return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
+}
+
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
@@ -2272,7 +2292,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
if (value_regno >= 0)
reg = &cur->regs[value_regno];
- if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
+ if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
!register_is_null(reg) && env->bpf_capable) {
if (dst_reg != BPF_REG_FP) {
/* The backtracking logic can only recognize explicit
@@ -2667,7 +2687,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
case BPF_PROG_TYPE_CGROUP_SKB:
if (t == BPF_WRITE)
return false;
- /* fallthrough */
+ fallthrough;
/* Program types with direct read + write access go here! */
case BPF_PROG_TYPE_SCHED_CLS:
@@ -3978,6 +3998,7 @@ static const struct bpf_reg_types sock_types = {
},
};
+#ifdef CONFIG_NET
static const struct bpf_reg_types btf_id_sock_common_types = {
.types = {
PTR_TO_SOCK_COMMON,
@@ -3988,6 +4009,7 @@ static const struct bpf_reg_types btf_id_sock_common_types = {
},
.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
};
+#endif
static const struct bpf_reg_types mem_types = {
.types = {
@@ -4017,6 +4039,7 @@ static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4030,7 +4053,9 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_CTX] = &context_types,
[ARG_PTR_TO_CTX_OR_NULL] = &context_types,
[ARG_PTR_TO_SOCK_COMMON] = &sock_types,
+#ifdef CONFIG_NET
[ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
+#endif
[ARG_PTR_TO_SOCKET] = &fullsock_types,
[ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
[ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
@@ -4042,6 +4067,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
+ [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4205,6 +4231,12 @@ skip_type_check:
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
+ } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+ if (!reg->btf_id) {
+ verbose(env, "Helper has invalid btf_id in R%d\n", regno);
+ return -EACCES;
+ }
+ meta->ret_btf_id = reg->btf_id;
} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
@@ -5114,6 +5146,35 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
regs[BPF_REG_0].id = ++env->id_gen;
regs[BPF_REG_0].mem_size = meta.mem_size;
+ } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
+ const struct btf_type *t;
+
+ mark_reg_known_zero(env, regs, BPF_REG_0);
+ t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
+ if (!btf_type_is_struct(t)) {
+ u32 tsize;
+ const struct btf_type *ret;
+ const char *tname;
+
+ /* resolve the type size of ksym. */
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ if (IS_ERR(ret)) {
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ verbose(env, "unable to resolve the size of type '%s': %ld\n",
+ tname, PTR_ERR(ret));
+ return -EINVAL;
+ }
+ regs[BPF_REG_0].type =
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+ PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
+ regs[BPF_REG_0].mem_size = tsize;
+ } else {
+ regs[BPF_REG_0].type =
+ fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
+ PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
+ regs[BPF_REG_0].btf_id = meta.ret_btf_id;
+ }
} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
int ret_btf_id;
@@ -5432,7 +5493,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
/* smin_val represents the known value */
if (known && smin_val == 0 && opcode == BPF_ADD)
break;
- /* fall-through */
+ fallthrough;
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
@@ -6389,6 +6450,11 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
src_reg = NULL;
if (dst_reg->type != SCALAR_VALUE)
ptr_reg = dst_reg;
+ else
+ /* Make sure ID is cleared otherwise dst_reg min/max could be
+ * incorrectly propagated into other registers by find_equal_scalars()
+ */
+ dst_reg->id = 0;
if (BPF_SRC(insn->code) == BPF_X) {
src_reg = &regs[insn->src_reg];
if (src_reg->type != SCALAR_VALUE) {
@@ -6522,6 +6588,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* case: R1 = R2
* copy register state to dest reg
*/
+ if (src_reg->type == SCALAR_VALUE && !src_reg->id)
+ /* Assign src and dst registers the same ID
+ * that will be used by find_equal_scalars()
+ * to propagate min/max range.
+ */
+ src_reg->id = ++env->id_gen;
*dst_reg = *src_reg;
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = DEF_NOT_SUBREG;
@@ -6534,6 +6606,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EACCES;
} else if (src_reg->type == SCALAR_VALUE) {
*dst_reg = *src_reg;
+ /* Make sure ID is cleared otherwise
+ * dst_reg min/max could be incorrectly
+ * propagated into src_reg by find_equal_scalars()
+ */
+ dst_reg->id = 0;
dst_reg->live |= REG_LIVE_WRITTEN;
dst_reg->subreg_def = env->insn_idx + 1;
} else {
@@ -7322,6 +7399,30 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
return true;
}
+static void find_equal_scalars(struct bpf_verifier_state *vstate,
+ struct bpf_reg_state *known_reg)
+{
+ struct bpf_func_state *state;
+ struct bpf_reg_state *reg;
+ int i, j;
+
+ for (i = 0; i <= vstate->curframe; i++) {
+ state = vstate->frame[i];
+ for (j = 0; j < MAX_BPF_REG; j++) {
+ reg = &state->regs[j];
+ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+ *reg = *known_reg;
+ }
+
+ bpf_for_each_spilled_reg(j, state, reg) {
+ if (!reg)
+ continue;
+ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
+ *reg = *known_reg;
+ }
+ }
+}
+
static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
@@ -7450,6 +7551,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
reg_combine_min_max(&other_branch_regs[insn->src_reg],
&other_branch_regs[insn->dst_reg],
src_reg, dst_reg, opcode);
+ if (src_reg->id) {
+ find_equal_scalars(this_branch, src_reg);
+ find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
+ }
+
}
} else if (dst_reg->type == SCALAR_VALUE) {
reg_set_min_max(&other_branch_regs[insn->dst_reg],
@@ -7457,6 +7563,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
opcode, is_jmp32);
}
+ if (dst_reg->type == SCALAR_VALUE && dst_reg->id) {
+ find_equal_scalars(this_branch, dst_reg);
+ find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
+ }
+
/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
* NOTE: these optimizations below are related with pointer comparison
* which will never be JMP32.
@@ -7488,6 +7599,7 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct bpf_insn_aux_data *aux = cur_aux(env);
struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *dst_reg;
struct bpf_map *map;
int err;
@@ -7504,25 +7616,45 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
+ dst_reg = &regs[insn->dst_reg];
if (insn->src_reg == 0) {
u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
- regs[insn->dst_reg].type = SCALAR_VALUE;
+ dst_reg->type = SCALAR_VALUE;
__mark_reg_known(&regs[insn->dst_reg], imm);
return 0;
}
+ if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
+ mark_reg_known_zero(env, regs, insn->dst_reg);
+
+ dst_reg->type = aux->btf_var.reg_type;
+ switch (dst_reg->type) {
+ case PTR_TO_MEM:
+ dst_reg->mem_size = aux->btf_var.mem_size;
+ break;
+ case PTR_TO_BTF_ID:
+ case PTR_TO_PERCPU_BTF_ID:
+ dst_reg->btf_id = aux->btf_var.btf_id;
+ break;
+ default:
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EFAULT;
+ }
+ return 0;
+ }
+
map = env->used_maps[aux->map_index];
mark_reg_known_zero(env, regs, insn->dst_reg);
- regs[insn->dst_reg].map_ptr = map;
+ dst_reg->map_ptr = map;
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
- regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
- regs[insn->dst_reg].off = aux->map_off;
+ dst_reg->type = PTR_TO_MAP_VALUE;
+ dst_reg->off = aux->map_off;
if (map_value_has_spin_lock(map))
- regs[insn->dst_reg].id = ++env->id_gen;
+ dst_reg->id = ++env->id_gen;
} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
- regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+ dst_reg->type = CONST_PTR_TO_MAP;
} else {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
@@ -9424,6 +9556,92 @@ process_bpf_exit:
return 0;
}
+/* replace pseudo btf_id with kernel symbol address */
+static int check_pseudo_btf_id(struct bpf_verifier_env *env,
+ struct bpf_insn *insn,
+ struct bpf_insn_aux_data *aux)
+{
+ u32 datasec_id, type, id = insn->imm;
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *datasec;
+ const struct btf_type *t;
+ const char *sym_name;
+ bool percpu = false;
+ u64 addr;
+ int i;
+
+ if (!btf_vmlinux) {
+ verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
+ return -EINVAL;
+ }
+
+ if (insn[1].imm != 0) {
+ verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
+ return -EINVAL;
+ }
+
+ t = btf_type_by_id(btf_vmlinux, id);
+ if (!t) {
+ verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
+ return -ENOENT;
+ }
+
+ if (!btf_type_is_var(t)) {
+ verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
+ id);
+ return -EINVAL;
+ }
+
+ sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
+ addr = kallsyms_lookup_name(sym_name);
+ if (!addr) {
+ verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
+ sym_name);
+ return -ENOENT;
+ }
+
+ datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
+ BTF_KIND_DATASEC);
+ if (datasec_id > 0) {
+ datasec = btf_type_by_id(btf_vmlinux, datasec_id);
+ for_each_vsi(i, datasec, vsi) {
+ if (vsi->type == id) {
+ percpu = true;
+ break;
+ }
+ }
+ }
+
+ insn[0].imm = (u32)addr;
+ insn[1].imm = addr >> 32;
+
+ type = t->type;
+ t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
+ if (percpu) {
+ aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
+ aux->btf_var.btf_id = type;
+ } else if (!btf_type_is_struct(t)) {
+ const struct btf_type *ret;
+ const char *tname;
+ u32 tsize;
+
+ /* resolve the type size of ksym. */
+ ret = btf_resolve_size(btf_vmlinux, t, &tsize);
+ if (IS_ERR(ret)) {
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
+ tname, PTR_ERR(ret));
+ return -EINVAL;
+ }
+ aux->btf_var.reg_type = PTR_TO_MEM;
+ aux->btf_var.mem_size = tsize;
+ } else {
+ aux->btf_var.reg_type = PTR_TO_BTF_ID;
+ aux->btf_var.btf_id = type;
+ }
+ return 0;
+}
+
static int check_map_prealloc(struct bpf_map *map)
{
return (map->map_type != BPF_MAP_TYPE_HASH &&
@@ -9534,10 +9752,14 @@ static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
}
-/* look for pseudo eBPF instructions that access map FDs and
- * replace them with actual map pointers
+/* find and rewrite pseudo imm in ld_imm64 instructions:
+ *
+ * 1. if it accesses map FD, replace it with actual map pointer.
+ * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
+ *
+ * NOTE: btf_vmlinux is required for converting pseudo btf_id.
*/
-static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
+static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -9578,6 +9800,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
/* valid generic load 64-bit imm */
goto next_insn;
+ if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
+ aux = &env->insn_aux_data[i];
+ err = check_pseudo_btf_id(env, insn, aux);
+ if (err)
+ return err;
+ goto next_insn;
+ }
+
/* In final convert_pseudo_ld_imm64() step, this is
* converted into regular 64-bit imm load insn.
*/
@@ -10819,7 +11049,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
if (insn->imm == BPF_FUNC_map_lookup_elem &&
ops->map_gen_lookup) {
cnt = ops->map_gen_lookup(map_ptr, insn_buf);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ if (cnt == -EOPNOTSUPP)
+ goto patch_map_ops_generic;
+ if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
@@ -10849,7 +11081,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
(int (*)(struct bpf_map *map, void *value))NULL));
-
+patch_map_ops_generic:
switch (insn->imm) {
case BPF_FUNC_map_lookup_elem:
insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
@@ -11633,10 +11865,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
- ret = replace_map_fd_with_map_ptr(env);
- if (ret < 0)
- goto skip_full_check;
-
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
@@ -11662,6 +11890,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (ret)
goto skip_full_check;
+ ret = resolve_pseudo_ldimm64(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = check_cfg(env);
if (ret < 0)
goto skip_full_check;