summaryrefslogtreecommitdiff
path: root/kernel/bpf/arraymap.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/arraymap.c')
-rw-r--r--kernel/bpf/arraymap.c230
1 files changed, 131 insertions, 99 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..1eeb31c5b317 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -12,6 +12,7 @@
#include <uapi/linux/btf.h>
#include <linux/rcupdate_trace.h>
#include <linux/btf_ids.h>
+#include <crypto/sha2.h>
#include "map_in_map.h"
@@ -73,6 +74,9 @@ int array_map_alloc_check(union bpf_attr *attr)
/* avoid overflow on round_up(map->value_size) */
if (attr->value_size > INT_MAX)
return -E2BIG;
+ /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */
+ if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE)
+ return -E2BIG;
return 0;
}
@@ -82,7 +86,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
- bool bypass_spec_v1 = bpf_bypass_spec_v1();
+ bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
u64 array_size, mask64;
struct bpf_array *array;
@@ -171,6 +175,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
return array->value + (u64)array->elem_size * (index & array->index_mask);
}
+static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size,
+ void *hash_buf)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+ sha256(array->value, (u64)array->elem_size * array->map.max_entries,
+ hash_buf);
+ memcpy(array->map.sha, hash_buf, sizeof(array->map.sha));
+ return 0;
+}
+
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
u32 off)
{
@@ -246,6 +261,38 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
+/* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
+static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_insn *insn = insn_buf;
+
+ if (!bpf_jit_supports_percpu_insn())
+ return -EOPNOTSUPP;
+
+ if (map->map_flags & BPF_F_INNER_MAP)
+ return -EOPNOTSUPP;
+
+ BUILD_BUG_ON(offsetof(struct bpf_array, map) != 0);
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
+
+ *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
+ if (!map->bypass_spec_v1) {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
+ *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
+ } else {
+ *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+ }
+
+ *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
+ *insn++ = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
+ *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
+ *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0);
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *insn++ = BPF_MOV64_IMM(BPF_REG_0, 0);
+ return insn - insn_buf;
+}
+
static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
@@ -288,18 +335,17 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
}
/* Called from syscall */
-static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
- struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = (u32 *)next_key;
- if (index >= array->map.max_entries) {
+ if (index >= map->max_entries) {
*next = 0;
return 0;
}
- if (index == array->map.max_entries - 1)
+ if (index == map->max_entries - 1)
return -ENOENT;
*next = index + 1;
@@ -396,17 +442,17 @@ static void *array_map_vmalloc_addr(struct bpf_array *array)
return (void *)round_down((unsigned long)array, PAGE_SIZE);
}
-static void array_map_free_timers(struct bpf_map *map)
+static void array_map_free_internal_structs(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
- /* We don't reset or free fields other than timer on uref dropping to zero. */
- if (!btf_record_has_field(map->record, BPF_TIMER))
+ /* We only free internal structs on uref dropping to zero */
+ if (!bpf_map_has_internal_structs(map))
return;
for (i = 0; i < array->map.max_entries; i++)
- bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i));
+ bpf_map_free_internal_structs(map, array_map_elem_ptr(array, i));
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -457,7 +503,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
if (map->btf_key_type_id)
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
rcu_read_unlock();
}
@@ -478,7 +524,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
seq_printf(m, "\tcpu%d: ", cpu);
btf_type_seq_show(map->btf, map->btf_value_type_id,
per_cpu_ptr(pptr, cpu), m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
seq_puts(m, "}\n");
@@ -490,8 +536,6 @@ static int array_map_check_btf(const struct bpf_map *map,
const struct btf_type *key_type,
const struct btf_type *value_type)
{
- u32 int_data;
-
/* One exception for keyless BTF: .bss/.data/.rodata map */
if (btf_type_is_void(key_type)) {
if (map->map_type != BPF_MAP_TYPE_ARRAY ||
@@ -504,14 +548,11 @@ static int array_map_check_btf(const struct bpf_map *map,
return 0;
}
- if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
- return -EINVAL;
-
- int_data = *(u32 *)(key_type + 1);
- /* bpf array can only take a u32 key. This check makes sure
+ /*
+ * Bpf array can only take a u32 key. This check makes sure
* that the btf matches the attr used during map_create.
*/
- if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
+ if (!btf_type_is_i32(key_type))
return -EINVAL;
return 0;
@@ -563,7 +604,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
- return array->pptrs[index];
+ return (void *)(uintptr_t)array->pptrs[index];
return array_map_elem_ptr(array, index);
}
@@ -582,7 +623,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
- return array->pptrs[index];
+ return (void *)(uintptr_t)array->pptrs[index];
return array_map_elem_ptr(array, index);
}
@@ -595,7 +636,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
- void __percpu **pptr;
+ void __percpu *pptr;
u32 size;
meta.seq = seq;
@@ -611,7 +652,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
if (!info->percpu_value_buf) {
ctx.value = v;
} else {
- pptr = v;
+ pptr = (void __percpu *)(uintptr_t)v;
size = array->elem_size;
for_each_possible_cpu(cpu) {
copy_map_value_long(map, info->percpu_value_buf + off,
@@ -695,13 +736,13 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback
u64 ret = 0;
void *val;
+ cant_migrate();
+
if (flags != 0)
return -EINVAL;
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
array = container_of(map, struct bpf_array, map);
- if (is_percpu)
- migrate_disable();
for (i = 0; i < map->max_entries; i++) {
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
@@ -716,8 +757,6 @@ static long bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback
break;
}
- if (is_percpu)
- migrate_enable();
return num_elems;
}
@@ -749,8 +788,8 @@ const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
- .map_get_next_key = array_map_get_next_key,
- .map_release_uref = array_map_free_timers,
+ .map_get_next_key = bpf_array_get_next_key,
+ .map_release_uref = array_map_free_internal_structs,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
@@ -767,6 +806,7 @@ const struct bpf_map_ops array_map_ops = {
.map_mem_usage = array_map_mem_usage,
.map_btf_id = &array_map_btf_ids[0],
.iter_seq_info = &iter_seq_info,
+ .map_get_hash = &array_map_get_hash,
};
const struct bpf_map_ops percpu_array_map_ops = {
@@ -774,8 +814,9 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = array_map_free,
- .map_get_next_key = array_map_get_next_key,
+ .map_get_next_key = bpf_array_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
+ .map_gen_lookup = percpu_array_map_gen_lookup,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
@@ -867,11 +908,11 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
}
if (old_ptr)
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
-static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
@@ -890,32 +931,60 @@ static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
if (old_ptr) {
- map->ops->map_fd_put_ptr(old_ptr);
+ map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
return 0;
} else {
return -ENOENT;
}
}
+static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+ return __fd_array_map_delete_elem(map, key, true);
+}
+
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
struct bpf_prog *prog = bpf_prog_get(fd);
+ bool is_extended;
if (IS_ERR(prog))
return prog;
- if (!bpf_prog_map_compatible(map, prog)) {
+ if (prog->type == BPF_PROG_TYPE_EXT ||
+ !bpf_prog_map_compatible(map, prog)) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
+ mutex_lock(&prog->aux->ext_mutex);
+ is_extended = prog->aux->is_extended;
+ if (!is_extended)
+ prog->aux->prog_array_member_cnt++;
+ mutex_unlock(&prog->aux->ext_mutex);
+ if (is_extended) {
+ /* Extended prog can not be tail callee. It's to prevent a
+ * potential infinite loop like:
+ * tail callee prog entry -> tail callee prog subprog ->
+ * freplace prog entry --tailcall-> tail callee prog entry.
+ */
+ bpf_prog_put(prog);
+ return ERR_PTR(-EBUSY);
+ }
+
return prog;
}
-static void prog_fd_array_put_ptr(void *ptr)
+static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
- bpf_prog_put(ptr);
+ struct bpf_prog *prog = ptr;
+
+ mutex_lock(&prog->aux->ext_mutex);
+ prog->aux->prog_array_member_cnt--;
+ mutex_unlock(&prog->aux->ext_mutex);
+ /* bpf_prog is freed after one RCU or tasks trace grace period */
+ bpf_prog_put(prog);
}
static u32 prog_fd_array_sys_lookup_elem(void *ptr)
@@ -924,13 +993,13 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
-static void bpf_fd_array_map_clear(struct bpf_map *map)
+static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, need_defer);
}
static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -949,7 +1018,7 @@ static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
prog_id = prog_fd_array_sys_lookup_elem(ptr);
btf_type_seq_show(map->btf, map->btf_value_type_id,
&prog_id, m);
- seq_puts(m, "\n");
+ seq_putc(m, '\n');
}
}
@@ -1012,11 +1081,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
mutex_unlock(&aux->poke_mutex);
}
+void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old)
+{
+ WARN_ON_ONCE(1);
+}
+
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old,
struct bpf_prog *new)
{
- u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem;
struct bpf_array_aux *aux;
@@ -1025,7 +1099,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
list_for_each_entry(elem, &aux->poke_progs, list) {
struct bpf_jit_poke_descriptor *poke;
- int i, ret;
+ int i;
for (i = 0; i < elem->aux->size_poke_tab; i++) {
poke = &elem->aux->poke_tab[i];
@@ -1044,21 +1118,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* activated, so tail call updates can arrive from here
* while JIT is still finishing its final fixup for
* non-activated poke entries.
- * 3) On program teardown, the program's kallsym entry gets
- * removed out of RCU callback, but we can only untrack
- * from sleepable context, therefore bpf_arch_text_poke()
- * might not see that this is in BPF text section and
- * bails out with -EINVAL. As these are unreachable since
- * RCU grace period already passed, we simply skip them.
- * 4) Also programs reaching refcount of zero while patching
+ * 3) Also programs reaching refcount of zero while patching
* is in progress is okay since we're protected under
* poke_mutex and untrack the programs before the JIT
- * buffer is freed. When we're still in the middle of
- * patching and suddenly kallsyms entry of the program
- * gets evicted, we just skip the rest which is fine due
- * to point 3).
- * 5) Any other error happening below from bpf_arch_text_poke()
- * is a unexpected bug.
+ * buffer is freed.
*/
if (!READ_ONCE(poke->tailcall_target_stable))
continue;
@@ -1068,39 +1131,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key)
continue;
- old_bypass_addr = old ? NULL : poke->bypass_addr;
- old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
- new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
-
- if (new) {
- ret = bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, new_addr);
- BUG_ON(ret < 0 && ret != -EINVAL);
- if (!old) {
- ret = bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- poke->bypass_addr,
- NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
- }
- } else {
- ret = bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- old_bypass_addr,
- poke->bypass_addr);
- BUG_ON(ret < 0 && ret != -EINVAL);
- /* let other CPUs finish the execution of program
- * so that it will not possible to expose them
- * to invalid nop, stack unwind, nop state
- */
- if (!ret)
- synchronize_rcu();
- ret = bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
- }
+ bpf_arch_poke_desc_update(poke, new, old);
}
}
}
@@ -1109,7 +1140,7 @@ static void prog_array_map_clear_deferred(struct work_struct *work)
{
struct bpf_map *map = container_of(work, struct bpf_array_aux,
work)->map;
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, true);
bpf_map_put(map);
}
@@ -1172,7 +1203,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_poke_track = prog_array_map_poke_track,
.map_poke_untrack = prog_array_map_poke_untrack,
.map_poke_run = prog_array_map_poke_run,
- .map_get_next_key = array_map_get_next_key,
+ .map_get_next_key = bpf_array_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
@@ -1189,7 +1220,7 @@ static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
{
struct bpf_event_entry *ee;
- ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
+ ee = kzalloc(sizeof(*ee), GFP_KERNEL);
if (ee) {
ee->event = perf_file->private_data;
ee->perf_file = perf_file;
@@ -1239,8 +1270,9 @@ err_out:
return ee;
}
-static void perf_event_fd_array_put_ptr(void *ptr)
+static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
+ /* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
@@ -1258,7 +1290,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
- fd_array_map_delete_elem(map, &i);
+ __fd_array_map_delete_elem(map, &i, true);
}
rcu_read_unlock();
}
@@ -1266,7 +1298,7 @@ static void perf_event_fd_array_release(struct bpf_map *map,
static void perf_event_fd_array_map_free(struct bpf_map *map)
{
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1275,7 +1307,7 @@ const struct bpf_map_ops perf_event_array_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = perf_event_fd_array_map_free,
- .map_get_next_key = array_map_get_next_key,
+ .map_get_next_key = bpf_array_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
@@ -1294,7 +1326,7 @@ static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
return cgroup_get_from_fd(fd);
}
-static void cgroup_fd_array_put_ptr(void *ptr)
+static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
@@ -1302,7 +1334,7 @@ static void cgroup_fd_array_put_ptr(void *ptr)
static void cgroup_fd_array_free(struct bpf_map *map)
{
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1311,7 +1343,7 @@ const struct bpf_map_ops cgroup_array_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = cgroup_fd_array_free,
- .map_get_next_key = array_map_get_next_key,
+ .map_get_next_key = bpf_array_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
@@ -1347,7 +1379,7 @@ static void array_of_map_free(struct bpf_map *map)
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
- bpf_fd_array_map_clear(map);
+ bpf_fd_array_map_clear(map, false);
fd_array_map_free(map);
}
@@ -1396,7 +1428,7 @@ const struct bpf_map_ops array_of_maps_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_of_map_alloc,
.map_free = array_of_map_free,
- .map_get_next_key = array_map_get_next_key,
+ .map_get_next_key = bpf_array_get_next_key,
.map_lookup_elem = array_of_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,