From 41c48f3a98231738c5ce79f6f2aa6e40ba924d18 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:43 -0700 Subject: bpf: Support access to bpf map fields There are multiple use-cases when it's convenient to have access to bpf map fields, both `struct bpf_map` and map type specific struct-s such as `struct bpf_array`, `struct bpf_htab`, etc. For example while working with sock arrays it can be necessary to calculate the key based on map->max_entries (some_hash % max_entries). Currently this is solved by communicating max_entries via "out-of-band" channel, e.g. via additional map with known key to get info about target map. That works, but is not very convenient and error-prone while working with many maps. In other cases necessary data is dynamic (i.e. unknown at loading time) and it's impossible to get it at all. For example while working with a hash table it can be convenient to know how much capacity is already used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case). At the same time kernel knows this info and can provide it to bpf program. Fill this gap by adding support to access bpf map fields from bpf program for both `struct bpf_map` and map type specific fields. Support is implemented via btf_struct_access() so that a user can define their own `struct bpf_map` or map type specific struct in their program with only necessary fields and preserve_access_index attribute, cast a map to this struct and use a field. For example: struct bpf_map { __u32 max_entries; } __attribute__((preserve_access_index)); struct bpf_array { struct bpf_map map; __u32 elem_size; } __attribute__((preserve_access_index)); struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); __type(key, __u32); __type(value, __u32); } m_array SEC(".maps"); SEC("cgroup_skb/egress") int cg_skb(void *ctx) { struct bpf_array *array = (struct bpf_array *)&m_array; struct bpf_map *map = (struct bpf_map *)&m_array; /* .. use map->max_entries or array->map.max_entries .. */ } Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of corresponding struct. Only reading from map fields is supported. For btf_struct_access() to work there should be a way to know btf id of a struct that corresponds to a map type. To get btf id there should be a way to get a stringified name of map-specific struct, such as "bpf_array", "bpf_htab", etc for a map type. Two new fields are added to `struct bpf_map_ops` to handle it: * .map_btf_name keeps a btf name of a struct returned by map_alloc(); * .map_btf_id is used to cache btf id of that struct. To make btf ids calculation cheaper they're calculated once while preparing btf_vmlinux and cached same way as it's done for btf_id field of `struct bpf_func_proto` While calculating btf ids, struct names are NOT checked for collision. Collisions will be checked as a part of the work to prepare btf ids used in verifier in compile time that should land soon. The only known collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs net/core/sock_map.c) was fixed earlier. Both new fields .map_btf_name and .map_btf_id must be set for a map type for the feature to work. If neither is set for a map type, verifier will return ENOTSUPP on a try to access map_ptr of corresponding type. If just one of them set, it's verifier misconfiguration. Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be supported separately. The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by perfmon_capable() so that unpriv programs won't have access to bpf map fields. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com --- kernel/bpf/arraymap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 11584618e861..e7caa48812fb 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -494,6 +494,7 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + pgoff); } +static int array_map_btf_id; const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -510,6 +511,8 @@ const struct bpf_map_ops array_map_ops = { .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, + .map_btf_name = "bpf_array", + .map_btf_id = &array_map_btf_id, }; const struct bpf_map_ops percpu_array_map_ops = { -- cgit From 2872e9ac33a4440173418147351ed4f93177e763 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 19 Jun 2020 14:11:44 -0700 Subject: bpf: Set map_btf_{name, id} for all map types Set map_btf_name and map_btf_id for all map types so that map fields can be accessed by bpf programs. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/a825f808f22af52b018dbe82f1c7d29dab5fc978.1592600985.git.rdna@fb.com --- kernel/bpf/arraymap.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e7caa48812fb..ec5cd11032aa 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -515,6 +515,7 @@ const struct bpf_map_ops array_map_ops = { .map_btf_id = &array_map_btf_id, }; +static int percpu_array_map_btf_id; const struct bpf_map_ops percpu_array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -525,6 +526,8 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &percpu_array_map_btf_id, }; static int fd_array_map_alloc_check(union bpf_attr *attr) @@ -871,6 +874,7 @@ static void prog_array_map_free(struct bpf_map *map) fd_array_map_free(map); } +static int prog_array_map_btf_id; const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = prog_array_map_alloc, @@ -886,6 +890,8 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = prog_array_map_clear, .map_seq_show_elem = prog_array_map_seq_show_elem, + .map_btf_name = "bpf_array", + .map_btf_id = &prog_array_map_btf_id, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, @@ -964,6 +970,7 @@ static void perf_event_fd_array_release(struct bpf_map *map, rcu_read_unlock(); } +static int perf_event_array_map_btf_id; const struct bpf_map_ops perf_event_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -975,6 +982,8 @@ const struct bpf_map_ops perf_event_array_map_ops = { .map_fd_put_ptr = perf_event_fd_array_put_ptr, .map_release = perf_event_fd_array_release, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &perf_event_array_map_btf_id, }; #ifdef CONFIG_CGROUPS @@ -997,6 +1006,7 @@ static void cgroup_fd_array_free(struct bpf_map *map) fd_array_map_free(map); } +static int cgroup_array_map_btf_id; const struct bpf_map_ops cgroup_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -1007,6 +1017,8 @@ const struct bpf_map_ops cgroup_array_map_ops = { .map_fd_get_ptr = cgroup_fd_array_get_ptr, .map_fd_put_ptr = cgroup_fd_array_put_ptr, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &cgroup_array_map_btf_id, }; #endif @@ -1080,6 +1092,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } +static int array_of_maps_map_btf_id; const struct bpf_map_ops array_of_maps_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, @@ -1092,4 +1105,6 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + .map_btf_name = "bpf_array", + .map_btf_id = &array_of_maps_map_btf_id, }; -- cgit From bba1dc0b55ac462d24ed1228ad49800c238cd6d7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 29 Jun 2020 21:33:39 -0700 Subject: bpf: Remove redundant synchronize_rcu. bpf_free_used_maps() or close(map_fd) will trigger map_free callback. bpf_free_used_maps() is called after bpf prog is no longer executing: bpf_prog_put->call_rcu->bpf_prog_free->bpf_free_used_maps. Hence there is no need to call synchronize_rcu() to protect map elements. Note that hash_of_maps and array_of_maps update/delete inner maps via sys_bpf() that calls maybe_wait_bpf_programs() and synchronize_rcu(). Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Paul E. McKenney Link: https://lore.kernel.org/bpf/20200630043343.53195-2-alexei.starovoitov@gmail.com --- kernel/bpf/arraymap.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel/bpf/arraymap.c') diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index ec5cd11032aa..c66e8273fccd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -386,13 +386,6 @@ static void array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); - /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding programs to complete - * and free the array - */ - synchronize_rcu(); - if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) bpf_array_free_percpu(array); @@ -546,8 +539,6 @@ static void fd_array_map_free(struct bpf_map *map) struct bpf_array *array = container_of(map, struct bpf_array, map); int i; - synchronize_rcu(); - /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) BUG_ON(array->ptrs[i] != NULL); -- cgit