From aa3496accc412b3d975e4ee5d06076d73394d8b5 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 4 Nov 2022 00:39:55 +0530 Subject: bpf: Refactor kptr_off_tab into btf_record To prepare the BPF verifier to handle special fields in both map values and program allocated types coming from program BTF, we need to refactor the kptr_off_tab handling code into something more generic and reusable across both cases to avoid code duplication. Later patches also require passing this data to helpers at runtime, so that they can work on user defined types, initialize them, destruct them, etc. The main observation is that both map values and such allocated types point to a type in program BTF, hence they can be handled similarly. We can prepare a field metadata table for both cases and store them in struct bpf_map or struct btf depending on the use case. Hence, refactor the code into generic btf_record and btf_field member structs. The btf_record represents the fields of a specific btf_type in user BTF. The cnt indicates the number of special fields we successfully recognized, and field_mask is a bitmask of fields that were found, to enable quick determination of availability of a certain field. Subsequently, refactor the rest of the code to work with these generic types, remove assumptions about kptr and kptr_off_tab, rename variables to more meaningful names, etc. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/map_in_map.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/bpf/map_in_map.c') diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 135205d0d560..d6c662183f88 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -52,7 +52,15 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->spin_lock_off = inner_map->spin_lock_off; inner_map_meta->timer_off = inner_map->timer_off; - inner_map_meta->kptr_off_tab = bpf_map_copy_kptr_off_tab(inner_map); + inner_map_meta->record = btf_record_dup(inner_map->record); + if (IS_ERR(inner_map_meta->record)) { + /* btf_record_dup returns NULL or valid pointer in case of + * invalid/empty/valid, but ERR_PTR in case of errors. During + * equality NULL or IS_ERR is equivalent. + */ + fdput(f); + return ERR_CAST(inner_map_meta->record); + } if (inner_map->btf) { btf_get(inner_map->btf); inner_map_meta->btf = inner_map->btf; @@ -72,7 +80,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) void bpf_map_meta_free(struct bpf_map *map_meta) { - bpf_map_free_kptr_off_tab(map_meta); + bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); } @@ -86,7 +94,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, meta0->value_size == meta1->value_size && meta0->timer_off == meta1->timer_off && meta0->map_flags == meta1->map_flags && - bpf_map_equal_kptr_off_tab(meta0, meta1); + btf_record_equal(meta0->record, meta1->record); } void *bpf_map_fd_get_ptr(struct bpf_map *map, -- cgit From db559117828d2448fe81ada051c60bcf39f822e9 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 4 Nov 2022 00:39:56 +0530 Subject: bpf: Consolidate spin_lock, timer management into btf_record Now that kptr_off_tab has been refactored into btf_record, and can hold more than one specific field type, accomodate bpf_spin_lock and bpf_timer as well. While they don't require any more metadata than offset, having all special fields in one place allows us to share the same code for allocated user defined types and handle both map values and these allocated objects in a similar fashion. As an optimization, we still keep spin_lock_off and timer_off offsets in the btf_record structure, just to avoid having to find the btf_field struct each time their offset is needed. This is mostly needed to manipulate such objects in a map value at runtime. It's ok to hardcode just one offset as more than one field is disallowed. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221103191013.1236066-8-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/map_in_map.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'kernel/bpf/map_in_map.c') diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index d6c662183f88..8ca0cca39d49 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -29,7 +29,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-ENOTSUPP); } - if (map_value_has_spin_lock(inner_map)) { + if (btf_record_has_field(inner_map->record, BPF_SPIN_LOCK)) { fdput(f); return ERR_PTR(-ENOTSUPP); } @@ -50,8 +50,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; - inner_map_meta->spin_lock_off = inner_map->spin_lock_off; - inner_map_meta->timer_off = inner_map->timer_off; inner_map_meta->record = btf_record_dup(inner_map->record); if (IS_ERR(inner_map_meta->record)) { /* btf_record_dup returns NULL or valid pointer in case of @@ -92,7 +90,6 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0, return meta0->map_type == meta1->map_type && meta0->key_size == meta1->key_size && meta0->value_size == meta1->value_size && - meta0->timer_off == meta1->timer_off && meta0->map_flags == meta1->map_flags && btf_record_equal(meta0->record, meta1->record); } -- cgit From d48995723c9a1c4896206be382c72d722accbfbc Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 18 Nov 2022 07:25:53 +0530 Subject: bpf: Free inner_map_meta when btf_record_dup fails Whenever btf_record_dup fails, we must free inner_map_meta that was allocated before. This fixes a memory leak (in case of errors) during inner map creation. Fixes: aa3496accc41 ("bpf: Refactor kptr_off_tab into btf_record") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221118015614.2013203-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/map_in_map.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/map_in_map.c') diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 8ca0cca39d49..a423130a8720 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -52,12 +52,14 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->record = btf_record_dup(inner_map->record); if (IS_ERR(inner_map_meta->record)) { + struct bpf_map *err_ptr = ERR_CAST(inner_map_meta->record); /* btf_record_dup returns NULL or valid pointer in case of * invalid/empty/valid, but ERR_PTR in case of errors. During * equality NULL or IS_ERR is equivalent. */ + kfree(inner_map_meta); fdput(f); - return ERR_CAST(inner_map_meta->record); + return err_ptr; } if (inner_map->btf) { btf_get(inner_map->btf); -- cgit From f73e601aafb2ad9f2b2012b969f86f4a41141a7d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 18 Nov 2022 07:25:54 +0530 Subject: bpf: Populate field_offs for inner_map_meta Far too much code simply assumes that both btf_record and btf_field_offs are set to valid pointers together, or both are unset. They go together hand in hand as btf_record describes the special fields and btf_field_offs is compact representation for runtime copying/zeroing. It is very difficult to make this clear in the code when the only exception to this universal invariant is inner_map_meta which is used as reg->map_ptr in the verifier. This is simply a bug waiting to happen, as in verifier context we cannot easily distinguish if PTR_TO_MAP_VALUE is coming from an inner map, and if we ever end up using field_offs for any reason in the future, we will silently ignore the special fields for inner map case (as NULL is not an error but unset field_offs). Hence, simply copy field_offs from inner map together with btf_record. While at it, refactor code to unwind properly on errors with gotos. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221118015614.2013203-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/map_in_map.c | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) (limited to 'kernel/bpf/map_in_map.c') diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index a423130a8720..fae6a6c33e2d 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) struct bpf_map *inner_map, *inner_map_meta; u32 inner_map_meta_size; struct fd f; + int ret; f = fdget(inner_map_ufd); inner_map = __bpf_map_get(f); @@ -20,18 +21,18 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) /* Does not support >1 level map-in-map */ if (inner_map->inner_map_meta) { - fdput(f); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto put; } if (!inner_map->ops->map_meta_equal) { - fdput(f); - return ERR_PTR(-ENOTSUPP); + ret = -ENOTSUPP; + goto put; } if (btf_record_has_field(inner_map->record, BPF_SPIN_LOCK)) { - fdput(f); - return ERR_PTR(-ENOTSUPP); + ret = -ENOTSUPP; + goto put; } inner_map_meta_size = sizeof(*inner_map_meta); @@ -41,8 +42,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) { - fdput(f); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto put; } inner_map_meta->map_type = inner_map->map_type; @@ -50,16 +51,27 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; + inner_map_meta->record = btf_record_dup(inner_map->record); if (IS_ERR(inner_map_meta->record)) { - struct bpf_map *err_ptr = ERR_CAST(inner_map_meta->record); /* btf_record_dup returns NULL or valid pointer in case of * invalid/empty/valid, but ERR_PTR in case of errors. During * equality NULL or IS_ERR is equivalent. */ - kfree(inner_map_meta); - fdput(f); - return err_ptr; + ret = PTR_ERR(inner_map_meta->record); + goto free; + } + if (inner_map_meta->record) { + struct btf_field_offs *field_offs; + /* If btf_record is !IS_ERR_OR_NULL, then field_offs is always + * valid. + */ + field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN); + if (!field_offs) { + ret = -ENOMEM; + goto free_rec; + } + inner_map_meta->field_offs = field_offs; } if (inner_map->btf) { btf_get(inner_map->btf); @@ -76,10 +88,18 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) fdput(f); return inner_map_meta; +free_rec: + btf_record_free(inner_map_meta->record); +free: + kfree(inner_map_meta); +put: + fdput(f); + return ERR_PTR(ret); } void bpf_map_meta_free(struct bpf_map *map_meta) { + kfree(map_meta->field_offs); bpf_map_free_record(map_meta); btf_put(map_meta->btf); kfree(map_meta); -- cgit From b7ff97925b55a0603a7c215305df4b43ab632948 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 18 Nov 2022 07:26:00 +0530 Subject: bpf: Allow locking bpf_spin_lock in inner map values There is no need to restrict users from locking bpf_spin_lock in map values of inner maps. Each inner map lookup gets a unique reg->id assigned to the returned PTR_TO_MAP_VALUE which will be preserved after the NULL check. Distinct lookups into different inner map get unique IDs, and distinct lookups into same inner map also get unique IDs. Hence, lift the restriction by removing the check return -ENOTSUPP in map_in_map.c. Later commits will add comprehensive test cases to ensure that invalid cases are rejected. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221118015614.2013203-11-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/map_in_map.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/bpf/map_in_map.c') diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index fae6a6c33e2d..7cce2047c6ef 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -30,11 +30,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) goto put; } - if (btf_record_has_field(inner_map->record, BPF_SPIN_LOCK)) { - ret = -ENOTSUPP; - goto put; - } - inner_map_meta_size = sizeof(*inner_map_meta); /* In some cases verifier needs to access beyond just base map. */ if (inner_map->ops == &array_map_ops) -- cgit From c22dfdd21592c5d56b49d5fba8de300ad7bf293c Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Fri, 18 Nov 2022 07:26:08 +0530 Subject: bpf: Add comments for map BTF matching requirement for bpf_list_head The old behavior of bpf_map_meta_equal was that it compared timer_off to be equal (but not spin_lock_off, because that was not allowed), and did memcmp of kptr_off_tab. Now, we memcmp the btf_record of two bpf_map structs, which has all fields. We preserve backwards compat as we kzalloc the array, so if only spin lock and timer exist in map, we only compare offset while the rest of unused members in the btf_field struct are zeroed out. In case of kptr, btf and everything else is of vmlinux or module, so as long type is same it will match, since kernel btf, module, dtor pointer will be same across maps. Now with list_head in the mix, things are a bit complicated. We implicitly add a requirement that both BTFs are same, because struct btf_field_list_head has btf and value_rec members. We obviously shouldn't force BTFs to be equal by default, as that breaks backwards compatibility. Currently it is only implicitly required due to list_head matching struct btf and value_rec member. value_rec points back into a btf_record stashed in the map BTF (btf member of btf_field_list_head). So that pointer and btf member has to match exactly. Document all these subtle details so that things don't break in the future when touching this code. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20221118015614.2013203-19-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/map_in_map.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/bpf/map_in_map.c') diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 7cce2047c6ef..38136ec4e095 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -68,6 +68,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) } inner_map_meta->field_offs = field_offs; } + /* Note: We must use the same BTF, as we also used btf_record_dup above + * which relies on BTF being same for both maps, as some members like + * record->fields.list_head have pointers like value_rec pointing into + * inner_map->btf. + */ if (inner_map->btf) { btf_get(inner_map->btf); inner_map_meta->btf = inner_map->btf; -- cgit