diff options
Diffstat (limited to 'kernel/bpf/btf.c')
-rw-r--r-- | kernel/bpf/btf.c | 6038 |
1 files changed, 5373 insertions, 665 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0443600146dc..c3223e0db2f5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018 Facebook */ #include <uapi/linux/btf.h> @@ -18,9 +18,20 @@ #include <linux/sort.h> #include <linux/bpf_verifier.h> #include <linux/btf.h> +#include <linux/btf_ids.h> +#include <linux/bpf.h> +#include <linux/bpf_lsm.h> #include <linux/skmsg.h> #include <linux/perf_event.h> +#include <linux/bsearch.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> + +#include <net/netfilter/nf_bpf_link.h> + #include <net/sock.h> +#include <net/xdp.h> +#include "../tools/lib/bpf/relo_core.h" /* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus @@ -47,7 +58,7 @@ * The BTF type section contains a list of 'struct btf_type' objects. * Each one describes a C type. Recall from the above section * that a 'struct btf_type' object could be immediately followed by extra - * data in order to desribe some particular C types. + * data in order to describe some particular C types. * * type_id: * ~~~~~~~ @@ -169,7 +180,7 @@ #define BITS_ROUNDUP_BYTES(bits) \ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) -#define BTF_INFO_MASK 0x8f00ffff +#define BTF_INFO_MASK 0x9f00ffff #define BTF_INT_MASK 0x0fffffff #define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE) #define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET) @@ -185,11 +196,6 @@ i < btf_type_vlen(struct_type); \ i++, member++) -#define for_each_vsi(i, struct_type, member) \ - for (i = 0, member = btf_type_var_secinfo(struct_type); \ - i < btf_type_vlen(struct_type); \ - i++, member++) - #define for_each_vsi_from(i, from, struct_type, member) \ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ i < btf_type_vlen(struct_type); \ @@ -198,6 +204,51 @@ DEFINE_IDR(btf_idr); DEFINE_SPINLOCK(btf_idr_lock); +enum btf_kfunc_hook { + BTF_KFUNC_HOOK_COMMON, + BTF_KFUNC_HOOK_XDP, + BTF_KFUNC_HOOK_TC, + BTF_KFUNC_HOOK_STRUCT_OPS, + BTF_KFUNC_HOOK_TRACING, + BTF_KFUNC_HOOK_SYSCALL, + BTF_KFUNC_HOOK_FMODRET, + BTF_KFUNC_HOOK_CGROUP, + BTF_KFUNC_HOOK_SCHED_ACT, + BTF_KFUNC_HOOK_SK_SKB, + BTF_KFUNC_HOOK_SOCKET_FILTER, + BTF_KFUNC_HOOK_LWT, + BTF_KFUNC_HOOK_NETFILTER, + BTF_KFUNC_HOOK_KPROBE, + BTF_KFUNC_HOOK_MAX, +}; + +enum { + BTF_KFUNC_SET_MAX_CNT = 256, + BTF_DTOR_KFUNC_MAX_CNT = 256, + BTF_KFUNC_FILTER_MAX_CNT = 16, +}; + +struct btf_kfunc_hook_filter { + btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT]; + u32 nr_filters; +}; + +struct btf_kfunc_set_tab { + struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; + struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX]; +}; + +struct btf_id_dtor_kfunc_tab { + u32 cnt; + struct btf_id_dtor_kfunc dtors[]; +}; + +struct btf_struct_ops_tab { + u32 cnt; + u32 capacity; + struct bpf_struct_ops_desc ops[]; +}; + struct btf { void *data; struct btf_type **types; @@ -206,12 +257,24 @@ struct btf { const char *strings; void *nohdr_data; struct btf_header hdr; - u32 nr_types; + u32 nr_types; /* includes VOID for base BTF */ u32 types_size; u32 data_size; refcount_t refcnt; u32 id; struct rcu_head rcu; + struct btf_kfunc_set_tab *kfunc_set_tab; + struct btf_id_dtor_kfunc_tab *dtor_kfunc_tab; + struct btf_struct_metas *struct_meta_tab; + struct btf_struct_ops_tab *struct_ops_tab; + + /* split BTF support */ + struct btf *base_btf; + u32 start_id; /* first type ID in this BTF (0 for base BTF) */ + u32 start_str_off; /* first string offset (0 for base BTF) */ + char name[MODULE_NAME_LEN]; + bool kernel_btf; + __u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */ }; enum verifier_phase { @@ -274,13 +337,108 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO", [BTF_KIND_VAR] = "VAR", [BTF_KIND_DATASEC] = "DATASEC", + [BTF_KIND_FLOAT] = "FLOAT", + [BTF_KIND_DECL_TAG] = "DECL_TAG", + [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; -static const char *btf_type_str(const struct btf_type *t) +const char *btf_type_str(const struct btf_type *t) { return btf_kind_str[BTF_INFO_KIND(t->info)]; } +/* Chunk size we use in safe copy of data to be shown. */ +#define BTF_SHOW_OBJ_SAFE_SIZE 32 + +/* + * This is the maximum size of a base type value (equivalent to a + * 128-bit int); if we are at the end of our safe buffer and have + * less than 16 bytes space we can't be assured of being able + * to copy the next type safely, so in such cases we will initiate + * a new copy. + */ +#define BTF_SHOW_OBJ_BASE_TYPE_SIZE 16 + +/* Type name size */ +#define BTF_SHOW_NAME_SIZE 80 + +/* + * The suffix of a type that indicates it cannot alias another type when + * comparing BTF IDs for kfunc invocations. + */ +#define NOCAST_ALIAS_SUFFIX "___init" + +/* + * Common data to all BTF show operations. Private show functions can add + * their own data to a structure containing a struct btf_show and consult it + * in the show callback. See btf_type_show() below. + * + * One challenge with showing nested data is we want to skip 0-valued + * data, but in order to figure out whether a nested object is all zeros + * we need to walk through it. As a result, we need to make two passes + * when handling structs, unions and arrays; the first path simply looks + * for nonzero data, while the second actually does the display. The first + * pass is signalled by show->state.depth_check being set, and if we + * encounter a non-zero value we set show->state.depth_to_show to + * the depth at which we encountered it. When we have completed the + * first pass, we will know if anything needs to be displayed if + * depth_to_show > depth. See btf_[struct,array]_show() for the + * implementation of this. + * + * Another problem is we want to ensure the data for display is safe to + * access. To support this, the anonymous "struct {} obj" tracks the data + * object and our safe copy of it. We copy portions of the data needed + * to the object "copy" buffer, but because its size is limited to + * BTF_SHOW_OBJ_COPY_LEN bytes, multiple copies may be required as we + * traverse larger objects for display. + * + * The various data type show functions all start with a call to + * btf_show_start_type() which returns a pointer to the safe copy + * of the data needed (or if BTF_SHOW_UNSAFE is specified, to the + * raw data itself). btf_show_obj_safe() is responsible for + * using copy_from_kernel_nofault() to update the safe data if necessary + * as we traverse the object's data. skbuff-like semantics are + * used: + * + * - obj.head points to the start of the toplevel object for display + * - obj.size is the size of the toplevel object + * - obj.data points to the current point in the original data at + * which our safe data starts. obj.data will advance as we copy + * portions of the data. + * + * In most cases a single copy will suffice, but larger data structures + * such as "struct task_struct" will require many copies. The logic in + * btf_show_obj_safe() handles the logic that determines if a new + * copy_from_kernel_nofault() is needed. + */ +struct btf_show { + u64 flags; + void *target; /* target of show operation (seq file, buffer) */ + __printf(2, 0) void (*showfn)(struct btf_show *show, const char *fmt, va_list args); + const struct btf *btf; + /* below are used during iteration */ + struct { + u8 depth; + u8 depth_to_show; + u8 depth_check; + u8 array_member:1, + array_terminated:1; + u16 array_encoding; + u32 type_id; + int status; /* non-zero for error */ + const struct btf_type *type; + const struct btf_member *member; + char name[BTF_SHOW_NAME_SIZE]; /* space for member name/type */ + } state; + struct { + u32 size; + void *head; + void *data; + u8 safe[BTF_SHOW_OBJ_SAFE_SIZE]; + } obj; +}; + struct btf_kind_operations { s32 (*check_meta)(struct btf_verifier_env *env, const struct btf_type *t, @@ -297,9 +455,9 @@ struct btf_kind_operations { const struct btf_type *member_type); void (*log_details)(struct btf_verifier_env *env, const struct btf_type *t); - void (*seq_show)(const struct btf *btf, const struct btf_type *t, + void (*show)(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offsets, - struct seq_file *m); + struct btf_show *show); }; static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS]; @@ -308,6 +466,9 @@ static struct btf_type btf_void; static int btf_resolve(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id); +static int btf_func_check(struct btf_verifier_env *env, + const struct btf_type *t); + static bool btf_type_is_modifier(const struct btf_type *t) { /* Some of them is not strictly a C modifier @@ -325,6 +486,7 @@ static bool btf_type_is_modifier(const struct btf_type *t) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return true; } @@ -336,15 +498,21 @@ bool btf_type_is_void(const struct btf_type *t) return t == &btf_void; } -static bool btf_type_is_fwd(const struct btf_type *t) +static bool btf_type_is_datasec(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; + return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; +} + +static bool btf_type_is_decl_tag(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } static bool btf_type_nosize(const struct btf_type *t) { return btf_type_is_void(t) || btf_type_is_fwd(t) || - btf_type_is_func(t) || btf_type_is_func_proto(t); + btf_type_is_func(t) || btf_type_is_func_proto(t) || + btf_type_is_decl_tag(t); } static bool btf_type_nosize_or_null(const struct btf_type *t) @@ -352,44 +520,38 @@ static bool btf_type_nosize_or_null(const struct btf_type *t) return !t || btf_type_nosize(t); } -/* union is only a special case of struct: - * all its offsetof(member) == 0 - */ -static bool btf_type_is_struct(const struct btf_type *t) +static bool btf_type_is_decl_tag_target(const struct btf_type *t) { - u8 kind = BTF_INFO_KIND(t->info); - - return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; + return btf_type_is_func(t) || btf_type_is_struct(t) || + btf_type_is_var(t) || btf_type_is_typedef(t); } -static bool __btf_type_is_struct(const struct btf_type *t) +bool btf_is_vmlinux(const struct btf *btf) { - return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; + return btf->kernel_btf && !btf->base_btf; } -static bool btf_type_is_array(const struct btf_type *t) +u32 btf_nr_types(const struct btf *btf) { - return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; -} + u32 total = 0; -static bool btf_type_is_var(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; -} + while (btf) { + total += btf->nr_types; + btf = btf->base_btf; + } -static bool btf_type_is_datasec(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; + return total; } s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) { const struct btf_type *t; const char *tname; - u32 i; + u32 i, total; - for (i = 1; i <= btf->nr_types; i++) { - t = btf->types[i]; + total = btf_nr_types(btf); + for (i = 1; i < total; i++) { + t = btf_type_by_id(btf, i); if (BTF_INFO_KIND(t->info) != kind) continue; @@ -401,6 +563,50 @@ s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) return -ENOENT; } +s32 bpf_find_btf_id(const char *name, u32 kind, struct btf **btf_p) +{ + struct btf *btf; + s32 ret; + int id; + + btf = bpf_get_btf_vmlinux(); + if (IS_ERR(btf)) + return PTR_ERR(btf); + if (!btf) + return -EINVAL; + + ret = btf_find_by_name_kind(btf, name, kind); + /* ret is never zero, since btf_find_by_name_kind returns + * positive btf_id or negative error. + */ + if (ret > 0) { + btf_get(btf); + *btf_p = btf; + return ret; + } + + /* If name is not found in vmlinux's BTF then search in module's BTFs */ + spin_lock_bh(&btf_idr_lock); + idr_for_each_entry(&btf_idr, btf, id) { + if (!btf_is_module(btf)) + continue; + /* linear search could be slow hence unlock/lock + * the IDR to avoiding holding it for too long + */ + btf_get(btf); + spin_unlock_bh(&btf_idr_lock); + ret = btf_find_by_name_kind(btf, name, kind); + if (ret > 0) { + *btf_p = btf; + return ret; + } + btf_put(btf); + spin_lock_bh(&btf_idr_lock); + } + spin_unlock_bh(&btf_idr_lock); + return ret; +} + const struct btf_type *btf_type_skip_modifiers(const struct btf *btf, u32 id, u32 *res_id) { @@ -447,6 +653,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf, static bool btf_type_is_resolve_source_only(const struct btf_type *t) { return btf_type_is_var(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -473,6 +680,8 @@ static bool btf_type_needs_resolve(const struct btf_type *t) btf_type_is_struct(t) || btf_type_is_array(t) || btf_type_is_var(t) || + btf_type_is_func(t) || + btf_type_is_decl_tag(t) || btf_type_is_datasec(t); } @@ -485,6 +694,8 @@ static bool btf_type_has_size(const struct btf_type *t) case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_DATASEC: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM64: return true; } @@ -525,9 +736,14 @@ static const struct btf_var *btf_type_var(const struct btf_type *t) return (const struct btf_var *)(t + 1); } -static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t) +static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t) { - return (const struct btf_var_secinfo *)(t + 1); + return (const struct btf_decl_tag *)(t + 1); +} + +static const struct btf_enum64 *btf_type_enum64(const struct btf_type *t) +{ + return (const struct btf_enum64 *)(t + 1); } static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) @@ -537,35 +753,52 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t) static bool btf_name_offset_valid(const struct btf *btf, u32 offset) { - return BTF_STR_OFFSET_VALID(offset) && - offset < btf->hdr.str_len; + if (!BTF_STR_OFFSET_VALID(offset)) + return false; + + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + return offset < btf->hdr.str_len; } -static bool __btf_name_char_ok(char c, bool first, bool dot_ok) +static bool __btf_name_char_ok(char c, bool first) { if ((first ? !isalpha(c) : !isalnum(c)) && c != '_' && - ((c == '.' && !dot_ok) || - c != '.')) + c != '.') return false; return true; } -static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) +const char *btf_str_by_offset(const struct btf *btf, u32 offset) +{ + while (offset < btf->start_str_off) + btf = btf->base_btf; + + offset -= btf->start_str_off; + if (offset < btf->hdr.str_len) + return &btf->strings[offset]; + + return NULL; +} + +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) { /* offset must be valid */ - const char *src = &btf->strings[offset]; + const char *src = btf_str_by_offset(btf, offset); const char *src_limit; - if (!__btf_name_char_ok(*src, true, dot_ok)) + if (!__btf_name_char_ok(*src, true)) return false; /* set a limit on identifier length */ src_limit = src + KSYM_NAME_LEN; src++; while (*src && src < src_limit) { - if (!__btf_name_char_ok(*src, false, dot_ok)) + if (!__btf_name_char_ok(*src, false)) return false; src++; } @@ -573,44 +806,54 @@ static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok) return !*src; } -/* Only C-style identifier is permitted. This can be relaxed if - * necessary. - */ -static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) -{ - return __btf_name_valid(btf, offset, false); -} - +/* Allow any printable character in DATASEC names */ static bool btf_name_valid_section(const struct btf *btf, u32 offset) { - return __btf_name_valid(btf, offset, true); + /* offset must be valid */ + const char *src = btf_str_by_offset(btf, offset); + const char *src_limit; + + if (!*src) + return false; + + /* set a limit on identifier length */ + src_limit = src + KSYM_NAME_LEN; + while (*src && src < src_limit) { + if (!isprint(*src)) + return false; + src++; + } + + return !*src; } static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { + const char *name; + if (!offset) return "(anon)"; - else if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - else - return "(invalid-name-offset)"; + + name = btf_str_by_offset(btf, offset); + return name ?: "(invalid-name-offset)"; } const char *btf_name_by_offset(const struct btf *btf, u32 offset) { - if (offset < btf->hdr.str_len) - return &btf->strings[offset]; - - return NULL; + return btf_str_by_offset(btf, offset); } const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { - if (type_id > btf->nr_types) - return NULL; + while (type_id < btf->start_id) + btf = btf->base_btf; + type_id -= btf->start_id; + if (type_id >= btf->nr_types) + return NULL; return btf->types[type_id]; } +EXPORT_SYMBOL_GPL(btf_type_by_id); /* * Regular int is not a bit field and it must be either @@ -676,6 +919,489 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, return true; } +/* Similar to btf_type_skip_modifiers() but does not skip typedefs. */ +static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf, + u32 id) +{ + const struct btf_type *t = btf_type_by_id(btf, id); + + while (btf_type_is_modifier(t) && + BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { + t = btf_type_by_id(btf, t->type); + } + + return t; +} + +#define BTF_SHOW_MAX_ITER 10 + +#define BTF_KIND_BIT(kind) (1ULL << kind) + +/* + * Populate show->state.name with type name information. + * Format of type name is + * + * [.member_name = ] (type_name) + */ +static const char *btf_show_name(struct btf_show *show) +{ + /* BTF_MAX_ITER array suffixes "[]" */ + const char *array_suffixes = "[][][][][][][][][][]"; + const char *array_suffix = &array_suffixes[strlen(array_suffixes)]; + /* BTF_MAX_ITER pointer suffixes "*" */ + const char *ptr_suffixes = "**********"; + const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)]; + const char *name = NULL, *prefix = "", *parens = ""; + const struct btf_member *m = show->state.member; + const struct btf_type *t; + const struct btf_array *array; + u32 id = show->state.type_id; + const char *member = NULL; + bool show_member = false; + u64 kinds = 0; + int i; + + show->state.name[0] = '\0'; + + /* + * Don't show type name if we're showing an array member; + * in that case we show the array type so don't need to repeat + * ourselves for each member. + */ + if (show->state.array_member) + return ""; + + /* Retrieve member name, if any. */ + if (m) { + member = btf_name_by_offset(show->btf, m->name_off); + show_member = strlen(member) > 0; + id = m->type; + } + + /* + * Start with type_id, as we have resolved the struct btf_type * + * via btf_modifier_show() past the parent typedef to the child + * struct, int etc it is defined as. In such cases, the type_id + * still represents the starting type while the struct btf_type * + * in our show->state points at the resolved type of the typedef. + */ + t = btf_type_by_id(show->btf, id); + if (!t) + return ""; + + /* + * The goal here is to build up the right number of pointer and + * array suffixes while ensuring the type name for a typedef + * is represented. Along the way we accumulate a list of + * BTF kinds we have encountered, since these will inform later + * display; for example, pointer types will not require an + * opening "{" for struct, we will just display the pointer value. + * + * We also want to accumulate the right number of pointer or array + * indices in the format string while iterating until we get to + * the typedef/pointee/array member target type. + * + * We start by pointing at the end of pointer and array suffix + * strings; as we accumulate pointers and arrays we move the pointer + * or array string backwards so it will show the expected number of + * '*' or '[]' for the type. BTF_SHOW_MAX_ITER of nesting of pointers + * and/or arrays and typedefs are supported as a precaution. + * + * We also want to get typedef name while proceeding to resolve + * type it points to so that we can add parentheses if it is a + * "typedef struct" etc. + */ + for (i = 0; i < BTF_SHOW_MAX_ITER; i++) { + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_TYPEDEF: + if (!name) + name = btf_name_by_offset(show->btf, + t->name_off); + kinds |= BTF_KIND_BIT(BTF_KIND_TYPEDEF); + id = t->type; + break; + case BTF_KIND_ARRAY: + kinds |= BTF_KIND_BIT(BTF_KIND_ARRAY); + parens = "["; + if (!t) + return ""; + array = btf_type_array(t); + if (array_suffix > array_suffixes) + array_suffix -= 2; + id = array->type; + break; + case BTF_KIND_PTR: + kinds |= BTF_KIND_BIT(BTF_KIND_PTR); + if (ptr_suffix > ptr_suffixes) + ptr_suffix -= 1; + id = t->type; + break; + default: + id = 0; + break; + } + if (!id) + break; + t = btf_type_skip_qualifiers(show->btf, id); + } + /* We may not be able to represent this type; bail to be safe */ + if (i == BTF_SHOW_MAX_ITER) + return ""; + + if (!name) + name = btf_name_by_offset(show->btf, t->name_off); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + prefix = BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT ? + "struct" : "union"; + /* if it's an array of struct/union, parens is already set */ + if (!(kinds & (BTF_KIND_BIT(BTF_KIND_ARRAY)))) + parens = "{"; + break; + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + prefix = "enum"; + break; + default: + break; + } + + /* pointer does not require parens */ + if (kinds & BTF_KIND_BIT(BTF_KIND_PTR)) + parens = ""; + /* typedef does not require struct/union/enum prefix */ + if (kinds & BTF_KIND_BIT(BTF_KIND_TYPEDEF)) + prefix = ""; + + if (!name) + name = ""; + + /* Even if we don't want type name info, we want parentheses etc */ + if (show->flags & BTF_SHOW_NONAME) + snprintf(show->state.name, sizeof(show->state.name), "%s", + parens); + else + snprintf(show->state.name, sizeof(show->state.name), + "%s%s%s(%s%s%s%s%s%s)%s", + /* first 3 strings comprise ".member = " */ + show_member ? "." : "", + show_member ? member : "", + show_member ? " = " : "", + /* ...next is our prefix (struct, enum, etc) */ + prefix, + strlen(prefix) > 0 && strlen(name) > 0 ? " " : "", + /* ...this is the type name itself */ + name, + /* ...suffixed by the appropriate '*', '[]' suffixes */ + strlen(ptr_suffix) > 0 ? " " : "", ptr_suffix, + array_suffix, parens); + + return show->state.name; +} + +static const char *__btf_show_indent(struct btf_show *show) +{ + const char *indents = " "; + const char *indent = &indents[strlen(indents)]; + + if ((indent - show->state.depth) >= indents) + return indent - show->state.depth; + return indents; +} + +static const char *btf_show_indent(struct btf_show *show) +{ + return show->flags & BTF_SHOW_COMPACT ? "" : __btf_show_indent(show); +} + +static const char *btf_show_newline(struct btf_show *show) +{ + return show->flags & BTF_SHOW_COMPACT ? "" : "\n"; +} + +static const char *btf_show_delim(struct btf_show *show) +{ + if (show->state.depth == 0) + return ""; + + if ((show->flags & BTF_SHOW_COMPACT) && show->state.type && + BTF_INFO_KIND(show->state.type->info) == BTF_KIND_UNION) + return "|"; + + return ","; +} + +__printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...) +{ + va_list args; + + if (!show->state.depth_check) { + va_start(args, fmt); + show->showfn(show, fmt, args); + va_end(args); + } +} + +/* Macros are used here as btf_show_type_value[s]() prepends and appends + * format specifiers to the format specifier passed in; these do the work of + * adding indentation, delimiters etc while the caller simply has to specify + * the type value(s) in the format specifier + value(s). + */ +#define btf_show_type_value(show, fmt, value) \ + do { \ + if ((value) != (__typeof__(value))0 || \ + (show->flags & BTF_SHOW_ZERO) || \ + show->state.depth == 0) { \ + btf_show(show, "%s%s" fmt "%s%s", \ + btf_show_indent(show), \ + btf_show_name(show), \ + value, btf_show_delim(show), \ + btf_show_newline(show)); \ + if (show->state.depth > show->state.depth_to_show) \ + show->state.depth_to_show = show->state.depth; \ + } \ + } while (0) + +#define btf_show_type_values(show, fmt, ...) \ + do { \ + btf_show(show, "%s%s" fmt "%s%s", btf_show_indent(show), \ + btf_show_name(show), \ + __VA_ARGS__, btf_show_delim(show), \ + btf_show_newline(show)); \ + if (show->state.depth > show->state.depth_to_show) \ + show->state.depth_to_show = show->state.depth; \ + } while (0) + +/* How much is left to copy to safe buffer after @data? */ +static int btf_show_obj_size_left(struct btf_show *show, void *data) +{ + return show->obj.head + show->obj.size - data; +} + +/* Is object pointed to by @data of @size already copied to our safe buffer? */ +static bool btf_show_obj_is_safe(struct btf_show *show, void *data, int size) +{ + return data >= show->obj.data && + (data + size) < (show->obj.data + BTF_SHOW_OBJ_SAFE_SIZE); +} + +/* + * If object pointed to by @data of @size falls within our safe buffer, return + * the equivalent pointer to the same safe data. Assumes + * copy_from_kernel_nofault() has already happened and our safe buffer is + * populated. + */ +static void *__btf_show_obj_safe(struct btf_show *show, void *data, int size) +{ + if (btf_show_obj_is_safe(show, data, size)) + return show->obj.safe + (data - show->obj.data); + return NULL; +} + +/* + * Return a safe-to-access version of data pointed to by @data. + * We do this by copying the relevant amount of information + * to the struct btf_show obj.safe buffer using copy_from_kernel_nofault(). + * + * If BTF_SHOW_UNSAFE is specified, just return data as-is; no + * safe copy is needed. + * + * Otherwise we need to determine if we have the required amount + * of data (determined by the @data pointer and the size of the + * largest base type we can encounter (represented by + * BTF_SHOW_OBJ_BASE_TYPE_SIZE). Having that much data ensures + * that we will be able to print some of the current object, + * and if more is needed a copy will be triggered. + * Some objects such as structs will not fit into the buffer; + * in such cases additional copies when we iterate over their + * members may be needed. + * + * btf_show_obj_safe() is used to return a safe buffer for + * btf_show_start_type(); this ensures that as we recurse into + * nested types we always have safe data for the given type. + * This approach is somewhat wasteful; it's possible for example + * that when iterating over a large union we'll end up copying the + * same data repeatedly, but the goal is safety not performance. + * We use stack data as opposed to per-CPU buffers because the + * iteration over a type can take some time, and preemption handling + * would greatly complicate use of the safe buffer. + */ +static void *btf_show_obj_safe(struct btf_show *show, + const struct btf_type *t, + void *data) +{ + const struct btf_type *rt; + int size_left, size; + void *safe = NULL; + + if (show->flags & BTF_SHOW_UNSAFE) + return data; + + rt = btf_resolve_size(show->btf, t, &size); + if (IS_ERR(rt)) { + show->state.status = PTR_ERR(rt); + return NULL; + } + + /* + * Is this toplevel object? If so, set total object size and + * initialize pointers. Otherwise check if we still fall within + * our safe object data. + */ + if (show->state.depth == 0) { + show->obj.size = size; + show->obj.head = data; + } else { + /* + * If the size of the current object is > our remaining + * safe buffer we _may_ need to do a new copy. However + * consider the case of a nested struct; it's size pushes + * us over the safe buffer limit, but showing any individual + * struct members does not. In such cases, we don't need + * to initiate a fresh copy yet; however we definitely need + * at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left + * in our buffer, regardless of the current object size. + * The logic here is that as we resolve types we will + * hit a base type at some point, and we need to be sure + * the next chunk of data is safely available to display + * that type info safely. We cannot rely on the size of + * the current object here because it may be much larger + * than our current buffer (e.g. task_struct is 8k). + * All we want to do here is ensure that we can print the + * next basic type, which we can if either + * - the current type size is within the safe buffer; or + * - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in + * the safe buffer. + */ + safe = __btf_show_obj_safe(show, data, + min(size, + BTF_SHOW_OBJ_BASE_TYPE_SIZE)); + } + + /* + * We need a new copy to our safe object, either because we haven't + * yet copied and are initializing safe data, or because the data + * we want falls outside the boundaries of the safe object. + */ + if (!safe) { + size_left = btf_show_obj_size_left(show, data); + if (size_left > BTF_SHOW_OBJ_SAFE_SIZE) + size_left = BTF_SHOW_OBJ_SAFE_SIZE; + show->state.status = copy_from_kernel_nofault(show->obj.safe, + data, size_left); + if (!show->state.status) { + show->obj.data = data; + safe = show->obj.safe; + } + } + + return safe; +} + +/* + * Set the type we are starting to show and return a safe data pointer + * to be used for showing the associated data. + */ +static void *btf_show_start_type(struct btf_show *show, + const struct btf_type *t, + u32 type_id, void *data) +{ + show->state.type = t; + show->state.type_id = type_id; + show->state.name[0] = '\0'; + + return btf_show_obj_safe(show, t, data); +} + +static void btf_show_end_type(struct btf_show *show) +{ + show->state.type = NULL; + show->state.type_id = 0; + show->state.name[0] = '\0'; +} + +static void *btf_show_start_aggr_type(struct btf_show *show, + const struct btf_type *t, + u32 type_id, void *data) +{ + void *safe_data = btf_show_start_type(show, t, type_id, data); + + if (!safe_data) + return safe_data; + + btf_show(show, "%s%s%s", btf_show_indent(show), + btf_show_name(show), + btf_show_newline(show)); + show->state.depth++; + return safe_data; +} + +static void btf_show_end_aggr_type(struct btf_show *show, + const char *suffix) +{ + show->state.depth--; + btf_show(show, "%s%s%s%s", btf_show_indent(show), suffix, + btf_show_delim(show), btf_show_newline(show)); + btf_show_end_type(show); +} + +static void btf_show_start_member(struct btf_show *show, + const struct btf_member *m) +{ + show->state.member = m; +} + +static void btf_show_start_array_member(struct btf_show *show) +{ + show->state.array_member = 1; + btf_show_start_member(show, NULL); +} + +static void btf_show_end_member(struct btf_show *show) +{ + show->state.member = NULL; +} + +static void btf_show_end_array_member(struct btf_show *show) +{ + show->state.array_member = 0; + btf_show_end_member(show); +} + +static void *btf_show_start_array_type(struct btf_show *show, + const struct btf_type *t, + u32 type_id, + u16 array_encoding, + void *data) +{ + show->state.array_encoding = array_encoding; + show->state.array_terminated = 0; + return btf_show_start_aggr_type(show, t, type_id, data); +} + +static void btf_show_end_array_type(struct btf_show *show) +{ + show->state.array_encoding = 0; + show->state.array_terminated = 0; + btf_show_end_aggr_type(show, "]"); +} + +static void *btf_show_start_struct_type(struct btf_show *show, + const struct btf_type *t, + u32 type_id, + void *data) +{ + return btf_show_start_aggr_type(show, t, type_id, data); +} + +static void btf_show_end_struct_type(struct btf_show *show) +{ + btf_show_end_aggr_type(show, "}"); +} + __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log, const char *fmt, ...) { @@ -706,23 +1432,28 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, const char *fmt, ...) { struct bpf_verifier_log *log = &env->log; - u8 kind = BTF_INFO_KIND(t->info); struct btf *btf = env->btf; va_list args; if (!bpf_verifier_log_needed(log)) return; - /* btf verifier prints all types it is processing via - * btf_verifier_log_type(..., fmt = NULL). - * Skip those prints for in-kernel BTF verification. - */ - if (log->level == BPF_LOG_KERNEL && !fmt) - return; + if (log->level == BPF_LOG_KERNEL) { + /* btf verifier prints all types it is processing via + * btf_verifier_log_type(..., fmt = NULL). + * Skip those prints for in-kernel BTF verification. + */ + if (!fmt) + return; + + /* Skip logging when loading module BTF with mismatches permitted */ + if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) + return; + } __btf_verifier_log(log, "[%u] %s %s%s", env->log_type_id, - btf_kind_str[kind], + btf_type_str(t), __btf_name_by_offset(btf, t->name_off), log_details ? " " : ""); @@ -757,8 +1488,15 @@ static void btf_verifier_log_member(struct btf_verifier_env *env, if (!bpf_verifier_log_needed(log)) return; - if (log->level == BPF_LOG_KERNEL && !fmt) - return; + if (log->level == BPF_LOG_KERNEL) { + if (!fmt) + return; + + /* Skip logging when loading module BTF with mismatches permitted */ + if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) + return; + } + /* The CHECK_META phase already did a btf dump. * * If member is logged again, it must hit an error in @@ -846,17 +1584,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) { struct btf *btf = env->btf; - /* < 2 because +1 for btf_void which is always in btf->types[0]. - * btf_void is not accounted in btf->nr_types because btf_void - * does not come from the BTF file. - */ - if (btf->types_size - btf->nr_types < 2) { + if (btf->types_size == btf->nr_types) { /* Expand 'types' array */ struct btf_type **new_types; u32 expand_by, new_size; - if (btf->types_size == BTF_MAX_TYPE) { + if (btf->start_id + btf->types_size == BTF_MAX_TYPE) { btf_verifier_log(env, "Exceeded max num of types"); return -E2BIG; } @@ -870,18 +1604,23 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t) if (!new_types) return -ENOMEM; - if (btf->nr_types == 0) - new_types[0] = &btf_void; - else + if (btf->nr_types == 0) { + if (!btf->base_btf) { + /* lazily init VOID type */ + new_types[0] = &btf_void; + btf->nr_types++; + } + } else { memcpy(new_types, btf->types, - sizeof(*btf->types) * (btf->nr_types + 1)); + sizeof(*btf->types) * btf->nr_types); + } kvfree(btf->types); btf->types = new_types; btf->types_size = new_size; } - btf->types[++(btf->nr_types)] = t; + btf->types[btf->nr_types++] = t; return 0; } @@ -922,12 +1661,78 @@ static void btf_free_id(struct btf *btf) spin_unlock_irqrestore(&btf_idr_lock, flags); } +static void btf_free_kfunc_set_tab(struct btf *btf) +{ + struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; + int hook; + + if (!tab) + return; + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) + kfree(tab->sets[hook]); + kfree(tab); + btf->kfunc_set_tab = NULL; +} + +static void btf_free_dtor_kfunc_tab(struct btf *btf) +{ + struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; + + if (!tab) + return; + kfree(tab); + btf->dtor_kfunc_tab = NULL; +} + +static void btf_struct_metas_free(struct btf_struct_metas *tab) +{ + int i; + + if (!tab) + return; + for (i = 0; i < tab->cnt; i++) + btf_record_free(tab->types[i].record); + kfree(tab); +} + +static void btf_free_struct_meta_tab(struct btf *btf) +{ + struct btf_struct_metas *tab = btf->struct_meta_tab; + + btf_struct_metas_free(tab); + btf->struct_meta_tab = NULL; +} + +static void btf_free_struct_ops_tab(struct btf *btf) +{ + struct btf_struct_ops_tab *tab = btf->struct_ops_tab; + u32 i; + + if (!tab) + return; + + for (i = 0; i < tab->cnt; i++) + bpf_struct_ops_desc_release(&tab->ops[i]); + + kfree(tab); + btf->struct_ops_tab = NULL; +} + static void btf_free(struct btf *btf) { + btf_free_struct_meta_tab(btf); + btf_free_dtor_kfunc_tab(btf); + btf_free_kfunc_set_tab(btf); + btf_free_struct_ops_tab(btf); kvfree(btf->types); kvfree(btf->resolved_sizes); kvfree(btf->resolved_ids); - kvfree(btf->data); + /* vmlinux does not allocate btf->data, it simply points it at + * __start_BTF. + */ + if (!btf_is_vmlinux(btf)) + kvfree(btf->data); + kvfree(btf->base_id_map); kfree(btf); } @@ -938,6 +1743,16 @@ static void btf_free_rcu(struct rcu_head *rcu) btf_free(btf); } +const char *btf_get_name(const struct btf *btf) +{ + return btf->name; +} + +void btf_get(struct btf *btf) +{ + refcount_inc(&btf->refcnt); +} + void btf_put(struct btf *btf) { if (btf && refcount_dec_and_test(&btf->refcnt)) { @@ -946,6 +1761,23 @@ void btf_put(struct btf *btf) } } +struct btf *btf_base_btf(const struct btf *btf) +{ + return btf->base_btf; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return &btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf_nr_types(base_btf); + btf->start_str_off = base_btf->hdr.str_len; +} + static int env_resolve_init(struct btf_verifier_env *env) { struct btf *btf = env->btf; @@ -954,18 +1786,17 @@ static int env_resolve_init(struct btf_verifier_env *env) u32 *resolved_ids = NULL; u8 *visit_states = NULL; - /* +1 for btf_void */ - resolved_sizes = kvcalloc(nr_types + 1, sizeof(*resolved_sizes), + resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes), GFP_KERNEL | __GFP_NOWARN); if (!resolved_sizes) goto nomem; - resolved_ids = kvcalloc(nr_types + 1, sizeof(*resolved_ids), + resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids), GFP_KERNEL | __GFP_NOWARN); if (!resolved_ids) goto nomem; - visit_states = kvcalloc(nr_types + 1, sizeof(*visit_states), + visit_states = kvcalloc(nr_types, sizeof(*visit_states), GFP_KERNEL | __GFP_NOWARN); if (!visit_states) goto nomem; @@ -1017,21 +1848,27 @@ static bool env_type_is_resolve_sink(const struct btf_verifier_env *env, static bool env_type_is_resolved(const struct btf_verifier_env *env, u32 type_id) { - return env->visit_states[type_id] == RESOLVED; + /* base BTF types should be resolved by now */ + if (type_id < env->btf->start_id) + return true; + + return env->visit_states[type_id - env->btf->start_id] == RESOLVED; } static int env_stack_push(struct btf_verifier_env *env, const struct btf_type *t, u32 type_id) { + const struct btf *btf = env->btf; struct resolve_vertex *v; if (env->top_stack == MAX_RESOLVE_DEPTH) return -E2BIG; - if (env->visit_states[type_id] != NOT_VISITED) + if (type_id < btf->start_id + || env->visit_states[type_id - btf->start_id] != NOT_VISITED) return -EEXIST; - env->visit_states[type_id] = VISITED; + env->visit_states[type_id - btf->start_id] = VISITED; v = &env->stack[env->top_stack++]; v->t = t; @@ -1061,6 +1898,7 @@ static void env_stack_pop_resolved(struct btf_verifier_env *env, u32 type_id = env->stack[--(env->top_stack)].type_id; struct btf *btf = env->btf; + type_id -= btf->start_id; /* adjust to local type id */ btf->resolved_sizes[type_id] = resolved_size; btf->resolved_ids[type_id] = resolved_type_id; env->visit_states[type_id] = RESOLVED; @@ -1078,23 +1916,27 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env) * *type_size: (x * y * sizeof(u32)). Hence, *type_size always * corresponds to the return type. * *elem_type: u32 + * *elem_id: id of u32 * *total_nelems: (x * y). Hence, individual elem size is * (*type_size / *total_nelems) + * *type_id: id of type if it's changed within the function, 0 if not * * type: is not an array (e.g. const struct X) * return type: type "struct X" * *type_size: sizeof(struct X) * *elem_type: same as return type ("struct X") + * *elem_id: 0 * *total_nelems: 1 + * *type_id: id of type if it's changed within the function, 0 if not */ -const struct btf_type * -btf_resolve_size(const struct btf *btf, const struct btf_type *type, - u32 *type_size, const struct btf_type **elem_type, - u32 *total_nelems) +static const struct btf_type * +__btf_resolve_size(const struct btf *btf, const struct btf_type *type, + u32 *type_size, const struct btf_type **elem_type, + u32 *elem_id, u32 *total_nelems, u32 *type_id) { const struct btf_type *array_type = NULL; - const struct btf_array *array; - u32 i, size, nelems = 1; + const struct btf_array *array = NULL; + u32 i, size, nelems = 1, id = 0; for (i = 0; i < MAX_RESOLVE_DEPTH; i++) { switch (BTF_INFO_KIND(type->info)) { @@ -1103,6 +1945,8 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM64: size = type->size; goto resolved; @@ -1115,6 +1959,8 @@ btf_resolve_size(const struct btf *btf, const struct btf_type *type, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: + id = type->type; type = btf_type_by_id(btf, type->type); break; @@ -1145,18 +1991,45 @@ resolved: *total_nelems = nelems; if (elem_type) *elem_type = type; + if (elem_id) + *elem_id = array ? array->type : 0; + if (type_id && id) + *type_id = id; return array_type ? : type; } +const struct btf_type * +btf_resolve_size(const struct btf *btf, const struct btf_type *type, + u32 *type_size) +{ + return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL); +} + +static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id) +{ + while (type_id < btf->start_id) + btf = btf->base_btf; + + return btf->resolved_ids[type_id - btf->start_id]; +} + /* The input param "type_id" must point to a needs_resolve type */ static const struct btf_type *btf_type_id_resolve(const struct btf *btf, u32 *type_id) { - *type_id = btf->resolved_ids[*type_id]; + *type_id = btf_resolved_type_id(btf, *type_id); return btf_type_by_id(btf, *type_id); } +static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id) +{ + while (type_id < btf->start_id) + btf = btf->base_btf; + + return btf->resolved_sizes[type_id - btf->start_id]; +} + const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *type_id, u32 *ret_size) { @@ -1171,7 +2044,7 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, if (btf_type_has_size(size_type)) { size = size_type->size; } else if (btf_type_is_array(size_type)) { - size = btf->resolved_sizes[size_type_id]; + size = btf_resolved_type_size(btf, size_type_id); } else if (btf_type_is_ptr(size_type)) { size = sizeof(void *); } else { @@ -1179,14 +2052,14 @@ const struct btf_type *btf_type_id_size(const struct btf *btf, !btf_type_is_var(size_type))) return NULL; - size_type_id = btf->resolved_ids[size_type_id]; + size_type_id = btf_resolved_type_id(btf, size_type_id); size_type = btf_type_by_id(btf, size_type_id); if (btf_type_nosize_or_null(size_type)) return NULL; else if (btf_type_has_size(size_type)) size = size_type->size; else if (btf_type_is_array(size_type)) - size = btf->resolved_sizes[size_type_id]; + size = btf_resolved_type_size(btf, size_type_id); else if (btf_type_is_ptr(size_type)) size = sizeof(void *); else @@ -1220,7 +2093,7 @@ static int btf_df_check_kflag_member(struct btf_verifier_env *env, return -EINVAL; } -/* Used for ptr, array and struct/union type members. +/* Used for ptr, array struct/union and float type members. * int, enum and modifier types have their specific callback functions. */ static int btf_generic_check_kflag_member(struct btf_verifier_env *env, @@ -1249,11 +2122,11 @@ static int btf_df_resolve(struct btf_verifier_env *env, return -EINVAL; } -static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offsets, - struct seq_file *m) +static void btf_df_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offsets, + struct btf_show *show) { - seq_printf(m, "<unsupported kind:%u>", BTF_INFO_KIND(t->info)); + btf_show(show, "<unsupported kind:%u>", BTF_INFO_KIND(t->info)); } static int btf_int_check_member(struct btf_verifier_env *env, @@ -1426,7 +2299,7 @@ static void btf_int_log(struct btf_verifier_env *env, btf_int_encoding_str(BTF_INT_ENCODING(int_data))); } -static void btf_int128_print(struct seq_file *m, void *data) +static void btf_int128_print(struct btf_show *show, void *data) { /* data points to a __int128 number. * Suppose @@ -1445,9 +2318,10 @@ static void btf_int128_print(struct seq_file *m, void *data) lower_num = *(u64 *)data; #endif if (upper_num == 0) - seq_printf(m, "0x%llx", lower_num); + btf_show_type_value(show, "0x%llx", lower_num); else - seq_printf(m, "0x%llx%016llx", upper_num, lower_num); + btf_show_type_values(show, "0x%llx%016llx", upper_num, + lower_num); } static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, @@ -1491,8 +2365,8 @@ static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, #endif } -static void btf_bitfield_seq_show(void *data, u8 bits_offset, - u8 nr_bits, struct seq_file *m) +static void btf_bitfield_show(void *data, u8 bits_offset, + u8 nr_bits, struct btf_show *show) { u16 left_shift_bits, right_shift_bits; u8 nr_copy_bytes; @@ -1512,14 +2386,14 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset, right_shift_bits = BITS_PER_U128 - nr_bits; btf_int128_shift(print_num, left_shift_bits, right_shift_bits); - btf_int128_print(m, print_num); + btf_int128_print(show, print_num); } -static void btf_int_bits_seq_show(const struct btf *btf, - const struct btf_type *t, - void *data, u8 bits_offset, - struct seq_file *m) +static void btf_int_bits_show(const struct btf *btf, + const struct btf_type *t, + void *data, u8 bits_offset, + struct btf_show *show) { u32 int_data = btf_type_int(t); u8 nr_bits = BTF_INT_BITS(int_data); @@ -1532,55 +2406,77 @@ static void btf_int_bits_seq_show(const struct btf *btf, total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); - btf_bitfield_seq_show(data, bits_offset, nr_bits, m); + btf_bitfield_show(data, bits_offset, nr_bits, show); } -static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offset, - struct seq_file *m) +static void btf_int_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) { u32 int_data = btf_type_int(t); u8 encoding = BTF_INT_ENCODING(int_data); bool sign = encoding & BTF_INT_SIGNED; u8 nr_bits = BTF_INT_BITS(int_data); + void *safe_data; + + safe_data = btf_show_start_type(show, t, type_id, data); + if (!safe_data) + return; if (bits_offset || BTF_INT_OFFSET(int_data) || BITS_PER_BYTE_MASKED(nr_bits)) { - btf_int_bits_seq_show(btf, t, data, bits_offset, m); - return; + btf_int_bits_show(btf, t, safe_data, bits_offset, show); + goto out; } switch (nr_bits) { case 128: - btf_int128_print(m, data); + btf_int128_print(show, safe_data); break; case 64: if (sign) - seq_printf(m, "%lld", *(s64 *)data); + btf_show_type_value(show, "%lld", *(s64 *)safe_data); else - seq_printf(m, "%llu", *(u64 *)data); + btf_show_type_value(show, "%llu", *(u64 *)safe_data); break; case 32: if (sign) - seq_printf(m, "%d", *(s32 *)data); + btf_show_type_value(show, "%d", *(s32 *)safe_data); else - seq_printf(m, "%u", *(u32 *)data); + btf_show_type_value(show, "%u", *(u32 *)safe_data); break; case 16: if (sign) - seq_printf(m, "%d", *(s16 *)data); + btf_show_type_value(show, "%d", *(s16 *)safe_data); else - seq_printf(m, "%u", *(u16 *)data); + btf_show_type_value(show, "%u", *(u16 *)safe_data); break; case 8: + if (show->state.array_encoding == BTF_INT_CHAR) { + /* check for null terminator */ + if (show->state.array_terminated) + break; + if (*(char *)data == '\0') { + show->state.array_terminated = 1; + break; + } + if (isprint(*(char *)data)) { + btf_show_type_value(show, "'%c'", + *(char *)safe_data); + break; + } + } if (sign) - seq_printf(m, "%d", *(s8 *)data); + btf_show_type_value(show, "%d", *(s8 *)safe_data); else - seq_printf(m, "%u", *(u8 *)data); + btf_show_type_value(show, "%u", *(u8 *)safe_data); break; default: - btf_int_bits_seq_show(btf, t, data, bits_offset, m); + btf_int_bits_show(btf, t, safe_data, bits_offset, show); + break; } +out: + btf_show_end_type(show); } static const struct btf_kind_operations int_ops = { @@ -1589,7 +2485,7 @@ static const struct btf_kind_operations int_ops = { .check_member = btf_int_check_member, .check_kflag_member = btf_int_check_kflag_member, .log_details = btf_int_log, - .seq_show = btf_int_seq_show, + .show = btf_int_show, }; static int btf_modifier_check_member(struct btf_verifier_env *env, @@ -1672,6 +2568,8 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) { + const char *value; + if (btf_type_vlen(t)) { btf_verifier_log_type(env, t, "vlen != 0"); return -EINVAL; @@ -1687,7 +2585,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, return -EINVAL; } - /* typedef type must have a valid name, and other ref types, + /* typedef/type_tag type must have a valid name, and other ref types, * volatile, const, restrict, should have a null name. */ if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) { @@ -1696,6 +2594,12 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } + } else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) { + value = btf_name_by_offset(env->btf, t->name_off); + if (!value || !value[0]) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } } else { if (t->name_off) { btf_verifier_log_type(env, t, "Invalid name"); @@ -1820,7 +2724,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env, * * We now need to continue from the last-resolved-ptr to * ensure the last-resolved-ptr will not referring back to - * the currenct ptr (t). + * the current ptr (t). */ if (btf_type_is_modifier(next_type)) { const struct btf_type *resolved_type; @@ -1853,34 +2757,44 @@ static int btf_ptr_resolve(struct btf_verifier_env *env, return 0; } -static void btf_modifier_seq_show(const struct btf *btf, - const struct btf_type *t, - u32 type_id, void *data, - u8 bits_offset, struct seq_file *m) +static void btf_modifier_show(const struct btf *btf, + const struct btf_type *t, + u32 type_id, void *data, + u8 bits_offset, struct btf_show *show) { if (btf->resolved_ids) t = btf_type_id_resolve(btf, &type_id); else t = btf_type_skip_modifiers(btf, type_id, NULL); - btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); + btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show); } -static void btf_var_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offset, - struct seq_file *m) +static void btf_var_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) { t = btf_type_id_resolve(btf, &type_id); - btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); + btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show); } -static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offset, - struct seq_file *m) +static void btf_ptr_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) { - /* It is a hashed value */ - seq_printf(m, "%p", *(void **)data); + void *safe_data; + + safe_data = btf_show_start_type(show, t, type_id, data); + if (!safe_data) + return; + + /* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */ + if (show->flags & BTF_SHOW_PTR_RAW) + btf_show_type_value(show, "0x%px", *(void **)safe_data); + else + btf_show_type_value(show, "0x%p", *(void **)safe_data); + btf_show_end_type(show); } static void btf_ref_type_log(struct btf_verifier_env *env, @@ -1889,22 +2803,22 @@ static void btf_ref_type_log(struct btf_verifier_env *env, btf_verifier_log(env, "type_id=%u", t->type); } -static struct btf_kind_operations modifier_ops = { +static const struct btf_kind_operations modifier_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_modifier_resolve, .check_member = btf_modifier_check_member, .check_kflag_member = btf_modifier_check_kflag_member, .log_details = btf_ref_type_log, - .seq_show = btf_modifier_seq_show, + .show = btf_modifier_show, }; -static struct btf_kind_operations ptr_ops = { +static const struct btf_kind_operations ptr_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_ptr_resolve, .check_member = btf_ptr_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_ref_type_log, - .seq_show = btf_ptr_seq_show, + .show = btf_ptr_show, }; static s32 btf_fwd_check_meta(struct btf_verifier_env *env, @@ -1939,13 +2853,13 @@ static void btf_fwd_type_log(struct btf_verifier_env *env, btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct"); } -static struct btf_kind_operations fwd_ops = { +static const struct btf_kind_operations fwd_ops = { .check_meta = btf_fwd_check_meta, .resolve = btf_df_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_fwd_type_log, - .seq_show = btf_df_seq_show, + .show = btf_df_show, }; static int btf_array_check_member(struct btf_verifier_env *env, @@ -2104,37 +3018,99 @@ static void btf_array_log(struct btf_verifier_env *env, array->type, array->index_type, array->nelems); } -static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offset, - struct seq_file *m) +static void __btf_array_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) { const struct btf_array *array = btf_type_array(t); const struct btf_kind_operations *elem_ops; const struct btf_type *elem_type; - u32 i, elem_size, elem_type_id; + u32 i, elem_size = 0, elem_type_id; + u16 encoding = 0; elem_type_id = array->type; - elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); + elem_type = btf_type_skip_modifiers(btf, elem_type_id, NULL); + if (elem_type && btf_type_has_size(elem_type)) + elem_size = elem_type->size; + + if (elem_type && btf_type_is_int(elem_type)) { + u32 int_type = btf_type_int(elem_type); + + encoding = BTF_INT_ENCODING(int_type); + + /* + * BTF_INT_CHAR encoding never seems to be set for + * char arrays, so if size is 1 and element is + * printable as a char, we'll do that. + */ + if (elem_size == 1) + encoding = BTF_INT_CHAR; + } + + if (!btf_show_start_array_type(show, t, type_id, encoding, data)) + return; + + if (!elem_type) + goto out; elem_ops = btf_type_ops(elem_type); - seq_puts(m, "["); + for (i = 0; i < array->nelems; i++) { - if (i) - seq_puts(m, ","); - elem_ops->seq_show(btf, elem_type, elem_type_id, data, - bits_offset, m); + btf_show_start_array_member(show); + + elem_ops->show(btf, elem_type, elem_type_id, data, + bits_offset, show); data += elem_size; + + btf_show_end_array_member(show); + + if (show->state.array_terminated) + break; } - seq_puts(m, "]"); +out: + btf_show_end_array_type(show); } -static struct btf_kind_operations array_ops = { +static void btf_array_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) +{ + const struct btf_member *m = show->state.member; + + /* + * First check if any members would be shown (are non-zero). + * See comments above "struct btf_show" definition for more + * details on how this works at a high-level. + */ + if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) { + if (!show->state.depth_check) { + show->state.depth_check = show->state.depth + 1; + show->state.depth_to_show = 0; + } + __btf_array_show(btf, t, type_id, data, bits_offset, show); + show->state.member = m; + + if (show->state.depth_check != show->state.depth + 1) + return; + show->state.depth_check = 0; + + if (show->state.depth_to_show <= show->state.depth) + return; + /* + * Reaching here indicates we have recursed and found + * non-zero array member(s). + */ + } + __btf_array_show(btf, t, type_id, data, bits_offset, show); +} + +static const struct btf_kind_operations array_ops = { .check_meta = btf_array_check_meta, .resolve = btf_array_resolve, .check_member = btf_array_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_array_log, - .seq_show = btf_array_seq_show, + .show = btf_array_show, }; static int btf_struct_check_member(struct btf_verifier_env *env, @@ -2213,7 +3189,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, return -EINVAL; } - offset = btf_member_bit_offset(t, member); + offset = __btf_member_bit_offset(t, member); if (is_union && offset) { btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); @@ -2257,7 +3233,7 @@ static int btf_struct_resolve(struct btf_verifier_env *env, if (v->next_member) { const struct btf_type *last_member_type; const struct btf_member *last_member; - u16 last_member_type_id; + u32 last_member_type_id; last_member = btf_type_member(v->t) + v->next_member - 1; last_member_type_id = last_member->type; @@ -2320,52 +3296,851 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -/* find 'struct bpf_spin_lock' in map value. - * return >= 0 offset if found - * and < 0 in case of error - */ -int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +enum { + BTF_FIELD_IGNORE = 0, + BTF_FIELD_FOUND = 1, +}; + +struct btf_field_info { + enum btf_field_type type; + u32 off; + union { + struct { + u32 type_id; + } kptr; + struct { + const char *node_name; + u32 value_btf_id; + } graph_root; + }; +}; + +static int btf_find_struct(const struct btf *btf, const struct btf_type *t, + u32 off, int sz, enum btf_field_type field_type, + struct btf_field_info *info) { - const struct btf_member *member; - u32 i, off = -ENOENT; + if (!__btf_type_is_struct(t)) + return BTF_FIELD_IGNORE; + if (t->size != sz) + return BTF_FIELD_IGNORE; + info->type = field_type; + info->off = off; + return BTF_FIELD_FOUND; +} + +static int btf_find_kptr(const struct btf *btf, const struct btf_type *t, + u32 off, int sz, struct btf_field_info *info, u32 field_mask) +{ + enum btf_field_type type; + u32 res_id; + + /* Permit modifiers on the pointer itself */ + if (btf_type_is_volatile(t)) + t = btf_type_by_id(btf, t->type); + /* For PTR, sz is always == 8 */ + if (!btf_type_is_ptr(t)) + return BTF_FIELD_IGNORE; + t = btf_type_by_id(btf, t->type); + + if (!btf_type_is_type_tag(t)) + return BTF_FIELD_IGNORE; + /* Reject extra tags */ + if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) + return -EINVAL; + if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_UNREF; + else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_REF; + else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_KPTR_PERCPU; + else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off))) + type = BPF_UPTR; + else + return -EINVAL; + + if (!(type & field_mask)) + return BTF_FIELD_IGNORE; + /* Get the base type */ + t = btf_type_skip_modifiers(btf, t->type, &res_id); + /* Only pointer to struct is allowed */ if (!__btf_type_is_struct(t)) return -EINVAL; - for_each_member(i, t, member) { - const struct btf_type *member_type = btf_type_by_id(btf, - member->type); - if (!__btf_type_is_struct(member_type)) + info->type = type; + info->off = off; + info->kptr.type_id = res_id; + return BTF_FIELD_FOUND; +} + +int btf_find_next_decl_tag(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key, int last_id) +{ + int len = strlen(tag_key); + int i, n; + + for (i = last_id + 1, n = btf_nr_types(btf); i < n; i++) { + const struct btf_type *t = btf_type_by_id(btf, i); + + if (!btf_type_is_decl_tag(t)) continue; - if (member_type->size != sizeof(struct bpf_spin_lock)) + if (pt != btf_type_by_id(btf, t->type)) continue; - if (strcmp(__btf_name_by_offset(btf, member_type->name_off), - "bpf_spin_lock")) + if (btf_type_decl_tag(t)->component_idx != comp_idx) continue; - if (off != -ENOENT) - /* only one 'struct bpf_spin_lock' is allowed */ - return -E2BIG; - off = btf_member_bit_offset(t, member); + if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) + continue; + return i; + } + return -ENOENT; +} + +const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt, + int comp_idx, const char *tag_key) +{ + const char *value = NULL; + const struct btf_type *t; + int len, id; + + id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, 0); + if (id < 0) + return ERR_PTR(id); + + t = btf_type_by_id(btf, id); + len = strlen(tag_key); + value = __btf_name_by_offset(btf, t->name_off) + len; + + /* Prevent duplicate entries for same type */ + id = btf_find_next_decl_tag(btf, pt, comp_idx, tag_key, id); + if (id >= 0) + return ERR_PTR(-EEXIST); + + return value; +} + +static int +btf_find_graph_root(const struct btf *btf, const struct btf_type *pt, + const struct btf_type *t, int comp_idx, u32 off, + int sz, struct btf_field_info *info, + enum btf_field_type head_type) +{ + const char *node_field_name; + const char *value_type; + s32 id; + + if (!__btf_type_is_struct(t)) + return BTF_FIELD_IGNORE; + if (t->size != sz) + return BTF_FIELD_IGNORE; + value_type = btf_find_decl_tag_value(btf, pt, comp_idx, "contains:"); + if (IS_ERR(value_type)) + return -EINVAL; + node_field_name = strstr(value_type, ":"); + if (!node_field_name) + return -EINVAL; + value_type = kstrndup(value_type, node_field_name - value_type, GFP_KERNEL | __GFP_NOWARN); + if (!value_type) + return -ENOMEM; + id = btf_find_by_name_kind(btf, value_type, BTF_KIND_STRUCT); + kfree(value_type); + if (id < 0) + return id; + node_field_name++; + if (str_is_empty(node_field_name)) + return -EINVAL; + info->type = head_type; + info->off = off; + info->graph_root.value_btf_id = id; + info->graph_root.node_name = node_field_name; + return BTF_FIELD_FOUND; +} + +#define field_mask_test_name(field_type, field_type_str) \ + if (field_mask & field_type && !strcmp(name, field_type_str)) { \ + type = field_type; \ + goto end; \ + } + +static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_type, + u32 field_mask, u32 *seen_mask, + int *align, int *sz) +{ + int type = 0; + const char *name = __btf_name_by_offset(btf, var_type->name_off); + + if (field_mask & BPF_SPIN_LOCK) { + if (!strcmp(name, "bpf_spin_lock")) { + if (*seen_mask & BPF_SPIN_LOCK) + return -E2BIG; + *seen_mask |= BPF_SPIN_LOCK; + type = BPF_SPIN_LOCK; + goto end; + } + } + if (field_mask & BPF_TIMER) { + if (!strcmp(name, "bpf_timer")) { + if (*seen_mask & BPF_TIMER) + return -E2BIG; + *seen_mask |= BPF_TIMER; + type = BPF_TIMER; + goto end; + } + } + if (field_mask & BPF_WORKQUEUE) { + if (!strcmp(name, "bpf_wq")) { + if (*seen_mask & BPF_WORKQUEUE) + return -E2BIG; + *seen_mask |= BPF_WORKQUEUE; + type = BPF_WORKQUEUE; + goto end; + } + } + field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head"); + field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); + field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); + field_mask_test_name(BPF_RB_NODE, "bpf_rb_node"); + field_mask_test_name(BPF_REFCOUNT, "bpf_refcount"); + + /* Only return BPF_KPTR when all other types with matchable names fail */ + if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) { + type = BPF_KPTR_REF; + goto end; + } + return 0; +end: + *sz = btf_field_type_size(type); + *align = btf_field_type_align(type); + return type; +} + +#undef field_mask_test_name + +/* Repeat a number of fields for a specified number of times. + * + * Copy the fields starting from the first field and repeat them for + * repeat_cnt times. The fields are repeated by adding the offset of each + * field with + * (i + 1) * elem_size + * where i is the repeat index and elem_size is the size of an element. + */ +static int btf_repeat_fields(struct btf_field_info *info, int info_cnt, + u32 field_cnt, u32 repeat_cnt, u32 elem_size) +{ + u32 i, j; + u32 cur; + + /* Ensure not repeating fields that should not be repeated. */ + for (i = 0; i < field_cnt; i++) { + switch (info[i].type) { + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: + case BPF_UPTR: + case BPF_LIST_HEAD: + case BPF_RB_ROOT: + break; + default: + return -EINVAL; + } + } + + /* The type of struct size or variable size is u32, + * so the multiplication will not overflow. + */ + if (field_cnt * (repeat_cnt + 1) > info_cnt) + return -E2BIG; + + cur = field_cnt; + for (i = 0; i < repeat_cnt; i++) { + memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0])); + for (j = 0; j < field_cnt; j++) + info[cur++].off += (i + 1) * elem_size; + } + + return 0; +} + +static int btf_find_struct_field(const struct btf *btf, + const struct btf_type *t, u32 field_mask, + struct btf_field_info *info, int info_cnt, + u32 level); + +/* Find special fields in the struct type of a field. + * + * This function is used to find fields of special types that is not a + * global variable or a direct field of a struct type. It also handles the + * repetition if it is the element type of an array. + */ +static int btf_find_nested_struct(const struct btf *btf, const struct btf_type *t, + u32 off, u32 nelems, + u32 field_mask, struct btf_field_info *info, + int info_cnt, u32 level) +{ + int ret, err, i; + + level++; + if (level >= MAX_RESOLVE_DEPTH) + return -E2BIG; + + ret = btf_find_struct_field(btf, t, field_mask, info, info_cnt, level); + + if (ret <= 0) + return ret; + + /* Shift the offsets of the nested struct fields to the offsets + * related to the container. + */ + for (i = 0; i < ret; i++) + info[i].off += off; + + if (nelems > 1) { + err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size); + if (err == 0) + ret *= nelems; + else + ret = err; + } + + return ret; +} + +static int btf_find_field_one(const struct btf *btf, + const struct btf_type *var, + const struct btf_type *var_type, + int var_idx, + u32 off, u32 expected_size, + u32 field_mask, u32 *seen_mask, + struct btf_field_info *info, int info_cnt, + u32 level) +{ + int ret, align, sz, field_type; + struct btf_field_info tmp; + const struct btf_array *array; + u32 i, nelems = 1; + + /* Walk into array types to find the element type and the number of + * elements in the (flattened) array. + */ + for (i = 0; i < MAX_RESOLVE_DEPTH && btf_type_is_array(var_type); i++) { + array = btf_array(var_type); + nelems *= array->nelems; + var_type = btf_type_by_id(btf, array->type); + } + if (i == MAX_RESOLVE_DEPTH) + return -E2BIG; + if (nelems == 0) + return 0; + + field_type = btf_get_field_type(btf, var_type, + field_mask, seen_mask, &align, &sz); + /* Look into variables of struct types */ + if (!field_type && __btf_type_is_struct(var_type)) { + sz = var_type->size; + if (expected_size && expected_size != sz * nelems) + return 0; + ret = btf_find_nested_struct(btf, var_type, off, nelems, field_mask, + &info[0], info_cnt, level); + return ret; + } + + if (field_type == 0) + return 0; + if (field_type < 0) + return field_type; + + if (expected_size && expected_size != sz * nelems) + return 0; + if (off % align) + return 0; + + switch (field_type) { + case BPF_SPIN_LOCK: + case BPF_TIMER: + case BPF_WORKQUEUE: + case BPF_LIST_NODE: + case BPF_RB_NODE: + case BPF_REFCOUNT: + ret = btf_find_struct(btf, var_type, off, sz, field_type, + info_cnt ? &info[0] : &tmp); + if (ret < 0) + return ret; + break; + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: + case BPF_UPTR: + ret = btf_find_kptr(btf, var_type, off, sz, + info_cnt ? &info[0] : &tmp, field_mask); + if (ret < 0) + return ret; + break; + case BPF_LIST_HEAD: + case BPF_RB_ROOT: + ret = btf_find_graph_root(btf, var, var_type, + var_idx, off, sz, + info_cnt ? &info[0] : &tmp, + field_type); + if (ret < 0) + return ret; + break; + default: + return -EFAULT; + } + + if (ret == BTF_FIELD_IGNORE) + return 0; + if (!info_cnt) + return -E2BIG; + if (nelems > 1) { + ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz); + if (ret < 0) + return ret; + } + return nelems; +} + +static int btf_find_struct_field(const struct btf *btf, + const struct btf_type *t, u32 field_mask, + struct btf_field_info *info, int info_cnt, + u32 level) +{ + int ret, idx = 0; + const struct btf_member *member; + u32 i, off, seen_mask = 0; + + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + + off = __btf_member_bit_offset(t, member); if (off % 8) /* valid C code cannot generate such BTF */ return -EINVAL; off /= 8; - if (off % __alignof__(struct bpf_spin_lock)) - /* valid struct bpf_spin_lock will be 4 byte aligned */ + + ret = btf_find_field_one(btf, t, member_type, i, + off, 0, + field_mask, &seen_mask, + &info[idx], info_cnt - idx, level); + if (ret < 0) + return ret; + idx += ret; + } + return idx; +} + +static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, + u32 field_mask, struct btf_field_info *info, + int info_cnt, u32 level) +{ + int ret, idx = 0; + const struct btf_var_secinfo *vsi; + u32 i, off, seen_mask = 0; + + for_each_vsi(i, t, vsi) { + const struct btf_type *var = btf_type_by_id(btf, vsi->type); + const struct btf_type *var_type = btf_type_by_id(btf, var->type); + + off = vsi->offset; + ret = btf_find_field_one(btf, var, var_type, -1, off, vsi->size, + field_mask, &seen_mask, + &info[idx], info_cnt - idx, + level); + if (ret < 0) + return ret; + idx += ret; + } + return idx; +} + +static int btf_find_field(const struct btf *btf, const struct btf_type *t, + u32 field_mask, struct btf_field_info *info, + int info_cnt) +{ + if (__btf_type_is_struct(t)) + return btf_find_struct_field(btf, t, field_mask, info, info_cnt, 0); + else if (btf_type_is_datasec(t)) + return btf_find_datasec_var(btf, t, field_mask, info, info_cnt, 0); + return -EINVAL; +} + +/* Callers have to ensure the life cycle of btf if it is program BTF */ +static int btf_parse_kptr(const struct btf *btf, struct btf_field *field, + struct btf_field_info *info) +{ + struct module *mod = NULL; + const struct btf_type *t; + /* If a matching btf type is found in kernel or module BTFs, kptr_ref + * is that BTF, otherwise it's program BTF + */ + struct btf *kptr_btf; + int ret; + s32 id; + + /* Find type in map BTF, and use it to look up the matching type + * in vmlinux or module BTFs, by name and kind. + */ + t = btf_type_by_id(btf, info->kptr.type_id); + id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info), + &kptr_btf); + if (id == -ENOENT) { + /* btf_parse_kptr should only be called w/ btf = program BTF */ + WARN_ON_ONCE(btf_is_kernel(btf)); + + /* Type exists only in program BTF. Assume that it's a MEM_ALLOC + * kptr allocated via bpf_obj_new + */ + field->kptr.dtor = NULL; + id = info->kptr.type_id; + kptr_btf = (struct btf *)btf; + goto found_dtor; + } + if (id < 0) + return id; + + /* Find and stash the function pointer for the destruction function that + * needs to be eventually invoked from the map free path. + */ + if (info->type == BPF_KPTR_REF) { + const struct btf_type *dtor_func; + const char *dtor_func_name; + unsigned long addr; + s32 dtor_btf_id; + + /* This call also serves as a whitelist of allowed objects that + * can be used as a referenced pointer and be stored in a map at + * the same time. + */ + dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id); + if (dtor_btf_id < 0) { + ret = dtor_btf_id; + goto end_btf; + } + + dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id); + if (!dtor_func) { + ret = -ENOENT; + goto end_btf; + } + + if (btf_is_module(kptr_btf)) { + mod = btf_try_get_module(kptr_btf); + if (!mod) { + ret = -ENXIO; + goto end_btf; + } + } + + /* We already verified dtor_func to be btf_type_is_func + * in register_btf_id_dtor_kfuncs. + */ + dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off); + addr = kallsyms_lookup_name(dtor_func_name); + if (!addr) { + ret = -EINVAL; + goto end_mod; + } + field->kptr.dtor = (void *)addr; + } + +found_dtor: + field->kptr.btf_id = id; + field->kptr.btf = kptr_btf; + field->kptr.module = mod; + return 0; +end_mod: + module_put(mod); +end_btf: + btf_put(kptr_btf); + return ret; +} + +static int btf_parse_graph_root(const struct btf *btf, + struct btf_field *field, + struct btf_field_info *info, + const char *node_type_name, + size_t node_type_align) +{ + const struct btf_type *t, *n = NULL; + const struct btf_member *member; + u32 offset; + int i; + + t = btf_type_by_id(btf, info->graph_root.value_btf_id); + /* We've already checked that value_btf_id is a struct type. We + * just need to figure out the offset of the list_node, and + * verify its type. + */ + for_each_member(i, t, member) { + if (strcmp(info->graph_root.node_name, + __btf_name_by_offset(btf, member->name_off))) + continue; + /* Invalid BTF, two members with same name */ + if (n) return -EINVAL; + n = btf_type_by_id(btf, member->type); + if (!__btf_type_is_struct(n)) + return -EINVAL; + if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off))) + return -EINVAL; + offset = __btf_member_bit_offset(n, member); + if (offset % 8) + return -EINVAL; + offset /= 8; + if (offset % node_type_align) + return -EINVAL; + + field->graph_root.btf = (struct btf *)btf; + field->graph_root.value_btf_id = info->graph_root.value_btf_id; + field->graph_root.node_offset = offset; } - return off; + if (!n) + return -ENOENT; + return 0; +} + +static int btf_parse_list_head(const struct btf *btf, struct btf_field *field, + struct btf_field_info *info) +{ + return btf_parse_graph_root(btf, field, info, "bpf_list_node", + __alignof__(struct bpf_list_node)); +} + +static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field, + struct btf_field_info *info) +{ + return btf_parse_graph_root(btf, field, info, "bpf_rb_node", + __alignof__(struct bpf_rb_node)); +} + +static int btf_field_cmp(const void *_a, const void *_b, const void *priv) +{ + const struct btf_field *a = (const struct btf_field *)_a; + const struct btf_field *b = (const struct btf_field *)_b; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + return 0; +} + +struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t, + u32 field_mask, u32 value_size) +{ + struct btf_field_info info_arr[BTF_FIELDS_MAX]; + u32 next_off = 0, field_type_size; + struct btf_record *rec; + int ret, i, cnt; + + ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); + if (ret < 0) + return ERR_PTR(ret); + if (!ret) + return NULL; + + cnt = ret; + /* This needs to be kzalloc to zero out padding and unused fields, see + * comment in btf_record_equal. + */ + rec = kzalloc(offsetof(struct btf_record, fields[cnt]), GFP_KERNEL | __GFP_NOWARN); + if (!rec) + return ERR_PTR(-ENOMEM); + + rec->spin_lock_off = -EINVAL; + rec->timer_off = -EINVAL; + rec->wq_off = -EINVAL; + rec->refcount_off = -EINVAL; + for (i = 0; i < cnt; i++) { + field_type_size = btf_field_type_size(info_arr[i].type); + if (info_arr[i].off + field_type_size > value_size) { + WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size); + ret = -EFAULT; + goto end; + } + if (info_arr[i].off < next_off) { + ret = -EEXIST; + goto end; + } + next_off = info_arr[i].off + field_type_size; + + rec->field_mask |= info_arr[i].type; + rec->fields[i].offset = info_arr[i].off; + rec->fields[i].type = info_arr[i].type; + rec->fields[i].size = field_type_size; + + switch (info_arr[i].type) { + case BPF_SPIN_LOCK: + WARN_ON_ONCE(rec->spin_lock_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->spin_lock_off = rec->fields[i].offset; + break; + case BPF_TIMER: + WARN_ON_ONCE(rec->timer_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->timer_off = rec->fields[i].offset; + break; + case BPF_WORKQUEUE: + WARN_ON_ONCE(rec->wq_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->wq_off = rec->fields[i].offset; + break; + case BPF_REFCOUNT: + WARN_ON_ONCE(rec->refcount_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->refcount_off = rec->fields[i].offset; + break; + case BPF_KPTR_UNREF: + case BPF_KPTR_REF: + case BPF_KPTR_PERCPU: + case BPF_UPTR: + ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]); + if (ret < 0) + goto end; + break; + case BPF_LIST_HEAD: + ret = btf_parse_list_head(btf, &rec->fields[i], &info_arr[i]); + if (ret < 0) + goto end; + break; + case BPF_RB_ROOT: + ret = btf_parse_rb_root(btf, &rec->fields[i], &info_arr[i]); + if (ret < 0) + goto end; + break; + case BPF_LIST_NODE: + case BPF_RB_NODE: + break; + default: + ret = -EFAULT; + goto end; + } + rec->cnt++; + } + + /* bpf_{list_head, rb_node} require bpf_spin_lock */ + if ((btf_record_has_field(rec, BPF_LIST_HEAD) || + btf_record_has_field(rec, BPF_RB_ROOT)) && rec->spin_lock_off < 0) { + ret = -EINVAL; + goto end; + } + + if (rec->refcount_off < 0 && + btf_record_has_field(rec, BPF_LIST_NODE) && + btf_record_has_field(rec, BPF_RB_NODE)) { + ret = -EINVAL; + goto end; + } + + sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp, + NULL, rec); + + return rec; +end: + btf_record_free(rec); + return ERR_PTR(ret); +} + +int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) +{ + int i; + + /* There are three types that signify ownership of some other type: + * kptr_ref, bpf_list_head, bpf_rb_root. + * kptr_ref only supports storing kernel types, which can't store + * references to program allocated local types. + * + * Hence we only need to ensure that bpf_{list_head,rb_root} ownership + * does not form cycles. + */ + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR))) + return 0; + for (i = 0; i < rec->cnt; i++) { + struct btf_struct_meta *meta; + const struct btf_type *t; + u32 btf_id; + + if (rec->fields[i].type == BPF_UPTR) { + /* The uptr only supports pinning one page and cannot + * point to a kernel struct + */ + if (btf_is_kernel(rec->fields[i].kptr.btf)) + return -EINVAL; + t = btf_type_by_id(rec->fields[i].kptr.btf, + rec->fields[i].kptr.btf_id); + if (!t->size) + return -EINVAL; + if (t->size > PAGE_SIZE) + return -E2BIG; + continue; + } + + if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) + continue; + btf_id = rec->fields[i].graph_root.value_btf_id; + meta = btf_find_struct_meta(btf, btf_id); + if (!meta) + return -EFAULT; + rec->fields[i].graph_root.value_rec = meta->record; + + /* We need to set value_rec for all root types, but no need + * to check ownership cycle for a type unless it's also a + * node type. + */ + if (!(rec->field_mask & BPF_GRAPH_NODE)) + continue; + + /* We need to ensure ownership acyclicity among all types. The + * proper way to do it would be to topologically sort all BTF + * IDs based on the ownership edges, since there can be multiple + * bpf_{list_head,rb_node} in a type. Instead, we use the + * following resaoning: + * + * - A type can only be owned by another type in user BTF if it + * has a bpf_{list,rb}_node. Let's call these node types. + * - A type can only _own_ another type in user BTF if it has a + * bpf_{list_head,rb_root}. Let's call these root types. + * + * We ensure that if a type is both a root and node, its + * element types cannot be root types. + * + * To ensure acyclicity: + * + * When A is an root type but not a node, its ownership + * chain can be: + * A -> B -> C + * Where: + * - A is an root, e.g. has bpf_rb_root. + * - B is both a root and node, e.g. has bpf_rb_node and + * bpf_list_head. + * - C is only an root, e.g. has bpf_list_node + * + * When A is both a root and node, some other type already + * owns it in the BTF domain, hence it can not own + * another root type through any of the ownership edges. + * A -> B + * Where: + * - A is both an root and node. + * - B is only an node. + */ + if (meta->record->field_mask & BPF_GRAPH_ROOT) + return -ELOOP; + } + return 0; } -static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offset, - struct seq_file *m) +static void __btf_struct_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) { - const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ","; const struct btf_member *member; + void *safe_data; u32 i; - seq_puts(m, "{"); + safe_data = btf_show_start_struct_type(show, t, type_id, data); + if (!safe_data) + return; + for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); @@ -2374,32 +4149,74 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, u32 bytes_offset; u8 bits8_offset; - if (i) - seq_puts(m, seq); + btf_show_start_member(show, member); - member_offset = btf_member_bit_offset(t, member); - bitfield_size = btf_member_bitfield_size(t, member); + member_offset = __btf_member_bit_offset(t, member); + bitfield_size = __btf_member_bitfield_size(t, member); bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); bits8_offset = BITS_PER_BYTE_MASKED(member_offset); if (bitfield_size) { - btf_bitfield_seq_show(data + bytes_offset, bits8_offset, - bitfield_size, m); + safe_data = btf_show_start_type(show, member_type, + member->type, + data + bytes_offset); + if (safe_data) + btf_bitfield_show(safe_data, + bits8_offset, + bitfield_size, show); + btf_show_end_type(show); } else { ops = btf_type_ops(member_type); - ops->seq_show(btf, member_type, member->type, - data + bytes_offset, bits8_offset, m); + ops->show(btf, member_type, member->type, + data + bytes_offset, bits8_offset, show); + } + + btf_show_end_member(show); + } + + btf_show_end_struct_type(show); +} + +static void btf_struct_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) +{ + const struct btf_member *m = show->state.member; + + /* + * First check if any members would be shown (are non-zero). + * See comments above "struct btf_show" definition for more + * details on how this works at a high-level. + */ + if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) { + if (!show->state.depth_check) { + show->state.depth_check = show->state.depth + 1; + show->state.depth_to_show = 0; } + __btf_struct_show(btf, t, type_id, data, bits_offset, show); + /* Restore saved member data here */ + show->state.member = m; + if (show->state.depth_check != show->state.depth + 1) + return; + show->state.depth_check = 0; + + if (show->state.depth_to_show <= show->state.depth) + return; + /* + * Reaching here indicates we have recursed and found + * non-zero child values. + */ } - seq_puts(m, "}"); + + __btf_struct_show(btf, t, type_id, data, bits_offset, show); } -static struct btf_kind_operations struct_ops = { +static const struct btf_kind_operations struct_ops = { .check_meta = btf_struct_check_meta, .resolve = btf_struct_resolve, .check_member = btf_struct_check_member, .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_struct_log, - .seq_show = btf_struct_seq_show, + .show = btf_struct_show, }; static int btf_enum_check_member(struct btf_verifier_env *env, @@ -2468,6 +4285,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, { const struct btf_enum *enums = btf_type_enum(t); struct btf *btf = env->btf; + const char *fmt_str; u16 i, nr_enums; u32 meta_needed; @@ -2481,11 +4299,6 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (btf_type_kflag(t)) { - btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); - return -EINVAL; - } - if (t->size > 8 || !is_power_of_2(t->size)) { btf_verifier_log_type(env, t, "Unexpected size"); return -EINVAL; @@ -2516,7 +4329,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, if (env->log.level == BPF_LOG_KERNEL) continue; - btf_verifier_log(env, "\t%s val=%d\n", + fmt_str = btf_type_kflag(t) ? "\t%s val=%d\n" : "\t%s val=%u\n"; + btf_verifier_log(env, fmt_str, __btf_name_by_offset(btf, enums[i].name_off), enums[i].val); } @@ -2530,33 +4344,150 @@ static void btf_enum_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t, - u32 type_id, void *data, u8 bits_offset, - struct seq_file *m) +static void btf_enum_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) { const struct btf_enum *enums = btf_type_enum(t); u32 i, nr_enums = btf_type_vlen(t); - int v = *(int *)data; + void *safe_data; + int v; + + safe_data = btf_show_start_type(show, t, type_id, data); + if (!safe_data) + return; + + v = *(int *)safe_data; for (i = 0; i < nr_enums; i++) { - if (v == enums[i].val) { - seq_printf(m, "%s", - __btf_name_by_offset(btf, - enums[i].name_off)); - return; - } + if (v != enums[i].val) + continue; + + btf_show_type_value(show, "%s", + __btf_name_by_offset(btf, + enums[i].name_off)); + + btf_show_end_type(show); + return; } - seq_printf(m, "%d", v); + if (btf_type_kflag(t)) + btf_show_type_value(show, "%d", v); + else + btf_show_type_value(show, "%u", v); + btf_show_end_type(show); } -static struct btf_kind_operations enum_ops = { +static const struct btf_kind_operations enum_ops = { .check_meta = btf_enum_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, .check_kflag_member = btf_enum_check_kflag_member, .log_details = btf_enum_log, - .seq_show = btf_enum_seq_show, + .show = btf_enum_show, +}; + +static s32 btf_enum64_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_enum64 *enums = btf_type_enum64(t); + struct btf *btf = env->btf; + const char *fmt_str; + u16 i, nr_enums; + u32 meta_needed; + + nr_enums = btf_type_vlen(t); + meta_needed = nr_enums * sizeof(*enums); + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + if (t->size > 8 || !is_power_of_2(t->size)) { + btf_verifier_log_type(env, t, "Unexpected size"); + return -EINVAL; + } + + /* enum type either no name or a valid one */ + if (t->name_off && + !btf_name_valid_identifier(env->btf, t->name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + for (i = 0; i < nr_enums; i++) { + if (!btf_name_offset_valid(btf, enums[i].name_off)) { + btf_verifier_log(env, "\tInvalid name_offset:%u", + enums[i].name_off); + return -EINVAL; + } + + /* enum member must have a valid name */ + if (!enums[i].name_off || + !btf_name_valid_identifier(btf, enums[i].name_off)) { + btf_verifier_log_type(env, t, "Invalid name"); + return -EINVAL; + } + + if (env->log.level == BPF_LOG_KERNEL) + continue; + + fmt_str = btf_type_kflag(t) ? "\t%s val=%lld\n" : "\t%s val=%llu\n"; + btf_verifier_log(env, fmt_str, + __btf_name_by_offset(btf, enums[i].name_off), + btf_enum64_value(enums + i)); + } + + return meta_needed; +} + +static void btf_enum64_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct btf_show *show) +{ + const struct btf_enum64 *enums = btf_type_enum64(t); + u32 i, nr_enums = btf_type_vlen(t); + void *safe_data; + s64 v; + + safe_data = btf_show_start_type(show, t, type_id, data); + if (!safe_data) + return; + + v = *(u64 *)safe_data; + + for (i = 0; i < nr_enums; i++) { + if (v != btf_enum64_value(enums + i)) + continue; + + btf_show_type_value(show, "%s", + __btf_name_by_offset(btf, + enums[i].name_off)); + + btf_show_end_type(show); + return; + } + + if (btf_type_kflag(t)) + btf_show_type_value(show, "%lld", v); + else + btf_show_type_value(show, "%llu", v); + btf_show_end_type(show); +} + +static const struct btf_kind_operations enum64_ops = { + .check_meta = btf_enum64_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_enum_check_member, + .check_kflag_member = btf_enum_check_kflag_member, + .log_details = btf_enum_log, + .show = btf_enum64_show, }; static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, @@ -2628,14 +4559,14 @@ done: btf_verifier_log(env, ")"); } -static struct btf_kind_operations func_proto_ops = { +static const struct btf_kind_operations func_proto_ops = { .check_meta = btf_func_proto_check_meta, .resolve = btf_df_resolve, /* * BTF_KIND_FUNC_PROTO cannot be directly referred by * a struct's member. * - * It should be a funciton pointer instead. + * It should be a function pointer instead. * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO) * * Hence, there is no btf_func_check_member(). @@ -2643,7 +4574,7 @@ static struct btf_kind_operations func_proto_ops = { .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_func_proto_log, - .seq_show = btf_df_seq_show, + .show = btf_df_show, }; static s32 btf_func_check_meta(struct btf_verifier_env *env, @@ -2671,13 +4602,28 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env, return 0; } -static struct btf_kind_operations func_ops = { +static int btf_func_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + int err; + + err = btf_func_check(env, t); + if (err) + return err; + + env_stack_pop_resolved(env, next_type_id, 0); + return 0; +} + +static const struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, - .resolve = btf_df_resolve, + .resolve = btf_func_resolve, .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_ref_type_log, - .seq_show = btf_df_seq_show, + .show = btf_df_show, }; static s32 btf_var_check_meta(struct btf_verifier_env *env, @@ -2705,7 +4651,7 @@ static s32 btf_var_check_meta(struct btf_verifier_env *env, } if (!t->name_off || - !__btf_name_valid(env->btf, t->name_off, true)) { + !btf_name_valid_identifier(env->btf, t->name_off)) { btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL; } @@ -2741,7 +4687,7 @@ static const struct btf_kind_operations var_ops = { .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_var_log, - .seq_show = btf_var_seq_show, + .show = btf_var_show, }; static s32 btf_datasec_check_meta(struct btf_verifier_env *env, @@ -2760,11 +4706,6 @@ static s32 btf_datasec_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (!btf_type_vlen(t)) { - btf_verifier_log_type(env, t, "vlen == 0"); - return -EINVAL; - } - if (!t->size) { btf_verifier_log_type(env, t, "size == 0"); return -EINVAL; @@ -2829,6 +4770,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env, struct btf *btf = env->btf; u16 i; + env->resolve_mode = RESOLVE_TBD; for_each_vsi_from(i, v->next_member, v->t, vsi) { u32 var_type_id = vsi->type, type_id, type_size = 0; const struct btf_type *var_type = btf_type_by_id(env->btf, @@ -2867,24 +4809,28 @@ static void btf_datasec_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } -static void btf_datasec_seq_show(const struct btf *btf, - const struct btf_type *t, u32 type_id, - void *data, u8 bits_offset, - struct seq_file *m) +static void btf_datasec_show(const struct btf *btf, + const struct btf_type *t, u32 type_id, + void *data, u8 bits_offset, + struct btf_show *show) { const struct btf_var_secinfo *vsi; const struct btf_type *var; u32 i; - seq_printf(m, "section (\"%s\") = {", __btf_name_by_offset(btf, t->name_off)); + if (!btf_show_start_type(show, t, type_id, data)) + return; + + btf_show_type_value(show, "section (\"%s\") = {", + __btf_name_by_offset(btf, t->name_off)); for_each_vsi(i, t, vsi) { var = btf_type_by_id(btf, vsi->type); if (i) - seq_puts(m, ","); - btf_type_ops(var)->seq_show(btf, var, vsi->type, - data + vsi->offset, bits_offset, m); + btf_show(show, ","); + btf_type_ops(var)->show(btf, var, vsi->type, + data + vsi->offset, bits_offset, show); } - seq_puts(m, "}"); + btf_show_end_type(show); } static const struct btf_kind_operations datasec_ops = { @@ -2893,7 +4839,186 @@ static const struct btf_kind_operations datasec_ops = { .check_member = btf_df_check_member, .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_datasec_log, - .seq_show = btf_datasec_seq_show, + .show = btf_datasec_show, +}; + +static s32 btf_float_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + + if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 && + t->size != 16) { + btf_verifier_log_type(env, t, "Invalid type_size"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return 0; +} + +static int btf_float_check_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u64 start_offset_bytes; + u64 end_offset_bytes; + u64 misalign_bits; + u64 align_bytes; + u64 align_bits; + + /* Different architectures have different alignment requirements, so + * here we check only for the reasonable minimum. This way we ensure + * that types after CO-RE can pass the kernel BTF verifier. + */ + align_bytes = min_t(u64, sizeof(void *), member_type->size); + align_bits = align_bytes * BITS_PER_BYTE; + div64_u64_rem(member->offset, align_bits, &misalign_bits); + if (misalign_bits) { + btf_verifier_log_member(env, struct_type, member, + "Member is not properly aligned"); + return -EINVAL; + } + + start_offset_bytes = member->offset / BITS_PER_BYTE; + end_offset_bytes = start_offset_bytes + member_type->size; + if (end_offset_bytes > struct_type->size) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + +static void btf_float_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + btf_verifier_log(env, "size=%u", t->size); +} + +static const struct btf_kind_operations float_ops = { + .check_meta = btf_float_check_meta, + .resolve = btf_df_resolve, + .check_member = btf_float_check_member, + .check_kflag_member = btf_generic_check_kflag_member, + .log_details = btf_float_log, + .show = btf_df_show, +}; + +static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env, + const struct btf_type *t, + u32 meta_left) +{ + const struct btf_decl_tag *tag; + u32 meta_needed = sizeof(*tag); + s32 component_idx; + const char *value; + + if (meta_left < meta_needed) { + btf_verifier_log_basic(env, t, + "meta_left:%u meta_needed:%u", + meta_left, meta_needed); + return -EINVAL; + } + + value = btf_name_by_offset(env->btf, t->name_off); + if (!value || !value[0]) { + btf_verifier_log_type(env, t, "Invalid value"); + return -EINVAL; + } + + if (btf_type_vlen(t)) { + btf_verifier_log_type(env, t, "vlen != 0"); + return -EINVAL; + } + + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + + component_idx = btf_type_decl_tag(t)->component_idx; + if (component_idx < -1) { + btf_verifier_log_type(env, t, "Invalid component_idx"); + return -EINVAL; + } + + btf_verifier_log_type(env, t, NULL); + + return meta_needed; +} + +static int btf_decl_tag_resolve(struct btf_verifier_env *env, + const struct resolve_vertex *v) +{ + const struct btf_type *next_type; + const struct btf_type *t = v->t; + u32 next_type_id = t->type; + struct btf *btf = env->btf; + s32 component_idx; + u32 vlen; + + next_type = btf_type_by_id(btf, next_type_id); + if (!next_type || !btf_type_is_decl_tag_target(next_type)) { + btf_verifier_log_type(env, v->t, "Invalid type_id"); + return -EINVAL; + } + + if (!env_type_is_resolve_sink(env, next_type) && + !env_type_is_resolved(env, next_type_id)) + return env_stack_push(env, next_type, next_type_id); + + component_idx = btf_type_decl_tag(t)->component_idx; + if (component_idx != -1) { + if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) { + btf_verifier_log_type(env, v->t, "Invalid component_idx"); + return -EINVAL; + } + + if (btf_type_is_struct(next_type)) { + vlen = btf_type_vlen(next_type); + } else { + /* next_type should be a function */ + next_type = btf_type_by_id(btf, next_type->type); + vlen = btf_type_vlen(next_type); + } + + if ((u32)component_idx >= vlen) { + btf_verifier_log_type(env, v->t, "Invalid component_idx"); + return -EINVAL; + } + } + + env_stack_pop_resolved(env, next_type_id, 0); + + return 0; +} + +static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t) +{ + btf_verifier_log(env, "type=%u component_idx=%d", t->type, + btf_type_decl_tag(t)->component_idx); +} + +static const struct btf_kind_operations decl_tag_ops = { + .check_meta = btf_decl_tag_check_meta, + .resolve = btf_decl_tag_resolve, + .check_member = btf_df_check_member, + .check_kflag_member = btf_df_check_kflag_member, + .log_details = btf_decl_tag_log, + .show = btf_df_show, }; static int btf_func_proto_check(struct btf_verifier_env *env, @@ -2919,6 +5044,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_is_resolve_source_only(ret_type)) { + btf_verifier_log_type(env, t, "Invalid return type"); + return -EINVAL; + } + if (btf_type_needs_resolve(ret_type) && !env_type_is_resolved(env, ret_type_id)) { err = btf_resolve(env, ret_type, ret_type_id); @@ -2946,7 +5076,6 @@ static int btf_func_proto_check(struct btf_verifier_env *env, nr_args--; } - err = 0; for (i = 0; i < nr_args; i++) { const struct btf_type *arg_type; u32 arg_type_id; @@ -2955,8 +5084,12 @@ static int btf_func_proto_check(struct btf_verifier_env *env, arg_type = btf_type_by_id(btf, arg_type_id); if (!arg_type) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; + } + + if (btf_type_is_resolve_source_only(arg_type)) { + btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); + return -EINVAL; } if (args[i].name_off && @@ -2964,25 +5097,23 @@ static int btf_func_proto_check(struct btf_verifier_env *env, !btf_name_valid_identifier(btf, args[i].name_off))) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; } if (btf_type_needs_resolve(arg_type) && !env_type_is_resolved(env, arg_type_id)) { err = btf_resolve(env, arg_type, arg_type_id); if (err) - break; + return err; } if (!btf_type_id_size(btf, &arg_type_id, NULL)) { btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); - err = -EINVAL; - break; + return -EINVAL; } } - return err; + return 0; } static int btf_func_check(struct btf_verifier_env *env, @@ -3029,6 +5160,10 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { [BTF_KIND_FUNC_PROTO] = &func_proto_ops, [BTF_KIND_VAR] = &var_ops, [BTF_KIND_DATASEC] = &datasec_ops, + [BTF_KIND_FLOAT] = &float_ops, + [BTF_KIND_DECL_TAG] = &decl_tag_ops, + [BTF_KIND_TYPE_TAG] = &modifier_ops, + [BTF_KIND_ENUM64] = &enum64_ops, }; static s32 btf_check_meta(struct btf_verifier_env *env, @@ -3083,7 +5218,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env) cur = btf->nohdr_data + hdr->type_off; end = cur + hdr->type_len; - env->log_type_id = 1; + env->log_type_id = btf->base_btf ? btf->start_id : 1; while (cur < end) { struct btf_type *t = cur; s32 meta_size; @@ -3110,8 +5245,12 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, return false; if (btf_type_is_struct(t) || btf_type_is_datasec(t)) - return !btf->resolved_ids[type_id] && - !btf->resolved_sizes[type_id]; + return !btf_resolved_type_id(btf, type_id) && + !btf_resolved_type_size(btf, type_id); + + if (btf_type_is_decl_tag(t) || btf_type_is_func(t)) + return btf_resolved_type_id(btf, type_id) && + !btf_resolved_type_size(btf, type_id); if (btf_type_is_modifier(t) || btf_type_is_ptr(t) || btf_type_is_var(t)) { @@ -3131,7 +5270,7 @@ static bool btf_resolve_valid(struct btf_verifier_env *env, elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); return elem_type && !btf_type_is_modifier(elem_type) && (array->nelems * elem_size == - btf->resolved_sizes[type_id]); + btf_resolved_type_size(btf, type_id)); } return false; @@ -3173,7 +5312,8 @@ static int btf_resolve(struct btf_verifier_env *env, static int btf_check_all_types(struct btf_verifier_env *env) { struct btf *btf = env->btf; - u32 type_id; + const struct btf_type *t; + u32 type_id, i; int err; err = env_resolve_init(env); @@ -3181,8 +5321,9 @@ static int btf_check_all_types(struct btf_verifier_env *env) return err; env->phase++; - for (type_id = 1; type_id <= btf->nr_types; type_id++) { - const struct btf_type *t = btf_type_by_id(btf, type_id); + for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) { + type_id = btf->start_id + i; + t = btf_type_by_id(btf, type_id); env->log_type_id = type_id; if (btf_type_needs_resolve(t) && @@ -3197,12 +5338,6 @@ static int btf_check_all_types(struct btf_verifier_env *env) if (err) return err; } - - if (btf_type_is_func(t)) { - err = btf_func_check(env, t); - if (err) - return err; - } } return 0; @@ -3219,7 +5354,7 @@ static int btf_parse_type_sec(struct btf_verifier_env *env) return -EINVAL; } - if (!hdr->type_len) { + if (!env->btf->base_btf && !hdr->type_len) { btf_verifier_log(env, "No type found"); return -EINVAL; } @@ -3246,13 +5381,18 @@ static int btf_parse_str_sec(struct btf_verifier_env *env) return -EINVAL; } - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || - start[0] || end[-1]) { + btf->strings = start; + + if (btf->base_btf && !hdr->str_len) + return 0; + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) { + btf_verifier_log(env, "Invalid string section"); + return -EINVAL; + } + if (!btf->base_btf && start[0]) { btf_verifier_log(env, "Invalid string section"); return -EINVAL; } - - btf->strings = start; return 0; } @@ -3328,13 +5468,11 @@ static int btf_parse_hdr(struct btf_verifier_env *env) u32 hdr_len, hdr_copy, btf_data_size; const struct btf_header *hdr; struct btf *btf; - int err; btf = env->btf; btf_data_size = btf->data_size; - if (btf_data_size < - offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) { + if (btf_data_size < offsetofend(struct btf_header, hdr_len)) { btf_verifier_log(env, "hdr_len not found"); return -EINVAL; } @@ -3381,50 +5519,242 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return -ENOTSUPP; } - if (btf_data_size == hdr->hdr_len) { + if (!btf->base_btf && btf_data_size == hdr->hdr_len) { btf_verifier_log(env, "No data"); return -EINVAL; } - err = btf_check_sec_info(env, btf_data_size); - if (err) - return err; + return btf_check_sec_info(env, btf_data_size); +} + +static const char *alloc_obj_fields[] = { + "bpf_spin_lock", + "bpf_list_head", + "bpf_list_node", + "bpf_rb_root", + "bpf_rb_node", + "bpf_refcount", +}; + +static struct btf_struct_metas * +btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf) +{ + struct btf_struct_metas *tab = NULL; + struct btf_id_set *aof; + int i, n, id, ret; + + BUILD_BUG_ON(offsetof(struct btf_id_set, cnt) != 0); + BUILD_BUG_ON(sizeof(struct btf_id_set) != sizeof(u32)); + + aof = kmalloc(sizeof(*aof), GFP_KERNEL | __GFP_NOWARN); + if (!aof) + return ERR_PTR(-ENOMEM); + aof->cnt = 0; + + for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { + /* Try to find whether this special type exists in user BTF, and + * if so remember its ID so we can easily find it among members + * of structs that we iterate in the next loop. + */ + struct btf_id_set *new_aof; + + id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); + if (id < 0) + continue; + + new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_aof) { + ret = -ENOMEM; + goto free_aof; + } + aof = new_aof; + aof->ids[aof->cnt++] = id; + } + + n = btf_nr_types(btf); + for (i = 1; i < n; i++) { + /* Try to find if there are kptrs in user BTF and remember their ID */ + struct btf_id_set *new_aof; + struct btf_field_info tmp; + const struct btf_type *t; + t = btf_type_by_id(btf, i); + if (!t) { + ret = -EINVAL; + goto free_aof; + } + + ret = btf_find_kptr(btf, t, 0, 0, &tmp, BPF_KPTR); + if (ret != BTF_FIELD_FOUND) + continue; + + new_aof = krealloc(aof, offsetof(struct btf_id_set, ids[aof->cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_aof) { + ret = -ENOMEM; + goto free_aof; + } + aof = new_aof; + aof->ids[aof->cnt++] = i; + } + + if (!aof->cnt) { + kfree(aof); + return NULL; + } + sort(&aof->ids, aof->cnt, sizeof(aof->ids[0]), btf_id_cmp_func, NULL); + + for (i = 1; i < n; i++) { + struct btf_struct_metas *new_tab; + const struct btf_member *member; + struct btf_struct_meta *type; + struct btf_record *record; + const struct btf_type *t; + int j, tab_cnt; + + t = btf_type_by_id(btf, i); + if (!__btf_type_is_struct(t)) + continue; + + cond_resched(); + + for_each_member(j, t, member) { + if (btf_id_set_contains(aof, member->type)) + goto parse; + } + continue; + parse: + tab_cnt = tab ? tab->cnt : 0; + new_tab = krealloc(tab, offsetof(struct btf_struct_metas, types[tab_cnt + 1]), + GFP_KERNEL | __GFP_NOWARN); + if (!new_tab) { + ret = -ENOMEM; + goto free; + } + if (!tab) + new_tab->cnt = 0; + tab = new_tab; + + type = &tab->types[tab->cnt]; + type->btf_id = i; + record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE | + BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT | + BPF_KPTR, t->size); + /* The record cannot be unset, treat it as an error if so */ + if (IS_ERR_OR_NULL(record)) { + ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT; + goto free; + } + type->record = record; + tab->cnt++; + } + kfree(aof); + return tab; +free: + btf_struct_metas_free(tab); +free_aof: + kfree(aof); + return ERR_PTR(ret); +} + +struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id) +{ + struct btf_struct_metas *tab; + + BUILD_BUG_ON(offsetof(struct btf_struct_meta, btf_id) != 0); + tab = btf->struct_meta_tab; + if (!tab) + return NULL; + return bsearch(&btf_id, tab->types, tab->cnt, sizeof(tab->types[0]), btf_id_cmp_func); +} + +static int btf_check_type_tags(struct btf_verifier_env *env, + struct btf *btf, int start_id) +{ + int i, n, good_id = start_id - 1; + bool in_tags; + + n = btf_nr_types(btf); + for (i = start_id; i < n; i++) { + const struct btf_type *t; + int chain_limit = 32; + u32 cur_id = i; + + t = btf_type_by_id(btf, i); + if (!t) + return -EINVAL; + if (!btf_type_is_modifier(t)) + continue; + + cond_resched(); + + in_tags = btf_type_is_type_tag(t); + while (btf_type_is_modifier(t)) { + if (!chain_limit--) { + btf_verifier_log(env, "Max chain length or cycle detected"); + return -ELOOP; + } + if (btf_type_is_type_tag(t)) { + if (!in_tags) { + btf_verifier_log(env, "Type tags don't precede modifiers"); + return -EINVAL; + } + } else if (in_tags) { + in_tags = false; + } + if (cur_id <= good_id) + break; + /* Move to next type */ + cur_id = t->type; + t = btf_type_by_id(btf, cur_id); + if (!t) + return -EINVAL; + } + good_id = i; + } return 0; } -static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, - u32 log_level, char __user *log_ubuf, u32 log_size) +static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size) +{ + u32 log_true_size; + int err; + + err = bpf_vlog_finalize(log, &log_true_size); + + if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && + copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), + &log_true_size, sizeof(log_true_size))) + err = -EFAULT; + + return err; +} + +static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { + bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); + char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); + struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; - struct bpf_verifier_log *log; struct btf *btf = NULL; u8 *data; - int err; + int err, ret; - if (btf_data_size > BTF_MAX_SIZE) + if (attr->btf_size > BTF_MAX_SIZE) return ERR_PTR(-E2BIG); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) return ERR_PTR(-ENOMEM); - log = &env->log; - if (log_level || log_ubuf || log_size) { - /* user requested verbose verifier output - * and supplied buffer to store the verification trace - */ - log->level = log_level; - log->ubuf = log_ubuf; - log->len_total = log_size; - - /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || - !log->level || !log->ubuf) { - err = -EINVAL; - goto errout; - } - } + /* user could have requested verbose verifier output + * and supplied buffer to store the verification trace + */ + err = bpf_vlog_init(&env->log, attr->btf_log_level, + log_ubuf, attr->btf_log_size); + if (err) + goto errout_free; btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { @@ -3433,16 +5763,16 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, } env->btf = btf; - data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN); + data = kvmalloc(attr->btf_size, GFP_KERNEL | __GFP_NOWARN); if (!data) { err = -ENOMEM; goto errout; } btf->data = data; - btf->data_size = btf_data_size; + btf->data_size = attr->btf_size; - if (copy_from_user(data, btf_data, btf_data_size)) { + if (copy_from_bpfptr(data, btf_data, attr->btf_size)) { err = -EFAULT; goto errout; } @@ -3461,24 +5791,51 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, if (err) goto errout; - if (log->level && bpf_verifier_log_full(log)) { - err = -ENOSPC; + err = btf_check_type_tags(env, btf, 1); + if (err) + goto errout; + + struct_meta_tab = btf_parse_struct_metas(&env->log, btf); + if (IS_ERR(struct_meta_tab)) { + err = PTR_ERR(struct_meta_tab); goto errout; } + btf->struct_meta_tab = struct_meta_tab; + + if (struct_meta_tab) { + int i; + + for (i = 0; i < struct_meta_tab->cnt; i++) { + err = btf_check_and_fixup_fields(btf, struct_meta_tab->types[i].record); + if (err < 0) + goto errout_meta; + } + } + + err = finalize_log(&env->log, uattr, uattr_size); + if (err) + goto errout_free; btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); return btf; +errout_meta: + btf_free_struct_meta_tab(btf); errout: + /* overwrite err with -ENOSPC or -EFAULT */ + ret = finalize_log(&env->log, uattr, uattr_size); + if (ret) + err = ret; +errout_free: btf_verifier_env_free(env); if (btf) btf_free(btf); return ERR_PTR(err); } -extern char __weak __start_BTF[]; -extern char __weak __stop_BTF[]; +extern char __start_BTF[]; +extern char __stop_BTF[]; extern struct btf *btf_vmlinux; #define BPF_MAP_TYPE(_id, _ops) @@ -3511,22 +5868,70 @@ static u8 bpf_ctx_convert_map[] = { #undef BPF_MAP_TYPE #undef BPF_LINK_TYPE -static const struct btf_member * -btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, - const struct btf_type *t, enum bpf_prog_type prog_type, - int arg) +static const struct btf_type *find_canonical_prog_ctx_type(enum bpf_prog_type prog_type) { const struct btf_type *conv_struct; - const struct btf_type *ctx_struct; const struct btf_member *ctx_type; - const char *tname, *ctx_tname; conv_struct = bpf_ctx_convert.t; - if (!conv_struct) { - bpf_log(log, "btf_vmlinux is malformed\n"); + if (!conv_struct) return NULL; - } + /* prog_type is valid bpf program type. No need for bounds check. */ + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; + /* ctx_type is a pointer to prog_ctx_type in vmlinux. + * Like 'struct __sk_buff' + */ + return btf_type_by_id(btf_vmlinux, ctx_type->type); +} + +static int find_kern_ctx_type_id(enum bpf_prog_type prog_type) +{ + const struct btf_type *conv_struct; + const struct btf_member *ctx_type; + + conv_struct = bpf_ctx_convert.t; + if (!conv_struct) + return -EFAULT; + /* prog_type is valid bpf program type. No need for bounds check. */ + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; + /* ctx_type is a pointer to prog_ctx_type in vmlinux. + * Like 'struct sk_buff' + */ + return ctx_type->type; +} + +bool btf_is_projection_of(const char *pname, const char *tname) +{ + if (strcmp(pname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) + return true; + if (strcmp(pname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + return true; + return false; +} + +bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, enum bpf_prog_type prog_type, + int arg) +{ + const struct btf_type *ctx_type; + const char *tname, *ctx_tname; + t = btf_type_by_id(btf, t->type); + + /* KPROBE programs allow bpf_user_pt_regs_t typedef, which we need to + * check before we skip all the typedef below. + */ + if (prog_type == BPF_PROG_TYPE_KPROBE) { + while (btf_type_is_modifier(t) && !btf_type_is_typedef(t)) + t = btf_type_by_id(btf, t->type); + + if (btf_type_is_typedef(t)) { + tname = btf_name_by_offset(btf, t->name_off); + if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0) + return true; + } + } + while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); if (!btf_type_is_struct(t)) { @@ -3535,30 +5940,30 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, * is not supported yet. * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. */ - if (log->level & BPF_LOG_LEVEL) - bpf_log(log, "arg#%d type is not a struct\n", arg); - return NULL; + return false; } tname = btf_name_by_offset(btf, t->name_off); if (!tname) { bpf_log(log, "arg#%d struct doesn't have a name\n", arg); - return NULL; + return false; } - /* prog_type is valid bpf program type. No need for bounds check. */ - ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; - /* ctx_struct is a pointer to prog_ctx_type in vmlinux. - * Like 'struct __sk_buff' - */ - ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type); - if (!ctx_struct) + + ctx_type = find_canonical_prog_ctx_type(prog_type); + if (!ctx_type) { + bpf_log(log, "btf_vmlinux is malformed\n"); /* should not happen */ - return NULL; - ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off); + return false; + } +again: + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); if (!ctx_tname) { /* should not happen */ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); - return NULL; + return false; } + /* program types without named context types work only with arg:ctx tag */ + if (ctx_tname[0] == '\0') + return false; /* only compare that prog's ctx type name is the same as * kernel expects. No need to compare field by field. * It's ok for bpf prog to do: @@ -3566,9 +5971,162 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used } */ - if (strcmp(ctx_tname, tname)) - return NULL; - return ctx_type; + if (btf_is_projection_of(ctx_tname, tname)) + return true; + if (strcmp(ctx_tname, tname)) { + /* bpf_user_pt_regs_t is a typedef, so resolve it to + * underlying struct and check name again + */ + if (!btf_type_is_modifier(ctx_type)) + return false; + while (btf_type_is_modifier(ctx_type)) + ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type); + goto again; + } + return true; +} + +/* forward declarations for arch-specific underlying types of + * bpf_user_pt_regs_t; this avoids the need for arch-specific #ifdef + * compilation guards below for BPF_PROG_TYPE_PERF_EVENT checks, but still + * works correctly with __builtin_types_compatible_p() on respective + * architectures + */ +struct user_regs_struct; +struct user_pt_regs; + +static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int arg, + enum bpf_prog_type prog_type, + enum bpf_attach_type attach_type) +{ + const struct btf_type *ctx_type; + const char *tname, *ctx_tname; + + if (!btf_is_ptr(t)) { + bpf_log(log, "arg#%d type isn't a pointer\n", arg); + return -EINVAL; + } + t = btf_type_by_id(btf, t->type); + + /* KPROBE and PERF_EVENT programs allow bpf_user_pt_regs_t typedef */ + if (prog_type == BPF_PROG_TYPE_KPROBE || prog_type == BPF_PROG_TYPE_PERF_EVENT) { + while (btf_type_is_modifier(t) && !btf_type_is_typedef(t)) + t = btf_type_by_id(btf, t->type); + + if (btf_type_is_typedef(t)) { + tname = btf_name_by_offset(btf, t->name_off); + if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0) + return 0; + } + } + + /* all other program types don't use typedefs for context type */ + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); + + /* `void *ctx __arg_ctx` is always valid */ + if (btf_type_is_void(t)) + return 0; + + tname = btf_name_by_offset(btf, t->name_off); + if (str_is_empty(tname)) { + bpf_log(log, "arg#%d type doesn't have a name\n", arg); + return -EINVAL; + } + + /* special cases */ + switch (prog_type) { + case BPF_PROG_TYPE_KPROBE: + if (__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return 0; + break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + __btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + __btf_type_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return 0; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + __btf_type_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return 0; + break; + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + case BPF_PROG_TYPE_TRACING: + switch (attach_type) { + case BPF_TRACE_RAW_TP: + /* tp_btf program is TRACING, so need special case here */ + if (__btf_type_is_struct(t) && + strcmp(tname, "bpf_raw_tracepoint_args") == 0) + return 0; + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + case BPF_TRACE_ITER: + /* allow struct bpf_iter__xxx types only */ + if (__btf_type_is_struct(t) && + strncmp(tname, "bpf_iter__", sizeof("bpf_iter__") - 1) == 0) + return 0; + break; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + default: + break; + } + break; + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return 0; + break; + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_SYSCALL: + case BPF_PROG_TYPE_EXT: + return 0; /* anything goes */ + default: + break; + } + + ctx_type = find_canonical_prog_ctx_type(prog_type); + if (!ctx_type) { + /* should not happen */ + bpf_log(log, "btf_vmlinux is malformed\n"); + return -EINVAL; + } + + /* resolve typedefs and check that underlying structs are matching as well */ + while (btf_type_is_modifier(ctx_type)) + ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type); + + /* if program type doesn't have distinctly named struct type for + * context, then __arg_ctx argument can only be `void *`, which we + * already checked above + */ + if (!__btf_type_is_struct(ctx_type)) { + bpf_log(log, "arg#%d should be void pointer\n", arg); + return -EINVAL; + } + + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); + if (!__btf_type_is_struct(t) || strcmp(ctx_tname, tname) != 0) { + bpf_log(log, "arg#%d should be `struct %s *`\n", arg, ctx_tname); + return -EINVAL; + } + + return 0; } static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, @@ -3577,21 +6135,141 @@ static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, enum bpf_prog_type prog_type, int arg) { - const struct btf_member *prog_ctx_type, *kern_ctx_type; - - prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg); - if (!prog_ctx_type) + if (!btf_is_prog_ctx_type(log, btf, t, prog_type, arg)) return -ENOENT; - kern_ctx_type = prog_ctx_type + 1; - return kern_ctx_type->type; + return find_kern_ctx_type_id(prog_type); +} + +int get_kern_ctx_btf_id(struct bpf_verifier_log *log, enum bpf_prog_type prog_type) +{ + const struct btf_member *kctx_member; + const struct btf_type *conv_struct; + const struct btf_type *kctx_type; + u32 kctx_type_id; + + conv_struct = bpf_ctx_convert.t; + /* get member for kernel ctx type */ + kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; + kctx_type_id = kctx_member->type; + kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id); + if (!btf_type_is_struct(kctx_type)) { + bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id); + return -EINVAL; + } + + return kctx_type_id; +} + +BTF_ID_LIST(bpf_ctx_convert_btf_id) +BTF_ID(struct, bpf_ctx_convert) + +static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name, + void *data, unsigned int data_size) +{ + struct btf *btf = NULL; + int err; + + if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) + return ERR_PTR(-ENOENT); + + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); + if (!btf) { + err = -ENOMEM; + goto errout; + } + env->btf = btf; + + btf->data = data; + btf->data_size = data_size; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "%s", name); + + err = btf_parse_hdr(env); + if (err) + goto errout; + + btf->nohdr_data = btf->data + btf->hdr.hdr_len; + + err = btf_parse_str_sec(env); + if (err) + goto errout; + + err = btf_check_all_metas(env); + if (err) + goto errout; + + err = btf_check_type_tags(env, btf, 1); + if (err) + goto errout; + + refcount_set(&btf->refcnt, 1); + + return btf; + +errout: + if (btf) { + kvfree(btf->types); + kfree(btf); + } + return ERR_PTR(err); } struct btf *btf_parse_vmlinux(void) { struct btf_verifier_env *env = NULL; struct bpf_verifier_log *log; - struct btf *btf = NULL; - int err, i; + struct btf *btf; + int err; + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + log->level = BPF_LOG_KERNEL; + btf = btf_parse_base(env, "vmlinux", __start_BTF, __stop_BTF - __start_BTF); + if (IS_ERR(btf)) + goto err_out; + + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ + bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); + err = btf_alloc_id(btf); + if (err) { + btf_free(btf); + btf = ERR_PTR(err); + } +err_out: + btf_verifier_env_free(env); + return btf; +} + +/* If .BTF_ids section was created with distilled base BTF, both base and + * split BTF ids will need to be mapped to actual base/split ids for + * BTF now that it has been relocated. + */ +static __u32 btf_relocate_id(const struct btf *btf, __u32 id) +{ + if (!btf->base_btf || !btf->base_id_map) + return id; + return btf->base_id_map[id]; +} + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + +static struct btf *btf_parse_module(const char *module_name, const void *data, + unsigned int data_size, void *base_data, + unsigned int base_data_size) +{ + struct btf *btf = NULL, *vmlinux_btf, *base_btf = NULL; + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + int err = 0; + + vmlinux_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(vmlinux_btf)) + return vmlinux_btf; + if (!vmlinux_btf) + return ERR_PTR(-EINVAL); env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) @@ -3600,6 +6278,16 @@ struct btf *btf_parse_vmlinux(void) log = &env->log; log->level = BPF_LOG_KERNEL; + if (base_data) { + base_btf = btf_parse_base(env, ".BTF.base", base_data, base_data_size); + if (IS_ERR(base_btf)) { + err = PTR_ERR(base_btf); + goto errout; + } + } else { + base_btf = vmlinux_btf; + } + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); if (!btf) { err = -ENOMEM; @@ -3607,8 +6295,18 @@ struct btf *btf_parse_vmlinux(void) } env->btf = btf; - btf->data = __start_BTF; - btf->data_size = __stop_BTF - __start_BTF; + btf->base_btf = base_btf; + btf->start_id = base_btf->nr_types; + btf->start_str_off = base_btf->hdr.str_len; + btf->kernel_btf = true; + snprintf(btf->name, sizeof(btf->name), "%s", module_name); + + btf->data = kvmemdup(data, data_size, GFP_KERNEL | __GFP_NOWARN); + if (!btf->data) { + err = -ENOMEM; + goto errout; + } + btf->data_size = data_size; err = btf_parse_hdr(env); if (err) @@ -3624,27 +6322,17 @@ struct btf *btf_parse_vmlinux(void) if (err) goto errout; - /* find struct bpf_ctx_convert for type checking later */ - for (i = 1; i <= btf->nr_types; i++) { - const struct btf_type *t; - const char *tname; - - t = btf_type_by_id(btf, i); - if (!__btf_type_is_struct(t)) - continue; - tname = __btf_name_by_offset(btf, t->name_off); - if (!strcmp(tname, "bpf_ctx_convert")) { - /* btf_parse_vmlinux() runs under bpf_verifier_lock */ - bpf_ctx_convert.t = t; - break; - } - } - if (i > btf->nr_types) { - err = -ENOENT; + err = btf_check_type_tags(env, btf, btf_nr_types(base_btf)); + if (err) goto errout; - } - bpf_struct_ops_init(btf, log); + if (base_btf != vmlinux_btf) { + err = btf_relocate(btf, vmlinux_btf, &btf->base_id_map); + if (err) + goto errout; + btf_free(base_btf); + base_btf = vmlinux_btf; + } btf_verifier_env_free(env); refcount_set(&btf->refcnt, 1); @@ -3652,47 +6340,209 @@ struct btf *btf_parse_vmlinux(void) errout: btf_verifier_env_free(env); + if (!IS_ERR(base_btf) && base_btf != vmlinux_btf) + btf_free(base_btf); if (btf) { + kvfree(btf->data); kvfree(btf->types); kfree(btf); } return ERR_PTR(err); } +#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ + struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) { - struct bpf_prog *tgt_prog = prog->aux->linked_prog; + struct bpf_prog *tgt_prog = prog->aux->dst_prog; - if (tgt_prog) { + if (tgt_prog) return tgt_prog->aux->btf; - } else { - return btf_vmlinux; + else + return prog->aux->attach_btf; +} + +static bool is_int_ptr(struct btf *btf, const struct btf_type *t) +{ + /* skip modifiers */ + t = btf_type_skip_modifiers(btf, t->type, NULL); + + return btf_type_is_int(t); +} + +static u32 get_ctx_arg_idx(struct btf *btf, const struct btf_type *func_proto, + int off) +{ + const struct btf_param *args; + const struct btf_type *t; + u32 offset = 0, nr_args; + int i; + + if (!func_proto) + return off / 8; + + nr_args = btf_type_vlen(func_proto); + args = (const struct btf_param *)(func_proto + 1); + for (i = 0; i < nr_args; i++) { + t = btf_type_skip_modifiers(btf, args[i].type, NULL); + offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8); + if (off < offset) + return i; } + + t = btf_type_skip_modifiers(btf, func_proto->type, NULL); + offset += btf_type_is_ptr(t) ? 8 : roundup(t->size, 8); + if (off < offset) + return nr_args; + + return nr_args + 1; } -static bool is_string_ptr(struct btf *btf, const struct btf_type *t) +static bool prog_args_trusted(const struct bpf_prog *prog) { - /* t comes in already as a pointer */ - t = btf_type_by_id(btf, t->type); + enum bpf_attach_type atype = prog->expected_attach_type; - /* allow const */ - if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) - t = btf_type_by_id(btf, t->type); + switch (prog->type) { + case BPF_PROG_TYPE_TRACING: + return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER; + case BPF_PROG_TYPE_LSM: + return bpf_lsm_is_trusted(prog); + case BPF_PROG_TYPE_STRUCT_OPS: + return true; + default: + return false; + } +} - /* char, signed char, unsigned char */ - return btf_type_is_int(t) && t->size == 1; +int btf_ctx_arg_offset(const struct btf *btf, const struct btf_type *func_proto, + u32 arg_no) +{ + const struct btf_param *args; + const struct btf_type *t; + int off = 0, i; + u32 sz; + + args = btf_params(func_proto); + for (i = 0; i < arg_no; i++) { + t = btf_type_by_id(btf, args[i].type); + t = btf_resolve_size(btf, t, &sz); + if (IS_ERR(t)) + return PTR_ERR(t); + off += roundup(sz, 8); + } + + return off; } +struct bpf_raw_tp_null_args { + const char *func; + u64 mask; +}; + +static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { + /* sched */ + { "sched_pi_setprio", 0x10 }, + /* ... from sched_numa_pair_template event class */ + { "sched_stick_numa", 0x100 }, + { "sched_swap_numa", 0x100 }, + /* afs */ + { "afs_make_fs_call", 0x10 }, + { "afs_make_fs_calli", 0x10 }, + { "afs_make_fs_call1", 0x10 }, + { "afs_make_fs_call2", 0x10 }, + { "afs_protocol_error", 0x1 }, + { "afs_flock_ev", 0x10 }, + /* cachefiles */ + { "cachefiles_lookup", 0x1 | 0x200 }, + { "cachefiles_unlink", 0x1 }, + { "cachefiles_rename", 0x1 }, + { "cachefiles_prep_read", 0x1 }, + { "cachefiles_mark_active", 0x1 }, + { "cachefiles_mark_failed", 0x1 }, + { "cachefiles_mark_inactive", 0x1 }, + { "cachefiles_vfs_error", 0x1 }, + { "cachefiles_io_error", 0x1 }, + { "cachefiles_ondemand_open", 0x1 }, + { "cachefiles_ondemand_copen", 0x1 }, + { "cachefiles_ondemand_close", 0x1 }, + { "cachefiles_ondemand_read", 0x1 }, + { "cachefiles_ondemand_cread", 0x1 }, + { "cachefiles_ondemand_fd_write", 0x1 }, + { "cachefiles_ondemand_fd_release", 0x1 }, + /* ext4, from ext4__mballoc event class */ + { "ext4_mballoc_discard", 0x10 }, + { "ext4_mballoc_free", 0x10 }, + /* fib */ + { "fib_table_lookup", 0x100 }, + /* filelock */ + /* ... from filelock_lock event class */ + { "posix_lock_inode", 0x10 }, + { "fcntl_setlk", 0x10 }, + { "locks_remove_posix", 0x10 }, + { "flock_lock_inode", 0x10 }, + /* ... from filelock_lease event class */ + { "break_lease_noblock", 0x10 }, + { "break_lease_block", 0x10 }, + { "break_lease_unblock", 0x10 }, + { "generic_delete_lease", 0x10 }, + { "time_out_leases", 0x10 }, + /* host1x */ + { "host1x_cdma_push_gather", 0x10000 }, + /* huge_memory */ + { "mm_khugepaged_scan_pmd", 0x10 }, + { "mm_collapse_huge_page_isolate", 0x1 }, + { "mm_khugepaged_scan_file", 0x10 }, + { "mm_khugepaged_collapse_file", 0x10 }, + /* kmem */ + { "mm_page_alloc", 0x1 }, + { "mm_page_pcpu_drain", 0x1 }, + /* .. from mm_page event class */ + { "mm_page_alloc_zone_locked", 0x1 }, + /* netfs */ + { "netfs_failure", 0x10 }, + /* power */ + { "device_pm_callback_start", 0x10 }, + /* qdisc */ + { "qdisc_dequeue", 0x1000 }, + /* rxrpc */ + { "rxrpc_recvdata", 0x1 }, + { "rxrpc_resend", 0x10 }, + /* skb */ + {"kfree_skb", 0x1000}, + /* sunrpc */ + { "xs_stream_read_data", 0x1 }, + /* ... from xprt_cong_event event class */ + { "xprt_reserve_cong", 0x10 }, + { "xprt_release_cong", 0x10 }, + { "xprt_get_cong", 0x10 }, + { "xprt_put_cong", 0x10 }, + /* tcp */ + { "tcp_send_reset", 0x11 }, + /* tegra_apb_dma */ + { "tegra_dma_tx_status", 0x100 }, + /* timer_migration */ + { "tmigr_update_events", 0x1 }, + /* writeback, from writeback_folio_template event class */ + { "writeback_dirty_folio", 0x10 }, + { "folio_wait_writeback", 0x10 }, + /* rdma */ + { "mr_integ_alloc", 0x2000 }, + /* bpf_testmod */ + { "bpf_testmod_test_read", 0x0 }, +}; + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { const struct btf_type *t = prog->aux->attach_func_proto; - struct bpf_prog *tgt_prog = prog->aux->linked_prog; + struct bpf_prog *tgt_prog = prog->aux->dst_prog; struct btf *btf = bpf_prog_get_target_btf(prog); const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; const struct btf_param *args; + bool ptr_err_raw_tp = false; + const char *tag_value; u32 nr_args, arg; int i, ret; @@ -3701,10 +6551,12 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, tname, off); return false; } - arg = off / 8; + arg = get_ctx_arg_idx(btf, t, off); args = (const struct btf_param *)(t + 1); - /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */ - nr_args = t ? btf_type_vlen(t) : 5; + /* if (t == NULL) Fall back to default BPF prog with + * MAX_BPF_FUNC_REG_ARGS u64 arguments. + */ + nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS; if (prog->aux->attach_btf_trace) { /* skip first 'void *__data' argument in btf_trace_##name typedef */ args++; @@ -3720,6 +6572,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (arg == nr_args) { switch (prog->expected_attach_type) { case BPF_LSM_MAC: + /* mark we are accessing the return value */ + info->is_retval = true; + fallthrough; + case BPF_LSM_CGROUP: case BPF_TRACE_FEXIT: /* When LSM programs are attached to void LSM hooks * they use FEXIT trampolines and when attached to @@ -3749,7 +6605,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (!btf_type_is_small_int(t)) { bpf_log(log, "ret type %s not allowed for fmod_ret\n", - btf_kind_str[BTF_INFO_KIND(t->info)]); + btf_type_str(t)); return false; } break; @@ -3760,7 +6616,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } } else { if (!t) - /* Default prog with 5 args */ + /* Default prog with MAX_BPF_FUNC_REG_ARGS args */ return true; t = btf_type_by_id(btf, args[arg].type); } @@ -3768,7 +6624,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, /* skip modifiers */ while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_small_int(t) || btf_type_is_enum(t)) + if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) /* accessing a scalar */ return true; if (!btf_type_is_ptr(t)) { @@ -3776,9 +6632,30 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", tname, arg, __btf_name_by_offset(btf, t->name_off), - btf_kind_str[BTF_INFO_KIND(t->info)]); + btf_type_str(t)); return false; } + + if (size != sizeof(u64)) { + bpf_log(log, "func '%s' size %d must be 8\n", + tname, size); + return false; + } + + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ + for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { + const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; + u32 type, flag; + + type = base_type(ctx_arg_info->reg_type); + flag = type_flag(ctx_arg_info->reg_type); + if (ctx_arg_info->offset == off && type == PTR_TO_BUF && + (flag & PTR_MAYBE_NULL)) { + info->reg_type = ctx_arg_info->reg_type; + return true; + } + } + if (t->type == 0) /* This is a pointer to void. * It is the same as scalar from the verifier safety pov. @@ -3786,23 +6663,77 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, */ return true; - if (is_string_ptr(btf, t)) + if (is_int_ptr(btf, t)) return true; /* this is a pointer to another type */ - info->reg_type = PTR_TO_BTF_ID; for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; if (ctx_arg_info->offset == off) { + if (!ctx_arg_info->btf_id) { + bpf_log(log,"invalid btf_id for context argument offset %u\n", off); + return false; + } + info->reg_type = ctx_arg_info->reg_type; + info->btf = ctx_arg_info->btf ? : btf_vmlinux; + info->btf_id = ctx_arg_info->btf_id; + return true; + } + } + + info->reg_type = PTR_TO_BTF_ID; + if (prog_args_trusted(prog)) + info->reg_type |= PTR_TRUSTED; + + if (btf_param_match_suffix(btf, &args[arg], "__nullable")) + info->reg_type |= PTR_MAYBE_NULL; + + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { + struct btf *btf = prog->aux->attach_btf; + const struct btf_type *t; + const char *tname; + + /* BTF lookups cannot fail, return false on error */ + t = btf_type_by_id(btf, prog->aux->attach_btf_id); + if (!t) + return false; + tname = btf_name_by_offset(btf, t->name_off); + if (!tname) + return false; + /* Checked by bpf_check_attach_target */ + tname += sizeof("btf_trace_") - 1; + for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) { + /* Is this a func with potential NULL args? */ + if (strcmp(tname, raw_tp_null_args[i].func)) + continue; + if (raw_tp_null_args[i].mask & (0x1 << (arg * 4))) + info->reg_type |= PTR_MAYBE_NULL; + /* Is the current arg IS_ERR? */ + if (raw_tp_null_args[i].mask & (0x2 << (arg * 4))) + ptr_err_raw_tp = true; break; } + /* If we don't know NULL-ness specification and the tracepoint + * is coming from a loadable module, be conservative and mark + * argument as PTR_MAYBE_NULL. + */ + if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf)) + info->reg_type |= PTR_MAYBE_NULL; } if (tgt_prog) { - ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg); + enum bpf_prog_type tgt_type; + + if (tgt_prog->type == BPF_PROG_TYPE_EXT) + tgt_type = tgt_prog->aux->saved_dst_prog_type; + else + tgt_type = tgt_prog->type; + + ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg); if (ret > 0) { + info->btf = btf_vmlinux; info->btf_id = ret; return true; } else { @@ -3810,8 +6741,18 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, } } + info->btf = btf; info->btf_id = t->type; t = btf_type_by_id(btf, t->type); + + if (btf_type_is_type_tag(t)) { + tag_value = __btf_name_by_offset(btf, t->name_off); + if (strcmp(tag_value, "user") == 0) + info->reg_type |= MEM_USER; + if (strcmp(tag_value, "percpu") == 0) + info->reg_type |= MEM_PERCPU; + } + /* skip modifiers */ while (btf_type_is_modifier(t)) { info->btf_id = t->type; @@ -3820,34 +6761,61 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, if (!btf_type_is_struct(t)) { bpf_log(log, "func '%s' arg%d type %s is not a struct\n", - tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]); + tname, arg, btf_type_str(t)); return false; } bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", - tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], + tname, arg, info->btf_id, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); + + /* Perform all checks on the validity of type for this argument, but if + * we know it can be IS_ERR at runtime, scrub pointer type and mark as + * scalar. + */ + if (ptr_err_raw_tp) { + bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg); + info->reg_type = SCALAR_VALUE; + } return true; } +EXPORT_SYMBOL_GPL(btf_ctx_access); -int btf_struct_access(struct bpf_verifier_log *log, - const struct btf_type *t, int off, int size, - enum bpf_access_type atype, - u32 *next_btf_id) +enum bpf_struct_walk_result { + /* < 0 error */ + WALK_SCALAR = 0, + WALK_PTR, + WALK_STRUCT, +}; + +static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf, + const struct btf_type *t, int off, int size, + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; const struct btf_type *mtype, *elem_type = NULL; const struct btf_member *member; - const char *tname, *mname; - u32 vlen; + const char *tname, *mname, *tag_value; + u32 vlen, elem_id, mid; again: - tname = __btf_name_by_offset(btf_vmlinux, t->name_off); + if (btf_type_is_modifier(t)) + t = btf_type_skip_modifiers(btf, t->type, NULL); + tname = __btf_name_by_offset(btf, t->name_off); if (!btf_type_is_struct(t)) { bpf_log(log, "Type '%s' is not a struct\n", tname); return -EINVAL; } vlen = btf_type_vlen(t); + if (BTF_INFO_KIND(t->info) == BTF_KIND_UNION && vlen != 1 && !(*flag & PTR_UNTRUSTED)) + /* + * walking unions yields untrusted pointers + * with exception of __bpf_md_ptr and other + * unions with a single member + */ + *flag |= PTR_UNTRUSTED; + if (off + size > t->size) { /* If the last element is a variable size array, we may * need to relax the rule. @@ -3858,7 +6826,7 @@ again: goto error; member = btf_type_member(t) + vlen - 1; - mtype = btf_type_skip_modifiers(btf_vmlinux, member->type, + mtype = btf_type_skip_modifiers(btf, member->type, NULL); if (!btf_type_is_array(mtype)) goto error; @@ -3867,21 +6835,22 @@ again: if (array_elem->nelems != 0) goto error; - moff = btf_member_bit_offset(t, member) / 8; + moff = __btf_member_bit_offset(t, member) / 8; if (off < moff) goto error; - /* Only allow structure for now, can be relaxed for - * other types later. - */ - elem_type = btf_type_skip_modifiers(btf_vmlinux, - array_elem->type, NULL); - if (!btf_type_is_struct(elem_type)) + /* allow structure and integer */ + t = btf_type_skip_modifiers(btf, array_elem->type, + NULL); + + if (btf_type_is_int(t)) + return WALK_SCALAR; + + if (!btf_type_is_struct(t)) goto error; - off = (off - moff) % elem_type->size; - return btf_struct_access(log, elem_type, off, size, atype, - next_btf_id); + off = (off - moff) % t->size; + goto again; error: bpf_log(log, "access beyond struct %s at off %u size %u\n", @@ -3891,14 +6860,14 @@ error: for_each_member(i, t, member) { /* offset of the field in bytes */ - moff = btf_member_bit_offset(t, member) / 8; + moff = __btf_member_bit_offset(t, member) / 8; if (off + size <= moff) /* won't find anything, field is already too far */ break; - if (btf_member_bitfield_size(t, member)) { - u32 end_bit = btf_member_bit_offset(t, member) + - btf_member_bitfield_size(t, member); + if (__btf_member_bitfield_size(t, member)) { + u32 end_bit = __btf_member_bit_offset(t, member) + + __btf_member_bitfield_size(t, member); /* off <= moff instead of off == moff because clang * does not generate a BTF member for anonymous @@ -3910,7 +6879,7 @@ error: */ if (off <= moff && BITS_ROUNDUP_BYTES(end_bit) <= off + size) - return SCALAR_VALUE; + return WALK_SCALAR; /* off may be accessing a following member * @@ -3932,11 +6901,13 @@ error: break; /* type of the field */ - mtype = btf_type_by_id(btf_vmlinux, member->type); - mname = __btf_name_by_offset(btf_vmlinux, member->name_off); + mid = member->type; + mtype = btf_type_by_id(btf, member->type); + mname = __btf_name_by_offset(btf, member->name_off); - mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, - &elem_type, &total_nelems); + mtype = __btf_resolve_size(btf, mtype, &msize, + &elem_type, &elem_id, &total_nelems, + &mid); if (IS_ERR(mtype)) { bpf_log(log, "field %s doesn't have size\n", mname); return -EFAULT; @@ -3950,7 +6921,7 @@ error: if (btf_type_is_array(mtype)) { u32 elem_idx; - /* btf_resolve_size() above helps to + /* __btf_resolve_size() above helps to * linearize a multi-dimensional array. * * The logic here is treating an array @@ -3998,6 +6969,7 @@ error: elem_idx = (off - moff) / msize; moff += elem_idx * msize; mtype = elem_type; + mid = elem_id; } /* the 'off' we're looking for is either equal to start @@ -4007,13 +6979,20 @@ error: /* our field must be inside that union or struct */ t = mtype; + /* return if the offset matches the member offset */ + if (off == moff) { + *next_btf_id = mid; + return WALK_STRUCT; + } + /* adjust offset we're looking for */ off -= moff; goto again; } if (btf_type_is_ptr(mtype)) { - const struct btf_type *stype; + const struct btf_type *stype, *t; + enum bpf_type_flag tmp_flag = 0; u32 id; if (msize != size || off != moff) { @@ -4023,10 +7002,28 @@ error: return -EACCES; } - stype = btf_type_skip_modifiers(btf_vmlinux, mtype->type, &id); + /* check type tag */ + t = btf_type_by_id(btf, mtype->type); + if (btf_type_is_type_tag(t)) { + tag_value = __btf_name_by_offset(btf, t->name_off); + /* check __user tag */ + if (strcmp(tag_value, "user") == 0) + tmp_flag = MEM_USER; + /* check __percpu tag */ + if (strcmp(tag_value, "percpu") == 0) + tmp_flag = MEM_PERCPU; + /* check __rcu tag */ + if (strcmp(tag_value, "rcu") == 0) + tmp_flag = MEM_RCU; + } + + stype = btf_type_skip_modifiers(btf, mtype->type, &id); if (btf_type_is_struct(stype)) { *next_btf_id = id; - return PTR_TO_BTF_ID; + *flag |= tmp_flag; + if (field_name) + *field_name = mname; + return WALK_PTR; } } @@ -4036,140 +7033,182 @@ error: * that also allows using an array of int as a scratch * space. e.g. skb->cb[]. */ - if (off + size > mtrue_end) { + if (off + size > mtrue_end && !(*flag & PTR_UNTRUSTED)) { bpf_log(log, "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n", mname, mtrue_end, tname, off, size); return -EACCES; } - return SCALAR_VALUE; + return WALK_SCALAR; } bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); return -EINVAL; } -static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, - int arg) +int btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size, enum bpf_access_type atype __maybe_unused, + u32 *next_btf_id, enum bpf_type_flag *flag, + const char **field_name) { - char fnname[KSYM_SYMBOL_LEN + 4] = "btf_"; - const struct btf_param *args; + const struct btf *btf = reg->btf; + enum bpf_type_flag tmp_flag = 0; const struct btf_type *t; - const char *tname, *sym; - u32 btf_id, i; + u32 id = reg->btf_id; + int err; - if (!btf_vmlinux) { - bpf_log(log, "btf_vmlinux doesn't exist\n"); - return -EINVAL; - } + while (type_is_alloc(reg->type)) { + struct btf_struct_meta *meta; + struct btf_record *rec; + int i; - if (IS_ERR(btf_vmlinux)) { - bpf_log(log, "btf_vmlinux is malformed\n"); - return -EINVAL; + meta = btf_find_struct_meta(btf, id); + if (!meta) + break; + rec = meta->record; + for (i = 0; i < rec->cnt; i++) { + struct btf_field *field = &rec->fields[i]; + u32 offset = field->offset; + if (off < offset + field->size && offset < off + size) { + bpf_log(log, + "direct access to %s is disallowed\n", + btf_field_type_name(field->type)); + return -EACCES; + } + } + break; } - sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4); - if (!sym) { - bpf_log(log, "kernel doesn't have kallsyms\n"); - return -EFAULT; - } + t = btf_type_by_id(btf, id); + do { + err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag, field_name); - for (i = 1; i <= btf_vmlinux->nr_types; i++) { - t = btf_type_by_id(btf_vmlinux, i); - if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) - continue; - tname = __btf_name_by_offset(btf_vmlinux, t->name_off); - if (!strcmp(tname, fnname)) + switch (err) { + case WALK_PTR: + /* For local types, the destination register cannot + * become a pointer again. + */ + if (type_is_alloc(reg->type)) + return SCALAR_VALUE; + /* If we found the pointer or scalar on t+off, + * we're done. + */ + *next_btf_id = id; + *flag = tmp_flag; + return PTR_TO_BTF_ID; + case WALK_SCALAR: + return SCALAR_VALUE; + case WALK_STRUCT: + /* We found nested struct, so continue the search + * by diving in it. At this point the offset is + * aligned with the new type, so set it to 0. + */ + t = btf_type_by_id(btf, id); + off = 0; break; - } - if (i > btf_vmlinux->nr_types) { - bpf_log(log, "helper %s type is not found\n", fnname); - return -ENOENT; - } - - t = btf_type_by_id(btf_vmlinux, t->type); - if (!btf_type_is_ptr(t)) - return -EFAULT; - t = btf_type_by_id(btf_vmlinux, t->type); - if (!btf_type_is_func_proto(t)) - return -EFAULT; + default: + /* It's either error or unknown return value.. + * scream and leave. + */ + if (WARN_ONCE(err > 0, "unknown btf_struct_walk return value")) + return -EINVAL; + return err; + } + } while (t); - args = (const struct btf_param *)(t + 1); - if (arg >= btf_type_vlen(t)) { - bpf_log(log, "bpf helper %s doesn't have %d-th argument\n", - fnname, arg); - return -EINVAL; - } + return -EINVAL; +} - t = btf_type_by_id(btf_vmlinux, args[arg].type); - if (!btf_type_is_ptr(t) || !t->type) { - /* anything but the pointer to struct is a helper config bug */ - bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n"); - return -EFAULT; - } - btf_id = t->type; - t = btf_type_by_id(btf_vmlinux, t->type); - /* skip modifiers */ - while (btf_type_is_modifier(t)) { - btf_id = t->type; - t = btf_type_by_id(btf_vmlinux, t->type); - } - if (!btf_type_is_struct(t)) { - bpf_log(log, "ARG_PTR_TO_BTF is not a struct\n"); - return -EFAULT; - } - bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4, - arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off)); - return btf_id; +/* Check that two BTF types, each specified as an BTF object + id, are exactly + * the same. Trivial ID check is not enough due to module BTFs, because we can + * end up with two different module BTFs, but IDs point to the common type in + * vmlinux BTF. + */ +bool btf_types_are_same(const struct btf *btf1, u32 id1, + const struct btf *btf2, u32 id2) +{ + if (id1 != id2) + return false; + if (btf1 == btf2) + return true; + return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2); } -int btf_resolve_helper_id(struct bpf_verifier_log *log, - const struct bpf_func_proto *fn, int arg) +bool btf_struct_ids_match(struct bpf_verifier_log *log, + const struct btf *btf, u32 id, int off, + const struct btf *need_btf, u32 need_type_id, + bool strict) { - int *btf_id = &fn->btf_id[arg]; - int ret; + const struct btf_type *type; + enum bpf_type_flag flag = 0; + int err; - if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID) - return -EINVAL; + /* Are we already done? */ + if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id)) + return true; + /* In case of strict type match, we do not walk struct, the top level + * type match must succeed. When strict is true, off should have already + * been 0. + */ + if (strict) + return false; +again: + type = btf_type_by_id(btf, id); + if (!type) + return false; + err = btf_struct_walk(log, btf, type, off, 1, &id, &flag, NULL); + if (err != WALK_STRUCT) + return false; - ret = READ_ONCE(*btf_id); - if (ret) - return ret; - /* ok to race the search. The result is the same */ - ret = __btf_resolve_helper_id(log, fn->func, arg); - if (!ret) { - /* Function argument cannot be type 'void' */ - bpf_log(log, "BTF resolution bug\n"); - return -EFAULT; + /* We found nested struct object. If it matches + * the requested ID, we're done. Otherwise let's + * continue the search with offset 0 in the new + * type. + */ + if (!btf_types_are_same(btf, id, need_btf, need_type_id)) { + off = 0; + goto again; } - WRITE_ONCE(*btf_id, ret); - return ret; + + return true; } static int __get_type_size(struct btf *btf, u32 btf_id, - const struct btf_type **bad_type) + const struct btf_type **ret_type) { const struct btf_type *t; + *ret_type = btf_type_by_id(btf, 0); if (!btf_id) /* void */ return 0; t = btf_type_by_id(btf, btf_id); while (t && btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (!t) { - *bad_type = btf->types[0]; + if (!t) return -EINVAL; - } + *ret_type = t; if (btf_type_is_ptr(t)) /* kernel size of pointer. Not BPF's size of pointer*/ return sizeof(void *); - if (btf_type_is_int(t) || btf_type_is_enum(t)) + if (btf_type_is_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) return t->size; - *bad_type = t; return -EINVAL; } +static u8 __get_type_fmodel_flags(const struct btf_type *t) +{ + u8 flags = 0; + + if (__btf_type_is_struct(t)) + flags |= BTF_FMODEL_STRUCT_ARG; + if (btf_type_is_signed_int(t)) + flags |= BTF_FMODEL_SIGNED_ARG; + + return flags; +} + int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, const struct btf_type *func, @@ -4183,40 +7222,59 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, if (!func) { /* BTF function prototype doesn't match the verifier types. - * Fall back to 5 u64 args. + * Fall back to MAX_BPF_FUNC_REG_ARGS u64 args. */ - for (i = 0; i < 5; i++) + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { m->arg_size[i] = 8; + m->arg_flags[i] = 0; + } m->ret_size = 8; - m->nr_args = 5; + m->ret_flags = 0; + m->nr_args = MAX_BPF_FUNC_REG_ARGS; return 0; } args = (const struct btf_param *)(func + 1); nargs = btf_type_vlen(func); - if (nargs >= MAX_BPF_FUNC_ARGS) { + if (nargs > MAX_BPF_FUNC_ARGS) { bpf_log(log, "The function %s has %d arguments. Too many.\n", tname, nargs); return -EINVAL; } ret = __get_type_size(btf, func->type, &t); - if (ret < 0) { + if (ret < 0 || __btf_type_is_struct(t)) { bpf_log(log, "The function %s return type %s is unsupported.\n", - tname, btf_kind_str[BTF_INFO_KIND(t->info)]); + tname, btf_type_str(t)); return -EINVAL; } m->ret_size = ret; + m->ret_flags = __get_type_fmodel_flags(t); for (i = 0; i < nargs; i++) { + if (i == nargs - 1 && args[i].type == 0) { + bpf_log(log, + "The function %s with variable args is unsupported.\n", + tname); + return -EINVAL; + } ret = __get_type_size(btf, args[i].type, &t); - if (ret < 0) { + + /* No support of struct argument size greater than 16 bytes */ + if (ret < 0 || ret > 16) { bpf_log(log, "The function %s arg%d type %s is unsupported.\n", - tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); + tname, i, btf_type_str(t)); + return -EINVAL; + } + if (ret == 0) { + bpf_log(log, + "The function %s has malformed void argument.\n", + tname); return -EINVAL; } m->arg_size[i] = ret; + m->arg_flags[i] = __get_type_fmodel_flags(t); } m->nr_args = nargs; return 0; @@ -4300,7 +7358,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log, * to context only. And only global functions can be replaced. * Hence type check only those types. */ - if (btf_type_is_int(t1) || btf_type_is_enum(t1)) + if (btf_type_is_int(t1) || btf_is_any_enum(t1)) continue; if (!btf_type_is_ptr(t1)) { bpf_log(log, @@ -4341,7 +7399,7 @@ static int btf_check_func_type_match(struct bpf_verifier_log *log, } /* Compare BTFs of given program with BTF of target program */ -int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, +int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, struct btf *btf2, const struct btf_type *t2) { struct btf *btf1 = prog->aux->btf; @@ -4349,7 +7407,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, u32 btf_id = 0; if (!prog->aux->func_info) { - bpf_log(&env->log, "Program extension requires BTF\n"); + bpf_log(log, "Program extension requires BTF\n"); return -EINVAL; } @@ -4361,139 +7419,142 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, if (!t1 || !btf_type_is_func(t1)) return -EFAULT; - return btf_check_func_type_match(&env->log, btf1, t1, btf2, t2); + return btf_check_func_type_match(log, btf1, t1, btf2, t2); } -/* Compare BTF of a function with given bpf_reg_state. - * Returns: - * EFAULT - there is a verifier bug. Abort verification. - * EINVAL - there is a type mismatch or BTF is not available. - * 0 - BTF matches with what bpf_reg_state expects. - * Only PTR_TO_CTX and SCALAR_VALUE states are recognized. - */ -int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg) +static bool btf_is_dynptr_ptr(const struct btf *btf, const struct btf_type *t) { - struct bpf_verifier_log *log = &env->log; - struct bpf_prog *prog = env->prog; - struct btf *btf = prog->aux->btf; - const struct btf_param *args; - const struct btf_type *t; - u32 i, nargs, btf_id; - const char *tname; + const char *name; - if (!prog->aux->func_info) - return -EINVAL; + t = btf_type_by_id(btf, t->type); /* skip PTR */ - btf_id = prog->aux->func_info[subprog].type_id; - if (!btf_id) - return -EFAULT; - - if (prog->aux->func_info_aux[subprog].unreliable) - return -EINVAL; + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf, t->type); - t = btf_type_by_id(btf, btf_id); - if (!t || !btf_type_is_func(t)) { - /* These checks were already done by the verifier while loading - * struct bpf_func_info - */ - bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n", - subprog); - return -EFAULT; + /* allow either struct or struct forward declaration */ + if (btf_type_is_struct(t) || + (btf_type_is_fwd(t) && btf_type_kflag(t) == 0)) { + name = btf_str_by_offset(btf, t->name_off); + return name && strcmp(name, "bpf_dynptr") == 0; } - tname = btf_name_by_offset(btf, t->name_off); + return false; +} + +struct bpf_cand_cache { + const char *name; + u32 name_len; + u16 kind; + u16 cnt; + struct { + const struct btf *btf; + u32 id; + } cands[]; +}; + +static DEFINE_MUTEX(cand_cache_mutex); + +static struct bpf_cand_cache * +bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id); + +static int btf_get_ptr_to_btf_id(struct bpf_verifier_log *log, int arg_idx, + const struct btf *btf, const struct btf_type *t) +{ + struct bpf_cand_cache *cc; + struct bpf_core_ctx ctx = { + .btf = btf, + .log = log, + }; + u32 kern_type_id, type_id; + int err = 0; + + /* skip PTR and modifiers */ + type_id = t->type; t = btf_type_by_id(btf, t->type); - if (!t || !btf_type_is_func_proto(t)) { - bpf_log(log, "Invalid BTF of func %s\n", tname); - return -EFAULT; - } - args = (const struct btf_param *)(t + 1); - nargs = btf_type_vlen(t); - if (nargs > 5) { - bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs); - goto out; - } - /* check that BTF function arguments match actual types that the - * verifier sees. - */ - for (i = 0; i < nargs; i++) { - t = btf_type_by_id(btf, args[i].type); - while (btf_type_is_modifier(t)) - t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) { - if (reg[i + 1].type == SCALAR_VALUE) - continue; - bpf_log(log, "R%d is not a scalar\n", i + 1); - goto out; - } - if (btf_type_is_ptr(t)) { - if (reg[i + 1].type == SCALAR_VALUE) { - bpf_log(log, "R%d is not a pointer\n", i + 1); - goto out; - } - /* If function expects ctx type in BTF check that caller - * is passing PTR_TO_CTX. - */ - if (btf_get_prog_ctx_type(log, btf, t, prog->type, i)) { - if (reg[i + 1].type != PTR_TO_CTX) { - bpf_log(log, - "arg#%d expected pointer to ctx, but got %s\n", - i, btf_kind_str[BTF_INFO_KIND(t->info)]); - goto out; - } - if (check_ctx_reg(env, ®[i + 1], i + 1)) - goto out; - continue; - } - } - bpf_log(log, "Unrecognized arg#%d type %s\n", - i, btf_kind_str[BTF_INFO_KIND(t->info)]); - goto out; + while (btf_type_is_modifier(t)) { + type_id = t->type; + t = btf_type_by_id(btf, t->type); } - return 0; -out: - /* Compiler optimizations can remove arguments from static functions - * or mismatched type can be passed into a global function. - * In such cases mark the function as unreliable from BTF point of view. - */ - prog->aux->func_info_aux[subprog].unreliable = true; - return -EINVAL; + + mutex_lock(&cand_cache_mutex); + cc = bpf_core_find_cands(&ctx, type_id); + if (IS_ERR(cc)) { + err = PTR_ERR(cc); + bpf_log(log, "arg#%d reference type('%s %s') candidate matching error: %d\n", + arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off), + err); + goto cand_cache_unlock; + } + if (cc->cnt != 1) { + bpf_log(log, "arg#%d reference type('%s %s') %s\n", + arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off), + cc->cnt == 0 ? "has no matches" : "is ambiguous"); + err = cc->cnt == 0 ? -ENOENT : -ESRCH; + goto cand_cache_unlock; + } + if (btf_is_module(cc->cands[0].btf)) { + bpf_log(log, "arg#%d reference type('%s %s') points to kernel module type (unsupported)\n", + arg_idx, btf_type_str(t), __btf_name_by_offset(btf, t->name_off)); + err = -EOPNOTSUPP; + goto cand_cache_unlock; + } + kern_type_id = cc->cands[0].id; + +cand_cache_unlock: + mutex_unlock(&cand_cache_mutex); + if (err) + return err; + + return kern_type_id; } -/* Convert BTF of a function into bpf_reg_state if possible +enum btf_arg_tag { + ARG_TAG_CTX = BIT_ULL(0), + ARG_TAG_NONNULL = BIT_ULL(1), + ARG_TAG_TRUSTED = BIT_ULL(2), + ARG_TAG_NULLABLE = BIT_ULL(3), + ARG_TAG_ARENA = BIT_ULL(4), +}; + +/* Process BTF of a function to produce high-level expectation of function + * arguments (like ARG_PTR_TO_CTX, or ARG_PTR_TO_MEM, etc). This information + * is cached in subprog info for reuse. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - cannot convert BTF. - * 0 - Successfully converted BTF into bpf_reg_state - * (either PTR_TO_CTX or SCALAR_VALUE). + * 0 - Successfully processed BTF and constructed argument expectations. */ -int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, - struct bpf_reg_state *reg) +int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog) { + bool is_global = subprog_aux(env, subprog)->linkage == BTF_FUNC_GLOBAL; + struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_verifier_log *log = &env->log; struct bpf_prog *prog = env->prog; enum bpf_prog_type prog_type = prog->type; struct btf *btf = prog->aux->btf; const struct btf_param *args; - const struct btf_type *t; + const struct btf_type *t, *ref_t, *fn_t; u32 i, nargs, btf_id; const char *tname; - if (!prog->aux->func_info || - prog->aux->func_info_aux[subprog].linkage != BTF_FUNC_GLOBAL) { + if (sub->args_cached) + return 0; + + if (!prog->aux->func_info) { bpf_log(log, "Verifier bug\n"); return -EFAULT; } btf_id = prog->aux->func_info[subprog].type_id; if (!btf_id) { + if (!is_global) /* not fatal for static funcs */ + return -EINVAL; bpf_log(log, "Global functions need valid BTF\n"); return -EFAULT; } - t = btf_type_by_id(btf, btf_id); - if (!t || !btf_type_is_func(t)) { + fn_t = btf_type_by_id(btf, btf_id); + if (!fn_t || !btf_type_is_func(fn_t)) { /* These checks were already done by the verifier while loading * struct bpf_func_info */ @@ -4501,36 +7562,36 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, subprog); return -EFAULT; } - tname = btf_name_by_offset(btf, t->name_off); - - if (log->level & BPF_LOG_LEVEL) - bpf_log(log, "Validating %s() func#%d...\n", - tname, subprog); + tname = btf_name_by_offset(btf, fn_t->name_off); if (prog->aux->func_info_aux[subprog].unreliable) { bpf_log(log, "Verifier bug in function %s()\n", tname); return -EFAULT; } if (prog_type == BPF_PROG_TYPE_EXT) - prog_type = prog->aux->linked_prog->type; + prog_type = prog->aux->dst_prog->type; - t = btf_type_by_id(btf, t->type); + t = btf_type_by_id(btf, fn_t->type); if (!t || !btf_type_is_func_proto(t)) { bpf_log(log, "Invalid type of function %s()\n", tname); return -EFAULT; } args = (const struct btf_param *)(t + 1); nargs = btf_type_vlen(t); - if (nargs > 5) { - bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n", - tname, nargs); + if (nargs > MAX_BPF_FUNC_REG_ARGS) { + if (!is_global) + return -EINVAL; + bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n", + tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL; } - /* check that function returns int */ + /* check that function returns int, exception cb also requires this */ t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (!btf_type_is_int(t) && !btf_type_is_enum(t)) { + if (!btf_type_is_int(t) && !btf_is_any_enum(t)) { + if (!is_global) + return -EINVAL; bpf_log(log, "Global function %s() doesn't return scalar. Only those are supported.\n", tname); @@ -4540,31 +7601,227 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, * Only PTR_TO_CTX and SCALAR are supported atm. */ for (i = 0; i < nargs; i++) { + u32 tags = 0; + int id = 0; + + /* 'arg:<tag>' decl_tag takes precedence over derivation of + * register type from BTF type itself + */ + while ((id = btf_find_next_decl_tag(btf, fn_t, i, "arg:", id)) > 0) { + const struct btf_type *tag_t = btf_type_by_id(btf, id); + const char *tag = __btf_name_by_offset(btf, tag_t->name_off) + 4; + + /* disallow arg tags in static subprogs */ + if (!is_global) { + bpf_log(log, "arg#%d type tag is not supported in static functions\n", i); + return -EOPNOTSUPP; + } + + if (strcmp(tag, "ctx") == 0) { + tags |= ARG_TAG_CTX; + } else if (strcmp(tag, "trusted") == 0) { + tags |= ARG_TAG_TRUSTED; + } else if (strcmp(tag, "nonnull") == 0) { + tags |= ARG_TAG_NONNULL; + } else if (strcmp(tag, "nullable") == 0) { + tags |= ARG_TAG_NULLABLE; + } else if (strcmp(tag, "arena") == 0) { + tags |= ARG_TAG_ARENA; + } else { + bpf_log(log, "arg#%d has unsupported set of tags\n", i); + return -EOPNOTSUPP; + } + } + if (id != -ENOENT) { + bpf_log(log, "arg#%d type tag fetching failure: %d\n", i, id); + return id; + } + t = btf_type_by_id(btf, args[i].type); while (btf_type_is_modifier(t)) t = btf_type_by_id(btf, t->type); - if (btf_type_is_int(t) || btf_type_is_enum(t)) { - reg[i + 1].type = SCALAR_VALUE; + if (!btf_type_is_ptr(t)) + goto skip_pointer; + + if ((tags & ARG_TAG_CTX) || btf_is_prog_ctx_type(log, btf, t, prog_type, i)) { + if (tags & ~ARG_TAG_CTX) { + bpf_log(log, "arg#%d has invalid combination of tags\n", i); + return -EINVAL; + } + if ((tags & ARG_TAG_CTX) && + btf_validate_prog_ctx_type(log, btf, t, i, prog_type, + prog->expected_attach_type)) + return -EINVAL; + sub->args[i].arg_type = ARG_PTR_TO_CTX; continue; } - if (btf_type_is_ptr(t) && - btf_get_prog_ctx_type(log, btf, t, prog_type, i)) { - reg[i + 1].type = PTR_TO_CTX; + if (btf_is_dynptr_ptr(btf, t)) { + if (tags) { + bpf_log(log, "arg#%d has invalid combination of tags\n", i); + return -EINVAL; + } + sub->args[i].arg_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY; continue; } + if (tags & ARG_TAG_TRUSTED) { + int kern_type_id; + + if (tags & ARG_TAG_NONNULL) { + bpf_log(log, "arg#%d has invalid combination of tags\n", i); + return -EINVAL; + } + + kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t); + if (kern_type_id < 0) + return kern_type_id; + + sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_TRUSTED; + if (tags & ARG_TAG_NULLABLE) + sub->args[i].arg_type |= PTR_MAYBE_NULL; + sub->args[i].btf_id = kern_type_id; + continue; + } + if (tags & ARG_TAG_ARENA) { + if (tags & ~ARG_TAG_ARENA) { + bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i); + return -EINVAL; + } + sub->args[i].arg_type = ARG_PTR_TO_ARENA; + continue; + } + if (is_global) { /* generic user data pointer */ + u32 mem_size; + + if (tags & ARG_TAG_NULLABLE) { + bpf_log(log, "arg#%d has invalid combination of tags\n", i); + return -EINVAL; + } + + t = btf_type_skip_modifiers(btf, t->type, NULL); + ref_t = btf_resolve_size(btf, t, &mem_size); + if (IS_ERR(ref_t)) { + bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n", + i, btf_type_str(t), btf_name_by_offset(btf, t->name_off), + PTR_ERR(ref_t)); + return -EINVAL; + } + + sub->args[i].arg_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL; + if (tags & ARG_TAG_NONNULL) + sub->args[i].arg_type &= ~PTR_MAYBE_NULL; + sub->args[i].mem_size = mem_size; + continue; + } + +skip_pointer: + if (tags) { + bpf_log(log, "arg#%d has pointer tag, but is not a pointer type\n", i); + return -EINVAL; + } + if (btf_type_is_int(t) || btf_is_any_enum(t)) { + sub->args[i].arg_type = ARG_ANYTHING; + continue; + } + if (!is_global) + return -EINVAL; bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n", - i, btf_kind_str[BTF_INFO_KIND(t->info)], tname); + i, btf_type_str(t), tname); return -EINVAL; } + + sub->arg_cnt = nargs; + sub->args_cached = true; + return 0; } +static void btf_type_show(const struct btf *btf, u32 type_id, void *obj, + struct btf_show *show) +{ + const struct btf_type *t = btf_type_by_id(btf, type_id); + + show->btf = btf; + memset(&show->state, 0, sizeof(show->state)); + memset(&show->obj, 0, sizeof(show->obj)); + + btf_type_ops(t)->show(btf, t, type_id, obj, 0, show); +} + +__printf(2, 0) static void btf_seq_show(struct btf_show *show, const char *fmt, + va_list args) +{ + seq_vprintf((struct seq_file *)show->target, fmt, args); +} + +int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, + void *obj, struct seq_file *m, u64 flags) +{ + struct btf_show sseq; + + sseq.target = m; + sseq.showfn = btf_seq_show; + sseq.flags = flags; + + btf_type_show(btf, type_id, obj, &sseq); + + return sseq.state.status; +} + void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m) { - const struct btf_type *t = btf_type_by_id(btf, type_id); + (void) btf_type_seq_show_flags(btf, type_id, obj, m, + BTF_SHOW_NONAME | BTF_SHOW_COMPACT | + BTF_SHOW_ZERO | BTF_SHOW_UNSAFE); +} + +struct btf_show_snprintf { + struct btf_show show; + int len_left; /* space left in string */ + int len; /* length we would have written */ +}; + +__printf(2, 0) static void btf_snprintf_show(struct btf_show *show, const char *fmt, + va_list args) +{ + struct btf_show_snprintf *ssnprintf = (struct btf_show_snprintf *)show; + int len; + + len = vsnprintf(show->target, ssnprintf->len_left, fmt, args); - btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m); + if (len < 0) { + ssnprintf->len_left = 0; + ssnprintf->len = len; + } else if (len >= ssnprintf->len_left) { + /* no space, drive on to get length we would have written */ + ssnprintf->len_left = 0; + ssnprintf->len += len; + } else { + ssnprintf->len_left -= len; + ssnprintf->len += len; + show->target += len; + } +} + +int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, + char *buf, int len, u64 flags) +{ + struct btf_show_snprintf ssnprintf; + + ssnprintf.show.target = buf; + ssnprintf.show.flags = flags; + ssnprintf.show.showfn = btf_snprintf_show; + ssnprintf.len_left = len; + ssnprintf.len = 0; + + btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf); + + /* If we encountered an error, return it. */ + if (ssnprintf.show.state.status) + return ssnprintf.show.state.status; + + /* Otherwise return length we would have written */ + return ssnprintf.len; } #ifdef CONFIG_PROC_FS @@ -4594,15 +7851,12 @@ static int __btf_new_fd(struct btf *btf) return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); } -int btf_new_fd(const union bpf_attr *attr) +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { struct btf *btf; int ret; - btf = btf_parse(u64_to_user_ptr(attr->btf), - attr->btf_size, attr->btf_log_level, - u64_to_user_ptr(attr->btf_log_buf), - attr->btf_log_size); + btf = btf_parse(attr, uattr, uattr_size); if (IS_ERR(btf)) return PTR_ERR(btf); @@ -4628,21 +7882,11 @@ int btf_new_fd(const union bpf_attr *attr) struct btf *btf_get_by_fd(int fd) { struct btf *btf; - struct fd f; + CLASS(fd, f)(fd); - f = fdget(fd); - - if (!f.file) - return ERR_PTR(-EBADF); - - if (f.file->f_op != &btf_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - btf = f.file->private_data; - refcount_inc(&btf->refcnt); - fdput(f); + btf = __btf_get_by_fd(f); + if (!IS_ERR(btf)) + refcount_inc(&btf->refcnt); return btf; } @@ -4655,7 +7899,9 @@ int btf_get_info_by_fd(const struct btf *btf, struct bpf_btf_info info; u32 info_copy, btf_copy; void __user *ubtf; - u32 uinfo_len; + char __user *uname; + u32 uinfo_len, uname_len, name_len; + int ret = 0; uinfo = u64_to_user_ptr(attr->info.info); uinfo_len = attr->info.info_len; @@ -4672,11 +7918,37 @@ int btf_get_info_by_fd(const struct btf *btf, return -EFAULT; info.btf_size = btf->data_size; + info.kernel_btf = btf->kernel_btf; + + uname = u64_to_user_ptr(info.name); + uname_len = info.name_len; + if (!uname ^ !uname_len) + return -EINVAL; + + name_len = strlen(btf->name); + info.name_len = name_len; + + if (uname) { + if (uname_len >= name_len + 1) { + if (copy_to_user(uname, btf->name, name_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(uname, btf->name, uname_len - 1)) + return -EFAULT; + if (put_user(zero, uname + uname_len - 1)) + return -EFAULT; + /* let user-space know about too short buffer */ + ret = -ENOSPC; + } + } + if (copy_to_user(uinfo, &info, info_copy) || put_user(info_copy, &uattr->info.info_len)) return -EFAULT; - return 0; + return ret; } int btf_get_fd_by_id(u32 id) @@ -4700,7 +7972,1443 @@ int btf_get_fd_by_id(u32 id) return fd; } -u32 btf_id(const struct btf *btf) +u32 btf_obj_id(const struct btf *btf) { return btf->id; } + +bool btf_is_kernel(const struct btf *btf) +{ + return btf->kernel_btf; +} + +bool btf_is_module(const struct btf *btf) +{ + return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0; +} + +enum { + BTF_MODULE_F_LIVE = (1 << 0), +}; + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +struct btf_module { + struct list_head list; + struct module *module; + struct btf *btf; + struct bin_attribute *sysfs_attr; + int flags; +}; + +static LIST_HEAD(btf_modules); +static DEFINE_MUTEX(btf_module_mutex); + +static void purge_cand_cache(struct btf *btf); + +static int btf_module_notify(struct notifier_block *nb, unsigned long op, + void *module) +{ + struct btf_module *btf_mod, *tmp; + struct module *mod = module; + struct btf *btf; + int err = 0; + + if (mod->btf_data_size == 0 || + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && + op != MODULE_STATE_GOING)) + goto out; + + switch (op) { + case MODULE_STATE_COMING: + btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL); + if (!btf_mod) { + err = -ENOMEM; + goto out; + } + btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size, + mod->btf_base_data, mod->btf_base_data_size); + if (IS_ERR(btf)) { + kfree(btf_mod); + if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) { + pr_warn("failed to validate module [%s] BTF: %ld\n", + mod->name, PTR_ERR(btf)); + err = PTR_ERR(btf); + } else { + pr_warn_once("Kernel module BTF mismatch detected, BTF debug info may be unavailable for some modules\n"); + } + goto out; + } + err = btf_alloc_id(btf); + if (err) { + btf_free(btf); + kfree(btf_mod); + goto out; + } + + purge_cand_cache(NULL); + mutex_lock(&btf_module_mutex); + btf_mod->module = module; + btf_mod->btf = btf; + list_add(&btf_mod->list, &btf_modules); + mutex_unlock(&btf_module_mutex); + + if (IS_ENABLED(CONFIG_SYSFS)) { + struct bin_attribute *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + goto out; + + sysfs_bin_attr_init(attr); + attr->attr.name = btf->name; + attr->attr.mode = 0444; + attr->size = btf->data_size; + attr->private = btf->data; + attr->read_new = sysfs_bin_attr_simple_read; + + err = sysfs_create_bin_file(btf_kobj, attr); + if (err) { + pr_warn("failed to register module [%s] BTF in sysfs: %d\n", + mod->name, err); + kfree(attr); + err = 0; + goto out; + } + + btf_mod->sysfs_attr = attr; + } + + break; + case MODULE_STATE_LIVE: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_mod->flags |= BTF_MODULE_F_LIVE; + break; + } + mutex_unlock(&btf_module_mutex); + break; + case MODULE_STATE_GOING: + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + list_del(&btf_mod->list); + if (btf_mod->sysfs_attr) + sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr); + purge_cand_cache(btf_mod->btf); + btf_put(btf_mod->btf); + kfree(btf_mod->sysfs_attr); + kfree(btf_mod); + break; + } + mutex_unlock(&btf_module_mutex); + break; + } +out: + return notifier_from_errno(err); +} + +static struct notifier_block btf_module_nb = { + .notifier_call = btf_module_notify, +}; + +static int __init btf_module_init(void) +{ + register_module_notifier(&btf_module_nb); + return 0; +} + +fs_initcall(btf_module_init); +#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ + +struct module *btf_try_get_module(const struct btf *btf) +{ + struct module *res = NULL; +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; + + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->btf != btf) + continue; + + /* We must only consider module whose __init routine has + * finished, hence we must check for BTF_MODULE_F_LIVE flag, + * which is set from the notifier callback for + * MODULE_STATE_LIVE. + */ + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) + res = btf_mod->module; + + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return res; +} + +/* Returns struct btf corresponding to the struct module. + * This function can return NULL or ERR_PTR. + */ +static struct btf *btf_get_module_btf(const struct module *module) +{ +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; +#endif + struct btf *btf = NULL; + + if (!module) { + btf = bpf_get_btf_vmlinux(); + if (!IS_ERR_OR_NULL(btf)) + btf_get(btf); + return btf; + } + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->module != module) + continue; + + btf_get(btf_mod->btf); + btf = btf_mod->btf; + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return btf; +} + +static int check_btf_kconfigs(const struct module *module, const char *feature) +{ + if (!module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { + pr_err("missing vmlinux BTF, cannot register %s\n", feature); + return -ENOENT; + } + if (module && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + pr_warn("missing module BTF, cannot register %s\n", feature); + return 0; +} + +BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) +{ + struct btf *btf = NULL; + int btf_obj_fd = 0; + long ret; + + if (flags) + return -EINVAL; + + if (name_sz <= 1 || name[name_sz - 1]) + return -EINVAL; + + ret = bpf_find_btf_id(name, kind, &btf); + if (ret > 0 && btf_is_module(btf)) { + btf_obj_fd = __btf_new_fd(btf); + if (btf_obj_fd < 0) { + btf_put(btf); + return btf_obj_fd; + } + return ret | (((u64)btf_obj_fd) << 32); + } + if (ret > 0) + btf_put(btf); + return ret; +} + +const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = { + .func = bpf_btf_find_by_name_kind, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + +BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE) +#define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type) +BTF_TRACING_TYPE_xxx +#undef BTF_TRACING_TYPE + +/* Validate well-formedness of iter argument type. + * On success, return positive BTF ID of iter state's STRUCT type. + * On error, negative error is returned. + */ +int btf_check_iter_arg(struct btf *btf, const struct btf_type *func, int arg_idx) +{ + const struct btf_param *arg; + const struct btf_type *t; + const char *name; + int btf_id; + + if (btf_type_vlen(func) <= arg_idx) + return -EINVAL; + + arg = &btf_params(func)[arg_idx]; + t = btf_type_skip_modifiers(btf, arg->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + t = btf_type_skip_modifiers(btf, t->type, &btf_id); + if (!t || !__btf_type_is_struct(t)) + return -EINVAL; + + name = btf_name_by_offset(btf, t->name_off); + if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) + return -EINVAL; + + return btf_id; +} + +static int btf_check_iter_kfuncs(struct btf *btf, const char *func_name, + const struct btf_type *func, u32 func_flags) +{ + u32 flags = func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY); + const char *sfx, *iter_name; + const struct btf_type *t; + char exp_name[128]; + u32 nr_args; + int btf_id; + + /* exactly one of KF_ITER_{NEW,NEXT,DESTROY} can be set */ + if (!flags || (flags & (flags - 1))) + return -EINVAL; + + /* any BPF iter kfunc should have `struct bpf_iter_<type> *` first arg */ + nr_args = btf_type_vlen(func); + if (nr_args < 1) + return -EINVAL; + + btf_id = btf_check_iter_arg(btf, func, 0); + if (btf_id < 0) + return btf_id; + + /* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to + * fit nicely in stack slots + */ + t = btf_type_by_id(btf, btf_id); + if (t->size == 0 || (t->size % 8)) + return -EINVAL; + + /* validate bpf_iter_<type>_{new,next,destroy}(struct bpf_iter_<type> *) + * naming pattern + */ + iter_name = btf_name_by_offset(btf, t->name_off) + sizeof(ITER_PREFIX) - 1; + if (flags & KF_ITER_NEW) + sfx = "new"; + else if (flags & KF_ITER_NEXT) + sfx = "next"; + else /* (flags & KF_ITER_DESTROY) */ + sfx = "destroy"; + + snprintf(exp_name, sizeof(exp_name), "bpf_iter_%s_%s", iter_name, sfx); + if (strcmp(func_name, exp_name)) + return -EINVAL; + + /* only iter constructor should have extra arguments */ + if (!(flags & KF_ITER_NEW) && nr_args != 1) + return -EINVAL; + + if (flags & KF_ITER_NEXT) { + /* bpf_iter_<type>_next() should return pointer */ + t = btf_type_skip_modifiers(btf, func->type, NULL); + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + } + + if (flags & KF_ITER_DESTROY) { + /* bpf_iter_<type>_destroy() should return void */ + t = btf_type_by_id(btf, func->type); + if (!t || !btf_type_is_void(t)) + return -EINVAL; + } + + return 0; +} + +static int btf_check_kfunc_protos(struct btf *btf, u32 func_id, u32 func_flags) +{ + const struct btf_type *func; + const char *func_name; + int err; + + /* any kfunc should be FUNC -> FUNC_PROTO */ + func = btf_type_by_id(btf, func_id); + if (!func || !btf_type_is_func(func)) + return -EINVAL; + + /* sanity check kfunc name */ + func_name = btf_name_by_offset(btf, func->name_off); + if (!func_name || !func_name[0]) + return -EINVAL; + + func = btf_type_by_id(btf, func->type); + if (!func || !btf_type_is_func_proto(func)) + return -EINVAL; + + if (func_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY)) { + err = btf_check_iter_kfuncs(btf, func_name, func, func_flags); + if (err) + return err; + } + + return 0; +} + +/* Kernel Function (kfunc) BTF ID set registration API */ + +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + const struct btf_kfunc_id_set *kset) +{ + struct btf_kfunc_hook_filter *hook_filter; + struct btf_id_set8 *add_set = kset->set; + bool vmlinux_set = !btf_is_module(btf); + bool add_filter = !!kset->filter; + struct btf_kfunc_set_tab *tab; + struct btf_id_set8 *set; + u32 set_cnt, i; + int ret; + + if (hook >= BTF_KFUNC_HOOK_MAX) { + ret = -EINVAL; + goto end; + } + + if (!add_set->cnt) + return 0; + + tab = btf->kfunc_set_tab; + + if (tab && add_filter) { + u32 i; + + hook_filter = &tab->hook_filters[hook]; + for (i = 0; i < hook_filter->nr_filters; i++) { + if (hook_filter->filters[i] == kset->filter) { + add_filter = false; + break; + } + } + + if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) { + ret = -E2BIG; + goto end; + } + } + + if (!tab) { + tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); + if (!tab) + return -ENOMEM; + btf->kfunc_set_tab = tab; + } + + set = tab->sets[hook]; + /* Warn when register_btf_kfunc_id_set is called twice for the same hook + * for module sets. + */ + if (WARN_ON_ONCE(set && !vmlinux_set)) { + ret = -EINVAL; + goto end; + } + + /* In case of vmlinux sets, there may be more than one set being + * registered per hook. To create a unified set, we allocate a new set + * and concatenate all individual sets being registered. While each set + * is individually sorted, they may become unsorted when concatenated, + * hence re-sorting the final set again is required to make binary + * searching the set using btf_id_set8_contains function work. + * + * For module sets, we need to allocate as we may need to relocate + * BTF ids. + */ + set_cnt = set ? set->cnt : 0; + + if (set_cnt > U32_MAX - add_set->cnt) { + ret = -EOVERFLOW; + goto end; + } + + if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) { + ret = -E2BIG; + goto end; + } + + /* Grow set */ + set = krealloc(tab->sets[hook], + offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]), + GFP_KERNEL | __GFP_NOWARN); + if (!set) { + ret = -ENOMEM; + goto end; + } + + /* For newly allocated set, initialize set->cnt to 0 */ + if (!tab->sets[hook]) + set->cnt = 0; + tab->sets[hook] = set; + + /* Concatenate the two sets */ + memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); + /* Now that the set is copied, update with relocated BTF ids */ + for (i = set->cnt; i < set->cnt + add_set->cnt; i++) + set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id); + + set->cnt += add_set->cnt; + + sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); + + if (add_filter) { + hook_filter = &tab->hook_filters[hook]; + hook_filter->filters[hook_filter->nr_filters++] = kset->filter; + } + return 0; +end: + btf_free_kfunc_set_tab(btf); + return ret; +} + +static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, + enum btf_kfunc_hook hook, + u32 kfunc_btf_id, + const struct bpf_prog *prog) +{ + struct btf_kfunc_hook_filter *hook_filter; + struct btf_id_set8 *set; + u32 *id, i; + + if (hook >= BTF_KFUNC_HOOK_MAX) + return NULL; + if (!btf->kfunc_set_tab) + return NULL; + hook_filter = &btf->kfunc_set_tab->hook_filters[hook]; + for (i = 0; i < hook_filter->nr_filters; i++) { + if (hook_filter->filters[i](prog, kfunc_btf_id)) + return NULL; + } + set = btf->kfunc_set_tab->sets[hook]; + if (!set) + return NULL; + id = btf_id_set8_contains(set, kfunc_btf_id); + if (!id) + return NULL; + /* The flags for BTF ID are located next to it */ + return id + 1; +} + +static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) +{ + switch (prog_type) { + case BPF_PROG_TYPE_UNSPEC: + return BTF_KFUNC_HOOK_COMMON; + case BPF_PROG_TYPE_XDP: + return BTF_KFUNC_HOOK_XDP; + case BPF_PROG_TYPE_SCHED_CLS: + return BTF_KFUNC_HOOK_TC; + case BPF_PROG_TYPE_STRUCT_OPS: + return BTF_KFUNC_HOOK_STRUCT_OPS; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_LSM: + return BTF_KFUNC_HOOK_TRACING; + case BPF_PROG_TYPE_SYSCALL: + return BTF_KFUNC_HOOK_SYSCALL; + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + return BTF_KFUNC_HOOK_CGROUP; + case BPF_PROG_TYPE_SCHED_ACT: + return BTF_KFUNC_HOOK_SCHED_ACT; + case BPF_PROG_TYPE_SK_SKB: + return BTF_KFUNC_HOOK_SK_SKB; + case BPF_PROG_TYPE_SOCKET_FILTER: + return BTF_KFUNC_HOOK_SOCKET_FILTER; + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + return BTF_KFUNC_HOOK_LWT; + case BPF_PROG_TYPE_NETFILTER: + return BTF_KFUNC_HOOK_NETFILTER; + case BPF_PROG_TYPE_KPROBE: + return BTF_KFUNC_HOOK_KPROBE; + default: + return BTF_KFUNC_HOOK_MAX; + } +} + +/* Caution: + * Reference to the module (obtained using btf_try_get_module) corresponding to + * the struct btf *MUST* be held when calling this function from verifier + * context. This is usually true as we stash references in prog's kfunc_btf_tab; + * keeping the reference for the duration of the call provides the necessary + * protection for looking up a well-formed btf->kfunc_set_tab. + */ +u32 *btf_kfunc_id_set_contains(const struct btf *btf, + u32 kfunc_btf_id, + const struct bpf_prog *prog) +{ + enum bpf_prog_type prog_type = resolve_prog_type(prog); + enum btf_kfunc_hook hook; + u32 *kfunc_flags; + + kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog); + if (kfunc_flags) + return kfunc_flags; + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog); +} + +u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, + const struct bpf_prog *prog) +{ + return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog); +} + +static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, + const struct btf_kfunc_id_set *kset) +{ + struct btf *btf; + int ret, i; + + btf = btf_get_module_btf(kset->owner); + if (!btf) + return check_btf_kconfigs(kset->owner, "kfunc"); + if (IS_ERR(btf)) + return PTR_ERR(btf); + + for (i = 0; i < kset->set->cnt; i++) { + ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id), + kset->set->pairs[i].flags); + if (ret) + goto err_out; + } + + ret = btf_populate_kfunc_set(btf, hook, kset); + +err_out: + btf_put(btf); + return ret; +} + +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, + const struct btf_kfunc_id_set *kset) +{ + enum btf_kfunc_hook hook; + + /* All kfuncs need to be tagged as such in BTF. + * WARN() for initcall registrations that do not check errors. + */ + if (!(kset->set->flags & BTF_SET8_KFUNCS)) { + WARN_ON(!kset->owner); + return -EINVAL; + } + + hook = bpf_prog_type_to_kfunc_hook(prog_type); + return __register_btf_kfunc_id_set(hook, kset); +} +EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); + +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset) +{ + return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset); +} +EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set); + +s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id) +{ + struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab; + struct btf_id_dtor_kfunc *dtor; + + if (!tab) + return -ENOENT; + /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need + * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func. + */ + BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0); + dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func); + if (!dtor) + return -ENOENT; + return dtor->kfunc_btf_id; +} + +static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt) +{ + const struct btf_type *dtor_func, *dtor_func_proto, *t; + const struct btf_param *args; + s32 dtor_btf_id; + u32 nr_args, i; + + for (i = 0; i < cnt; i++) { + dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id); + + dtor_func = btf_type_by_id(btf, dtor_btf_id); + if (!dtor_func || !btf_type_is_func(dtor_func)) + return -EINVAL; + + dtor_func_proto = btf_type_by_id(btf, dtor_func->type); + if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto)) + return -EINVAL; + + /* Make sure the prototype of the destructor kfunc is 'void func(type *)' */ + t = btf_type_by_id(btf, dtor_func_proto->type); + if (!t || !btf_type_is_void(t)) + return -EINVAL; + + nr_args = btf_type_vlen(dtor_func_proto); + if (nr_args != 1) + return -EINVAL; + args = btf_params(dtor_func_proto); + t = btf_type_by_id(btf, args[0].type); + /* Allow any pointer type, as width on targets Linux supports + * will be same for all pointer types (i.e. sizeof(void *)) + */ + if (!t || !btf_type_is_ptr(t)) + return -EINVAL; + } + return 0; +} + +/* This function must be invoked only from initcalls/module init functions */ +int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, + struct module *owner) +{ + struct btf_id_dtor_kfunc_tab *tab; + struct btf *btf; + u32 tab_cnt, i; + int ret; + + btf = btf_get_module_btf(owner); + if (!btf) + return check_btf_kconfigs(owner, "dtor kfuncs"); + if (IS_ERR(btf)) + return PTR_ERR(btf); + + if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { + pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); + ret = -E2BIG; + goto end; + } + + /* Ensure that the prototype of dtor kfuncs being registered is sane */ + ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt); + if (ret < 0) + goto end; + + tab = btf->dtor_kfunc_tab; + /* Only one call allowed for modules */ + if (WARN_ON_ONCE(tab && btf_is_module(btf))) { + ret = -EINVAL; + goto end; + } + + tab_cnt = tab ? tab->cnt : 0; + if (tab_cnt > U32_MAX - add_cnt) { + ret = -EOVERFLOW; + goto end; + } + if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) { + pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT); + ret = -E2BIG; + goto end; + } + + tab = krealloc(btf->dtor_kfunc_tab, + offsetof(struct btf_id_dtor_kfunc_tab, dtors[tab_cnt + add_cnt]), + GFP_KERNEL | __GFP_NOWARN); + if (!tab) { + ret = -ENOMEM; + goto end; + } + + if (!btf->dtor_kfunc_tab) + tab->cnt = 0; + btf->dtor_kfunc_tab = tab; + + memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); + + /* remap BTF ids based on BTF relocation (if any) */ + for (i = tab_cnt; i < tab_cnt + add_cnt; i++) { + tab->dtors[i].btf_id = btf_relocate_id(btf, tab->dtors[i].btf_id); + tab->dtors[i].kfunc_btf_id = btf_relocate_id(btf, tab->dtors[i].kfunc_btf_id); + } + + tab->cnt += add_cnt; + + sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); + +end: + if (ret) + btf_free_dtor_kfunc_tab(btf); + btf_put(btf); + return ret; +} +EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs); + +#define MAX_TYPES_ARE_COMPAT_DEPTH 2 + +/* Check local and target types for compatibility. This check is used for + * type-based CO-RE relocations and follow slightly different rules than + * field-based relocations. This function assumes that root types were already + * checked for name match. Beyond that initial root-level name check, names + * are completely ignored. Compatibility rules are as follows: + * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but + * kind should match for local and target types (i.e., STRUCT is not + * compatible with UNION); + * - for ENUMs/ENUM64s, the size is ignored; + * - for INT, size and signedness are ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - CONST/VOLATILE/RESTRICT modifiers are ignored; + * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; + * - FUNC_PROTOs are compatible if they have compatible signature: same + * number of input args and compatible return and argument types. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) +{ + return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, + MAX_TYPES_ARE_COMPAT_DEPTH); +} + +#define MAX_TYPES_MATCH_DEPTH 2 + +int bpf_core_types_match(const struct btf *local_btf, u32 local_id, + const struct btf *targ_btf, u32 targ_id) +{ + return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, + MAX_TYPES_MATCH_DEPTH); +} + +static bool bpf_core_is_flavor_sep(const char *s) +{ + /* check X___Y name pattern, where X and Y are not underscores */ + return s[0] != '_' && /* X */ + s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ + s[4] != '_'; /* Y */ +} + +size_t bpf_core_essential_name_len(const char *name) +{ + size_t n = strlen(name); + int i; + + for (i = n - 5; i >= 0; i--) { + if (bpf_core_is_flavor_sep(name + i)) + return i + 1; + } + return n; +} + +static void bpf_free_cands(struct bpf_cand_cache *cands) +{ + if (!cands->cnt) + /* empty candidate array was allocated on stack */ + return; + kfree(cands); +} + +static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands) +{ + kfree(cands->name); + kfree(cands); +} + +#define VMLINUX_CAND_CACHE_SIZE 31 +static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE]; + +#define MODULE_CAND_CACHE_SIZE 31 +static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE]; + +static void __print_cand_cache(struct bpf_verifier_log *log, + struct bpf_cand_cache **cache, + int cache_size) +{ + struct bpf_cand_cache *cc; + int i, j; + + for (i = 0; i < cache_size; i++) { + cc = cache[i]; + if (!cc) + continue; + bpf_log(log, "[%d]%s(", i, cc->name); + for (j = 0; j < cc->cnt; j++) { + bpf_log(log, "%d", cc->cands[j].id); + if (j < cc->cnt - 1) + bpf_log(log, " "); + } + bpf_log(log, "), "); + } +} + +static void print_cand_cache(struct bpf_verifier_log *log) +{ + mutex_lock(&cand_cache_mutex); + bpf_log(log, "vmlinux_cand_cache:"); + __print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); + bpf_log(log, "\nmodule_cand_cache:"); + __print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE); + bpf_log(log, "\n"); + mutex_unlock(&cand_cache_mutex); +} + +static u32 hash_cands(struct bpf_cand_cache *cands) +{ + return jhash(cands->name, cands->name_len, 0); +} + +static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands, + struct bpf_cand_cache **cache, + int cache_size) +{ + struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size]; + + if (cc && cc->name_len == cands->name_len && + !strncmp(cc->name, cands->name, cands->name_len)) + return cc; + return NULL; +} + +static size_t sizeof_cands(int cnt) +{ + return offsetof(struct bpf_cand_cache, cands[cnt]); +} + +static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands, + struct bpf_cand_cache **cache, + int cache_size) +{ + struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands; + + if (*cc) { + bpf_free_cands_from_cache(*cc); + *cc = NULL; + } + new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL); + if (!new_cands) { + bpf_free_cands(cands); + return ERR_PTR(-ENOMEM); + } + /* strdup the name, since it will stay in cache. + * the cands->name points to strings in prog's BTF and the prog can be unloaded. + */ + new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL); + bpf_free_cands(cands); + if (!new_cands->name) { + kfree(new_cands); + return ERR_PTR(-ENOMEM); + } + *cc = new_cands; + return new_cands; +} + +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES +static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, + int cache_size) +{ + struct bpf_cand_cache *cc; + int i, j; + + for (i = 0; i < cache_size; i++) { + cc = cache[i]; + if (!cc) + continue; + if (!btf) { + /* when new module is loaded purge all of module_cand_cache, + * since new module might have candidates with the name + * that matches cached cands. + */ + bpf_free_cands_from_cache(cc); + cache[i] = NULL; + continue; + } + /* when module is unloaded purge cache entries + * that match module's btf + */ + for (j = 0; j < cc->cnt; j++) + if (cc->cands[j].btf == btf) { + bpf_free_cands_from_cache(cc); + cache[i] = NULL; + break; + } + } + +} + +static void purge_cand_cache(struct btf *btf) +{ + mutex_lock(&cand_cache_mutex); + __purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE); + mutex_unlock(&cand_cache_mutex); +} +#endif + +static struct bpf_cand_cache * +bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf, + int targ_start_id) +{ + struct bpf_cand_cache *new_cands; + const struct btf_type *t; + const char *targ_name; + size_t targ_essent_len; + int n, i; + + n = btf_nr_types(targ_btf); + for (i = targ_start_id; i < n; i++) { + t = btf_type_by_id(targ_btf, i); + if (btf_kind(t) != cands->kind) + continue; + + targ_name = btf_name_by_offset(targ_btf, t->name_off); + if (!targ_name) + continue; + + /* the resched point is before strncmp to make sure that search + * for non-existing name will have a chance to schedule(). + */ + cond_resched(); + + if (strncmp(cands->name, targ_name, cands->name_len) != 0) + continue; + + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != cands->name_len) + continue; + + /* most of the time there is only one candidate for a given kind+name pair */ + new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL); + if (!new_cands) { + bpf_free_cands(cands); + return ERR_PTR(-ENOMEM); + } + + memcpy(new_cands, cands, sizeof_cands(cands->cnt)); + bpf_free_cands(cands); + cands = new_cands; + cands->cands[cands->cnt].btf = targ_btf; + cands->cands[cands->cnt].id = i; + cands->cnt++; + } + return cands; +} + +static struct bpf_cand_cache * +bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id) +{ + struct bpf_cand_cache *cands, *cc, local_cand = {}; + const struct btf *local_btf = ctx->btf; + const struct btf_type *local_type; + const struct btf *main_btf; + size_t local_essent_len; + struct btf *mod_btf; + const char *name; + int id; + + main_btf = bpf_get_btf_vmlinux(); + if (IS_ERR(main_btf)) + return ERR_CAST(main_btf); + if (!main_btf) + return ERR_PTR(-EINVAL); + + local_type = btf_type_by_id(local_btf, local_type_id); + if (!local_type) + return ERR_PTR(-EINVAL); + + name = btf_name_by_offset(local_btf, local_type->name_off); + if (str_is_empty(name)) + return ERR_PTR(-EINVAL); + local_essent_len = bpf_core_essential_name_len(name); + + cands = &local_cand; + cands->name = name; + cands->kind = btf_kind(local_type); + cands->name_len = local_essent_len; + + cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); + /* cands is a pointer to stack here */ + if (cc) { + if (cc->cnt) + return cc; + goto check_modules; + } + + /* Attempt to find target candidates in vmlinux BTF first */ + cands = bpf_core_add_cands(cands, main_btf, 1); + if (IS_ERR(cands)) + return ERR_CAST(cands); + + /* cands is a pointer to kmalloced memory here if cands->cnt > 0 */ + + /* populate cache even when cands->cnt == 0 */ + cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); + if (IS_ERR(cc)) + return ERR_CAST(cc); + + /* if vmlinux BTF has any candidate, don't go for module BTFs */ + if (cc->cnt) + return cc; + +check_modules: + /* cands is a pointer to stack here and cands->cnt == 0 */ + cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); + if (cc) + /* if cache has it return it even if cc->cnt == 0 */ + return cc; + + /* If candidate is not found in vmlinux's BTF then search in module's BTFs */ + spin_lock_bh(&btf_idr_lock); + idr_for_each_entry(&btf_idr, mod_btf, id) { + if (!btf_is_module(mod_btf)) + continue; + /* linear search could be slow hence unlock/lock + * the IDR to avoiding holding it for too long + */ + btf_get(mod_btf); + spin_unlock_bh(&btf_idr_lock); + cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf)); + btf_put(mod_btf); + if (IS_ERR(cands)) + return ERR_CAST(cands); + spin_lock_bh(&btf_idr_lock); + } + spin_unlock_bh(&btf_idr_lock); + /* cands is a pointer to kmalloced memory here if cands->cnt > 0 + * or pointer to stack if cands->cnd == 0. + * Copy it into the cache even when cands->cnt == 0 and + * return the result. + */ + return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); +} + +int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, + int relo_idx, void *insn) +{ + bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; + struct bpf_core_cand_list cands = {}; + struct bpf_core_relo_res targ_res; + struct bpf_core_spec *specs; + const struct btf_type *type; + int err; + + /* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" + * into arrays of btf_ids of struct fields and array indices. + */ + specs = kcalloc(3, sizeof(*specs), GFP_KERNEL); + if (!specs) + return -ENOMEM; + + type = btf_type_by_id(ctx->btf, relo->type_id); + if (!type) { + bpf_log(ctx->log, "relo #%u: bad type id %u\n", + relo_idx, relo->type_id); + kfree(specs); + return -EINVAL; + } + + if (need_cands) { + struct bpf_cand_cache *cc; + int i; + + mutex_lock(&cand_cache_mutex); + cc = bpf_core_find_cands(ctx, relo->type_id); + if (IS_ERR(cc)) { + bpf_log(ctx->log, "target candidate search failed for %d\n", + relo->type_id); + err = PTR_ERR(cc); + goto out; + } + if (cc->cnt) { + cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL); + if (!cands.cands) { + err = -ENOMEM; + goto out; + } + } + for (i = 0; i < cc->cnt; i++) { + bpf_log(ctx->log, + "CO-RE relocating %s %s: found target candidate [%d]\n", + btf_kind_str[cc->kind], cc->name, cc->cands[i].id); + cands.cands[i].btf = cc->cands[i].btf; + cands.cands[i].id = cc->cands[i].id; + } + cands.len = cc->cnt; + /* cand_cache_mutex needs to span the cache lookup and + * copy of btf pointer into bpf_core_cand_list, + * since module can be unloaded while bpf_core_calc_relo_insn + * is working with module's btf. + */ + } + + err = bpf_core_calc_relo_insn((void *)ctx->log, relo, relo_idx, ctx->btf, &cands, specs, + &targ_res); + if (err) + goto out; + + err = bpf_core_patch_insn((void *)ctx->log, insn, relo->insn_off / 8, relo, relo_idx, + &targ_res); + +out: + kfree(specs); + if (need_cands) { + kfree(cands.cands); + mutex_unlock(&cand_cache_mutex); + if (ctx->log->level & BPF_LOG_LEVEL2) + print_cand_cache(ctx->log); + } + return err; +} + +bool btf_nested_type_is_trusted(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + const char *field_name, u32 btf_id, const char *suffix) +{ + struct btf *btf = reg->btf; + const struct btf_type *walk_type, *safe_type; + const char *tname; + char safe_tname[64]; + long ret, safe_id; + const struct btf_member *member; + u32 i; + + walk_type = btf_type_by_id(btf, reg->btf_id); + if (!walk_type) + return false; + + tname = btf_name_by_offset(btf, walk_type->name_off); + + ret = snprintf(safe_tname, sizeof(safe_tname), "%s%s", tname, suffix); + if (ret >= sizeof(safe_tname)) + return false; + + safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info)); + if (safe_id < 0) + return false; + + safe_type = btf_type_by_id(btf, safe_id); + if (!safe_type) + return false; + + for_each_member(i, safe_type, member) { + const char *m_name = __btf_name_by_offset(btf, member->name_off); + const struct btf_type *mtype = btf_type_by_id(btf, member->type); + u32 id; + + if (!btf_type_is_ptr(mtype)) + continue; + + btf_type_skip_modifiers(btf, mtype->type, &id); + /* If we match on both type and name, the field is considered trusted. */ + if (btf_id == id && !strcmp(field_name, m_name)) + return true; + } + + return false; +} + +bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log, + const struct btf *reg_btf, u32 reg_id, + const struct btf *arg_btf, u32 arg_id) +{ + const char *reg_name, *arg_name, *search_needle; + const struct btf_type *reg_type, *arg_type; + int reg_len, arg_len, cmp_len; + size_t pattern_len = sizeof(NOCAST_ALIAS_SUFFIX) - sizeof(char); + + reg_type = btf_type_by_id(reg_btf, reg_id); + if (!reg_type) + return false; + + arg_type = btf_type_by_id(arg_btf, arg_id); + if (!arg_type) + return false; + + reg_name = btf_name_by_offset(reg_btf, reg_type->name_off); + arg_name = btf_name_by_offset(arg_btf, arg_type->name_off); + + reg_len = strlen(reg_name); + arg_len = strlen(arg_name); + + /* Exactly one of the two type names may be suffixed with ___init, so + * if the strings are the same size, they can't possibly be no-cast + * aliases of one another. If you have two of the same type names, e.g. + * they're both nf_conn___init, it would be improper to return true + * because they are _not_ no-cast aliases, they are the same type. + */ + if (reg_len == arg_len) + return false; + + /* Either of the two names must be the other name, suffixed with ___init. */ + if ((reg_len != arg_len + pattern_len) && + (arg_len != reg_len + pattern_len)) + return false; + + if (reg_len < arg_len) { + search_needle = strstr(arg_name, NOCAST_ALIAS_SUFFIX); + cmp_len = reg_len; + } else { + search_needle = strstr(reg_name, NOCAST_ALIAS_SUFFIX); + cmp_len = arg_len; + } + + if (!search_needle) + return false; + + /* ___init suffix must come at the end of the name */ + if (*(search_needle + pattern_len) != '\0') + return false; + + return !strncmp(reg_name, arg_name, cmp_len); +} + +#ifdef CONFIG_BPF_JIT +static int +btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops, + struct bpf_verifier_log *log) +{ + struct btf_struct_ops_tab *tab, *new_tab; + int i, err; + + tab = btf->struct_ops_tab; + if (!tab) { + tab = kzalloc(offsetof(struct btf_struct_ops_tab, ops[4]), + GFP_KERNEL); + if (!tab) + return -ENOMEM; + tab->capacity = 4; + btf->struct_ops_tab = tab; + } + + for (i = 0; i < tab->cnt; i++) + if (tab->ops[i].st_ops == st_ops) + return -EEXIST; + + if (tab->cnt == tab->capacity) { + new_tab = krealloc(tab, + offsetof(struct btf_struct_ops_tab, + ops[tab->capacity * 2]), + GFP_KERNEL); + if (!new_tab) + return -ENOMEM; + tab = new_tab; + tab->capacity *= 2; + btf->struct_ops_tab = tab; + } + + tab->ops[btf->struct_ops_tab->cnt].st_ops = st_ops; + + err = bpf_struct_ops_desc_init(&tab->ops[btf->struct_ops_tab->cnt], btf, log); + if (err) + return err; + + btf->struct_ops_tab->cnt++; + + return 0; +} + +const struct bpf_struct_ops_desc * +bpf_struct_ops_find_value(struct btf *btf, u32 value_id) +{ + const struct bpf_struct_ops_desc *st_ops_list; + unsigned int i; + u32 cnt; + + if (!value_id) + return NULL; + if (!btf->struct_ops_tab) + return NULL; + + cnt = btf->struct_ops_tab->cnt; + st_ops_list = btf->struct_ops_tab->ops; + for (i = 0; i < cnt; i++) { + if (st_ops_list[i].value_id == value_id) + return &st_ops_list[i]; + } + + return NULL; +} + +const struct bpf_struct_ops_desc * +bpf_struct_ops_find(struct btf *btf, u32 type_id) +{ + const struct bpf_struct_ops_desc *st_ops_list; + unsigned int i; + u32 cnt; + + if (!type_id) + return NULL; + if (!btf->struct_ops_tab) + return NULL; + + cnt = btf->struct_ops_tab->cnt; + st_ops_list = btf->struct_ops_tab->ops; + for (i = 0; i < cnt; i++) { + if (st_ops_list[i].type_id == type_id) + return &st_ops_list[i]; + } + + return NULL; +} + +int __register_bpf_struct_ops(struct bpf_struct_ops *st_ops) +{ + struct bpf_verifier_log *log; + struct btf *btf; + int err = 0; + + btf = btf_get_module_btf(st_ops->owner); + if (!btf) + return check_btf_kconfigs(st_ops->owner, "struct_ops"); + if (IS_ERR(btf)) + return PTR_ERR(btf); + + log = kzalloc(sizeof(*log), GFP_KERNEL | __GFP_NOWARN); + if (!log) { + err = -ENOMEM; + goto errout; + } + + log->level = BPF_LOG_KERNEL; + + err = btf_add_struct_ops(btf, st_ops, log); + +errout: + kfree(log); + btf_put(btf); + + return err; +} +EXPORT_SYMBOL_GPL(__register_bpf_struct_ops); +#endif + +bool btf_param_match_suffix(const struct btf *btf, + const struct btf_param *arg, + const char *suffix) +{ + int suffix_len = strlen(suffix), len; + const char *param_name; + + /* In the future, this can be ported to use BTF tagging */ + param_name = btf_name_by_offset(btf, arg->name_off); + if (str_is_empty(param_name)) + return false; + len = strlen(param_name); + if (len <= suffix_len) + return false; + param_name += len - suffix_len; + return !strncmp(param_name, suffix, suffix_len); +} |