diff options
Diffstat (limited to 'tools/lib/bpf/libbpf.c')
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 8528 |
1 files changed, 5624 insertions, 2904 deletions
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52..3dc8a8078815 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -31,11 +31,10 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/filter.h> -#include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> +#include <linux/bpf_perf_event.h> #include <linux/ring_buffer.h> -#include <linux/version.h> #include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -51,15 +50,19 @@ #include "libbpf.h" #include "bpf.h" #include "btf.h" -#include "str_error.h" #include "libbpf_internal.h" #include "hashmap.h" #include "bpf_gen_internal.h" +#include "zip.h" #ifndef BPF_FS_MAGIC #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -71,11 +74,188 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); + +static const char * const attach_type_name[] = { + [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", + [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", + [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", + [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", + [BPF_CGROUP_DEVICE] = "cgroup_device", + [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", + [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", + [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", + [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", + [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", + [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", + [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", + [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", + [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", + [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", + [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", + [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", + [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", + [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", + [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "trace_raw_tp", + [BPF_TRACE_FENTRY] = "trace_fentry", + [BPF_TRACE_FEXIT] = "trace_fexit", + [BPF_MODIFY_RETURN] = "modify_return", + [BPF_LSM_MAC] = "lsm_mac", + [BPF_LSM_CGROUP] = "lsm_cgroup", + [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", + [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", + [BPF_STRUCT_OPS] = "struct_ops", + [BPF_NETFILTER] = "netfilter", + [BPF_TCX_INGRESS] = "tcx_ingress", + [BPF_TCX_EGRESS] = "tcx_egress", + [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", + [BPF_NETKIT_PRIMARY] = "netkit_primary", + [BPF_NETKIT_PEER] = "netkit_peer", + [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", + [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", +}; + +static const char * const link_type_name[] = { + [BPF_LINK_TYPE_UNSPEC] = "unspec", + [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_LINK_TYPE_TRACING] = "tracing", + [BPF_LINK_TYPE_CGROUP] = "cgroup", + [BPF_LINK_TYPE_ITER] = "iter", + [BPF_LINK_TYPE_NETNS] = "netns", + [BPF_LINK_TYPE_XDP] = "xdp", + [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", + [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_LINK_TYPE_NETFILTER] = "netfilter", + [BPF_LINK_TYPE_TCX] = "tcx", + [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", + [BPF_LINK_TYPE_NETKIT] = "netkit", + [BPF_LINK_TYPE_SOCKMAP] = "sockmap", +}; + +static const char * const map_type_name[] = { + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_MAP_TYPE_RINGBUF] = "ringbuf", + [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", + [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", + [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", + [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", + [BPF_MAP_TYPE_ARENA] = "arena", + [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array", +}; + +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", + [BPF_PROG_TYPE_LSM] = "lsm", + [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", + [BPF_PROG_TYPE_SYSCALL] = "syscall", + [BPF_PROG_TYPE_NETFILTER] = "netfilter", +}; static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { - if (level == LIBBPF_DEBUG) + const char *env_var = "LIBBPF_LOG_LEVEL"; + static enum libbpf_print_level min_level = LIBBPF_INFO; + static bool initialized; + + if (!initialized) { + char *verbosity; + + initialized = true; + verbosity = getenv(env_var); + if (verbosity) { + if (strcasecmp(verbosity, "warn") == 0) + min_level = LIBBPF_WARN; + else if (strcasecmp(verbosity, "debug") == 0) + min_level = LIBBPF_DEBUG; + else if (strcasecmp(verbosity, "info") == 0) + min_level = LIBBPF_INFO; + else + fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", + env_var, verbosity); + } + } + + /* if too verbose, skip logging */ + if (level > min_level) return 0; return vfprintf(stderr, format, args); @@ -85,9 +265,10 @@ static libbpf_print_fn_t __libbpf_pr = __base_pr; libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) { - libbpf_print_fn_t old_print_fn = __libbpf_pr; + libbpf_print_fn_t old_print_fn; + + old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); - __libbpf_pr = fn; return old_print_fn; } @@ -95,13 +276,20 @@ __printf(2, 3) void libbpf_print(enum libbpf_print_level level, const char *format, ...) { va_list args; + int old_errno; + libbpf_print_fn_t print_fn; - if (!__libbpf_pr) + print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); + if (!print_fn) return; + old_errno = errno; + va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); + + errno = old_errno; } static void pr_perm_msg(int err) @@ -130,8 +318,6 @@ static void pr_perm_msg(int err) buf); } -#define STRERR_BUFSIZE 128 - /* Copied from tools/perf/util/util.h */ #ifndef zfree # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) @@ -151,20 +337,9 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } -/* this goes away in libbpf 1.0 */ -enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; - int libbpf_set_strict_mode(enum libbpf_strict_mode mode) { - /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to - * get all possible values we compensate last +1, and then (2*x - 1) - * to get the bit mask - */ - if (mode != LIBBPF_STRICT_ALL - && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1))) - return errno = EINVAL, -EINVAL; - - libbpf_mode = mode; + /* as of v1.0 libbpf_set_strict_mode() is a no-op */ return 0; } @@ -191,10 +366,11 @@ enum reloc_type { RELO_LD64, RELO_CALL, RELO_DATA, - RELO_EXTERN_VAR, - RELO_EXTERN_FUNC, + RELO_EXTERN_LD64, + RELO_EXTERN_CALL, RELO_SUBPROG_ADDR, RELO_CORE, + RELO_INSN_ARRAY, }; struct reloc_desc { @@ -205,16 +381,20 @@ struct reloc_desc { struct { int map_idx; int sym_off; + /* + * The following two fields can be unionized, as the + * ext_idx field is used for extern symbols, and the + * sym_size is used for jump tables, which are never + * extern + */ + union { + int ext_idx; + int sym_size; + }; }; }; }; -struct bpf_sec_def; - -typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); - /* stored as sec_def->cookie for all libbpf-supported SEC()s */ enum sec_def_flags { SEC_NONE = 0, @@ -229,23 +409,32 @@ enum sec_def_flags { SEC_ATTACHABLE = 2, SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, /* attachment target is specified through BTF ID in either kernel or - * other BPF program's BTF object */ + * other BPF program's BTF object + */ SEC_ATTACH_BTF = 4, /* BPF program type allows sleeping/blocking in kernel */ SEC_SLEEPABLE = 8, - /* allow non-strict prefix matching */ - SEC_SLOPPY_PFX = 16, + /* BPF program support non-linear XDP buffer */ + SEC_XDP_FRAGS = 16, + /* Setup proper attach type for usdt probes. */ + SEC_USDT = 32, }; struct bpf_sec_def { - const char *sec; + char *sec; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; long cookie; + int handler_id; + + libbpf_prog_setup_fn_t prog_setup_fn; + libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; + libbpf_prog_attach_fn_t prog_attach_fn; +}; - init_fn_t init_fn; - preload_fn_t preload_fn; - attach_fn_t attach_fn; +struct bpf_light_subprog { + __u32 sec_insn_off; + __u32 sub_insn_off; }; /* @@ -253,9 +442,10 @@ struct bpf_sec_def { * linux/filter.h. */ struct bpf_program { - const struct bpf_sec_def *sec_def; + char *name; char *sec_name; size_t sec_idx; + const struct bpf_sec_def *sec_def; /* this program's instruction offset (in number of instructions) * within its containing ELF section */ @@ -275,12 +465,6 @@ struct bpf_program { */ size_t sub_insn_off; - char *name; - /* name with / replaced by _; makes recursive pinning - * in bpf_object__pin_programs easier - */ - char *pin_name; - /* instructions that belong to BPF program; insns[0] is located at * sec_insn_off instruction within its ELF section in ELF file, so * when mapping ELF file instruction index to the local instruction, @@ -301,24 +485,22 @@ struct bpf_program { size_t log_size; __u32 log_level; - struct { - int nr; - int *fds; - } instances; - bpf_program_prep_t preprocessor; - struct bpf_object *obj; - void *priv; - bpf_program_clear_priv_t clear_priv; - bool load; + int fd; + bool autoload; + bool autoattach; + bool sym_global; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int exception_cb_idx; + int prog_ifindex; __u32 attach_btf_obj_fd; __u32 attach_btf_id; __u32 attach_prog_fd; + void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -327,11 +509,13 @@ struct bpf_program { __u32 line_info_rec_size; __u32 line_info_cnt; __u32 prog_flags; + __u8 hash[SHA256_DIGEST_LENGTH]; + + struct bpf_light_subprog *subprogs; + __u32 subprog_cnt; }; struct bpf_struct_ops { - const char *tname; - const struct btf_type *type; struct bpf_program **progs; __u32 *kern_func_off; /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ @@ -356,6 +540,8 @@ struct bpf_struct_ops { #define KCONFIG_SEC ".kconfig" #define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" +#define STRUCT_OPS_LINK_SEC ".struct_ops.link" +#define ARENA_SEC ".addr_space.1" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -365,11 +551,20 @@ enum libbpf_map_type { LIBBPF_MAP_KCONFIG, }; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section - * name. This is important to be be able to find corresponding BTF + * name. This is important to be able to find corresponding BTF * DATASEC information. */ char *real_name; @@ -381,11 +576,10 @@ struct bpf_map { struct bpf_map_def def; __u32 numa_node; __u32 btf_var_idx; + int mod_btf_fd; __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; - void *priv; - bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; void *mmaped; struct bpf_struct_ops *st_ops; @@ -395,8 +589,10 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; + bool autoattach; __u64 map_extra; + struct bpf_program *excl_prog; }; enum extern_type { @@ -419,7 +615,8 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; + char *essent_name; bool is_set; bool is_weak; union { @@ -448,8 +645,6 @@ struct extern_desc { }; }; -static LIST_HEAD(bpf_objects_list); - struct module_btf { struct btf *btf; char *name; @@ -464,6 +659,7 @@ enum sec_type { SEC_BSS, SEC_DATA, SEC_RODATA, + SEC_ST_OPS, }; struct elf_sec_desc { @@ -479,17 +675,26 @@ struct elf_state { Elf *elf; Elf64_Ehdr *ehdr; Elf_Data *symbols; - Elf_Data *st_ops_data; + Elf_Data *arena_data; size_t shstrndx; /* section index for section name strings */ size_t strtabidx; struct elf_sec_desc *secs; - int sec_cnt; - int maps_shndx; + size_t sec_cnt; int btf_maps_shndx; __u32 btf_maps_sec_btf_id; int text_shndx; int symbols_shndx; - int st_ops_shndx; + bool has_st_ops; + int arena_data_shndx; + int jumptables_data_shndx; +}; + +struct usdt_manager; + +enum bpf_object_state { + OBJ_OPEN, + OBJ_PREPARED, + OBJ_LOADED, }; struct bpf_object { @@ -497,6 +702,7 @@ struct bpf_object { char license[64]; __u32 kern_version; + enum bpf_object_state state; struct bpf_program *programs; size_t nr_programs; struct bpf_map *maps; @@ -508,7 +714,6 @@ struct bpf_object { int nr_extern; int kconfig_map_idx; - bool loaded; bool has_subcalls; bool has_rodata; @@ -516,12 +721,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; - /* - * All loaded bpf_object are linked in a list, which is - * hidden to caller. bpf_objects__<func> handlers deal with - * all objects. - */ - struct list_head list; + + unsigned char byteorder; struct btf *btf; struct btf_ext *btf_ext; @@ -547,13 +748,30 @@ struct bpf_object { size_t log_size; __u32 log_level; - void *priv; - bpf_object_clear_priv_t clear_priv; - int *fd_array; size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + + int arena_map_idx; + void *arena_data; + size_t arena_data_sz; + + void *jumptables_data; + size_t jumptables_data_sz; + + struct { + struct bpf_program *prog; + int sym_off; + int fd; + } *jumptable_maps; + size_t jumptable_map_cnt; + + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -569,28 +787,14 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); void bpf_program__unload(struct bpf_program *prog) { - int i; - if (!prog) return; - /* - * If the object is opened but the program was never loaded, - * it is possible that prog->instances.nr == -1. - */ - if (prog->instances.nr > 0) { - for (i = 0; i < prog->instances.nr; i++) - zclose(prog->instances.fds[i]); - } else if (prog->instances.nr != -1) { - pr_warn("Internal error: instances.nr is %d\n", - prog->instances.nr); - } - - prog->instances.nr = -1; - zfree(&prog->instances.fds); + zclose(prog->fd); zfree(&prog->func_info); zfree(&prog->line_info); + zfree(&prog->subprogs); } static void bpf_program__exit(struct bpf_program *prog) @@ -598,16 +802,9 @@ static void bpf_program__exit(struct bpf_program *prog) if (!prog) return; - if (prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = NULL; - prog->clear_priv = NULL; - bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->sec_name); - zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -616,26 +813,6 @@ static void bpf_program__exit(struct bpf_program *prog) prog->sec_idx = -1; } -static char *__bpf_program__pin_name(struct bpf_program *prog) -{ - char *name, *p; - - if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) - name = strdup(prog->name); - else - name = strdup(prog->sec_name); - - if (!name) - return NULL; - - p = name; - - while ((p = strchr(p, '/'))) - *p = '_'; - - return name; -} - static bool insn_is_subprog_call(const struct bpf_insn *insn) { return BPF_CLASS(insn->code) == BPF_JMP && @@ -677,10 +854,22 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + prog->fd = -1; + prog->exception_cb_idx = -1; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } - prog->instances.fds = NULL; - prog->instances.nr = -1; + prog->autoattach = true; /* inherit object's log_level */ prog->log_level = obj->log_level; @@ -693,10 +882,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, if (!prog->name) goto errout; - prog->pin_name = __bpf_program__pin_name(prog); - if (!prog->pin_name) - goto errout; - prog->insns = malloc(insn_data_sz); if (!prog->insns) goto errout; @@ -724,7 +909,6 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, progs = obj->programs; nr_progs = obj->nr_programs; nr_syms = symbols->d_size / sizeof(Elf64_Sym); - sec_off = 0; for (i = 0; i < nr_syms; i++) { sym = elf_sym_by_idx(obj, i); @@ -744,7 +928,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -778,14 +962,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) + prog->sym_global = true; + /* if function is a global/weak symbol, but has restricted * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL - && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN - || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) + if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -795,40 +981,18 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } -__u32 get_kernel_version(void) +static void bpf_object_bswap_progs(struct bpf_object *obj) { - /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, - * but Ubuntu provides /proc/version_signature file, as described at - * https://ubuntu.com/kernel, with an example contents below, which we - * can use to get a proper LINUX_VERSION_CODE. - * - * Ubuntu 5.4.0-12.15-generic 5.4.8 - * - * In the above, 5.4.8 is what kernel is actually expecting, while - * uname() call will return 5.4.0 in info.release. - */ - const char *ubuntu_kver_file = "/proc/version_signature"; - __u32 major, minor, patch; - struct utsname info; - - if (access(ubuntu_kver_file, R_OK) == 0) { - FILE *f; - - f = fopen(ubuntu_kver_file, "r"); - if (f) { - if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { - fclose(f); - return KERNEL_VERSION(major, minor, patch); - } - fclose(f); - } - /* something went wrong, fall back to uname() approach */ - } + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; - uname(&info); - if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) - return 0; - return KERNEL_VERSION(major, minor, patch); + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); } static const struct btf_member * @@ -860,43 +1024,52 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t, return NULL; } +static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, + __u16 kind, struct btf **res_btf, + struct module_btf **res_mod_btf); + #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(const struct btf *btf, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, + struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, const struct btf_member **data_member) { const struct btf_type *kern_type, *kern_vtype; const struct btf_member *kern_data_member; + struct btf *btf = NULL; __s32 kern_vtype_id, kern_type_id; + char tname[192], stname[256]; __u32 i; - kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); - if (kern_type_id < 0) { - pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", - tname); - return kern_type_id; - } - kern_type = btf__type_by_id(btf, kern_type_id); + snprintf(tname, sizeof(tname), "%.*s", + (int)bpf_core_essential_name_len(tname_raw), tname_raw); + + snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname); - /* Find the corresponding "map_value" type that will be used - * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, - * find "struct bpf_struct_ops_tcp_congestion_ops" from the - * btf_vmlinux. + /* Look for the corresponding "map_value" type that will be used + * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf + * and the mod_btf. + * For example, find "struct bpf_struct_ops_tcp_congestion_ops". */ - kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, - tname, BTF_KIND_STRUCT); + kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf); if (kern_vtype_id < 0) { - pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", - STRUCT_OPS_VALUE_PREFIX, tname); + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname); return kern_vtype_id; } kern_vtype = btf__type_by_id(btf, kern_vtype_id); + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (kern_type_id < 0) { + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); + return kern_type_id; + } + kern_type = btf__type_by_id(btf, kern_type_id); + /* Find "struct tcp_congestion_ops" from * struct bpf_struct_ops_tcp_congestion_ops { * [ ... ] @@ -909,8 +1082,8 @@ find_struct_ops_kern_types(const struct btf *btf, const char *tname, break; } if (i == btf_vlen(kern_vtype)) { - pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", - tname, STRUCT_OPS_VALUE_PREFIX, tname); + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n", + tname, stname); return -EINVAL; } @@ -928,32 +1101,95 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; } +static bool is_valid_st_ops_program(struct bpf_object *obj, + const struct bpf_program *prog) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + if (&obj->programs[i] == prog) + return prog->type == BPF_PROG_TYPE_STRUCT_OPS; + } + + return false; +} + +/* For each struct_ops program P, referenced from some struct_ops map M, + * enable P.autoload if there are Ms for which M.autocreate is true, + * disable P.autoload if for all Ms M.autocreate is false. + * Don't change P.autoload for programs that are not referenced from any maps. + */ +static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) +{ + struct bpf_program *prog, *slot_prog; + struct bpf_map *map; + int i, j, k, vlen; + + for (i = 0; i < obj->nr_programs; ++i) { + int should_load = false; + int use_cnt = 0; + + prog = &obj->programs[i]; + if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) + continue; + + for (j = 0; j < obj->nr_maps; ++j) { + const struct btf_type *type; + + map = &obj->maps[j]; + if (!bpf_map__is_struct_ops(map)) + continue; + + type = btf__type_by_id(obj->btf, map->st_ops->type_id); + vlen = btf_vlen(type); + for (k = 0; k < vlen; ++k) { + slot_prog = map->st_ops->progs[k]; + if (prog != slot_prog) + continue; + + use_cnt++; + if (map->autocreate) + should_load = true; + } + } + if (use_cnt) + prog->autoload = should_load; + } + + return 0; +} + /* Init the map's fields that depend on kern_btf */ -static int bpf_map__init_kern_struct_ops(struct bpf_map *map, - const struct btf *btf, - const struct btf *kern_btf) +static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { const struct btf_member *member, *kern_member, *kern_data_member; const struct btf_type *type, *kern_type, *kern_vtype; __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_object *obj = map->obj; + const struct btf *btf = obj->btf; struct bpf_struct_ops *st_ops; + const struct btf *kern_btf; + struct module_btf *mod_btf = NULL; void *data, *kern_data; const char *tname; int err; st_ops = map->st_ops; - type = st_ops->type; - tname = st_ops->tname; - err = find_struct_ops_kern_types(kern_btf, tname, + type = btf__type_by_id(btf, st_ops->type_id); + tname = btf__name_by_offset(btf, type->name_off); + err = find_struct_ops_kern_types(obj, tname, &mod_btf, &kern_type, &kern_type_id, &kern_vtype, &kern_vtype_id, &kern_data_member); if (err) return err; + kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; map->def.value_size = kern_vtype->size; map->btf_vmlinux_value_type_id = kern_vtype_id; @@ -970,17 +1206,46 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, const struct btf_type *mtype, *kern_mtype; __u32 mtype_id, kern_mtype_id; void *mdata, *kern_mdata; + struct bpf_program *prog; __s64 msize, kern_msize; __u32 moff, kern_moff; __u32 kern_member_idx; const char *mname; mname = btf__name_by_offset(btf, member->name_off); + moff = member->offset / 8; + mdata = data + moff; + msize = btf__resolve_size(btf, member->type); + if (msize < 0) { + pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", + map->name, mname); + return msize; + } + kern_member = find_member_by_name(kern_btf, kern_type, mname); if (!kern_member) { - pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + if (!libbpf_is_mem_zeroed(mdata, msize)) { + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + map->name, mname); + return -ENOTSUP; + } + + if (st_ops->progs[i]) { + /* If we had declaratively set struct_ops callback, we need to + * force its autoload to false, because it doesn't have + * a chance of succeeding from POV of the current struct_ops map. + * If this program is still referenced somewhere else, though, + * then bpf_object_adjust_struct_ops_autoload() will update its + * autoload accordingly. + */ + st_ops->progs[i]->autoload = false; + st_ops->progs[i] = NULL; + } + + /* Skip all-zero/NULL fields if they are not present in the kernel BTF */ + pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", map->name, mname); - return -ENOTSUP; + continue; } kern_member_idx = kern_member - btf_members(kern_type); @@ -991,10 +1256,7 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } - moff = member->offset / 8; kern_moff = kern_member->offset / 8; - - mdata = data + moff; kern_mdata = kern_data + kern_moff; mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); @@ -1009,12 +1271,25 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, } if (btf_is_ptr(mtype)) { - struct bpf_program *prog; + prog = *(void **)mdata; + /* just like for !kern_member case above, reset declaratively + * set (at compile time) program's autload to false, + * if user replaced it with another program or NULL + */ + if (st_ops->progs[i] && st_ops->progs[i] != prog) + st_ops->progs[i]->autoload = false; - prog = st_ops->progs[i]; + /* Update the value from the shadow type */ + st_ops->progs[i] = prog; if (!prog) continue; + if (!is_valid_st_ops_program(obj, prog)) { + pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", + map->name, mname); + return -ENOTSUP; + } + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, &kern_mtype_id); @@ -1029,8 +1304,34 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } - prog->attach_btf_id = kern_type_id; - prog->expected_attach_type = kern_member_idx; + if (mod_btf) + prog->attach_btf_obj_fd = mod_btf->fd; + + /* if we haven't yet processed this BPF program, record proper + * attach_btf_id and member_idx + */ + if (!prog->attach_btf_id) { + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + } + + /* struct_ops BPF prog can be re-used between multiple + * .struct_ops & .struct_ops.link as long as it's the + * same struct_ops struct definition and the same + * function pointer field + */ + if (prog->attach_btf_id != kern_type_id) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->attach_btf_id, kern_type_id); + return -EINVAL; + } + if (prog->expected_attach_type != kern_member_idx) { + pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", + map->name, mname, prog->name, prog->sec_name, prog->type, + prog->expected_attach_type, kern_member_idx); + return -EINVAL; + } st_ops->kern_func_off[i] = kern_data_off + kern_moff; @@ -1041,9 +1342,8 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, continue; } - msize = btf__resolve_size(btf, mtype_id); kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); - if (msize < 0 || kern_msize < 0 || msize != kern_msize) { + if (kern_msize < 0 || msize != kern_msize) { pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", map->name, mname, (ssize_t)msize, (ssize_t)kern_msize); @@ -1071,8 +1371,10 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; - err = bpf_map__init_kern_struct_ops(map, obj->btf, - obj->btf_vmlinux); + if (!map->autocreate) + continue; + + err = bpf_map__init_kern_struct_ops(map); if (err) return err; } @@ -1080,7 +1382,8 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) return 0; } -static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, + int shndx, Elf_Data *data) { const struct btf_type *type, *datasec; const struct btf_var_secinfo *vsi; @@ -1091,15 +1394,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) struct bpf_map *map; __u32 i; - if (obj->efile.st_ops_shndx == -1) + if (shndx == -1) return 0; btf = obj->btf; - datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + datasec_id = btf__find_by_name_kind(btf, sec_name, BTF_KIND_DATASEC); if (datasec_id < 0) { pr_warn("struct_ops init: DATASEC %s not found\n", - STRUCT_OPS_SEC); + sec_name); return -EINVAL; } @@ -1112,7 +1415,7 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) type_id = btf__resolve_type(obj->btf, vsi->type); if (type_id < 0) { pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", - vsi->type, STRUCT_OPS_SEC); + vsi->type, sec_name); return -EINVAL; } @@ -1131,16 +1434,28 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (IS_ERR(map)) return PTR_ERR(map); - map->sec_idx = obj->efile.st_ops_shndx; + map->sec_idx = shndx; map->sec_offset = vsi->offset; map->name = strdup(var_name); if (!map->name) return -ENOMEM; + map->btf_value_type_id = type_id; + + /* Follow same convention as for programs autoload: + * SEC("?.struct_ops") means map is not created by default. + */ + if (sec_name[0] == '?') { + map->autocreate = false; + /* from now on forget there was ? in section name */ + sec_name++; + } map->def.type = BPF_MAP_TYPE_STRUCT_OPS; map->def.key_size = sizeof(int); map->def.value_size = type->size; map->def.max_entries = 1; + map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; + map->autoattach = true; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1153,17 +1468,15 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) return -ENOMEM; - if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + if (vsi->offset + type->size > data->d_size) { pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", - var_name, STRUCT_OPS_SEC); + var_name, sec_name); return -EINVAL; } memcpy(st_ops->data, - obj->efile.st_ops_data->d_buf + vsi->offset, + data->d_buf + vsi->offset, type->size); - st_ops->tname = tname; - st_ops->type = type; st_ops->type_id = type_id; pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", @@ -1173,12 +1486,34 @@ static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) return 0; } +static int bpf_object_init_struct_ops(struct bpf_object *obj) +{ + const char *sec_name; + int sec_idx, err; + + for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { + struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; + + if (desc->sec_type != SEC_ST_OPS) + continue; + + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + if (!sec_name) + return -LIBBPF_ERRNO__FORMAT; + + err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); + if (err) + return err; + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, const char *obj_name) { - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); struct bpf_object *obj; char *end; @@ -1208,17 +1543,13 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; - obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; - obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); - obj->loaded = false; + obj->state = OBJ_OPEN; - INIT_LIST_HEAD(&obj->list); - if (!strict) - list_add(&obj->list, &bpf_objects_list); return obj; } @@ -1227,12 +1558,11 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; - obj->efile.st_ops_data = NULL; + obj->efile.arena_data = NULL; zfree(&obj->efile.secs); obj->efile.sec_cnt = 0; @@ -1253,19 +1583,13 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (obj->efile.obj_buf_sz > 0) { - /* - * obj_buf should have been validated by - * bpf_object__open_buffer(). - */ + /* obj_buf should have been validated by bpf_object__open_mem(). */ elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE], *cp; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("elf: failed to open %s: %s\n", obj->path, cp); + pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); return err; } @@ -1299,6 +1623,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1306,7 +1640,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } - /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + /* ELF is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1327,24 +1661,24 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { + if (!data) { + pr_warn("invalid license section in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't * go over allowed ELF data section buffer */ @@ -1358,7 +1692,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) { __u32 kver; - if (size != sizeof(kver)) { + if (!data || size != sizeof(kver)) { pr_warn("invalid kver section in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -1378,98 +1712,153 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { - int ret = -ENOENT; Elf_Data *data; Elf_Scn *scn; - *size = 0; if (!name) return -EINVAL; scn = elf_sec_by_name(obj, name); data = elf_sec_data(obj, scn); if (data) { - ret = 0; /* found it */ *size = data->d_size; + return 0; /* found it */ } - return *size ? 0 : ret; + return -ENOENT; } -static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) +static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) { Elf_Data *symbols = obj->efile.symbols; const char *sname; size_t si; - if (!name || !off) - return -EINVAL; - for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); if (!sname) { pr_warn("failed to get sym name string for var %s\n", name); - return -EIO; - } - if (strcmp(name, sname) == 0) { - *off = sym->st_value; - return 0; + return ERR_PTR(-EIO); } + if (strcmp(name, sname) == 0) + return sym; } - return -ENOENT; + return ERR_PTR(-ENOENT); } -static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif +#ifndef MFD_NOEXEC_SEAL +#define MFD_NOEXEC_SEAL 0x0008U +#endif + +static int create_placeholder_fd(void) { - struct bpf_map *new_maps; - size_t new_cap; - int i; + unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; + const char *name = "libbpf-placeholder-fd"; + int fd; - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; + fd = ensure_good_fd(sys_memfd_create(name, flags)); + if (fd >= 0) + return fd; + else if (errno != EINVAL) + return -errno; - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + /* Possibly running on kernel without MFD_NOEXEC_SEAL */ + fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); + if (fd < 0) + return -errno; + return fd; +} - obj->maps_cap = new_cap; - obj->maps = new_maps; +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) +{ + struct bpf_map *map; + int err; - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); + + map = &obj->maps[obj->nr_maps++]; + map->obj = obj; + /* Preallocate map FD without actually creating BPF map just yet. + * These map FD "placeholders" will be reused later without changing + * FD value when map is actually created in the kernel. + * + * This is useful to be able to perform BPF program relocations + * without having to create BPF maps before that step. This allows us + * to finalize and load BTF very late in BPF object's loading phase, + * right before BPF maps have to be created and BPF programs have to + * be loaded. By having these map FD placeholders we can perform all + * the sanitizations, relocations, and any other adjustments before we + * start creating actual BPF kernel objects (BTF, maps, progs). */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map->fd = create_placeholder_fd(); + if (map->fd < 0) + return ERR_PTR(map->fd); + map->inner_map_fd = -1; + map->autocreate = true; - return &obj->maps[obj->nr_maps++]; + return map; } -static size_t bpf_map_mmap_sz(const struct bpf_map *map) +static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { - long page_sz = sysconf(_SC_PAGE_SIZE); + const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; - map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = (size_t)roundup(value_sz, 8) * max_entries; map_sz = roundup(map_sz, page_sz); return map_sz; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + + switch (map->def.type) { + case BPF_MAP_TYPE_ARRAY: + return array_map_mmap_sz(map->def.value_size, map->def.max_entries); + case BPF_MAP_TYPE_ARENA: + return page_sz * map->def.max_entries; + default: + return 0; /* not supported */ + } +} + +static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) +{ + void *mmaped; + + if (!map->mmaped) + return -EINVAL; + + if (old_sz == new_sz) + return 0; + + mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (mmaped == MAP_FAILED) + return -errno; + + memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); + munmap(map->mmaped, old_sz); + map->mmaped = mmaped; + return 0; +} + static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; @@ -1520,7 +1909,7 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, sfx_len, real_name); - /* sanitise map name to characters allowed by kernel */ + /* sanities map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) if (!isalnum(*p) && *p != '_' && *p != '.') *p = '_'; @@ -1529,11 +1918,46 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) } static int +map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); + +/* Internal BPF map is mmap()'able only if at least one of corresponding + * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL + * variable and it's not marked as __hidden (which turns it into, effectively, + * a STATIC variable). + */ +static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) +{ + const struct btf_type *t, *vt; + struct btf_var_secinfo *vsi; + int i, n; + + if (!map->btf_value_type_id) + return false; + + t = btf__type_by_id(obj->btf, map->btf_value_type_id); + if (!btf_is_datasec(t)) + return false; + + vsi = btf_var_secinfos(t); + for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { + vt = btf__type_by_id(obj->btf, vsi->type); + if (!btf_is_var(vt)) + continue; + + if (btf_var(vt)->linkage != BTF_VAR_STATIC) + return true; + } + + return false; +} + +static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, const char *real_name, int sec_idx, void *data, size_t data_sz) { struct bpf_map_def *def; struct bpf_map *map; + size_t mmap_sz; int err; map = bpf_object__add_map(obj); @@ -1557,19 +1981,24 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->value_size = data_sz; def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG - ? BPF_F_RDONLY_PROG : 0; - def->map_flags |= BPF_F_MMAPABLE; + ? BPF_F_RDONLY_PROG : 0; + + /* failures are fine because of maps like .rodata.str1.1 */ + (void) map_fill_btf_type_info(obj, map); + + if (map_is_mmapable(obj, map)) + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + mmap_sz = bpf_map_mmap_sz(map); + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("failed to alloc map '%s' content buffer: %d\n", - map->name, err); + pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); zfree(&map->real_name); zfree(&map->name); return err; @@ -1594,6 +2023,10 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { sec_desc = &obj->efile.secs[sec_idx]; + /* Skip recognized sections with size 0. */ + if (!sec_desc->data || sec_desc->data->d_size == 0) + continue; + switch (sec_desc->sec_type) { case SEC_DATA: sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); @@ -1640,13 +2073,27 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, return NULL; } +static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj, + const void *name, int len) +{ + const char *ext_name; + int i; + + for (i = 0; i < obj->nr_extern; i++) { + ext_name = obj->externs[i].name; + if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0) + return &obj->externs[i]; + } + return NULL; +} + static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, char value) { switch (ext->kcfg.type) { case KCFG_BOOL: if (value == 'm') { - pr_warn("extern (kcfg) %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", ext->name, value); return -EINVAL; } @@ -1667,7 +2114,7 @@ static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, case KCFG_INT: case KCFG_CHAR_ARR: default: - pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", ext->name, value); return -EINVAL; } @@ -1681,12 +2128,13 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, size_t len; if (ext->kcfg.type != KCFG_CHAR_ARR) { - pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", + ext->name, value); return -EINVAL; } len = strlen(value); - if (value[len - 1] != '"') { + if (len < 2 || value[len - 1] != '"') { pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; @@ -1695,7 +2143,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, /* strip quotes */ len -= 2; if (len >= ext->kcfg.sz) { - pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", ext->name, value, len, ext->kcfg.sz - 1); len = ext->kcfg.sz - 1; } @@ -1714,7 +2162,7 @@ static int parse_u64(const char *value, __u64 *res) *res = strtoull(value, &value_end, 0); if (errno) { err = -errno; - pr_warn("failed to parse '%s' as integer: %d\n", value, err); + pr_warn("failed to parse '%s': %s\n", value, errstr(err)); return err; } if (*value_end) { @@ -1752,23 +2200,38 @@ static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, __u64 value) { - if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { - pr_warn("extern (kcfg) %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && + ext->kcfg.type != KCFG_BOOL) { + pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", ext->name, (unsigned long long)value); return -EINVAL; } + if (ext->kcfg.type == KCFG_BOOL && value > 1) { + pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", + ext->name, (unsigned long long)value); + return -EINVAL; + + } if (!is_kcfg_value_in_range(ext, value)) { - pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } switch (ext->kcfg.sz) { - case 1: *(__u8 *)ext_val = value; break; - case 2: *(__u16 *)ext_val = value; break; - case 4: *(__u32 *)ext_val = value; break; - case 8: *(__u64 *)ext_val = value; break; - default: - return -EINVAL; + case 1: + *(__u8 *)ext_val = value; + break; + case 2: + *(__u16 *)ext_val = value; + break; + case 4: + *(__u32 *)ext_val = value; + break; + case 8: + *(__u64 *)ext_val = value; + break; + default: + return -EINVAL; } ext->is_set = true; return 0; @@ -1822,16 +2285,19 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern (kcfg) %s=%s should be integer\n", - ext->name, value); + pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); return err; } + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); + return -EINVAL; + } err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern (kcfg) %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); return 0; } @@ -1850,9 +2316,9 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) return -ENAMETOOLONG; /* gzopen also accepts uncompressed files. */ - file = gzopen(buf, "r"); + file = gzopen(buf, "re"); if (!file) - file = gzopen("/proc/config.gz", "r"); + file = gzopen("/proc/config.gz", "re"); if (!file) { pr_warn("failed to open system Kconfig\n"); @@ -1862,8 +2328,8 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) while (gzgets(file, buf, sizeof(buf))) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing system Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing system Kconfig line '%s': %s\n", + buf, errstr(err)); goto out; } } @@ -1883,15 +2349,15 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, file = fmemopen((void *)config, strlen(config), "r"); if (!file) { err = -errno; - pr_warn("failed to open in-memory Kconfig: %d\n", err); + pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); return err; } while (fgets(buf, sizeof(buf), file)) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing in-memory Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing in-memory Kconfig line '%s': %s\n", + buf, errstr(err)); break; } } @@ -1927,133 +2393,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj) return 0; } -static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) -{ - Elf_Data *symbols = obj->efile.symbols; - int i, map_def_sz = 0, nr_maps = 0, nr_syms; - Elf_Data *data = NULL; - Elf_Scn *scn; - - if (obj->efile.maps_shndx < 0) - return 0; - - if (!symbols) - return -EINVAL; - - scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); - data = elf_sec_data(obj, scn); - if (!scn || !data) { - pr_warn("elf: failed to get legacy map definitions for %s\n", - obj->path); - return -EINVAL; - } - - /* - * Count number of maps. Each map has a name. - * Array of maps is not supported: only the first element is - * considered. - * - * TODO: Detect array of map and report error. - */ - nr_syms = symbols->d_size / sizeof(Elf64_Sym); - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - nr_maps++; - } - /* Assume equally sized map definitions */ - pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n", - nr_maps, data->d_size, obj->path); - - if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { - pr_warn("elf: unable to determine legacy map definition size in %s\n", - obj->path); - return -EINVAL; - } - map_def_sz = data->d_size / nr_maps; - - /* Fill obj->maps using data in "maps" section. */ - for (i = 0; i < nr_syms; i++) { - Elf64_Sym *sym = elf_sym_by_idx(obj, i); - const char *map_name; - struct bpf_map_def *def; - struct bpf_map *map; - - if (sym->st_shndx != obj->efile.maps_shndx) - continue; - if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION) - continue; - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - - map_name = elf_sym_str(obj, sym->st_name); - if (!map_name) { - pr_warn("failed to get map #%d name sym string for obj %s\n", - i, obj->path); - return -LIBBPF_ERRNO__FORMAT; - } - - if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { - pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); - return -ENOTSUP; - } - - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->sec_idx = sym->st_shndx; - map->sec_offset = sym->st_value; - pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - if (sym->st_value + map_def_sz > data->d_size) { - pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); - return -EINVAL; - } - - map->name = strdup(map_name); - if (!map->name) { - pr_warn("map '%s': failed to alloc map name\n", map_name); - return -ENOMEM; - } - pr_debug("map %d is \"%s\"\n", i, map->name); - def = (struct bpf_map_def *)(data->d_buf + sym->st_value); - /* - * If the definition of the map in the object file fits in - * bpf_map_def, copy it. Any extra fields in our version - * of bpf_map_def will default to zero as a result of the - * calloc above. - */ - if (map_def_sz <= sizeof(struct bpf_map_def)) { - memcpy(&map->def, def, map_def_sz); - } else { - /* - * Here the map structure being read is bigger than what - * we expect, truncate if the excess bits are all zero. - * If they are not zero, reject this map as - * incompatible. - */ - char *b; - - for (b = ((char *)def) + sizeof(struct bpf_map_def); - b < ((char *)def) + map_def_sz; b++) { - if (*b != 0) { - pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", - obj->path, map_name); - if (strict) - return -EINVAL; - } - } - memcpy(&map->def, def, sizeof(struct bpf_map_def)); - } - } - return 0; -} - const struct btf_type * skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { @@ -2107,6 +2446,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; case BTF_KIND_TYPE_TAG: return "type_tag"; + case BTF_KIND_ENUM64: return "enum64"; default: return "unknown"; } } @@ -2153,23 +2493,81 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } -static int build_map_pin_path(struct bpf_map *map, const char *path) +static bool get_map_field_long(const char *map_name, const struct btf *btf, + const struct btf_member *m, __u64 *res) { - char buf[PATH_MAX]; - int len; + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); + const char *name = btf__name_by_offset(btf, m->name_off); - if (!path) - path = "/sys/fs/bpf"; + if (btf_is_ptr(t)) { + __u32 res32; + bool ret; + + ret = get_map_field_int(map_name, btf, m, &res32); + if (ret) + *res = (__u64)res32; + return ret; + } + + if (!btf_is_enum(t) && !btf_is_enum64(t)) { + pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", + map_name, name, btf_kind_str(t)); + return false; + } + + if (btf_vlen(t) != 1) { + pr_warn("map '%s': attr '%s': invalid __ulong\n", + map_name, name); + return false; + } + + if (btf_is_enum(t)) { + const struct btf_enum *e = btf_enum(t); + + *res = e->val; + } else { + const struct btf_enum64 *e = btf_enum64(t); - len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + *res = btf_enum64_value(e); + } + return true; +} + +static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) +{ + int len; + + len = snprintf(buf, buf_sz, "%s/%s", path, name); if (len < 0) return -EINVAL; - else if (len >= PATH_MAX) + if (len >= buf_sz) return -ENAMETOOLONG; + return 0; +} + +static int build_map_pin_path(struct bpf_map *map, const char *path) +{ + char buf[PATH_MAX]; + int err; + + if (!path) + path = BPF_FS_DEFAULT_PATH; + + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) + return err; + return bpf_map__set_pin_path(map, buf); } +/* should match definition in bpf_helpers.h */ +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + int parse_btf_map_def(const char *map_name, struct btf *btf, const struct btf_type *def_t, bool strict, struct btf_map_def *map_def, struct btf_map_def *inner_def) @@ -2368,9 +2766,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->pinning = val; map_def->parts |= MAP_DEF_PINNING; } else if (strcmp(name, "map_extra") == 0) { - __u32 map_extra; + __u64 map_extra; - if (!get_map_field_int(map_name, btf, m, &map_extra)) + if (!get_map_field_long(map_name, btf, m, &map_extra)) return -EINVAL; map_def->map_extra = map_extra; map_def->parts |= MAP_DEF_MAP_EXTRA; @@ -2391,6 +2789,43 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return 0; } +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + +static bool map_is_ringbuf(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_RINGBUF || + map->def.type == BPF_MAP_TYPE_USER_RINGBUF; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2404,6 +2839,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_key_type_id = def->key_type_id; map->btf_value_type_id = def->value_type_id; + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map_is_ringbuf(map)) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + if (def->parts & MAP_DEF_MAP_TYPE) pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); @@ -2528,7 +2967,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, map->inner_map = calloc(1, sizeof(*map->inner_map)); if (!map->inner_map) return -ENOMEM; - map->inner_map->fd = -1; + map->inner_map->fd = create_placeholder_fd(); + if (map->inner_map->fd < 0) + return map->inner_map->fd; map->inner_map->sec_idx = sec_idx; map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); if (!map->inner_map->name) @@ -2538,6 +2979,36 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, fill_map_from_def(map->inner_map, &inner_def); } + err = map_fill_btf_type_info(obj, map); + if (err) + return err; + + return 0; +} + +static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, + const char *sec_name, int sec_idx, + void *data, size_t data_sz) +{ + const long page_sz = sysconf(_SC_PAGE_SIZE); + size_t mmap_sz; + + mmap_sz = bpf_map_mmap_sz(map); + if (roundup(data_sz, page_sz) > mmap_sz) { + pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", + sec_name, mmap_sz, data_sz); + return -E2BIG; + } + + obj->arena_data = malloc(data_sz); + if (!obj->arena_data) + return -ENOMEM; + memcpy(obj->arena_data, data, data_sz); + obj->arena_data_sz = data_sz; + + /* make bpf_map__init_value() work for ARENA maps */ + map->mmaped = obj->arena_data; + return 0; } @@ -2556,7 +3027,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); data = elf_sec_data(obj, scn); - if (!scn || !data) { + if (!data) { pr_warn("elf: failed to get %s map definitions for %s\n", MAPS_ELF_SEC, obj->path); return -EINVAL; @@ -2590,6 +3061,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } + for (i = 0; i < obj->nr_maps; i++) { + struct bpf_map *map = &obj->maps[i]; + + if (map->def.type != BPF_MAP_TYPE_ARENA) + continue; + + if (obj->arena_map_idx >= 0) { + pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", + map->name, obj->maps[obj->arena_map_idx].name); + return -EINVAL; + } + obj->arena_map_idx = i; + + if (obj->efile.arena_data) { + err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, + obj->efile.arena_data->d_buf, + obj->efile.arena_data->d_size); + if (err) + return err; + } + } + if (obj->efile.arena_data && obj->arena_map_idx < 0) { + pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", + ARENA_SEC); + return -ENOENT; + } + return 0; } @@ -2598,16 +3096,15 @@ static int bpf_object__init_maps(struct bpf_object *obj, { const char *pin_root_path; bool strict; - int err; + int err = 0; strict = !OPTS_GET(opts, relaxed_maps, false); pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_user_maps(obj, strict); - err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); - err = err ?: bpf_object__init_struct_ops_maps(obj); + err = err ?: bpf_object_init_struct_ops(obj); return err; } @@ -2623,6 +3120,11 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) return sh->sh_flags & SHF_EXECINSTR; } +static bool starts_with_qmark(const char *s) +{ + return s && s[0] == '?'; +} + static bool btf_needs_sanitization(struct bpf_object *obj) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); @@ -2631,12 +3133,14 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag; + !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; } -static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -2644,6 +3148,9 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); + bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); + bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); + int enum64_placeholder_id = 0; struct btf_type *t; int i, j, vlen; @@ -2669,7 +3176,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) name = (char *)btf__name_by_offset(btf, t->name_off); while (*name) { - if (*name == '.') + if (*name == '.' || *name == '?') *name = '_'; name++; } @@ -2684,6 +3191,14 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } + } else if (!has_qmark_datasec && btf_is_datasec(t) && + starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { + /* replace '?' prefix with '_' for DATASEC names */ + char *name; + + name = (char *)btf__name_by_offset(btf, t->name_off); + if (name[0] == '?') + name[0] = '_'; } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ vlen = btf_vlen(t); @@ -2706,20 +3221,44 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) /* replace TYPE_TAG with a CONST */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); + } else if (!has_enum64 && btf_is_enum(t)) { + /* clear the kflag */ + t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); + } else if (!has_enum64 && btf_is_enum64(t)) { + /* replace ENUM64 with a union */ + struct btf_member *m; + + if (enum64_placeholder_id == 0) { + enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); + if (enum64_placeholder_id < 0) + return enum64_placeholder_id; + + t = (struct btf_type *)btf__type_by_id(btf, i); + } + + m = btf_members(t); + vlen = btf_vlen(t); + t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); + for (j = 0; j < vlen; j++, m++) { + m->type = enum64_placeholder_id; + m->offset = 0; + } } } + + return 0; } static bool libbpf_needs_btf(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || - obj->efile.st_ops_shndx >= 0 || + obj->efile.has_st_ops || obj->nr_extern > 0; } static bool kernel_needs_btf(const struct bpf_object *obj) { - return obj->efile.st_ops_shndx >= 0; + return obj->efile.has_st_ops; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -2733,13 +3272,16 @@ static int bpf_object__init_btf(struct bpf_object *obj, err = libbpf_get_error(obj->btf); if (err) { obj->btf = NULL; - pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); goto out; } /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2748,11 +3290,48 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", - BTF_EXT_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", + BTF_EXT_ELF_SEC, errstr(err)); obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -2773,57 +3352,89 @@ static int compare_vsi_off(const void *_a, const void *_b) static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, struct btf_type *t) { - __u32 size = 0, off = 0, i, vars = btf_vlen(t); - const char *name = btf__name_by_offset(btf, t->name_off); - const struct btf_type *t_var; + __u32 size = 0, i, vars = btf_vlen(t); + const char *sec_name = btf__name_by_offset(btf, t->name_off); struct btf_var_secinfo *vsi; - const struct btf_var *var; - int ret; + bool fixup_offsets = false; + int err; - if (!name) { + if (!sec_name) { pr_debug("No name found in string section for DATASEC kind.\n"); return -ENOENT; } - /* .extern datasec size and var offsets were set correctly during - * extern collection step, so just skip straight to sorting variables + /* Extern-backing datasecs (.ksyms, .kconfig) have their size and + * variable offsets set at the previous step. Further, not every + * extern BTF VAR has corresponding ELF symbol preserved, so we skip + * all fixups altogether for such sections and go straight to sorting + * VARs within their DATASEC. */ - if (t->size) + if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) goto sort_vars; - ret = find_elf_sec_sz(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { - pr_debug("Invalid size for section %s: %u bytes\n", name, size); - return -ENOENT; - } + /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to + * fix this up. But BPF static linker already fixes this up and fills + * all the sizes and offsets during static linking. So this step has + * to be optional. But the STV_HIDDEN handling is non-optional for any + * non-extern DATASEC, so the variable fixup loop below handles both + * functions at the same time, paying the cost of BTF VAR <-> ELF + * symbol matching just once. + */ + if (t->size == 0) { + err = find_elf_sec_sz(obj, sec_name, &size); + if (err || !size) { + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", + sec_name, size, errstr(err)); + return -ENOENT; + } - t->size = size; + t->size = size; + fixup_offsets = true; + } for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { + const struct btf_type *t_var; + struct btf_var *var; + const char *var_name; + Elf64_Sym *sym; + t_var = btf__type_by_id(btf, vsi->type); if (!t_var || !btf_is_var(t_var)) { - pr_debug("Non-VAR type seen in section %s\n", name); + pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); return -EINVAL; } var = btf_var(t_var); - if (var->linkage == BTF_VAR_STATIC) + if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) continue; - name = btf__name_by_offset(btf, t_var->name_off); - if (!name) { - pr_debug("No name found in string section for VAR kind\n"); + var_name = btf__name_by_offset(btf, t_var->name_off); + if (!var_name) { + pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", + sec_name, i); return -ENOENT; } - ret = find_elf_var_offset(obj, name, &off); - if (ret) { - pr_debug("No offset found in symbol table for VAR %s\n", - name); + sym = find_elf_var_sym(obj, var_name); + if (IS_ERR(sym)) { + pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", + sec_name, var_name); return -ENOENT; } - vsi->offset = off; + if (fixup_offsets) + vsi->offset = sym->st_value; + + /* if variable is a global/weak symbol, but has restricted + * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR + * as static. This follows similar logic for functions (BPF + * subprogs) and influences libbpf's further decisions about + * whether to make global data BPF array maps as + * BPF_F_MMAPABLE. + */ + if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) + var->linkage = BTF_VAR_STATIC; } sort_vars: @@ -2831,13 +3442,16 @@ sort_vars: return 0; } -static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) +static int bpf_object_fixup_btf(struct bpf_object *obj) { - int err = 0; - __u32 i, n = btf__type_cnt(btf); + int i, n, err = 0; + if (!obj->btf) + return 0; + + n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { - struct btf_type *t = btf_type_by_id(btf, i); + struct btf_type *t = btf_type_by_id(obj->btf, i); /* Loader needs to fix up some of the things compiler * couldn't get its hands on while emitting BTF. This @@ -2845,33 +3459,12 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf) * the info from the ELF itself for this purpose. */ if (btf_is_datasec(t)) { - err = btf_fixup_datasec(obj, btf, t); + err = btf_fixup_datasec(obj, obj->btf, t); if (err) - break; + return err; } } - return libbpf_err(err); -} - -int btf__finalize_data(struct bpf_object *obj, struct btf *btf) -{ - return btf_finalize_data(obj, btf); -} - -static int bpf_object__finalize_btf(struct bpf_object *obj) -{ - int err; - - if (!obj->btf) - return 0; - - err = btf_finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - return err; - } - return 0; } @@ -2890,9 +3483,15 @@ static bool prog_needs_vmlinux_btf(struct bpf_program *prog) return false; } +static bool map_needs_vmlinux_btf(struct bpf_map *map) +{ + return bpf_map__is_struct_ops(map); +} + static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) { struct bpf_program *prog; + struct bpf_map *map; int i; /* CO-RE relocations need kernel BTF, only when btf_custom_path @@ -2911,12 +3510,17 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; } + bpf_object__for_each_map(map, obj) { + if (map_needs_vmlinux_btf(map)) + return true; + } + return false; } @@ -2934,7 +3538,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { - pr_warn("Error loading vmlinux BTF: %d\n", err); + pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); obj->btf_vmlinux = NULL; return err; } @@ -3005,7 +3609,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); - bpf_object__sanitize_btf(obj, kern_btf); + err = bpf_object__sanitize_btf(obj, kern_btf); + if (err) + return err; } if (obj->gen_loader) { @@ -3022,7 +3628,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -3035,11 +3641,14 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) report: if (err) { btf_mandatory = kernel_needs_btf(obj); - pr_warn("Error loading .BTF into kernel: %d. %s\n", err, - btf_mandatory ? "BTF is mandatory, can't proceed." - : "BTF is optional, ignoring."); - if (!btf_mandatory) + if (btf_mandatory) { + pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", + errstr(err)); + } else { + pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", + errstr(err)); err = 0; + } } return err; } @@ -3242,10 +3851,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf64_Shdr *sh; /* ELF section indices are 0-based, but sec #0 is special "invalid" - * section. e_shnum does include sec #0, so e_shnum is the necessary - * size of an array to keep all the sections. + * section. Since section count retrieved by elf_getshdrnum() does + * include sec #0, it is already the necessary size of an array to keep + * all the sections. */ - obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; + if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { + pr_warn("elf: failed to get the number of sections for %s: %s\n", + obj->path, elf_errmsg(-1)); + return -LIBBPF_ERRNO__FORMAT; + } obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); if (!obj->efile.secs) return -ENOMEM; @@ -3317,7 +3931,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (err) return err; } else if (strcmp(name, "maps") == 0) { - obj->efile.maps_shndx = idx; + pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); + return -ENOTSUP; } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { @@ -3347,9 +3962,24 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RODATA; sec_desc->shdr = sh; sec_desc->data = data; - } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { - obj->efile.st_ops_data = data; - obj->efile.st_ops_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || + strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_SEC) == 0 || + strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { + sec_desc->sec_type = SEC_ST_OPS; + sec_desc->shdr = sh; + sec_desc->data = data; + obj->efile.has_st_ops = true; + } else if (strcmp(name, ARENA_SEC) == 0) { + obj->efile.arena_data = data; + obj->efile.arena_data_shndx = idx; + } else if (strcmp(name, JUMPTABLES_SEC) == 0) { + obj->jumptables_data = malloc(data->d_size); + if (!obj->jumptables_data) + return -ENOMEM; + memcpy(obj->jumptables_data, data->d_buf, data->d_size); + obj->jumptables_data_sz = data->d_size; + obj->efile.jumptables_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -3364,6 +3994,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && + strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_SEC) && + strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", idx, name, targ_sec_idx, @@ -3374,7 +4007,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RELO; sec_desc->shdr = sh; sec_desc->data = data; - } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) { + } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || + str_has_pfx(name, BSS_SEC "."))) { sec_desc->sec_type = SEC_BSS; sec_desc->shdr = sh; sec_desc->data = data; @@ -3389,8 +4023,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset - * for faster search */ + * for faster search + */ if (obj->nr_programs) qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); @@ -3420,7 +4059,7 @@ static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) return true; /* global function */ - return bind == STB_GLOBAL && type == STT_FUNC; + return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; } static int find_extern_btf_id(const struct btf *btf, const char *ext_name) @@ -3512,6 +4151,10 @@ static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, if (strcmp(name, "libbpf_tristate")) return KCFG_UNKNOWN; return KCFG_TRISTATE; + case BTF_KIND_ENUM64: + if (strcmp(name, "libbpf_tristate")) + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) return KCFG_UNKNOWN; @@ -3606,6 +4249,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) struct extern_desc *ext; int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; + size_t ext_essent_len; Elf_Scn *scn; Elf64_Shdr *sh; @@ -3651,10 +4295,20 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; + ext_essent_len = bpf_core_essential_name_len(ext->name); + ext->essent_name = NULL; + if (ext_essent_len != strlen(ext->name)) { + ext->essent_name = strndup(ext->name, ext_essent_len); + if (!ext->essent_name) + return -ENOMEM; + } + ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", @@ -3685,9 +4339,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return -EINVAL; } ext->kcfg.type = find_kcfg_type(obj->btf, t->type, - &ext->kcfg.is_signed); + &ext->kcfg.is_signed); if (ext->kcfg.type == KCFG_UNKNOWN) { - pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); return -ENOTSUP; } } else if (strcmp(sec_name, KSYMS_SEC) == 0) { @@ -3809,35 +4463,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return 0; } -struct bpf_program * -bpf_object__find_program_by_title(const struct bpf_object *obj, - const char *title) +static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - struct bpf_program *pos; - - bpf_object__for_each_program(pos, obj) { - if (pos->sec_name && !strcmp(pos->sec_name, title)) - return pos; - } - return errno = ENOENT, NULL; -} - -static bool prog_is_subprog(const struct bpf_object *obj, - const struct bpf_program *prog) -{ - /* For legacy reasons, libbpf supports an entry-point BPF programs - * without SEC() attribute, i.e., those in the .text section. But if - * there are 2 or more such programs in the .text section, they all - * must be subprograms called from entry-point BPF programs in - * designated SEC()'tions, otherwise there is no way to distinguish - * which of those programs should be loaded vs which are a subprogram. - * Similarly, if there is a function/program in .text and at least one - * other BPF program with custom SEC() attribute, then we just assume - * .text programs are subprograms (even if they are not called from - * other programs), because libbpf never explicitly supported mixing - * SEC()-designated BPF programs and .text entry-point BPF programs. - */ - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; + return prog->sec_idx == obj->efile.text_shndx; } struct bpf_program * @@ -3871,8 +4499,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj, static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.maps_shndx || - shndx == obj->efile.btf_maps_shndx; + return shndx == obj->efile.btf_maps_shndx; } static enum libbpf_map_type @@ -3893,6 +4520,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) } } +static int bpf_prog_compute_hash(struct bpf_program *prog) +{ + struct bpf_insn *purged; + int i, err = 0; + + purged = calloc(prog->insns_cnt, BPF_INSN_SZ); + if (!purged) + return -ENOMEM; + + /* If relocations have been done, the map_fd needs to be + * discarded for the digest calculation. + */ + for (i = 0; i < prog->insns_cnt; i++) { + purged[i] = prog->insns[i]; + if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) && + (purged[i].src_reg == BPF_PSEUDO_MAP_FD || + purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { + purged[i].imm = 0; + i++; + if (i >= prog->insns_cnt || + prog->insns[i].code != 0 || + prog->insns[i].dst_reg != 0 || + prog->insns[i].src_reg != 0 || + prog->insns[i].off != 0) { + err = -EINVAL; + goto out; + } + purged[i] = prog->insns[i]; + purged[i].imm = 0; + } + } + libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn), + prog->hash); +out: + free(purged); + return err; +} + static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, @@ -3930,11 +4595,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", prog->name, i, ext->name, ext->sym_idx, insn_idx); if (insn->code == (BPF_JMP | BPF_CALL)) - reloc_desc->type = RELO_EXTERN_FUNC; + reloc_desc->type = RELO_EXTERN_CALL; else - reloc_desc->type = RELO_EXTERN_VAR; + reloc_desc->type = RELO_EXTERN_LD64; reloc_desc->insn_idx = insn_idx; - reloc_desc->sym_off = i; /* sym_off stores extern index */ + reloc_desc->ext_idx = i; return 0; } @@ -3988,6 +4653,35 @@ static int bpf_program__record_reloc(struct bpf_program *prog, type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); + /* arena data relocation */ + if (shdr_idx == obj->efile.arena_data_shndx) { + if (obj->arena_map_idx < 0) { + pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", + prog->name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } + reloc_desc->type = RELO_DATA; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = obj->arena_map_idx; + reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); + return 0; + } + + /* jump table data relocation */ + if (shdr_idx == obj->efile.jumptables_data_shndx) { + reloc_desc->type = RELO_INSN_ARRAY; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = -1; + reloc_desc->sym_off = sym->st_value; + reloc_desc->sym_size = sym->st_size; + return 0; + } + /* generic map reference relocation */ if (type == LIBBPF_MAP_UNSPEC) { if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { @@ -4058,6 +4752,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, int l = 0, r = obj->nr_programs - 1, m; struct bpf_program *prog; + if (!obj->nr_programs) + return NULL; + while (l < r) { m = l + (r - l + 1) / 2; prog = &obj->programs[m]; @@ -4097,6 +4794,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); + if (!scn_data) + return -LIBBPF_ERRNO__FORMAT; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, scn); @@ -4175,37 +4874,33 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat return 0; } -static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) +static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) { - struct bpf_map_def *def = &map->def; - __u32 key_type_id = 0, value_type_id = 0; - int ret; + int id; + + if (!obj->btf) + return -ENOENT; /* if it's BTF-defined map, we don't need to search for type IDs. * For struct_ops map, it does not need btf_key_type_id and * btf_value_type_id. */ - if (map->sec_idx == obj->efile.btf_maps_shndx || - bpf_map__is_struct_ops(map)) + if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) return 0; - if (!bpf_map__is_internal(map)) { - ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); - } else { - /* - * LLVM annotates global data differently in BTF, that is, - * only as '.data', '.bss' or '.rodata'. - */ - ret = btf__find_by_name(obj->btf, map->real_name); - } - if (ret < 0) - return ret; + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + if (!bpf_map__is_internal(map)) + return -ENOENT; - map->btf_key_type_id = key_type_id; - map->btf_value_type_id = bpf_map__is_internal(map) ? - ret : value_type_id; + id = btf__find_by_name(obj->btf, map->real_name); + if (id < 0) + return id; + + map->btf_key_type_id = 0; + map->btf_value_type_id = id; return 0; } @@ -4219,11 +4914,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); memset(info, 0, sizeof(*info)); - fp = fopen(file, "r"); + fp = fopen(file, "re"); if (!fp) { err = -errno; - pr_warn("failed to open %s: %d. No procfs support?\n", file, - err); + pr_warn("failed to open %s: %s. No procfs support?\n", file, + errstr(err)); return err; } @@ -4245,43 +4940,79 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +static bool map_is_created(const struct bpf_map *map) +{ + return map->obj->state >= OBJ_PREPARED || map->reused; +} + +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map_is_created(map)) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + +int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) +{ + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + map->autoattach = autoattach; + return 0; +} + +bool bpf_map__autoattach(const struct bpf_map *map) +{ + return map->autoattach; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { - struct bpf_map_info info = {}; - __u32 len = sizeof(info); + struct bpf_map_info info; + __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; - err = bpf_obj_get_info_by_fd(fd, &info, &len); + memset(&info, 0, len); + err = bpf_map_get_info_by_fd(fd, &info, &len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(fd, &info); if (err) return libbpf_err(err); - new_name = strdup(info.name); + name_len = strlen(info.name); + if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) + new_name = strdup(map->name); + else + new_name = strdup(info.name); + if (!new_name) return libbpf_err(-errno); - new_fd = open("/", O_RDONLY | O_CLOEXEC); + /* + * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. + * This is similar to what we do in ensure_good_fd(), but without + * closing original FD. + */ + new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); if (new_fd < 0) { err = -errno; goto err_free_new_name; } - new_fd = dup3(fd, new_fd, O_CLOEXEC); - if (new_fd < 0) { - err = -errno; - goto err_close_new_fd; - } + err = reuse_fd(map->fd, new_fd); + if (err) + goto err_free_new_name; - err = zclose(map->fd); - if (err) { - err = -errno; - goto err_close_new_fd; - } free(map->name); - map->fd = new_fd; map->name = new_name; map->def.type = info.type; map->def.key_size = info.key_size; @@ -4295,8 +5026,6 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) return 0; -err_close_new_fd: - close(new_fd); err_free_new_name: free(new_name); return libbpf_err(err); @@ -4317,48 +5046,99 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); + map->def.max_entries = max_entries; + + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map_is_ringbuf(map)) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + return 0; } -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +static int bpf_object_prepare_token(struct bpf_object *obj) { - if (!map || !max_entries) - return libbpf_err(-EINVAL); + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is explicitly prevented */ + if (obj->token_path && obj->token_path[0] == '\0') { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + return 0; + } - return bpf_map__set_max_entries(map, max_entries); + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", + obj->name, errstr(err), bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; } static int bpf_object__probe_loading(struct bpf_object *obj) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = obj->token_fd, + .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, + ); if (obj->gen_loader) return 0; ret = bump_rlimit_memlock(); if (ret) - pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", + errstr(ret)); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " - "program. Make sure your kernel supports BPF " - "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " - "set to big enough value.\n", __func__, cp, ret); + pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", + __func__, errstr(ret)); return -ret; } close(ret); @@ -4366,403 +5146,46 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - /* make sure loading with name works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_obj_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; - enum kern_feature_result res; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, -}; - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - - if (obj && obj->gen_loader) + if (obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); - } - } + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return feat_supported(NULL, feat_id); } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { - struct bpf_map_info map_info = {}; - char msg[STRERR_BUFSIZE]; - __u32 map_info_len; + struct bpf_map_info map_info; + __u32 map_info_len = sizeof(map_info); int err; - map_info_len = sizeof(map_info); - - err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + memset(&map_info, 0, map_info_len); + err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); if (err) { pr_warn("failed to get map info for map FD %d: %s\n", map_fd, - libbpf_strerror_r(errno, msg, sizeof(msg))); + errstr(err)); return false; } + /* + * bpf_get_map_info_by_fd() for DEVMAP will always return flags with + * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time. + * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from + * bpf_get_map_info_by_fd() when checking for compatibility with an + * existing DEVMAP. + */ + if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH) + map_info.map_flags &= ~BPF_F_RDONLY_PROG; + return (map_info.type == map->def.type && map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && @@ -4774,7 +5197,6 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) static int bpf_object__reuse_map(struct bpf_map *map) { - char *cp, errmsg[STRERR_BUFSIZE]; int err, pin_fd; pin_fd = bpf_obj_get(map->pin_path); @@ -4786,9 +5208,8 @@ bpf_object__reuse_map(struct bpf_map *map) return 0; } - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warn("couldn't retrieve pinned map '%s': %s\n", - map->pin_path, cp); + map->pin_path, errstr(err)); return err; } @@ -4800,10 +5221,10 @@ bpf_object__reuse_map(struct bpf_map *map) } err = bpf_map__reuse_fd(map, pin_fd); - if (err) { - close(pin_fd); + close(pin_fd); + if (err) return err; - } + map->pinned = true; pr_debug("reused pinned map at '%s'\n", map->pin_path); @@ -4814,8 +5235,8 @@ static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { enum libbpf_map_type map_type = map->libbpf_type; - char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + size_t mmap_sz; if (obj->gen_loader) { bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, @@ -4824,12 +5245,12 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); return 0; } + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error setting initial map(%s) contents: %s\n", - map->name, cp); + pr_warn("map '%s': failed to set initial contents: %s\n", + bpf_map__name(map), errstr(err)); return err; } @@ -4838,12 +5259,43 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("map '%s': failed to freeze as read-only: %s\n", + bpf_map__name(map), errstr(err)); return err; } } + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure. + */ + mmap_sz = bpf_map_mmap_sz(map); + if (map->def.map_flags & BPF_F_MMAPABLE) { + void *mmaped; + int prot; + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); + if (mmaped == MAP_FAILED) { + err = -errno; + pr_warn("map '%s': failed to re-mmap() contents: %s\n", + bpf_map__name(map), errstr(err)); + return err; + } + map->mmaped = mmaped; + } else if (map->mmaped) { + munmap(map->mmaped, mmap_sz); + map->mmaped = NULL; + } + return 0; } @@ -4854,8 +5306,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; const char *map_name = NULL; - __u32 max_entries; - int err = 0; + int err = 0, map_fd; if (kernel_supports(obj, FEAT_PROG_NAME)) map_name = map->name; @@ -4863,26 +5314,27 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; + if (obj->token_fd) + create_attr.map_flags |= BPF_F_TOKEN_FD; + if (map->excl_prog) { + err = bpf_prog_compute_hash(map->excl_prog); + if (err) + return err; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { - int nr_cpus; - - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("map '%s': failed to determine number of system CPUs: %d\n", - map->name, nr_cpus); - return nr_cpus; - } - pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - max_entries = nr_cpus; - } else { - max_entries = def->max_entries; + create_attr.excl_prog_hash = map->excl_prog->hash; + create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH; } - if (bpf_map__is_struct_ops(map)) + if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + if (map->mod_btf_fd >= 0) { + create_attr.value_type_btf_obj_fd = map->mod_btf_fd; + create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; + } + } - if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { + if (obj->btf && btf__fd(obj->btf) >= 0) { create_attr.btf_fd = btf__fd(obj->btf); create_attr.btf_key_type_id = map->btf_key_type_id; create_attr.btf_value_type_id = map->btf_value_type_id; @@ -4890,13 +5342,16 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { - pr_warn("map '%s': failed to create inner map: %d\n", - map->name, err); + pr_warn("map '%s': failed to create inner map: %s\n", + map->name, errstr(err)); return err; } - map->inner_map_fd = bpf_map__fd(map->inner_map); + map->inner_map_fd = map->inner_map->fd; } if (map->inner_map_fd >= 0) create_attr.inner_map_fd = map->inner_map_fd; @@ -4916,49 +5371,50 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_ARENA: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; + break; + case BPF_MAP_TYPE_STRUCT_OPS: + create_attr.btf_value_type_id = 0; + break; default: break; } if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, def->type, map_name, - def->key_size, def->value_size, max_entries, + def->key_size, def->value_size, def->max_entries, &create_attr, is_inner ? -1 : map - obj->maps); - /* Pretend to have valid FD to pass various fd >= 0 checks. - * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + /* We keep pretenting we have valid FD to pass various fd >= 0 + * checks by just keeping original placeholder FDs in place. + * See bpf_object__add_map() comment. + * This placeholder fd will not be used with any syscall and + * will be reset to -1 eventually. */ - map->fd = 0; + map_fd = map->fd; } else { - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - max_entries, &create_attr); + map_fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + def->max_entries, &create_attr); } - if (map->fd < 0 && (create_attr.btf_key_type_id || - create_attr.btf_value_type_id)) { - char *cp, errmsg[STRERR_BUFSIZE]; - + if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", + map->name, errstr(err)); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = bpf_map_create(def->type, map_name, - def->key_size, def->value_size, - max_entries, &create_attr); + map_fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + def->max_entries, &create_attr); } - err = map->fd < 0 ? -errno : 0; - if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { if (obj->gen_loader) map->inner_map->fd = -1; @@ -4966,7 +5422,19 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b zfree(&map->inner_map); } - return err; + if (map_fd < 0) + return map_fd; + + /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ + if (map->fd == map_fd) + return 0; + + /* Keep placeholder FD value but now point it to the BPF map object. + * This way everything that relied on this map's FD (e.g., relocated + * ldimm64 instructions) will stay valid and won't need adjustments. + * map->fd stays valid but now point to what map_fd points to. + */ + return reuse_fd(map->fd, map_fd); } static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -4980,7 +5448,7 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) continue; targ_map = map->init_slots[i]; - fd = bpf_map__fd(targ_map); + fd = targ_map->fd; if (obj->gen_loader) { bpf_gen__populate_outer_map(obj->gen_loader, @@ -4991,8 +5459,8 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) } if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", + map->name, i, targ_map->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -5024,8 +5492,8 @@ static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &i, &fd, 0); if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", - map->name, i, targ_prog->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", + map->name, i, targ_prog->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", @@ -5050,11 +5518,27 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj) continue; err = init_prog_array_slots(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) return err; + } + return 0; +} + +static int map_set_def_max_entries(struct bpf_map *map) +{ + if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { + int nr_cpus; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("map '%s': failed to determine number of system CPUs: %d\n", + map->name, nr_cpus); + return nr_cpus; } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); + map->def.max_entries = nr_cpus; } + return 0; } @@ -5062,7 +5546,6 @@ static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_map *map; - char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; bool retried; @@ -5084,12 +5567,18 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } + err = map_set_def_max_entries(map); + if (err) + goto err_out; + retried = false; retry: if (map->pin_path) { @@ -5107,7 +5596,7 @@ retry: } } - if (map->fd >= 0) { + if (map->reused) { pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); } else { @@ -5120,31 +5609,41 @@ retry: if (bpf_map__is_internal(map)) { err = bpf_object__populate_internal_map(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) goto err_out; + } else if (map->def.type == BPF_MAP_TYPE_ARENA) { + map->mmaped = mmap((void *)(long)map->map_extra, + bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, + map->fd, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("map '%s': failed to mmap arena: %s\n", + map->name, errstr(err)); + return err; + } + if (obj->arena_data) { + memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); + zfree(&obj->arena_data); } } - if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { err = init_map_in_map_slots(obj, map); - if (err < 0) { - zclose(map->fd); + if (err < 0) goto err_out; - } } } if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { - zclose(map->fd); if (!retried && err == -EEXIST) { retried = true; goto retry; } - pr_warn("map '%s': failed to auto-pin at '%s': %d\n", - map->name, map->pin_path, err); + pr_warn("map '%s': failed to auto-pin at '%s': %s\n", + map->name, map->pin_path, errstr(err)); goto err_out; } } @@ -5153,8 +5652,7 @@ retry: return 0; err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); + pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); @@ -5185,18 +5683,21 @@ size_t bpf_core_essential_name_len(const char *name) return n; } -static void bpf_core_free_cands(struct bpf_core_cand_list *cands) +void bpf_core_free_cands(struct bpf_core_cand_list *cands) { + if (!cands) + return; + free(cands->cands); free(cands); } -static int bpf_core_add_cands(struct bpf_core_cand *local_cand, - size_t local_essent_len, - const struct btf *targ_btf, - const char *targ_btf_name, - int targ_start_id, - struct bpf_core_cand_list *cands) +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; const struct btf_type *t, *local_t; @@ -5210,7 +5711,7 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, n = btf__type_cnt(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_t)) + if (!btf_kind_core_compat(t, local_t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5269,9 +5770,13 @@ static int load_module_btfs(struct bpf_object *obj) err = bpf_btf_get_next_id(id, &id); if (err && errno == ENOENT) return 0; + if (err && errno == EPERM) { + pr_debug("skipping module BTFs loading, missing privileges\n"); + return 0; + } if (err) { err = -errno; - pr_warn("failed to iterate BTF objects: %d\n", err); + pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); return err; } @@ -5280,7 +5785,7 @@ static int load_module_btfs(struct bpf_object *obj) if (errno == ENOENT) continue; /* expected race: BTF was unloaded */ err = -errno; - pr_warn("failed to get BTF object #%d FD: %d\n", id, err); + pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); return err; } @@ -5289,10 +5794,10 @@ static int load_module_btfs(struct bpf_object *obj) info.name = ptr_to_u64(name); info.name_len = sizeof(name); - err = bpf_obj_get_info_by_fd(fd, &info, &len); + err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; - pr_warn("failed to get BTF object #%d info: %d\n", id, err); + pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); goto err_out; } @@ -5305,13 +5810,13 @@ static int load_module_btfs(struct bpf_object *obj) btf = btf_get_from_fd(fd, obj->btf_vmlinux); err = libbpf_get_error(btf); if (err) { - pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", - name, id, err); + pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", + name, id, errstr(err)); goto err_out; } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, - sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); + sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) goto err_out; @@ -5418,91 +5923,23 @@ err_out: int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id) { - const struct btf_type *local_type, *targ_type; - int depth = 32; /* max recursion depth */ - - /* caller made sure that names match (ignoring flavor suffix) */ - local_type = btf__type_by_id(local_btf, local_id); - targ_type = btf__type_by_id(targ_btf, targ_id); - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - -recur: - depth--; - if (depth < 0) - return -EINVAL; - - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_UNKN: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - case BTF_KIND_FWD: - return 1; - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; - case BTF_KIND_PTR: - local_id = local_type->type; - targ_id = targ_type->type; - goto recur; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - case BTF_KIND_FUNC_PROTO: { - struct btf_param *local_p = btf_params(local_type); - struct btf_param *targ_p = btf_params(targ_type); - __u16 local_vlen = btf_vlen(local_type); - __u16 targ_vlen = btf_vlen(targ_type); - int i, err; - - if (local_vlen != targ_vlen) - return 0; - - for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { - skip_mods_and_typedefs(local_btf, local_p->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); - err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id); - if (err <= 0) - return err; - } - - /* tail recurse for return type check */ - skip_mods_and_typedefs(local_btf, local_type->type, &local_id); - skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); - goto recur; - } - default: - pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", - btf_kind_str(local_type), local_id, targ_id); - return 0; - } + return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); } -static size_t bpf_core_hash_fn(const void *key, void *ctx) +int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) { - return (size_t)key; + return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); } -static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) +static size_t bpf_core_hash_fn(const long key, void *ctx) { - return k1 == k2; + return key; } -static void *u32_as_hash_key(__u32 x) +static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) { - return (void *)(uintptr_t)x; + return k1 == k2; } static int record_relo_core(struct bpf_program *prog, @@ -5523,33 +5960,36 @@ static int record_relo_core(struct bpf_program *prog, return 0; } -static int bpf_core_apply_relo(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct hashmap *cand_cache) +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + +static int bpf_core_resolve_relo(struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct hashmap *cand_cache, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec specs_scratch[3] = {}; - const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_cand_list *cands = NULL; const char *prog_name = prog->name; const struct btf_type *local_type; const char *local_name; __u32 local_id = relo->type_id; - struct bpf_insn *insn; - int insn_idx, err; - - if (relo->insn_off % BPF_INSN_SZ) - return -EINVAL; - insn_idx = relo->insn_off / BPF_INSN_SZ; - /* adjust insn_idx from section frame of reference to the local - * program's frame of reference; (sub-)program code is not yet - * relocated, so it's enough to just subtract in-section offset - */ - insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx >= prog->insns_cnt) - return -EINVAL; - insn = &prog->insns[insn_idx]; + int err; local_type = btf__type_by_id(local_btf, local_id); if (!local_type) @@ -5559,17 +5999,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!local_name) return -EINVAL; - if (prog->obj->gen_loader) { - const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - - pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", - prog - prog->obj->programs, relo->insn_off / 8, - btf_kind_str(local_type), local_name, spec_str, insn_idx); - return record_relo_core(prog, relo, insn_idx); - } - if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && - !hashmap__find(cand_cache, type_key, (void **)&cands)) { + !hashmap__find(cand_cache, local_id, &cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", @@ -5577,28 +6008,30 @@ static int bpf_core_apply_relo(struct bpf_program *prog, local_name, PTR_ERR(cands)); return PTR_ERR(cands); } - err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); + err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); if (err) { bpf_core_free_cands(cands); return err; } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, - relo_idx, local_btf, cands, specs_scratch); + return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, + targ_res); } static int bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; + struct bpf_core_relo_res targ_res; const struct bpf_core_relo *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; struct bpf_program *prog; + struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5607,7 +6040,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); err = libbpf_get_error(obj->btf_vmlinux_override); if (err) { - pr_warn("failed to parse target BTF: %d\n", err); + pr_warn("failed to parse target BTF: %s\n", errstr(err)); return err; } } @@ -5619,52 +6052,73 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) - break; - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { + if (rec->insn_off % BPF_INSN_SZ) + return -EINVAL; insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) + continue; + + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; + if (insn_idx >= prog->insns_cnt) + return -EINVAL; + insn = &prog->insns[insn_idx]; + + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", + prog->name, i, errstr(err)); + goto out; + } + + if (prog->obj->gen_loader) continue; - err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache); + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); + if (err) { + pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", + prog->name, i, errstr(err)); + goto out; + } + + err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", + prog->name, i, insn_idx, errstr(err)); goto out; } } @@ -5677,13 +6131,218 @@ out: if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { - bpf_core_free_cands(entry->value); + bpf_core_free_cands(entry->pvalue); } hashmap__free(cand_cache); } return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define POISON_LDIMM64_MAP_BASE 2001000000 +#define POISON_LDIMM64_MAP_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; + + insn++; + } +} + +/* unresolved kfunc call special constant, used also for log fixup logic */ +#define POISON_CALL_KFUNC_BASE 2002000000 +#define POISON_CALL_KFUNC_PFX "2002" + +static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int ext_idx, const struct extern_desc *ext) +{ + pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", + prog->name, relo_idx, insn_idx, ext->name); + + /* we turn kfunc call into invalid helper call with identifiable constant */ + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is extern index into obj->externs[] array + */ + insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; +} + +static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off) +{ + size_t i; + + for (i = 0; i < obj->jumptable_map_cnt; i++) { + /* + * This might happen that same offset is used for two different + * programs (as jump tables can be the same). However, for + * different programs different maps should be created. + */ + if (obj->jumptable_maps[i].sym_off == sym_off && + obj->jumptable_maps[i].prog == prog) + return obj->jumptable_maps[i].fd; + } + + return -ENOENT; +} + +static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd) +{ + size_t cnt = obj->jumptable_map_cnt; + size_t size = sizeof(obj->jumptable_maps[0]); + void *tmp; + + tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + obj->jumptable_maps = tmp; + obj->jumptable_maps[cnt].prog = prog; + obj->jumptable_maps[cnt].sym_off = sym_off; + obj->jumptable_maps[cnt].fd = map_fd; + obj->jumptable_map_cnt++; + + return 0; +} + +static int find_subprog_idx(struct bpf_program *prog, int insn_idx) +{ + int i; + + for (i = prog->subprog_cnt - 1; i >= 0; i--) { + if (insn_idx >= prog->subprogs[i].sub_insn_off) + return i; + } + + return -1; +} + +static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo) +{ + const __u32 jt_entry_size = 8; + int sym_off = relo->sym_off; + int jt_size = relo->sym_size; + __u32 max_entries = jt_size / jt_entry_size; + __u32 value_size = sizeof(struct bpf_insn_array_value); + struct bpf_insn_array_value val = {}; + int subprog_idx; + int map_fd, err; + __u64 insn_off; + __u64 *jt; + __u32 i; + + map_fd = find_jt_map(obj, prog, sym_off); + if (map_fd >= 0) + return map_fd; + + if (sym_off % jt_entry_size) { + pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n", + sym_off, jt_entry_size); + return -EINVAL; + } + + if (jt_size % jt_entry_size) { + pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n", + jt_size, jt_entry_size); + return -EINVAL; + } + + map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables", + 4, value_size, max_entries, NULL); + if (map_fd < 0) + return map_fd; + + if (!obj->jumptables_data) { + pr_warn("map '.jumptables': ELF file is missing jump table data\n"); + err = -EINVAL; + goto err_close; + } + if (sym_off + jt_size > obj->jumptables_data_sz) { + pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n", + obj->jumptables_data_sz, sym_off + jt_size); + err = -EINVAL; + goto err_close; + } + + subprog_idx = -1; /* main program */ + if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) { + pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx); + err = -EINVAL; + goto err_close; + } + if (prog->subprogs) + subprog_idx = find_subprog_idx(prog, relo->insn_idx); + + jt = (__u64 *)(obj->jumptables_data + sym_off); + for (i = 0; i < max_entries; i++) { + /* + * The offset should be made to be relative to the beginning of + * the main function, not the subfunction. + */ + insn_off = jt[i]/sizeof(struct bpf_insn); + if (subprog_idx >= 0) { + insn_off -= prog->subprogs[subprog_idx].sec_insn_off; + insn_off += prog->subprogs[subprog_idx].sub_insn_off; + } else { + insn_off -= prog->sec_insn_off; + } + + /* + * LLVM-generated jump tables contain u64 records, however + * should contain values that fit in u32. + */ + if (insn_off > UINT32_MAX) { + pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n", + (long long)jt[i], sym_off + i * jt_entry_size); + err = -EINVAL; + goto err_close; + } + + val.orig_off = insn_off; + err = bpf_map_update_elem(map_fd, &i, &val, 0); + if (err) + goto err_close; + } + + err = bpf_map_freeze(map_fd); + if (err) + goto err_close; + + err = add_jt_map(obj, prog, sym_off, map_fd); + if (err) + goto err_close; + + return map_fd; + +err_close: + close(map_fd); + return err; +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5697,37 +6356,39 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; - case RELO_EXTERN_VAR: - ext = &obj->externs[relo->sym_off]; + case RELO_EXTERN_LD64: + ext = &obj->externs[relo->ext_idx]; if (ext->type == EXT_KCFG) { if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; @@ -5748,15 +6409,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) } } break; - case RELO_EXTERN_FUNC: - ext = &obj->externs[relo->sym_off]; + case RELO_EXTERN_CALL: + ext = &obj->externs[relo->ext_idx]; insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; if (ext->is_set) { insn[0].imm = ext->ksym.kernel_btf_id; insn[0].off = ext->ksym.btf_fd_idx; - } else { /* unresolved weak kfunc */ - insn[0].imm = 0; - insn[0].off = 0; + } else { /* unresolved weak kfunc call */ + poison_kfunc_call(prog, i, relo->insn_idx, insn, + relo->ext_idx, ext); } break; case RELO_SUBPROG_ADDR: @@ -5773,6 +6434,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CORE: /* will be handled by bpf_program_record_relos() */ break; + case RELO_INSN_ARRAY: { + int map_fd; + + map_fd = create_jt_map(obj, prog, relo); + if (map_fd < 0) { + pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n", + prog->name, i, relo->sym_off); + return map_fd; + } + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn->imm = map_fd; + insn->off = 0; + } + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -5793,14 +6468,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -5860,7 +6534,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, int err; /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't - * supprot func/line info + * support func/line info */ if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) return 0; @@ -5877,8 +6551,8 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, &main_prog->func_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->func_info) { @@ -5905,8 +6579,8 @@ line_info: &main_prog->line_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->line_info) { @@ -5951,7 +6625,11 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra if (main_prog == subprog) return 0; relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); - if (!relos) + /* if new count is zero, reallocarray can return a valid NULL result; + * in this case the previous pointer will be freed, so we *have to* + * reassign old pointer to the new value (even if it's NULL) + */ + if (!relos && new_cnt) return -ENOMEM; if (subprog->nr_reloc) memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, @@ -5967,14 +6645,72 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra return 0; } +static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + size_t size = sizeof(main_prog->subprogs[0]); + int cnt = main_prog->subprog_cnt; + void *tmp; + + tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + main_prog->subprogs = tmp; + main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off; + main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off; + main_prog->subprog_cnt++; + + return 0; +} + +static int +bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, + struct bpf_program *subprog) +{ + struct bpf_insn *insns; + size_t new_cnt; + int err; + + subprog->sub_insn_off = main_prog->insns_cnt; + + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); + + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + + err = save_subprog_offsets(main_prog, subprog); + if (err) { + pr_warn("prog '%s': failed to add subprog offsets: %s\n", + main_prog->name, errstr(err)); + return err; + } + + return 0; +} + static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) { - size_t sub_insn_idx, insn_idx, new_cnt; + size_t sub_insn_idx, insn_idx; struct bpf_program *subprog; - struct bpf_insn *insns, *insn; struct reloc_desc *relo; + struct bpf_insn *insn; int err; err = reloc_prog_func_and_line_info(obj, main_prog, prog); @@ -5987,7 +6723,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, continue; relo = find_prog_insn_relo(prog, insn_idx); - if (relo && relo->type == RELO_EXTERN_FUNC) + if (relo && relo->type == RELO_EXTERN_CALL) /* kfunc relocations will be handled later * in bpf_object__relocate_data() */ @@ -6049,25 +6785,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * and relocate. */ if (subprog->sub_insn_off == 0) { - subprog->sub_insn_off = main_prog->insns_cnt; - - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; - - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); - - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); - - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); + err = bpf_object__append_subprog_code(obj, main_prog, subprog); if (err) return err; err = bpf_object__reloc_code(obj, main_prog, subprog); @@ -6083,7 +6801,8 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * prog; each main prog can have a different set of * subprograms appended (potentially in different order as * well), so position of any subprog can be different for - * different main programs */ + * different main programs + */ insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", @@ -6195,7 +6914,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6242,8 +6960,408 @@ static void bpf_object__sort_relos(struct bpf_object *obj) } } -static int -bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) +static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) +{ + const char *str = "exception_callback:"; + size_t pfx_len = strlen(str); + int i, j, n; + + if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) + return 0; + + n = btf__type_cnt(obj->btf); + for (i = 1; i < n; i++) { + const char *name; + struct btf_type *t; + + t = btf_type_by_id(obj->btf, i); + if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strncmp(name, str, pfx_len) != 0) + continue; + + t = btf_type_by_id(obj->btf, t->type); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", + prog->name); + return -EINVAL; + } + if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) + continue; + /* Multiple callbacks are specified for the same prog, + * the verifier will eventually return an error for this + * case, hence simply skip appending a subprog. + */ + if (prog->exception_cb_idx >= 0) { + prog->exception_cb_idx = -1; + break; + } + + name += pfx_len; + if (str_is_empty(name)) { + pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", + prog->name); + return -EINVAL; + } + + for (j = 0; j < obj->nr_programs; j++) { + struct bpf_program *subprog = &obj->programs[j]; + + if (!prog_is_subprog(obj, subprog)) + continue; + if (strcmp(name, subprog->name) != 0) + continue; + /* Enforce non-hidden, as from verifier point of + * view it expects global functions, whereas the + * mark_btf_static fixes up linkage as static. + */ + if (!subprog->sym_global || subprog->mark_btf_static) { + pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", + prog->name, subprog->name); + return -EINVAL; + } + /* Let's see if we already saw a static exception callback with the same name */ + if (prog->exception_cb_idx >= 0) { + pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", + prog->name, subprog->name); + return -EINVAL; + } + prog->exception_cb_idx = j; + break; + } + + if (prog->exception_cb_idx >= 0) + continue; + + pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); + return -ENOENT; + } + + return 0; +} + +static struct { + enum bpf_prog_type prog_type; + const char *ctx_name; +} global_ctx_map[] = { + { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, + { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, + { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, + { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, + { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, + { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, + { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, + { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, + { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, + { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, + { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, + { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, + { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, + { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, + { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, + { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, + { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, + { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, + { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, + { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, + { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, + { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, + { BPF_PROG_TYPE_XDP, "xdp_md" }, + /* all other program types don't have "named" context structs */ +}; + +/* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, + * for below __builtin_types_compatible_p() checks; + * with this approach we don't need any extra arch-specific #ifdef guards + */ +struct pt_regs; +struct user_pt_regs; +struct user_regs_struct; + +static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, + const char *subprog_name, int arg_idx, + int arg_type_id, const char *ctx_name) +{ + const struct btf_type *t; + const char *tname; + + /* check if existing parameter already matches verifier expectations */ + t = skip_mods_and_typedefs(btf, arg_type_id, NULL); + if (!btf_is_ptr(t)) + goto out_warn; + + /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe + * and perf_event programs, so check this case early on and forget + * about it for subsequent checks + */ + while (btf_is_mod(t)) + t = btf__type_by_id(btf, t->type); + if (btf_is_typedef(t) && + (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { + tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; + if (strcmp(tname, "bpf_user_pt_regs_t") == 0) + return false; /* canonical type for kprobe/perf_event */ + } + + /* now we can ignore typedefs moving forward */ + t = skip_mods_and_typedefs(btf, t->type, NULL); + + /* if it's `void *`, definitely fix up BTF info */ + if (btf_is_void(t)) + return true; + + /* if it's already proper canonical type, no need to fix up */ + tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; + if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) + return false; + + /* special cases */ + switch (prog->type) { + case BPF_PROG_TYPE_KPROBE: + /* `struct pt_regs *` is expected, but we need to fix up */ + if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return true; + break; + case BPF_PROG_TYPE_PERF_EVENT: + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && + btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) + return true; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && + btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) + return true; + if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && + btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) + return true; + break; + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: + /* allow u64* as ctx */ + if (btf_is_int(t) && t->size == 8) + return true; + break; + default: + break; + } + +out_warn: + pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", + prog->name, subprog_name, arg_idx, ctx_name); + return false; +} + +static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) +{ + int fn_id, fn_proto_id, ret_type_id, orig_proto_id; + int i, err, arg_cnt, fn_name_off, linkage; + struct btf_type *fn_t, *fn_proto_t, *t; + struct btf_param *p; + + /* caller already validated FUNC -> FUNC_PROTO validity */ + fn_t = btf_type_by_id(btf, orig_fn_id); + fn_proto_t = btf_type_by_id(btf, fn_t->type); + + /* Note that each btf__add_xxx() operation invalidates + * all btf_type and string pointers, so we need to be + * very careful when cloning BTF types. BTF type + * pointers have to be always refetched. And to avoid + * problems with invalidated string pointers, we + * add empty strings initially, then just fix up + * name_off offsets in place. Offsets are stable for + * existing strings, so that works out. + */ + fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ + linkage = btf_func_linkage(fn_t); + orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ + ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ + arg_cnt = btf_vlen(fn_proto_t); + + /* clone FUNC_PROTO and its params */ + fn_proto_id = btf__add_func_proto(btf, ret_type_id); + if (fn_proto_id < 0) + return -EINVAL; + + for (i = 0; i < arg_cnt; i++) { + int name_off; + + /* copy original parameter data */ + t = btf_type_by_id(btf, orig_proto_id); + p = &btf_params(t)[i]; + name_off = p->name_off; + + err = btf__add_func_param(btf, "", p->type); + if (err) + return err; + + fn_proto_t = btf_type_by_id(btf, fn_proto_id); + p = &btf_params(fn_proto_t)[i]; + p->name_off = name_off; /* use remembered str offset */ + } + + /* clone FUNC now, btf__add_func() enforces non-empty name, so use + * entry program's name as a placeholder, which we replace immediately + * with original name_off + */ + fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); + if (fn_id < 0) + return -EINVAL; + + fn_t = btf_type_by_id(btf, fn_id); + fn_t->name_off = fn_name_off; /* reuse original string */ + + return fn_id; +} + +/* Check if main program or global subprog's function prototype has `arg:ctx` + * argument tags, and, if necessary, substitute correct type to match what BPF + * verifier would expect, taking into account specific program type. This + * allows to support __arg_ctx tag transparently on old kernels that don't yet + * have a native support for it in the verifier, making user's life much + * easier. + */ +static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) +{ + const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; + struct bpf_func_info_min *func_rec; + struct btf_type *fn_t, *fn_proto_t; + struct btf *btf = obj->btf; + const struct btf_type *t; + struct btf_param *p; + int ptr_id = 0, struct_id, tag_id, orig_fn_id; + int i, n, arg_idx, arg_cnt, err, rec_idx; + int *orig_ids; + + /* no .BTF.ext, no problem */ + if (!obj->btf_ext || !prog->func_info) + return 0; + + /* don't do any fix ups if kernel natively supports __arg_ctx */ + if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) + return 0; + + /* some BPF program types just don't have named context structs, so + * this fallback mechanism doesn't work for them + */ + for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { + if (global_ctx_map[i].prog_type != prog->type) + continue; + ctx_name = global_ctx_map[i].ctx_name; + break; + } + if (!ctx_name) + return 0; + + /* remember original func BTF IDs to detect if we already cloned them */ + orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); + if (!orig_ids) + return -ENOMEM; + for (i = 0; i < prog->func_info_cnt; i++) { + func_rec = prog->func_info + prog->func_info_rec_size * i; + orig_ids[i] = func_rec->type_id; + } + + /* go through each DECL_TAG with "arg:ctx" and see if it points to one + * of our subprogs; if yes and subprog is global and needs adjustment, + * clone and adjust FUNC -> FUNC_PROTO combo + */ + for (i = 1, n = btf__type_cnt(btf); i < n; i++) { + /* only DECL_TAG with "arg:ctx" value are interesting */ + t = btf__type_by_id(btf, i); + if (!btf_is_decl_tag(t)) + continue; + if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) + continue; + + /* only global funcs need adjustment, if at all */ + orig_fn_id = t->type; + fn_t = btf_type_by_id(btf, orig_fn_id); + if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) + continue; + + /* sanity check FUNC -> FUNC_PROTO chain, just in case */ + fn_proto_t = btf_type_by_id(btf, fn_t->type); + if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) + continue; + + /* find corresponding func_info record */ + func_rec = NULL; + for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { + if (orig_ids[rec_idx] == t->type) { + func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; + break; + } + } + /* current main program doesn't call into this subprog */ + if (!func_rec) + continue; + + /* some more sanity checking of DECL_TAG */ + arg_cnt = btf_vlen(fn_proto_t); + arg_idx = btf_decl_tag(t)->component_idx; + if (arg_idx < 0 || arg_idx >= arg_cnt) + continue; + + /* check if we should fix up argument type */ + p = &btf_params(fn_proto_t)[arg_idx]; + fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; + if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) + continue; + + /* clone fn/fn_proto, unless we already did it for another arg */ + if (func_rec->type_id == orig_fn_id) { + int fn_id; + + fn_id = clone_func_btf_info(btf, orig_fn_id, prog); + if (fn_id < 0) { + err = fn_id; + goto err_out; + } + + /* point func_info record to a cloned FUNC type */ + func_rec->type_id = fn_id; + } + + /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; + * we do it just once per main BPF program, as all global + * funcs share the same program type, so need only PTR -> + * STRUCT type chain + */ + if (ptr_id == 0) { + struct_id = btf__add_struct(btf, ctx_name, 0); + ptr_id = btf__add_ptr(btf, struct_id); + if (ptr_id < 0 || struct_id < 0) { + err = -EINVAL; + goto err_out; + } + } + + /* for completeness, clone DECL_TAG and point it to cloned param */ + tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); + if (tag_id < 0) { + err = -EINVAL; + goto err_out; + } + + /* all the BTF manipulations invalidated pointers, refetch them */ + fn_t = btf_type_by_id(btf, func_rec->type_id); + fn_proto_t = btf_type_by_id(btf, fn_t->type); + + /* fix up type ID pointed to by param */ + p = &btf_params(fn_proto_t)[arg_idx]; + p->type = ptr_id; + } + + free(orig_ids); + return 0; +err_out: + free(orig_ids); + return err; +} + +static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; size_t i, j; @@ -6252,12 +7370,11 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warn("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %s\n", + errstr(err)); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6293,32 +7410,62 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate calls: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate calls: %s\n", + prog->name, errstr(err)); return err; } + + err = bpf_prog_assign_exc_cb(obj, prog); + if (err) + return err; + /* Now, also append exception callback if it has not been done already. */ + if (prog->exception_cb_idx >= 0) { + struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; + + /* Calling exception callback directly is disallowed, which the + * verifier will reject later. In case it was processed already, + * we can skip this step, otherwise for all other valid cases we + * have to append exception callback now. + */ + if (subprog->sub_insn_off == 0) { + err = bpf_object__append_subprog_code(obj, prog, subprog); + if (err) + return err; + err = bpf_object__reloc_code(obj, prog, subprog); + if (err) + return err; + } + } } - /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; + + /* Process data relos for main programs */ err = bpf_object__relocate_data(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate data references: %s\n", + prog->name, errstr(err)); + return err; + } + + /* Fix up .BTF.ext information, if necessary */ + err = bpf_program_fixup_func_info(obj, prog); + if (err) { + pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", + prog->name, errstr(err)); return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -6479,12 +7626,12 @@ static int bpf_object__collect_relos(struct bpf_object *obj) data = sec_desc->data; idx = shdr->sh_info; - if (shdr->sh_type != SHT_REL) { + if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } - if (idx == obj->efile.st_ops_shndx) + if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) err = bpf_object__collect_st_ops_relos(obj, shdr, data); else if (idx == obj->efile.btf_maps_shndx) err = bpf_object__collect_map_relos(obj, shdr, data); @@ -6549,9 +7696,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, int *btf_obj_fd, int *btf_type_id); -/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ -static int libbpf_preload_prog(struct bpf_program *prog, - struct bpf_prog_load_opts *opts, long cookie) +/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ +static int libbpf_prepare_prog_load(struct bpf_program *prog, + struct bpf_prog_load_opts *opts, long cookie) { enum sec_def_flags def = cookie; @@ -6562,13 +7709,40 @@ static int libbpf_preload_prog(struct bpf_program *prog, if (def & SEC_SLEEPABLE) opts->prog_flags |= BPF_F_SLEEPABLE; - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) + opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + + /* special check for usdt to use uprobe_multi link */ + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { + /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type + * in prog, and expected_attach_type we set in kernel is from opts, so we + * update both. + */ + prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + } + + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -6588,21 +7762,25 @@ static int libbpf_preload_prog(struct bpf_program *prog, return 0; } -static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, - struct bpf_insn *insns, int insns_cnt, - const char *license, __u32 kern_version, - int *prog_fd) +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + +static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, + struct bpf_insn *insns, int insns_cnt, + const char *license, __u32 kern_version, int *prog_fd) { LIBBPF_OPTS(bpf_prog_load_opts, load_attr); const char *prog_name = NULL; - char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL, *tmp; - int btf_fd, ret, err; bool own_log_buf = true; __u32 log_level = prog->log_level; + int ret, err; - if (prog->type == BPF_PROG_TYPE_UNSPEC) { + /* Be more helpful by rejecting programs that can't be validated early + * with more meaningful and actionable error message. + */ + switch (prog->type) { + case BPF_PROG_TYPE_UNSPEC: /* * The program type must be set. Most likely we couldn't find a proper * section definition at load time, and thus we didn't infer the type. @@ -6610,12 +7788,20 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", prog->name, prog->sec_name); return -EINVAL; + case BPF_PROG_TYPE_STRUCT_OPS: + if (prog->attach_btf_id == 0) { + pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n", + prog->name); + return -EINVAL; + } + break; + default: + break; } if (!insns || !insns_cnt) return -EINVAL; - load_attr.expected_attach_type = prog->expected_attach_type; if (kernel_supports(obj, FEAT_PROG_NAME)) prog_name = prog->name; load_attr.attach_prog_fd = prog->attach_prog_fd; @@ -6623,11 +7809,11 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.expected_attach_type = prog->expected_attach_type; /* specify func_info/line_info only if kernel supports them */ - btf_fd = bpf_object__btf_fd(obj); - if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { - load_attr.prog_btf_fd = btf_fd; + if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { + load_attr.prog_btf_fd = btf__fd(obj->btf); load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@ -6639,14 +7825,20 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; + load_attr.token_fd = obj->token_fd; + if (obj->token_fd) + load_attr.prog_flags |= BPF_F_TOKEN_FD; + /* adjust load_attr if sec_def provides custom preload callback */ - if (prog->sec_def && prog->sec_def->preload_fn) { - err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie); + if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { + err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to prepare load attributes: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to prepare load attributes: %s\n", + prog->name, errstr(err)); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -6658,7 +7850,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed @@ -6705,10 +7897,9 @@ retry_load: if (map->libbpf_type != LIBBPF_MAP_RODATA) continue; - if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + if (bpf_prog_bind_map(ret, map->fd, NULL)) { pr_warn("prog '%s': failed to bind map '%s': %s\n", - prog->name, map->real_name, cp); + prog->name, map->real_name, errstr(errno)); /* Don't fail hard if can't bind rodata. */ } } @@ -6734,18 +7925,17 @@ retry_load: goto retry_load; ret = -errno; - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); pr_perm_msg(ret); if (own_log_buf && log_buf && log_buf[0] != '\0') { pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) @@ -6753,6 +7943,212 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err, spec_len; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded map reference: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" is a map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= POISON_LDIMM64_MAP_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid BPF map reference>\n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_kfunc_call(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded kfunc call: + * line1 -> 123: (85) call unknown#2002000345 + * line2 -> invalid func unknown#2002000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. + */ + struct bpf_object *obj = prog->obj; + const struct extern_desc *ext; + int insn_idx, ext_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) + return; + + ext_idx -= POISON_CALL_KFUNC_BASE; + if (ext_idx < 0 || ext_idx >= obj->nr_extern) + return; + ext = &obj->externs[ext_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid kfunc call>\n" + "kfunc '%s' is referenced but wasn't resolved\n", + insn_idx, ext->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + /* failed CO-RE relocation case */ + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + /* reference to uncreated BPF map */ + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + /* reference to unresolved kfunc */ + fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; @@ -6760,19 +8156,22 @@ static int bpf_program_record_relos(struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; - struct extern_desc *ext = &obj->externs[relo->sym_off]; + struct extern_desc *ext = &obj->externs[relo->ext_idx]; + int kind; switch (relo->type) { - case RELO_EXTERN_VAR: + case RELO_EXTERN_LD64: if (ext->type != EXT_KSYM) continue; + kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? + BTF_KIND_VAR : BTF_KIND_FUNC; bpf_gen__record_extern(obj->gen_loader, ext->name, ext->is_weak, !ext->ksym.type_id, - BTF_KIND_VAR, relo->insn_idx); + true, kind, relo->insn_idx); break; - case RELO_EXTERN_FUNC: + case RELO_EXTERN_CALL: bpf_gen__record_extern(obj->gen_loader, ext->name, - ext->is_weak, false, BTF_KIND_FUNC, + ext->is_weak, false, false, BTF_KIND_FUNC, relo->insn_idx); break; case RELO_CORE: { @@ -6793,95 +8192,39 @@ static int bpf_program_record_relos(struct bpf_program *prog) return 0; } -static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, - const char *license, __u32 kern_ver) +static int +bpf_object__load_progs(struct bpf_object *obj, int log_level) { - int err = 0, fd, i; - - if (obj->loaded) { - pr_warn("prog '%s': can't load after object was loaded\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr < 0 || !prog->instances.fds) { - if (prog->preprocessor) { - pr_warn("Internal error: can't load program '%s'\n", - prog->name); - return libbpf_err(-LIBBPF_ERRNO__INTERNAL); - } + struct bpf_program *prog; + size_t i; + int err; - prog->instances.fds = malloc(sizeof(int)); - if (!prog->instances.fds) { - pr_warn("Not enough memory for BPF fds\n"); - return libbpf_err(-ENOMEM); + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog_is_subprog(obj, prog)) + continue; + if (!prog->autoload) { + pr_debug("prog '%s': skipped loading\n", prog->name); + continue; } - prog->instances.nr = 1; - prog->instances.fds[0] = -1; - } + prog->log_level |= log_level; - if (!prog->preprocessor) { - if (prog->instances.nr != 1) { - pr_warn("prog '%s': inconsistent nr(%d) != 1\n", - prog->name, prog->instances.nr); - } if (obj->gen_loader) bpf_program_record_relos(prog); - err = bpf_object_load_prog_instance(obj, prog, - prog->insns, prog->insns_cnt, - license, kern_ver, &fd); - if (!err) - prog->instances.fds[0] = fd; - goto out; - } - - for (i = 0; i < prog->instances.nr; i++) { - struct bpf_prog_prep_result result; - bpf_program_prep_t preprocessor = prog->preprocessor; - - memset(&result, 0, sizeof(result)); - err = preprocessor(prog, i, prog->insns, - prog->insns_cnt, &result); - if (err) { - pr_warn("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->name); - goto out; - } - if (!result.new_insn_ptr || !result.new_insn_cnt) { - pr_debug("Skip loading the %dth instance of program '%s'\n", - i, prog->name); - prog->instances.fds[i] = -1; - if (result.pfd) - *result.pfd = -1; - continue; - } - - err = bpf_object_load_prog_instance(obj, prog, - result.new_insn_ptr, result.new_insn_cnt, - license, kern_ver, &fd); + err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, + obj->license, obj->kern_version, &prog->fd); if (err) { - pr_warn("Loading the %dth instance of program '%s' failed\n", - i, prog->name); - goto out; + pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); + return err; } - - if (result.pfd) - *result.pfd = fd; - prog->instances.fds[i] = fd; } -out: - if (err) - pr_warn("failed to load program '%s'\n", prog->name); - return libbpf_err(err); -} -int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) -{ - return bpf_object_load_prog(prog->obj, prog, license, kern_ver); + bpf_object__free_relocs(obj); + return 0; } -static int -bpf_object__load_progs(struct bpf_object *obj, int log_level) +static int bpf_object_prepare_progs(struct bpf_object *obj) { struct bpf_program *prog; size_t i; @@ -6893,22 +8236,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (prog_is_subprog(obj, prog)) - continue; - if (!prog->load) { - pr_debug("prog '%s': skipped loading\n", prog->name); - continue; - } - prog->log_level |= log_level; - err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); - if (err) - return err; - } - if (obj->gen_loader) - bpf_object__free_relocs(obj); return 0; } @@ -6928,24 +8255,17 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING || - prog->sec_def->prog_type == BPF_PROG_TYPE_EXT) - prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); -#pragma GCC diagnostic pop + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; /* sec_def can have custom callback which should be called * after bpf_program is initialized to adjust its properties */ - if (prog->sec_def->init_fn) { - err = prog->sec_def->init_fn(prog, prog->sec_def->cookie); + if (prog->sec_def->prog_setup_fn) { + err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to initialize: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to initialize: %s\n", + prog->name, errstr(err)); return err; } } @@ -6955,16 +8275,19 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object } static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const char *obj_name, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; - char tmp_name[64]; int err; char *log_buf; size_t log_size; __u32 log_level; + if (obj_buf && !obj_name) + return ERR_PTR(-EINVAL); + if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", path ? : "(mem buf)"); @@ -6974,16 +8297,12 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (!OPTS_VALID(opts, bpf_object_open_opts)) return ERR_PTR(-EINVAL); - obj_name = OPTS_GET(opts, object_name, NULL); + obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name; if (obj_buf) { - if (!obj_name) { - snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", - (unsigned long)obj_buf, - (unsigned long)obj_buf_sz); - obj_name = tmp_name; - } path = obj_name; pr_debug("loading object '%s' from buffer\n", obj_name); + } else { + pr_debug("loading object from %s\n", path); } log_buf = OPTS_GET(opts, kernel_log_buf, NULL); @@ -6994,6 +8313,16 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + /* if user didn't specify bpf_token_path explicitly, check if + * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path + * option + */ + if (!token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7002,6 +8331,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -7025,10 +8362,9 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); - err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object_fixup_btf(obj); err = err ? : bpf_object__init_maps(obj, opts); err = err ? : bpf_object_init_progs(obj, opts); err = err ? : bpf_object__collect_relos(obj); @@ -7043,72 +8379,33 @@ out: return ERR_PTR(err); } -static struct bpf_object * -__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_maps = flags & MAPS_RELAX_COMPAT, - ); - - /* param validation */ - if (!attr->file) - return NULL; - - pr_debug("loading %s\n", attr->file); - return bpf_object_open(attr->file, NULL, 0, &opts); -} - -struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) -{ - return libbpf_ptr(__bpf_object__open_xattr(attr, 0)); -} - -struct bpf_object *bpf_object__open(const char *path) -{ - struct bpf_object_open_attr attr = { - .file = path, - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - - return libbpf_ptr(__bpf_object__open_xattr(&attr, 0)); -} - struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) return libbpf_err_ptr(-EINVAL); - pr_debug("loading %s\n", path); + return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts)); +} - return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); +struct bpf_object *bpf_object__open(const char *path) +{ + return bpf_object__open_file(path, NULL); } struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { + char tmp_name[64]; + if (!obj_buf || obj_buf_sz == 0) return libbpf_err_ptr(-EINVAL); - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); -} - -struct bpf_object * -bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, - const char *name) -{ - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .object_name = name, - /* wrong default, but backwards-compatible */ - .relaxed_maps = true, - ); - - /* returning NULL is wrong, but backwards-compatible */ - if (!obj_buf || obj_buf_sz == 0) - return errno = EINVAL, NULL; + /* create a (quite useless) default "name" for this memory buffer object */ + snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz); - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts)); } static int bpf_object_unload(struct bpf_object *obj) @@ -7130,8 +8427,6 @@ static int bpf_object_unload(struct bpf_object *obj) return 0; } -int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); - static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -7140,25 +8435,26 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) if (!bpf_map__is_internal(m)) continue; if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) - m->def.map_flags ^= BPF_F_MMAPABLE; + m->def.map_flags &= ~BPF_F_MMAPABLE; } return 0; } -static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx); + +static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) { char sym_type, sym_name[500]; unsigned long long sym_addr; - const struct btf_type *t; - struct extern_desc *ext; int ret, err = 0; FILE *f; - f = fopen("/proc/kallsyms", "r"); + f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; - pr_warn("failed to open /proc/kallsyms: %d\n", err); + pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); return err; } @@ -7170,33 +8466,54 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj) if (ret != 3) { pr_warn("failed to read kallsyms entry: %d\n", ret); err = -EINVAL; - goto out; + break; } + err = cb(sym_addr, sym_type, sym_name, ctx); + if (err) + break; + } + + fclose(f); + return err; +} + +static int kallsyms_cb(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx) +{ + struct bpf_object *obj = ctx; + const struct btf_type *t; + struct extern_desc *ext; + char *res; + + res = strstr(sym_name, ".llvm."); + if (sym_type == 'd' && res) + ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name); + else ext = find_extern_by_name(obj, sym_name); - if (!ext || ext->type != EXT_KSYM) - continue; + if (!ext || ext->type != EXT_KSYM) + return 0; - t = btf__type_by_id(obj->btf, ext->btf_id); - if (!btf_is_var(t)) - continue; + t = btf__type_by_id(obj->btf, ext->btf_id); + if (!btf_is_var(t)) + return 0; - if (ext->is_set && ext->ksym.addr != sym_addr) { - pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", - sym_name, ext->ksym.addr, sym_addr); - err = -EINVAL; - goto out; - } - if (!ext->is_set) { - ext->is_set = true; - ext->ksym.addr = sym_addr; - pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); - } + if (ext->is_set && ext->ksym.addr != sym_addr) { + pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", + sym_name, ext->ksym.addr, sym_addr); + return -EINVAL; + } + if (!ext->is_set) { + ext->is_set = true; + ext->ksym.addr = sym_addr; + pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); } + return 0; +} -out: - fclose(f); - return err; +static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +{ + return libbpf_kallsyms_parse(kallsyms_cb, obj); } static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, @@ -7297,7 +8614,8 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); + kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, + &mod_btf); if (kfunc_id < 0) { if (kfunc_id == -ESRCH && ext->is_weak) return 0; @@ -7312,8 +8630,12 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, kern_btf, kfunc_proto_id); if (ret <= 0) { - pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n", - ext->name, local_func_proto_id, kfunc_proto_id); + if (ext->is_weak) + return 0; + + pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", + ext->name, local_func_proto_id, + mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); return -EINVAL; } @@ -7341,8 +8663,14 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ext->is_set = true; ext->ksym.kernel_btf_id = kfunc_id; ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; - pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n", - ext->name, kfunc_id); + /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() + * populates FD into ld_imm64 insn when it's used to point to kfunc. + * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. + * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. + */ + ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; + pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", + ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); return 0; } @@ -7393,28 +8721,52 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (ext->type == EXT_KCFG && - strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = kcfg_data + ext->kcfg.data_off; - __u32 kver = get_kernel_version(); + if (ext->type == EXT_KSYM) { + if (ext->ksym.type_id) + need_vmlinux_btf = true; + else + need_kallsyms = true; + continue; + } else if (ext->type == EXT_KCFG) { + void *ext_ptr = kcfg_data + ext->kcfg.data_off; + __u64 value = 0; + + /* Kconfig externs need actual /proc/config.gz */ + if (str_has_pfx(ext->name, "CONFIG_")) { + need_config = true; + continue; + } - if (!kver) { - pr_warn("failed to get kernel version\n"); + /* Virtual kcfg externs are customly handled by libbpf */ + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + value = get_kernel_version(); + if (!value) { + pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); + return -EINVAL; + } + } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { + value = kernel_supports(obj, FEAT_BPF_COOKIE); + } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { + value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); + } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { + /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed + * __kconfig externs, where LINUX_ ones are virtual and filled out + * customly by libbpf (their values don't come from Kconfig). + * If LINUX_xxx variable is not recognized by libbpf, but is marked + * __weak, it defaults to zero value, just like for CONFIG_xxx + * externs. + */ + pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); return -EINVAL; } - err = set_kcfg_value_num(ext, ext_val, kver); + + err = set_kcfg_value_num(ext, ext_ptr, value); if (err) return err; - pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); - } else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) { - need_config = true; - } else if (ext->type == EXT_KSYM) { - if (ext->ksym.type_id) - need_vmlinux_btf = true; - else - need_kallsyms = true; + pr_debug("extern (kcfg) '%s': set to 0x%llx\n", + ext->name, (long long)value); } else { - pr_warn("unrecognized extern '%s'\n", ext->name); + pr_warn("extern '%s': unrecognized extern kind\n", ext->name); return -EINVAL; } } @@ -7450,10 +8802,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, ext = &obj->externs[i]; if (!ext->is_set && !ext->is_weak) { - pr_warn("extern %s (strong) not resolved\n", ext->name); + pr_warn("extern '%s' (strong): not resolved\n", ext->name); return -ESRCH; } else if (!ext->is_set) { - pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", ext->name); } } @@ -7461,42 +8813,62 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } -static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +static void bpf_map_prepare_vdata(const struct bpf_map *map) { - int err, i; + const struct btf_type *type; + struct bpf_struct_ops *st_ops; + __u32 i; - if (!obj) - return libbpf_err(-EINVAL); + st_ops = map->st_ops; + type = btf__type_by_id(map->obj->btf, st_ops->type_id); + for (i = 0; i < btf_vlen(type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; - if (obj->loaded) { - pr_warn("object '%s': load can't be attempted twice\n", obj->name); - return libbpf_err(-EINVAL); + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; } +} - if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); +static int bpf_object_prepare_struct_ops(struct bpf_object *obj) +{ + struct bpf_map *map; + int i; - err = bpf_object__probe_loading(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj, false); - err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); - err = err ? : bpf_object__sanitize_and_load_btf(obj); - err = err ? : bpf_object__sanitize_maps(obj); - err = err ? : bpf_object__init_kern_struct_ops_maps(obj); - err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); - err = err ? : bpf_object__load_progs(obj, extra_log_level); - err = err ? : bpf_object_init_prog_arrays(obj); + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; - if (obj->gen_loader) { - /* reset FDs */ - if (obj->btf) - btf__set_fd(obj->btf, -1); - for (i = 0; i < obj->nr_maps; i++) - obj->maps[i].fd = -1; - if (!err) - err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); + if (!bpf_map__is_struct_ops(map)) + continue; + + if (!map->autocreate) + continue; + + bpf_map_prepare_vdata(map); } + return 0; +} + +static void bpf_object_unpin(struct bpf_object *obj) +{ + int i; + + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); +} + +static void bpf_object_post_load_cleanup(struct bpf_object *obj) +{ + int i; + /* clean up fd_array */ zfree(&obj->fd_array); @@ -7506,32 +8878,101 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch btf__free(obj->btf_modules[i].btf); free(obj->btf_modules[i].name); } - free(obj->btf_modules); + obj->btf_module_cnt = 0; + zfree(&obj->btf_modules); /* clean up vmlinux BTF */ btf__free(obj->btf_vmlinux); obj->btf_vmlinux = NULL; +} - obj->loaded = true; /* doesn't matter if successfully or not */ +static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) +{ + int err; - if (err) - goto out; + if (obj->state >= OBJ_PREPARED) { + pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); + return -EINVAL; + } + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj, false); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object_prepare_progs(obj); + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + obj->state = OBJ_LOADED; + return err; + } + + obj->state = OBJ_PREPARED; return 0; -out: - /* unpin any maps that were auto-pinned during load */ - for (i = 0; i < obj->nr_maps; i++) - if (obj->maps[i].pinned && !obj->maps[i].reused) - bpf_map__unpin(&obj->maps[i], NULL); +} - bpf_object_unload(obj); - pr_warn("failed to load object '%s'\n", obj->path); - return libbpf_err(err); +static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +{ + int err; + + if (!obj) + return libbpf_err(-EINVAL); + + if (obj->state >= OBJ_LOADED) { + pr_warn("object '%s': load can't be attempted twice\n", obj->name); + return libbpf_err(-EINVAL); + } + + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { + bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } + + if (obj->state < OBJ_PREPARED) { + err = bpf_object_prepare(obj, target_btf_path); + if (err) + return libbpf_err(err); + } + err = bpf_object__load_progs(obj, extra_log_level); + err = err ? : bpf_object_init_prog_arrays(obj); + err = err ? : bpf_object_prepare_struct_ops(obj); + + if (obj->gen_loader) { + /* reset FDs */ + if (obj->btf) + btf__set_fd(obj->btf, -1); + if (!err) + err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); + } + + bpf_object_post_load_cleanup(obj); + obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + pr_warn("failed to load object '%s'\n", obj->path); + return libbpf_err(err); + } + + return 0; } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) +int bpf_object__prepare(struct bpf_object *obj) { - return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); + return libbpf_err(bpf_object_prepare(obj, NULL)); } int bpf_object__load(struct bpf_object *obj) @@ -7541,7 +8982,6 @@ int bpf_object__load(struct bpf_object *obj) static int make_parent_dir(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; char *dname, *dir; int err = 0; @@ -7555,15 +8995,13 @@ static int make_parent_dir(const char *path) free(dname); if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); + pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); } return err; } static int check_path(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -7577,8 +9015,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); err = -errno; } free(dname); @@ -7591,11 +9028,15 @@ static int check_path(const char *path) return err; } -static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__pin(struct bpf_program *prog, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; + if (prog->fd < 0) { + pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); + return libbpf_err(-EINVAL); + } + err = make_parent_dir(path); if (err) return libbpf_err(err); @@ -7604,176 +9045,39 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, if (err) return libbpf_err(err); - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->name, prog->instances.nr); - return libbpf_err(-EINVAL); - } - - if (bpf_obj_pin(prog->instances.fds[instance], path)) { + if (bpf_obj_pin(prog->fd, path)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin program: %s\n", cp); + pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); return libbpf_err(err); } - pr_debug("pinned program '%s'\n", path); + pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); return 0; } -static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance) +int bpf_program__unpin(struct bpf_program *prog, const char *path) { int err; - err = check_path(path); - if (err) - return libbpf_err(err); - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (instance < 0 || instance >= prog->instances.nr) { - pr_warn("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->name, prog->instances.nr); + if (prog->fd < 0) { + pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); return libbpf_err(-EINVAL); } - err = unlink(path); - if (err != 0) - return libbpf_err(-errno); - - pr_debug("unpinned program '%s'\n", path); - - return 0; -} - -__attribute__((alias("bpf_program_pin_instance"))) -int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance); - -__attribute__((alias("bpf_program_unpin_instance"))) -int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); - -int bpf_program__pin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = make_parent_dir(path); - if (err) - return libbpf_err(err); - err = check_path(path); if (err) return libbpf_err(err); - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program_pin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) { - err = -EINVAL; - goto err_unpin; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin; - } - - err = bpf_program_pin_instance(prog, buf, i); - if (err) - goto err_unpin; - } - - return 0; - -err_unpin: - for (i = i - 1; i >= 0; i--) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - continue; - else if (len >= PATH_MAX) - continue; - - bpf_program_unpin_instance(prog, buf, i); - } - - rmdir(path); - - return libbpf_err(err); -} - -int bpf_program__unpin(struct bpf_program *prog, const char *path) -{ - int i, err; - - err = check_path(path); - if (err) - return libbpf_err(err); - - if (prog == NULL) { - pr_warn("invalid program pointer\n"); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr <= 0) { - pr_warn("no instances of prog %s to pin\n", prog->name); - return libbpf_err(-EINVAL); - } - - if (prog->instances.nr == 1) { - /* don't create subdirs when pinning single instance */ - return bpf_program_unpin_instance(prog, path, 0); - } - - for (i = 0; i < prog->instances.nr; i++) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%d", path, i); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); - - err = bpf_program_unpin_instance(prog, buf, i); - if (err) - return err; - } - - err = rmdir(path); + err = unlink(path); if (err) return libbpf_err(-errno); + pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); return 0; } int bpf_map__pin(struct bpf_map *map, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (map == NULL) { @@ -7781,6 +9085,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return libbpf_err(-EINVAL); } + if (map->fd < 0) { + pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + if (map->pin_path) { if (path && strcmp(path, map->pin_path)) { pr_warn("map '%s' already has pin path '%s' different from '%s'\n", @@ -7827,8 +9136,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return 0; out_err: - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); + pr_warn("failed to pin map: %s\n", errstr(err)); return libbpf_err(err); } @@ -7883,10 +9191,8 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path) return 0; } -const char *bpf_map__get_pin_path(const struct bpf_map *map) -{ - return map->pin_path; -} +__alias(bpf_map__pin_path) +const char *bpf_map__get_pin_path(const struct bpf_map *map); const char *bpf_map__pin_path(const struct bpf_map *map) { @@ -7916,7 +9222,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_PREPARED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -7925,21 +9231,13 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) { - err = -EINVAL; - goto err_unpin_maps; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) goto err_unpin_maps; - } sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -7977,14 +9275,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) char buf[PATH_MAX]; if (path) { - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); + err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); + if (err) + return libbpf_err(err); sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -8002,29 +9295,21 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) int bpf_object__pin_programs(struct bpf_object *obj, const char *path) { struct bpf_program *prog; + char buf[PATH_MAX]; int err; if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_LOADED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } bpf_object__for_each_program(prog, obj) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) { - err = -EINVAL; - goto err_unpin_programs; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; + err = pathname_concat(buf, sizeof(buf), path, prog->name); + if (err) goto err_unpin_programs; - } err = bpf_program__pin(prog, buf); if (err) @@ -8035,14 +9320,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) err_unpin_programs: while ((prog = bpf_object__prev_program(obj, prog))) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) - continue; - else if (len >= PATH_MAX) + if (pathname_concat(buf, sizeof(buf), path, prog->name)) continue; bpf_program__unpin(prog, buf); @@ -8061,14 +9339,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->pin_name); - if (len < 0) - return libbpf_err(-EINVAL); - else if (len >= PATH_MAX) - return libbpf_err(-ENAMETOOLONG); + err = pathname_concat(buf, sizeof(buf), path, prog->name); + if (err) + return libbpf_err(err); err = bpf_program__unpin(prog, buf); if (err) @@ -8095,13 +9369,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } -static void bpf_map__destroy(struct bpf_map *map) +int bpf_object__unpin(struct bpf_object *obj, const char *path) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - map->priv = NULL; - map->clear_priv = NULL; + int err; + err = bpf_object__unpin_programs(obj, path); + if (err) + return libbpf_err(err); + + err = bpf_object__unpin_maps(obj, path); + if (err) + return libbpf_err(err); + + return 0; +} + +static void bpf_map__destroy(struct bpf_map *map) +{ if (map->inner_map) { bpf_map__destroy(map->inner_map); zfree(&map->inner_map); @@ -8110,10 +9394,9 @@ static void bpf_map__destroy(struct bpf_map *map) zfree(&map->init_slots); map->init_slots_sz = 0; - if (map->mmaped) { + if (map->mmaped && map->mmaped != map->obj->arena_data) munmap(map->mmaped, bpf_map_mmap_sz(map)); - map->mmaped = NULL; - } + map->mmaped = NULL; if (map->st_ops) { zfree(&map->st_ops->data); @@ -8137,13 +9420,21 @@ void bpf_object__close(struct bpf_object *obj) if (IS_ERR_OR_NULL(obj)) return; - if (obj->clear_priv) - obj->clear_priv(obj, obj->priv); + /* + * if user called bpf_object__prepare() without ever getting to + * bpf_object__load(), we need to clean up stuff that is normally + * cleaned up at the end of loading step + */ + bpf_object_post_load_cleanup(obj); + + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); btf__free(obj->btf); + btf__free(obj->btf_vmlinux); btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) @@ -8151,6 +9442,12 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); + + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); + zfree(&obj->externs[i].essent_name); + } + zfree(&obj->externs); obj->nr_extern = 0; @@ -8163,31 +9460,21 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); - list_del(&obj->list); - free(obj); -} + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); -struct bpf_object * -bpf_object__next(struct bpf_object *prev) -{ - struct bpf_object *next; - bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST); - - if (strict) - return NULL; + zfree(&obj->arena_data); - if (!prev) - next = list_first_entry(&bpf_objects_list, - struct bpf_object, - list); - else - next = list_next_entry(prev, list); + zfree(&obj->jumptables_data); + obj->jumptables_data_sz = 0; - /* Empty list is noticed here so don't need checking on entry. */ - if (&next->list == &bpf_objects_list) - return NULL; + for (i = 0; i < obj->jumptable_map_cnt; i++) + close(obj->jumptable_maps[i].fd); + zfree(&obj->jumptable_maps); - return next; + free(obj); } const char *bpf_object__name(const struct bpf_object *obj) @@ -8200,6 +9487,11 @@ unsigned int bpf_object__kversion(const struct bpf_object *obj) return obj ? obj->kern_version : 0; } +int bpf_object__token_fd(const struct bpf_object *obj) +{ + return obj->token_fd ?: -1; +} + struct btf *bpf_object__btf(const struct bpf_object *obj) { return obj ? obj->btf : NULL; @@ -8212,7 +9504,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj) int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) { - if (obj->loaded) + if (obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); obj->kern_version = kern_version; @@ -8220,34 +9512,19 @@ int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) return 0; } -int bpf_object__set_priv(struct bpf_object *obj, void *priv, - bpf_object_clear_priv_t clear_priv) -{ - if (obj->priv && obj->clear_priv) - obj->clear_priv(obj, obj->priv); - - obj->priv = priv; - obj->clear_priv = clear_priv; - return 0; -} - -void *bpf_object__priv(const struct bpf_object *obj) -{ - return obj ? obj->priv : libbpf_err_ptr(-EINVAL); -} - int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) { struct bpf_gen *gen; if (!opts) - return -EFAULT; + return libbpf_err(-EFAULT); if (!OPTS_VALID(opts, gen_loader_opts)) - return -EINVAL; - gen = calloc(sizeof(*gen), 1); + return libbpf_err(-EINVAL); + gen = calloc(1, sizeof(*gen)); if (!gen) - return -ENOMEM; + return libbpf_err(-ENOMEM); gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } @@ -8279,12 +9556,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, } struct bpf_program * -bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj) -{ - return bpf_object__next_program(obj, prev); -} - -struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { struct bpf_program *prog = prev; @@ -8297,12 +9568,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) } struct bpf_program * -bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj) -{ - return bpf_object__prev_program(obj, next); -} - -struct bpf_program * bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) { struct bpf_program *prog = next; @@ -8314,22 +9579,6 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) return prog; } -int bpf_program__set_priv(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv) -{ - if (prog->priv && prog->clear_priv) - prog->clear_priv(prog, prog->priv); - - prog->priv = priv; - prog->clear_priv = clear_priv; - return 0; -} - -void *bpf_program__priv(const struct bpf_program *prog) -{ - return prog ? prog->priv : libbpf_err_ptr(-EINVAL); -} - void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) { prog->prog_ifindex = ifindex; @@ -8345,46 +9594,28 @@ const char *bpf_program__section_name(const struct bpf_program *prog) return prog->sec_name; } -const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) -{ - const char *title; - - title = prog->sec_name; - if (needs_copy) { - title = strdup(title); - if (!title) { - pr_warn("failed to strdup program title\n"); - return libbpf_err_ptr(-ENOMEM); - } - } - - return title; -} - bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } -static int bpf_program_nth_fd(const struct bpf_program *prog, int n); - -int bpf_program__fd(const struct bpf_program *prog) +bool bpf_program__autoattach(const struct bpf_program *prog) { - return bpf_program_nth_fd(prog, 0); + return prog->autoattach; } -size_t bpf_program__size(const struct bpf_program *prog) +void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) { - return prog->insns_cnt * BPF_INSN_SZ; + prog->autoattach = autoattach; } const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) @@ -8397,114 +9628,90 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } -int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, - bpf_program_prep_t prep) +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) { - int *instances_fds; - - if (nr_instances <= 0 || !prep) - return libbpf_err(-EINVAL); + struct bpf_insn *insns; - if (prog->instances.nr > 0 || prog->instances.fds) { - pr_warn("Can't set pre-processor after loading\n"); - return libbpf_err(-EINVAL); - } + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); - instances_fds = malloc(sizeof(int) * nr_instances); - if (!instances_fds) { - pr_warn("alloc memory failed for fds\n"); + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + /* NULL is a valid return from reallocarray if the new count is zero */ + if (!insns && new_insn_cnt) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); return libbpf_err(-ENOMEM); } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); - /* fill all fd with -1 */ - memset(instances_fds, -1, sizeof(int) * nr_instances); - - prog->instances.nr = nr_instances; - prog->instances.fds = instances_fds; - prog->preprocessor = prep; + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; return 0; } -__attribute__((alias("bpf_program_nth_fd"))) -int bpf_program__nth_fd(const struct bpf_program *prog, int n); - -static int bpf_program_nth_fd(const struct bpf_program *prog, int n) +int bpf_program__fd(const struct bpf_program *prog) { - int fd; - if (!prog) return libbpf_err(-EINVAL); - if (n >= prog->instances.nr || n < 0) { - pr_warn("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->name, prog->instances.nr); - return libbpf_err(-EINVAL); - } - - fd = prog->instances.fds[n]; - if (fd < 0) { - pr_warn("%dth instance of program '%s' is invalid\n", - n, prog->name); + if (prog->fd < 0) return libbpf_err(-ENOENT); - } - return fd; + return prog->fd; } -enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog) +__alias(bpf_program__type) +enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); + +enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) { return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +static size_t custom_sec_def_cnt; +static struct bpf_sec_def *custom_sec_defs; +static struct bpf_sec_def custom_fallback_def; +static bool has_custom_fallback_def; +static int last_custom_sec_def_handler_id; + +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); + + /* if type is not changed, do nothing */ + if (prog->type == type) + return 0; + prog->type = type; + + /* If a program type was changed, we need to reset associated SEC() + * handler, as it will be invalid now. The only exception is a generic + * fallback handler, which by definition is program type-agnostic and + * is a catch-all custom handler, optionally set by the application, + * so should be able to handle any type of BPF program. + */ + if (prog->sec_def != &custom_fallback_def) + prog->sec_def = NULL; + return 0; } -static bool bpf_program__is_type(const struct bpf_program *prog, - enum bpf_prog_type type) -{ - return prog ? (prog->type == type) : false; -} - -#define BPF_PROG_TYPE_FNS(NAME, TYPE) \ -int bpf_program__set_##NAME(struct bpf_program *prog) \ -{ \ - if (!prog) \ - return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ -} \ - \ -bool bpf_program__is_##NAME(const struct bpf_program *prog) \ -{ \ - return bpf_program__is_type(prog, TYPE); \ -} \ - -BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER); -BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM); -BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE); -BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS); -BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT); -BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); -BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); -BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); -BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); -BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); -BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); -BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); -BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); - -enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog) +__alias(bpf_program__expected_attach_type) +enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); + +enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) { return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) @@ -8514,7 +9721,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog) int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->prog_flags = flags; @@ -8528,7 +9735,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog) int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->log_level = log_level; @@ -8544,160 +9751,313 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) { if (log_size && !log_buf) - return -EINVAL; + return libbpf_err(-EINVAL); if (prog->log_size > UINT_MAX) - return -EINVAL; - if (prog->obj->loaded) - return -EBUSY; + return libbpf_err(-EINVAL); + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); prog->log_buf = log_buf; prog->log_size = log_size; return 0; } +struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) +{ + if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->func_info; +} + +__u32 bpf_program__func_info_cnt(const struct bpf_program *prog) +{ + return prog->func_info_cnt; +} + +struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) +{ + if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->line_info; +} + +__u32 bpf_program__line_info_cnt(const struct bpf_program *prog) +{ + return prog->line_info_cnt; +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ - .sec = sec_pfx, \ + .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ .expected_attach_type = atype, \ .cookie = (long)(flags), \ - .preload_fn = libbpf_preload_prog, \ + .prog_prepare_load_fn = libbpf_prepare_prog_load, \ __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); +static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); static const struct bpf_sec_def section_defs[] = { - SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), + SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), + SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), + SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), + SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), + SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), + SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), + SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ + SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ + SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), + SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), + SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), + SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), - SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), - SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), - SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), + SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), + SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), + SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), + SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), + SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), + SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), + SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), + SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), + SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), + SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), + SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), + SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), + SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), + SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), + SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), - SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), + SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), + SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), + SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), }; -#define MAX_TYPE_NAME_SIZE 32 +int libbpf_register_prog_handler(const char *sec, + enum bpf_prog_type prog_type, + enum bpf_attach_type exp_attach_type, + const struct libbpf_prog_handler_opts *opts) +{ + struct bpf_sec_def *sec_def; -static const struct bpf_sec_def *find_sec_def(const char *sec_name) + if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) + return libbpf_err(-EINVAL); + + if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ + return libbpf_err(-E2BIG); + + if (sec) { + sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, + sizeof(*sec_def)); + if (!sec_def) + return libbpf_err(-ENOMEM); + + custom_sec_defs = sec_def; + sec_def = &custom_sec_defs[custom_sec_def_cnt]; + } else { + if (has_custom_fallback_def) + return libbpf_err(-EBUSY); + + sec_def = &custom_fallback_def; + } + + sec_def->sec = sec ? strdup(sec) : NULL; + if (sec && !sec_def->sec) + return libbpf_err(-ENOMEM); + + sec_def->prog_type = prog_type; + sec_def->expected_attach_type = exp_attach_type; + sec_def->cookie = OPTS_GET(opts, cookie, 0); + + sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); + sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); + sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); + + sec_def->handler_id = ++last_custom_sec_def_handler_id; + + if (sec) + custom_sec_def_cnt++; + else + has_custom_fallback_def = true; + + return sec_def->handler_id; +} + +int libbpf_unregister_prog_handler(int handler_id) { - const struct bpf_sec_def *sec_def; - enum sec_def_flags sec_flags; - int i, n = ARRAY_SIZE(section_defs), len; - bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME; + struct bpf_sec_def *sec_defs; + int i; - for (i = 0; i < n; i++) { - sec_def = §ion_defs[i]; - sec_flags = sec_def->cookie; - len = strlen(sec_def->sec); + if (handler_id <= 0) + return libbpf_err(-EINVAL); - /* "type/" always has to have proper SEC("type/extras") form */ - if (sec_def->sec[len - 1] == '/') { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } + if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { + memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); + has_custom_fallback_def = false; + return 0; + } - /* "type+" means it can be either exact SEC("type") or - * well-formed SEC("type/extras") with proper '/' separator - */ - if (sec_def->sec[len - 1] == '+') { - len--; - /* not even a prefix */ - if (strncmp(sec_name, sec_def->sec, len) != 0) - continue; - /* exact match or has '/' separator */ - if (sec_name[len] == '\0' || sec_name[len] == '/') - return sec_def; - continue; - } + for (i = 0; i < custom_sec_def_cnt; i++) { + if (custom_sec_defs[i].handler_id == handler_id) + break; + } - /* SEC_SLOPPY_PFX definitions are allowed to be just prefix - * matches, unless strict section name mode - * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the - * match has to be exact. - */ - if ((sec_flags & SEC_SLOPPY_PFX) && !strict) { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } + if (i == custom_sec_def_cnt) + return libbpf_err(-ENOENT); - /* Definitions not marked SEC_SLOPPY_PFX (e.g., - * SEC("syscall")) are exact matches in both modes. - */ - if (strcmp(sec_name, sec_def->sec) == 0) + free(custom_sec_defs[i].sec); + for (i = i + 1; i < custom_sec_def_cnt; i++) + custom_sec_defs[i - 1] = custom_sec_defs[i]; + custom_sec_def_cnt--; + + /* try to shrink the array, but it's ok if we couldn't */ + sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); + /* if new count is zero, reallocarray can return a valid NULL result; + * in this case the previous pointer will be freed, so we *have to* + * reassign old pointer to the new value (even if it's NULL) + */ + if (sec_defs || custom_sec_def_cnt == 0) + custom_sec_defs = sec_defs; + + return 0; +} + +static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) +{ + size_t len = strlen(sec_def->sec); + + /* "type/" always has to have proper SEC("type/extras") form */ + if (sec_def->sec[len - 1] == '/') { + if (str_has_pfx(sec_name, sec_def->sec)) + return true; + return false; + } + + /* "type+" means it can be either exact SEC("type") or + * well-formed SEC("type/extras") with proper '/' separator + */ + if (sec_def->sec[len - 1] == '+') { + len--; + /* not even a prefix */ + if (strncmp(sec_name, sec_def->sec, len) != 0) + return false; + /* exact match or has '/' separator */ + if (sec_name[len] == '\0' || sec_name[len] == '/') + return true; + return false; + } + + return strcmp(sec_name, sec_def->sec) == 0; +} + +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + const struct bpf_sec_def *sec_def; + int i, n; + + n = custom_sec_def_cnt; + for (i = 0; i < n; i++) { + sec_def = &custom_sec_defs[i]; + if (sec_def_matches(sec_def, sec_name)) + return sec_def; + } + + n = ARRAY_SIZE(section_defs); + for (i = 0; i < n; i++) { + sec_def = §ion_defs[i]; + if (sec_def_matches(sec_def, sec_name)) return sec_def; } + + if (has_custom_fallback_def) + return &custom_fallback_def; + return NULL; } +#define MAX_TYPE_NAME_SIZE 32 + static char *libbpf_get_type_names(bool attach_type) { int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; @@ -8713,7 +10073,7 @@ static char *libbpf_get_type_names(bool attach_type) const struct bpf_sec_def *sec_def = §ion_defs[i]; if (attach_type) { - if (sec_def->preload_fn != libbpf_preload_prog) + if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) continue; if (!(sec_def->cookie & SEC_ATTACHABLE)) @@ -8757,7 +10117,40 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return libbpf_err(-ESRCH); } +const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) + return NULL; + + return attach_type_name[t]; +} + +const char *libbpf_bpf_link_type_str(enum bpf_link_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(link_type_name)) + return NULL; + + return link_type_name[t]; +} + +const char *libbpf_bpf_map_type_str(enum bpf_map_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(map_type_name)) + return NULL; + + return map_type_name[t]; +} + +const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) +{ + if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) + return NULL; + + return prog_type_name[t]; +} + static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + int sec_idx, size_t offset) { struct bpf_map *map; @@ -8767,7 +10160,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, map = &obj->maps[i]; if (!bpf_map__is_struct_ops(map)) continue; - if (map->sec_offset <= offset && + if (map->sec_idx == sec_idx && + map->sec_offset <= offset && offset - map->sec_offset < map->def.value_size) return map; } @@ -8775,10 +10169,13 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, return NULL; } -/* Collect the reloc from ELF and populate the st_ops->progs[] */ +/* Collect the reloc from ELF, populate the st_ops->progs[], and update + * st_ops->data for shadow type. + */ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { + const struct btf_type *type; const struct btf_member *member; struct bpf_struct_ops *st_ops; struct bpf_program *prog; @@ -8809,7 +10206,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } name = elf_sym_str(obj, sym->st_name) ?: "<?>"; - map = find_struct_ops_map_by_offset(obj, rel->r_offset); + map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); if (!map) { pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", (size_t)rel->r_offset); @@ -8838,13 +10235,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } insn_idx = sym->st_value / BPF_INSN_SZ; - member = find_member_by_offset(st_ops->type, moff * 8); + type = btf__type_by_id(btf, st_ops->type_id); + member = find_member_by_offset(type, moff * 8); if (!member) { pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", map->name, moff); return -EINVAL; } - member_idx = member - btf_members(st_ops->type); + member_idx = member - btf_members(type); name = btf__name_by_offset(btf, member->name_off); if (!resolve_func_ptr(btf, member->type, NULL)) { @@ -8867,27 +10265,15 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, return -EINVAL; } - /* if we haven't yet processed this BPF program, record proper - * attach_btf_id and member_idx - */ - if (!prog->attach_btf_id) { - prog->attach_btf_id = st_ops->type_id; - prog->expected_attach_type = member_idx; - } + st_ops->progs[member_idx] = prog; - /* struct_ops BPF prog can be re-used between multiple - * .struct_ops as long as it's the same struct_ops struct - * definition and the same function pointer field + /* st_ops->data will be exposed to users, being returned by + * bpf_map__initial_value() as a pointer to the shadow + * type. All function pointers in the original struct type + * should be converted to a pointer to struct bpf_program + * in the shadow type. */ - if (prog->attach_btf_id != st_ops->type_id || - prog->expected_attach_type != member_idx) { - pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", - map->name, prog->name, prog->sec_name, prog->type, - prog->attach_btf_id, prog->expected_attach_type, name); - return -EINVAL; - } - - st_ops->progs[member_idx] = prog; + *((struct bpf_program **)(st_ops->data + moff)) = prog; } return 0; @@ -8907,6 +10293,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, *kind = BTF_KIND_TYPEDEF; break; case BPF_LSM_MAC: + case BPF_LSM_CGROUP: *prefix = BTF_LSM_PREFIX; *kind = BTF_KIND_FUNC; break; @@ -8968,17 +10355,18 @@ int libbpf_find_vmlinux_btf_id(const char *name, return libbpf_err(err); } -static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); struct btf *btf; int err; - err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); + memset(&info, 0, info_len); + err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", - attach_prog_fd, err); + pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", + attach_prog_fd, errstr(err)); return err; } @@ -8987,10 +10375,10 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info.btf_id); + btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); err = libbpf_get_error(btf); if (err) { - pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); + pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -9007,16 +10395,28 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, enum bpf_attach_type attach_type, int *btf_obj_fd, int *btf_type_id) { - int ret, i; + int ret, i, mod_len = 0; + const char *fn_name, *mod_name = NULL; - ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); - if (ret > 0) { - *btf_obj_fd = 0; /* vmlinux BTF */ - *btf_type_id = ret; - return 0; + fn_name = strchr(attach_name, ':'); + if (fn_name) { + mod_name = attach_name; + mod_len = fn_name - mod_name; + fn_name++; + } + + if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) { + ret = find_attach_btf_id(obj->btf_vmlinux, + mod_name ? fn_name : attach_name, + attach_type); + if (ret > 0) { + *btf_obj_fd = 0; /* vmlinux BTF */ + *btf_type_id = ret; + return 0; + } + if (ret != -ENOENT) + return ret; } - if (ret != -ENOENT) - return ret; ret = load_module_btfs(obj); if (ret) @@ -9025,7 +10425,12 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, for (i = 0; i < obj->btf_module_cnt; i++) { const struct module_btf *mod = &obj->btf_modules[i]; - ret = find_attach_btf_id(mod->btf, attach_name, attach_type); + if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) + continue; + + ret = find_attach_btf_id(mod->btf, + mod_name ? fn_name : attach_name, + attach_type); if (ret > 0) { *btf_obj_fd = mod->fd; *btf_type_id = ret; @@ -9048,11 +10453,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac int err = 0; /* BPF program's BTF ID */ - if (attach_prog_fd) { - err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); + if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { + if (!attach_prog_fd) { + pr_warn("prog '%s': attach program FD is not set\n", prog->name); + return -EINVAL; + } + err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); if (err < 0) { - pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n", - attach_prog_fd, attach_name, err); + pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", + prog->name, attach_prog_fd, attach_name, errstr(err)); return err; } *btf_obj_fd = 0; @@ -9066,10 +10475,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac *btf_obj_fd = 0; *btf_type_id = 1; } else { - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + err = find_kernel_btf_id(prog->obj, attach_name, + attach_type, btf_obj_fd, + btf_type_id); } if (err) { - pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); + pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", + prog->name, attach_name, errstr(err)); return err; } return 0; @@ -9096,7 +10508,7 @@ int libbpf_attach_type_by_name(const char *name, return libbpf_err(-EINVAL); } - if (sec_def->preload_fn != libbpf_preload_prog) + if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) return libbpf_err(-EINVAL); if (!(sec_def->cookie & SEC_ATTACHABLE)) return libbpf_err(-EINVAL); @@ -9107,12 +10519,11 @@ int libbpf_attach_type_by_name(const char *name, int bpf_map__fd(const struct bpf_map *map) { - return map ? map->fd : libbpf_err(-EINVAL); -} - -const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) -{ - return map ? &map->def : libbpf_err_ptr(-EINVAL); + if (!map) + return libbpf_err(-EINVAL); + if (!map_is_created(map)) + return -1; + return map->fd; } static bool map_uses_real_name(const struct bpf_map *map) @@ -9148,7 +10559,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map) int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.type = type; return 0; @@ -9161,7 +10572,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map) int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.map_flags = flags; return 0; @@ -9174,7 +10585,7 @@ __u64 bpf_map__map_extra(const struct bpf_map *map) int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->map_extra = map_extra; return 0; @@ -9187,7 +10598,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map) int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->numa_node = numa_node; return 0; @@ -9200,7 +10611,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map) int bpf_map__set_key_size(struct bpf_map *map, __u32 size) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.key_size = size; return 0; @@ -9211,10 +10622,106 @@ __u32 bpf_map__value_size(const struct bpf_map *map) return map->def.value_size; } +static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) +{ + struct btf *btf; + struct btf_type *datasec_type, *var_type; + struct btf_var_secinfo *var; + const struct btf_type *array_type; + const struct btf_array *array; + int vlen, element_sz, new_array_id; + __u32 nr_elements; + + /* check btf existence */ + btf = bpf_object__btf(map->obj); + if (!btf) + return -ENOENT; + + /* verify map is datasec */ + datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); + if (!btf_is_datasec(datasec_type)) { + pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify datasec has at least one var */ + vlen = btf_vlen(datasec_type); + if (vlen == 0) { + pr_warn("map '%s': cannot be resized, map value datasec is empty\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify last var in the datasec is an array */ + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); + if (!btf_is_array(array_type)) { + pr_warn("map '%s': cannot be resized, last var must be an array\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify request size aligns with array */ + array = btf_array(array_type); + element_sz = btf__resolve_size(btf, array->type); + if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { + pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", + bpf_map__name(map), element_sz, size); + return -EINVAL; + } + + /* create a new array based on the existing array, but with new length */ + nr_elements = (size - var->offset) / element_sz; + new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); + if (new_array_id < 0) + return new_array_id; + + /* adding a new btf type invalidates existing pointers to btf objects, + * so refresh pointers before proceeding + */ + datasec_type = btf_type_by_id(btf, map->btf_value_type_id); + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + + /* finally update btf info */ + datasec_type->size = size; + var->size = size - var->offset; + var_type->type = new_array_id; + + return 0; +} + int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); + + if (map->mmaped) { + size_t mmap_old_sz, mmap_new_sz; + int err; + + if (map->def.type != BPF_MAP_TYPE_ARRAY) + return libbpf_err(-EOPNOTSUPP); + + mmap_old_sz = bpf_map_mmap_sz(map); + mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); + err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); + if (err) { + pr_warn("map '%s': failed to resize memory-mapped region: %s\n", + bpf_map__name(map), errstr(err)); + return libbpf_err(err); + } + err = map_btf_datasec_resize(map, size); + if (err && err != -ENOENT) { + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", + bpf_map__name(map), errstr(err)); + map->btf_value_type_id = 0; + map->btf_key_type_id = 0; + } + } + map->def.value_size = size; return 0; } @@ -9229,49 +10736,45 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) return map ? map->btf_value_type_id : 0; } -int bpf_map__set_priv(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv) +int bpf_map__set_initial_value(struct bpf_map *map, + const void *data, size_t size) { - if (!map) - return libbpf_err(-EINVAL); + size_t actual_sz; - if (map->priv) { - if (map->clear_priv) - map->clear_priv(map, map->priv); - } - - map->priv = priv; - map->clear_priv = clear_priv; - return 0; -} + if (map_is_created(map)) + return libbpf_err(-EBUSY); -void *bpf_map__priv(const struct bpf_map *map) -{ - return map ? map->priv : libbpf_err_ptr(-EINVAL); -} + if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) + return libbpf_err(-EINVAL); -int bpf_map__set_initial_value(struct bpf_map *map, - const void *data, size_t size) -{ - if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || - size != map->def.value_size || map->fd >= 0) + if (map->def.type == BPF_MAP_TYPE_ARENA) + actual_sz = map->obj->arena_data_sz; + else + actual_sz = map->def.value_size; + if (size != actual_sz) return libbpf_err(-EINVAL); memcpy(map->mmaped, data, size); return 0; } -const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) { + if (bpf_map__is_struct_ops(map)) { + if (psize) + *psize = map->def.value_size; + return map->st_ops->data; + } + if (!map->mmaped) return NULL; - *psize = map->def.value_size; - return map->mmaped; -} -bool bpf_map__is_offload_neutral(const struct bpf_map *map) -{ - return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; + if (map->def.type == BPF_MAP_TYPE_ARENA) + *psize = map->obj->arena_data_sz; + else + *psize = map->def.value_size; + + return map->mmaped; } bool bpf_map__is_internal(const struct bpf_map *map) @@ -9286,7 +10789,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map) int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { - if (map->fd >= 0) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->map_ifindex = ifindex; return 0; @@ -9310,6 +10813,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) return 0; } +int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog) +{ + if (map_is_created(map)) { + pr_warn("exclusive programs must be set before map creation\n"); + return libbpf_err(-EINVAL); + } + + if (map->obj != prog->obj) { + pr_warn("excl_prog and map must be from the same bpf object\n"); + return libbpf_err(-EINVAL); + } + + map->excl_prog = prog; + return 0; +} + +struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map) +{ + return map->excl_prog; +} + static struct bpf_map * __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) { @@ -9335,30 +10859,18 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) } struct bpf_map * -bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj) -{ - return bpf_object__next_map(obj, prev); -} - -struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { - if (prev == NULL) + if (prev == NULL && obj != NULL) return obj->maps; return __bpf_map__iter(prev, obj, 1); } struct bpf_map * -bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj) -{ - return bpf_object__prev_map(obj, next); -} - -struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { - if (next == NULL) { + if (next == NULL && obj != NULL) { if (!obj->nr_maps) return NULL; return obj->maps + obj->nr_maps - 1; @@ -9400,132 +10912,144 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); } -struct bpf_map * -bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) { - return libbpf_err_ptr(-ENOTSUP); + if (!map_is_created(map)) /* map is not yet created */ + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (map->fd < 0) { + pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; } -long libbpf_get_error(const void *ptr) +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) { - if (!IS_ERR_OR_NULL(ptr)) - return 0; + int err; - if (IS_ERR(ptr)) - errno = -PTR_ERR(ptr); + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); - /* If ptr == NULL, then errno should be already set by the failing - * API, because libbpf never returns NULL on success and it now always - * sets errno on error. So no extra errno handling for ptr == NULL - * case. - */ - return -errno; + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); } -__attribute__((alias("bpf_prog_load_xattr2"))) -int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd); - -static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) { - struct bpf_object_open_attr open_attr = {}; - struct bpf_program *prog, *first_prog = NULL; - struct bpf_object *obj; - struct bpf_map *map; int err; - if (!attr) - return libbpf_err(-EINVAL); - if (!attr->file) - return libbpf_err(-EINVAL); + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); - open_attr.file = attr->file; - open_attr.prog_type = attr->prog_type; + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; - obj = bpf_object__open_xattr(&open_attr); - err = libbpf_get_error(obj); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); if (err) - return libbpf_err(-ENOENT); + return libbpf_err(err); - bpf_object__for_each_program(prog, obj) { - enum bpf_attach_type attach_type = attr->expected_attach_type; - /* - * to preserve backwards compatibility, bpf_prog_load treats - * attr->prog_type, if specified, as an override to whatever - * bpf_object__open guessed - */ - if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); - } - if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { - /* - * we haven't guessed from section name and user - * didn't provide a fallback type, too bad... - */ - bpf_object__close(obj); - return libbpf_err(-EINVAL); - } + return bpf_map_delete_elem_flags(map->fd, key, flags); +} - prog->prog_ifindex = attr->ifindex; - prog->log_level = attr->log_level; - prog->prog_flags |= attr->prog_flags; - if (!first_prog) - first_prog = prog; - } +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; - bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) - map->map_ifindex = attr->ifindex; - } + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); - if (!first_prog) { - pr_warn("object file doesn't contain bpf program\n"); - bpf_object__close(obj); - return libbpf_err(-ENOENT); - } + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} - err = bpf_object__load(obj); - if (err) { - bpf_object__close(obj); +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) return libbpf_err(err); - } - *pobj = obj; - *prog_fd = bpf_program__fd(first_prog); - return 0; + return bpf_map_get_next_key(map->fd, cur_key, next_key); } -COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) -int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) +long libbpf_get_error(const void *ptr) { - struct bpf_prog_load_attr attr; + if (!IS_ERR_OR_NULL(ptr)) + return 0; - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; + if (IS_ERR(ptr)) + errno = -PTR_ERR(ptr); - return bpf_prog_load_xattr2(&attr, pobj, prog_fd); + /* If ptr == NULL, then errno should be already set by the failing + * API, because libbpf never returns NULL on success and it now always + * sets errno on error. So no extra errno handling for ptr == NULL + * case. + */ + return -errno; } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { int ret; + int prog_fd = bpf_program__fd(prog); + + if (prog_fd < 0) { + pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err(-EINVAL); + } - ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); + ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); return libbpf_err_errno(ret); } @@ -9704,9 +11228,9 @@ static void bpf_link_perf_dealloc(struct bpf_link *link) struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { - char errmsg[STRERR_BUFSIZE]; struct bpf_link_perf *link; int prog_fd, link_fd = -1, err; + bool force_ioctl_attach; if (!OPTS_VALID(opts, bpf_perf_event_opts)) return libbpf_err_ptr(-EINVAL); @@ -9718,7 +11242,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -9730,16 +11254,16 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p link->link.dealloc = &bpf_link_perf_dealloc; link->perf_event_fd = pfd; - if (kernel_supports(prog->obj, FEAT_PERF_LINK)) { + force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); + if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); if (link_fd < 0) { err = -errno; - pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", - prog->name, pfd, - err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); goto err_out; } link->link.fd = link_fd; @@ -9753,7 +11277,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); if (err == -EPROTO) pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd); @@ -9761,11 +11285,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } link->link.fd = pfd; } - if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + + if (!OPTS_GET(opts, dont_enable, false)) { + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); + goto err_out; + } } return &link->link; @@ -9788,22 +11315,19 @@ struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, */ static int parse_uint_from_file(const char *file, const char *fmt) { - char buf[STRERR_BUFSIZE]; int err, ret; FILE *f; - f = fopen(file, "r"); + f = fopen(file, "re"); if (!f) { err = -errno; - pr_debug("failed to open '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to open '%s': %s\n", file, errstr(err)); return err; } err = fscanf(f, fmt, &ret); if (err != 1) { err = err == EOF ? -EIO : -errno; - pr_debug("failed to parse '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to parse '%s': %s\n", file, errstr(err)); fclose(f); return err; } @@ -9845,19 +11369,21 @@ static int determine_uprobe_retprobe_bit(void) static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, uint64_t offset, int pid, size_t ref_ctr_off) { - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; - int type, pfd, err; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; + int type, pfd; - if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) + if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; + memset(&attr, 0, attr_sz); + type = uprobe ? determine_uprobe_perf_type() : determine_kprobe_perf_type(); if (type < 0) { pr_warn("failed to determine %s perf type: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + errstr(type)); return type; } if (retprobe) { @@ -9867,12 +11393,12 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (bit < 0) { pr_warn("failed to determine %s retprobe bit: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); + errstr(bit)); return bit; } attr.config |= 1 << bit; } - attr.size = sizeof(attr); + attr.size = attr_sz; attr.type = type; attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ @@ -9883,51 +11409,93 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, pid < 0 ? -1 : pid /* pid */, pid == -1 ? 0 : -1 /* cpu */, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); - if (pfd < 0) { - err = -errno; - pr_warn("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; - } - return pfd; + return pfd >= 0 ? pfd : -errno; } static int append_to_file(const char *file, const char *fmt, ...) { int fd, n, err = 0; va_list ap; + char buf[1024]; + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (n < 0 || n >= sizeof(buf)) + return -EINVAL; fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); if (fd < 0) return -errno; - va_start(ap, fmt); - n = vdprintf(fd, fmt, ap); - va_end(ap); - - if (n < 0) + if (write(fd, buf, n) < 0) err = -errno; close(fd); return err; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +#define DEBUGFS "/sys/kernel/debug/tracing" +#define TRACEFS "/sys/kernel/tracing" + +static bool use_debugfs(void) +{ + static int has_debugfs = -1; + + if (has_debugfs < 0) + has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; + + return has_debugfs == 1; +} + +static const char *tracefs_path(void) +{ + return use_debugfs() ? DEBUGFS : TRACEFS; +} + +static const char *tracefs_kprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; +} + +static const char *tracefs_uprobe_events(void) +{ + return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; +} + +static const char *tracefs_available_filter_functions(void) +{ + return use_debugfs() ? DEBUGFS"/available_filter_functions" + : TRACEFS"/available_filter_functions"; +} + +static const char *tracefs_available_filter_functions_addrs(void) +{ + return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" + : TRACEFS"/available_filter_functions_addrs"; +} + +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; + int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); + + /* sanitize name in the probe name */ + for (i = 0; buf[i]; i++) { + if (!isalnum(buf[i])) + buf[i] = '_'; + } } static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "%c:%s/%s %s+0x%zx", + return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", retprobe ? 'r' : 'p', retprobe ? "kretprobes" : "kprobes", probe_name, kfunc_name, offset); @@ -9935,18 +11503,16 @@ static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/kprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name); + return append_to_file(tracefs_kprobe_events(), "-:%s/%s", + retprobe ? "kretprobes" : "kprobes", probe_name); } static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[256]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "kretprobes" : "kprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -9954,25 +11520,28 @@ static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset, int pid) { - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; int type, pfd, err; err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); if (err < 0) { pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } type = determine_kprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { + err = type; pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); - return type; + errstr(err)); + goto err_clean_legacy; } - attr.size = sizeof(attr); + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -9983,10 +11552,71 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, if (pfd < 0) { err = -errno; pr_warn("legacy kprobe perf_event_open() failed: %s\n", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return err; + errstr(err)); + goto err_clean_legacy; } return pfd; + +err_clean_legacy: + /* Clear the newly added legacy kprobe_event */ + remove_kprobe_event_legacy(probe_name, retprobe); + return err; +} + +static const char *arch_specific_syscall_pfx(void) +{ +#if defined(__x86_64__) + return "x64"; +#elif defined(__i386__) + return "ia32"; +#elif defined(__s390x__) + return "s390x"; +#elif defined(__arm__) + return "arm"; +#elif defined(__aarch64__) + return "arm64"; +#elif defined(__mips__) + return "mips"; +#elif defined(__riscv) + return "riscv"; +#elif defined(__powerpc__) + return "powerpc"; +#elif defined(__powerpc64__) + return "powerpc64"; +#else + return NULL; +#endif +} + +int probe_kern_syscall_wrapper(int token_fd) +{ + char syscall_name[64]; + const char *ksys_pfx; + + ksys_pfx = arch_specific_syscall_pfx(); + if (!ksys_pfx) + return 0; + + snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); + + if (determine_kprobe_perf_type() >= 0) { + int pfd; + + pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); + if (pfd >= 0) + close(pfd); + + return pfd >= 0 ? 1 : 0; + } else { /* legacy mode */ + char probe_name[MAX_EVENT_NAME_LEN]; + + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) + return 0; + + (void)remove_kprobe_event_legacy(probe_name, false); + return 1; + } } struct bpf_link * @@ -9995,7 +11625,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const struct bpf_kprobe_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; + enum probe_attach_mode attach_mode; char *legacy_probe = NULL; struct bpf_link *link; size_t offset; @@ -10005,20 +11635,41 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_kprobe_opts)) return libbpf_err_ptr(-EINVAL); + attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); retprobe = OPTS_GET(opts, retprobe, false); offset = OPTS_GET(opts, offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); legacy = determine_kprobe_perf_type() < 0; + switch (attach_mode) { + case PROBE_ATTACH_MODE_LEGACY: + legacy = true; + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_PERF: + if (legacy) + return libbpf_err_ptr(-ENOTSUP); + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_LINK: + if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) + return libbpf_err_ptr(-ENOTSUP); + break; + case PROBE_ATTACH_MODE_DEFAULT: + break; + default: + return libbpf_err_ptr(-EINVAL); + } + if (!legacy) { pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -10032,7 +11683,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -10042,8 +11693,8 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + errstr(err)); + goto err_clean_legacy; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -10054,6 +11705,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, } return link; + +err_clean_legacy: + if (legacy) + remove_kprobe_event_legacy(legacy_probe, retprobe); err_out: free(legacy_probe); return libbpf_err_ptr(err); @@ -10070,14 +11725,359 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie) +struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, + const char *syscall_name, + const struct bpf_ksyscall_opts *opts) +{ + LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); + char func_name[128]; + + if (!OPTS_VALID(opts, bpf_ksyscall_opts)) + return libbpf_err_ptr(-EINVAL); + + if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { + /* arch_specific_syscall_pfx() should never return NULL here + * because it is guarded by kernel_supports(). However, since + * compiler does not know that we have an explicit conditional + * as well. + */ + snprintf(func_name, sizeof(func_name), "__%s_sys_%s", + arch_specific_syscall_pfx() ? : "", syscall_name); + } else { + snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); + } + + kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); + kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + + return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); +} + +/* Adapted from perf/util/string.c */ +bool glob_match(const char *str, const char *pat) +{ + while (*str && *pat && *pat != '*') { + if (*pat == '?') { /* Matches any single character */ + str++; + pat++; + continue; + } + if (*str != *pat) + return false; + str++; + pat++; + } + /* Check wild card */ + if (*pat == '*') { + while (*pat == '*') + pat++; + if (!*pat) /* Tail wild card matches all */ + return true; + while (*str) + if (glob_match(str++, pat)) + return true; + } + return !*str && !*pat; +} + +struct kprobe_multi_resolve { + const char *pattern; + unsigned long *addrs; + size_t cap; + size_t cnt; +}; + +struct avail_kallsyms_data { + char **syms; + size_t cnt; + struct kprobe_multi_resolve *res; +}; + +static int avail_func_cmp(const void *a, const void *b) +{ + return strcmp(*(const char **)a, *(const char **)b); +} + +static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx) +{ + struct avail_kallsyms_data *data = ctx; + struct kprobe_multi_resolve *res = data->res; + int err; + + if (!glob_match(sym_name, res->pattern)) + return 0; + + if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) { + /* Some versions of kernel strip out .llvm.<hash> suffix from + * function names reported in available_filter_functions, but + * don't do so for kallsyms. While this is clearly a kernel + * bug (fixed by [0]) we try to accommodate that in libbpf to + * make multi-kprobe usability a bit better: if no match is + * found, we will strip .llvm. suffix and try one more time. + * + * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") + */ + char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + + if (!(sym_sfx = strstr(sym_name, ".llvm."))) + return 0; + + /* psym_trim vs sym_trim dance is done to avoid pointer vs array + * coercion differences and get proper `const char **` pointer + * which avail_func_cmp() expects + */ + snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name); + if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) + return 0; + } + + err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); + if (err) + return err; + + res->addrs[res->cnt++] = (unsigned long)sym_addr; + return 0; +} + +static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) +{ + const char *available_functions_file = tracefs_available_filter_functions(); + struct avail_kallsyms_data data; + char sym_name[500]; + FILE *f; + int err = 0, ret, i; + char **syms = NULL; + size_t cap = 0, cnt = 0; + + f = fopen(available_functions_file, "re"); + if (!f) { + err = -errno; + pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); + return err; + } + + while (true) { + char *name; + + ret = fscanf(f, "%499s%*[^\n]\n", sym_name); + if (ret == EOF && feof(f)) + break; + + if (ret != 1) { + pr_warn("failed to parse available_filter_functions entry: %d\n", ret); + err = -EINVAL; + goto cleanup; + } + + if (!glob_match(sym_name, res->pattern)) + continue; + + err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); + if (err) + goto cleanup; + + name = strdup(sym_name); + if (!name) { + err = -errno; + goto cleanup; + } + + syms[cnt++] = name; + } + + /* no entries found, bail out */ + if (cnt == 0) { + err = -ENOENT; + goto cleanup; + } + + /* sort available functions */ + qsort(syms, cnt, sizeof(*syms), avail_func_cmp); + + data.syms = syms; + data.res = res; + data.cnt = cnt; + libbpf_kallsyms_parse(avail_kallsyms_cb, &data); + + if (res->cnt == 0) + err = -ENOENT; + +cleanup: + for (i = 0; i < cnt; i++) + free((char *)syms[i]); + free(syms); + + fclose(f); + return err; +} + +static bool has_available_filter_functions_addrs(void) +{ + return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; +} + +static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) +{ + const char *available_path = tracefs_available_filter_functions_addrs(); + char sym_name[500]; + FILE *f; + int ret, err = 0; + unsigned long long sym_addr; + + f = fopen(available_path, "re"); + if (!f) { + err = -errno; + pr_warn("failed to open %s: %s\n", available_path, errstr(err)); + return err; + } + + while (true) { + ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); + if (ret == EOF && feof(f)) + break; + + if (ret != 2) { + pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", + ret); + err = -EINVAL; + goto cleanup; + } + + if (!glob_match(sym_name, res->pattern)) + continue; + + err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, + sizeof(*res->addrs), res->cnt + 1); + if (err) + goto cleanup; + + res->addrs[res->cnt++] = (unsigned long)sym_addr; + } + + if (res->cnt == 0) + err = -ENOENT; + +cleanup: + fclose(f); + return err; +} + +struct bpf_link * +bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, + const char *pattern, + const struct bpf_kprobe_multi_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, lopts); + struct kprobe_multi_resolve res = { + .pattern = pattern, + }; + enum bpf_attach_type attach_type; + struct bpf_link *link = NULL; + const unsigned long *addrs; + int err, link_fd, prog_fd; + bool retprobe, session, unique_match; + const __u64 *cookies; + const char **syms; + size_t cnt; + + if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + syms = OPTS_GET(opts, syms, false); + addrs = OPTS_GET(opts, addrs, false); + cnt = OPTS_GET(opts, cnt, false); + cookies = OPTS_GET(opts, cookies, false); + unique_match = OPTS_GET(opts, unique_match, false); + + if (!pattern && !addrs && !syms) + return libbpf_err_ptr(-EINVAL); + if (pattern && (addrs || syms || cookies || cnt)) + return libbpf_err_ptr(-EINVAL); + if (!pattern && !cnt) + return libbpf_err_ptr(-EINVAL); + if (!pattern && unique_match) + return libbpf_err_ptr(-EINVAL); + if (addrs && syms) + return libbpf_err_ptr(-EINVAL); + + if (pattern) { + if (has_available_filter_functions_addrs()) + err = libbpf_available_kprobes_parse(&res); + else + err = libbpf_available_kallsyms_parse(&res); + if (err) + goto error; + + if (unique_match && res.cnt != 1) { + pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n", + prog->name, pattern, res.cnt); + err = -EINVAL; + goto error; + } + + addrs = res.addrs; + cnt = res.cnt; + } + + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); + + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + + attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI; + + lopts.kprobe_multi.syms = syms; + lopts.kprobe_multi.addrs = addrs; + lopts.kprobe_multi.cookies = cookies; + lopts.kprobe_multi.cnt = cnt; + lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach: %s\n", + prog->name, errstr(err)); + goto error; + } + link->fd = link_fd; + free(res.addrs); + return link; + +error: + free(link); + free(res.addrs); + return libbpf_err_ptr(err); +} + +static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; - struct bpf_link *link; const char *func_name; char *func; - int n, err; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) @@ -10087,43 +12087,136 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cooki n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { - err = -EINVAL; pr_warn("kprobe name is invalid: %s\n", func_name); - return libbpf_err_ptr(err); + return -EINVAL; } if (opts.retprobe && offset != 0) { free(func); - err = -EINVAL; pr_warn("kretprobes do not support offset specification\n"); - return libbpf_err_ptr(err); + return -EINVAL; } opts.offset = offset; - link = bpf_program__attach_kprobe_opts(prog, func, &opts); + *link = bpf_program__attach_kprobe_opts(prog, func, &opts); free(func); - return link; + return libbpf_get_error(*link); } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) +static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - int i; + LIBBPF_OPTS(bpf_ksyscall_opts, opts); + const char *syscall_name; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); + *link = NULL; - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; + /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ + if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); + if (opts.retprobe) + syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; + else + syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; + + *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); + return *link ? 0 : -errno; +} + +static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *spec; + char *pattern; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); + if (opts.retprobe) + spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; + else + spec = prog->sec_name + sizeof("kprobe.multi/") - 1; + + n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); + if (n < 1) { + pr_warn("kprobe multi pattern is invalid: %s\n", spec); + return -EINVAL; + } + + *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); + free(pattern); + return libbpf_get_error(*link); +} + +static int attach_kprobe_session(const struct bpf_program *prog, long cookie, + struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true); + const char *spec; + char *pattern; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe.session") */ + if (strcmp(prog->sec_name, "kprobe.session") == 0) + return 0; + + spec = prog->sec_name + sizeof("kprobe.session/") - 1; + n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); + if (n < 1) { + pr_warn("kprobe session pattern is invalid: %s\n", spec); + return -EINVAL; } + + *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); + free(pattern); + return *link ? 0 : -errno; +} + +static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + int n, ret = -EINVAL; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 3: + opts.session = str_has_pfx(probe_type, "uprobe.session"); + opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); + + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + return ret; } static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "%c:%s/%s %s:0x%zx", + return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", retprobe ? 'r' : 'p', retprobe ? "uretprobes" : "uprobes", probe_name, binary_path, offset); @@ -10131,18 +12224,16 @@ static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) { - const char *file = "/sys/kernel/debug/tracing/uprobe_events"; - - return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name); + return append_to_file(tracefs_uprobe_events(), "-:%s/%s", + retprobe ? "uretprobes" : "uprobes", probe_name); } static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) { char file[512]; - snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - retprobe ? "uretprobes" : "uprobes", probe_name); + snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); return parse_uint_from_file(file, "%d\n"); } @@ -10150,24 +12241,26 @@ static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset, int pid) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; int type, pfd, err; err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); if (err < 0) { - pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", - binary_path, (size_t)offset, err); + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", + binary_path, (size_t)offset, errstr(err)); return err; } type = determine_uprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { - pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", - binary_path, offset, err); - return type; + err = type; + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", + binary_path, offset, errstr(err)); + goto err_clean_legacy; } - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(attr); + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -10177,10 +12270,283 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); - return err; + pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); + goto err_clean_legacy; } return pfd; + +err_clean_legacy: + /* Clear the newly added legacy uprobe_event */ + remove_uprobe_event_legacy(probe_name, retprobe); + return err; +} + +/* Find offset of function name in archive specified by path. Currently + * supported are .zip files that do not compress their contents, as used on + * Android in the form of APKs, for example. "file_name" is the name of the ELF + * file inside the archive. "func_name" matches symbol name or name@@LIB for + * library functions. + * + * An overview of the APK format specifically provided here: + * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents + */ +static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, + const char *func_name) +{ + struct zip_archive *archive; + struct zip_entry entry; + long ret; + Elf *elf; + + archive = zip_archive_open(archive_path); + if (IS_ERR(archive)) { + ret = PTR_ERR(archive); + pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); + return ret; + } + + ret = zip_archive_find_entry(archive, file_name, &entry); + if (ret) { + pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, + archive_path, ret); + goto out; + } + pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, + (unsigned long)entry.data_offset); + + if (entry.compression) { + pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, + archive_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + + elf = elf_memory((void *)entry.data, entry.data_length); + if (!elf) { + pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, + elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__LIBELF; + goto out; + } + + ret = elf_find_func_offset(elf, file_name, func_name); + if (ret > 0) { + pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", + func_name, file_name, archive_path, entry.data_offset, ret, + ret + entry.data_offset); + ret += entry.data_offset; + } + elf_end(elf); + +out: + zip_archive_close(archive); + return ret; +} + +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[3] = {}; + int i, perm; + + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); + perm = R_OK; + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + perm = R_OK | X_OK; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it has required permissions */ + if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + +struct bpf_link * +bpf_program__attach_uprobe_multi(const struct bpf_program *prog, + pid_t pid, + const char *path, + const char *func_pattern, + const struct bpf_uprobe_multi_opts *opts) +{ + const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; + LIBBPF_OPTS(bpf_link_create_opts, lopts); + unsigned long *resolved_offsets = NULL; + enum bpf_attach_type attach_type; + int err = 0, link_fd, prog_fd; + struct bpf_link *link = NULL; + char full_path[PATH_MAX]; + bool retprobe, session; + const __u64 *cookies; + const char **syms; + size_t cnt; + + if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + syms = OPTS_GET(opts, syms, NULL); + offsets = OPTS_GET(opts, offsets, NULL); + ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); + cookies = OPTS_GET(opts, cookies, NULL); + cnt = OPTS_GET(opts, cnt, 0); + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); + + /* + * User can specify 2 mutually exclusive set of inputs: + * + * 1) use only path/func_pattern/pid arguments + * + * 2) use path/pid with allowed combinations of: + * syms/offsets/ref_ctr_offsets/cookies/cnt + * + * - syms and offsets are mutually exclusive + * - ref_ctr_offsets and cookies are optional + * + * Any other usage results in error. + */ + + if (!path) + return libbpf_err_ptr(-EINVAL); + if (!func_pattern && cnt == 0) + return libbpf_err_ptr(-EINVAL); + + if (func_pattern) { + if (syms || offsets || ref_ctr_offsets || cookies || cnt) + return libbpf_err_ptr(-EINVAL); + } else { + if (!!syms == !!offsets) + return libbpf_err_ptr(-EINVAL); + } + + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + + if (func_pattern) { + if (!strchr(path, '/')) { + err = resolve_full_path(path, full_path, sizeof(full_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, path, errstr(err)); + return libbpf_err_ptr(err); + } + path = full_path; + } + + err = elf_resolve_pattern_offsets(path, func_pattern, + &resolved_offsets, &cnt); + if (err < 0) + return libbpf_err_ptr(err); + offsets = resolved_offsets; + } else if (syms) { + err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); + if (err < 0) + return libbpf_err_ptr(err); + offsets = resolved_offsets; + } + + attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; + + lopts.uprobe_multi.path = path; + lopts.uprobe_multi.offsets = offsets; + lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; + lopts.uprobe_multi.cookies = cookies; + lopts.uprobe_multi.cnt = cnt; + lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; + + if (pid == 0) + pid = getpid(); + if (pid > 0) + lopts.uprobe_multi.pid = pid; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", + prog->name, errstr(err)); + goto error; + } + link->fd = link_fd; + free(resolved_offsets); + return link; + +error: + free(resolved_offsets); + free(link); + return libbpf_err_ptr(err); } LIBBPF_API struct bpf_link * @@ -10188,32 +12554,94 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts) { + const char *archive_path = NULL, *archive_sep = NULL; + char *legacy_probe = NULL; DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + enum probe_attach_mode attach_mode; + char full_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); + attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); retprobe = OPTS_GET(opts, retprobe, false); ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + + /* Check if "binary_path" refers to an archive. */ + archive_sep = strstr(binary_path, "!/"); + if (archive_sep) { + full_path[0] = '\0'; + libbpf_strlcpy(full_path, binary_path, + min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); + archive_path = full_path; + binary_path = archive_sep + 2; + } else if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_path, sizeof(full_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); + return libbpf_err_ptr(err); + } + binary_path = full_path; + } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (archive_path) { + sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, + func_name); + binary_path = archive_path; + } else { + sym_off = elf_find_func_offset_from_file(binary_path, func_name); + } + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } + legacy = determine_uprobe_perf_type() < 0; + switch (attach_mode) { + case PROBE_ATTACH_MODE_LEGACY: + legacy = true; + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_PERF: + if (legacy) + return libbpf_err_ptr(-ENOTSUP); + pe_opts.force_ioctl_attach = true; + break; + case PROBE_ATTACH_MODE_LINK: + if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) + return libbpf_err_ptr(-ENOTSUP); + break; + case PROBE_ATTACH_MODE_DEFAULT: + break; + default: + return libbpf_err_ptr(-EINVAL); + } + if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -10227,7 +12655,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } @@ -10238,8 +12666,8 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - goto err_out; + errstr(err)); + goto err_clean_legacy; } if (legacy) { struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); @@ -10249,10 +12677,78 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, perf_link->legacy_is_retprobe = retprobe; } return link; + +err_clean_legacy: + if (legacy) + remove_uprobe_event_legacy(legacy_probe, retprobe); err_out: free(legacy_probe); return libbpf_err_ptr(err); +} + +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; + long offset = 0; + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } + opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || + strcmp(probe_type, "uretprobe.s") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + + return ret; } struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, @@ -10265,15 +12761,96 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + __u64 usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!binary_path) + return libbpf_err_ptr(-EINVAL); + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { char file[PATH_MAX]; int ret; - ret = snprintf(file, sizeof(file), - "/sys/kernel/debug/tracing/events/%s/%s/id", - tp_category, tp_name); + ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", + tracefs_path(), tp_category, tp_name); if (ret < 0) return -errno; if (ret >= sizeof(file)) { @@ -10287,20 +12864,21 @@ static int determine_tracepoint_id(const char *tp_category, static int perf_event_open_tracepoint(const char *tp_category, const char *tp_name) { - struct perf_event_attr attr = {}; - char errmsg[STRERR_BUFSIZE]; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; int tp_id, pfd, err; tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + errstr(tp_id)); return tp_id; } + memset(&attr, 0, attr_sz); attr.type = PERF_TYPE_TRACEPOINT; - attr.size = sizeof(attr); + attr.size = attr_sz; attr.config = tp_id; pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, @@ -10309,7 +12887,7 @@ static int perf_event_open_tracepoint(const char *tp_category, err = -errno; pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } return pfd; @@ -10321,7 +12899,6 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p const struct bpf_tracepoint_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int pfd, err; @@ -10334,7 +12911,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p if (pfd < 0) { pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + errstr(pfd)); return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -10343,7 +12920,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p close(pfd); pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return libbpf_err_ptr(err); } return link; @@ -10356,14 +12933,19 @@ struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) +static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { char *sec_name, *tp_cat, *tp_name; - struct bpf_link *link; + + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; sec_name = strdup(prog->sec_name); if (!sec_name) - return libbpf_err_ptr(-ENOMEM); + return -ENOMEM; /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ if (str_has_pfx(prog->sec_name, "tp/")) @@ -10373,23 +12955,28 @@ static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); - return libbpf_err_ptr(-EINVAL); + return -EINVAL; } *tp_name = '\0'; tp_name++; - link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); + *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); free(sec_name); - return link; + return libbpf_get_error(*link); } -struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, - const char *tp_name) +struct bpf_link * +bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, + const char *tp_name, + struct bpf_raw_tracepoint_opts *opts) { - char errmsg[STRERR_BUFSIZE]; + LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -10401,51 +12988,78 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); + raw_opts.tp_name = tp_name; + raw_opts.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); if (pfd < 0) { pfd = -errno; free(link); pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", - prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, tp_name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) +struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, + const char *tp_name) +{ + return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); +} + +static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); - return libbpf_err_ptr(-EINVAL); + return -EINVAL; } - return bpf_program__attach_raw_tracepoint(prog, tp_name); + *link = bpf_program__attach_raw_tracepoint(prog, tp_name); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { - char errmsg[STRERR_BUFSIZE]; + LIBBPF_OPTS(bpf_link_create_opts, link_opts); struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -10457,46 +13071,54 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie) +static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_trace(prog); + *link = bpf_program__attach_trace(prog); + return libbpf_get_error(*link); } -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie) +static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_lsm(prog); + *link = bpf_program__attach_lsm(prog); + return libbpf_get_error(*link); } static struct bpf_link * -bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id, - const char *target_name) +bpf_program_attach_fd(const struct bpf_program *prog, + int target_fd, const char *target_name, + const struct bpf_link_create_opts *opts) { - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, - .target_btf_id = btf_id); enum bpf_attach_type attach_type; - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; @@ -10511,14 +13133,14 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - attach_type = bpf_program__get_expected_attach_type(prog); - link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts); + attach_type = bpf_program__expected_attach_type(prog); + link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); if (link_fd < 0) { link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to %s: %s\n", prog->name, target_name, - libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -10528,19 +13150,122 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id struct bpf_link * bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) { - return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup"); + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); } struct bpf_link * bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) { - return bpf_program__attach_fd(prog, netns_fd, 0, "netns"); + return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); +} + +struct bpf_link * +bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd) +{ + return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL); } struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ - return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); + return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); +} + +struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_cgroup_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.cgroup.relative_fd = relative_fd; + link_create_opts.cgroup.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); +} + +struct bpf_link * +bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, + const struct bpf_tcx_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_tcx_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (!ifindex) { + pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.tcx.relative_fd = relative_fd; + link_create_opts.tcx.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + /* target_fd/target_ifindex use the same field in LINK_CREATE */ + return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); +} + +struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_netkit_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (!ifindex) { + pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.netkit.relative_fd = relative_fd; + link_create_opts.netkit.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); } struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, @@ -10556,17 +13281,22 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } if (prog->type != BPF_PROG_TYPE_EXT) { - pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", prog->name); return libbpf_err_ptr(-EINVAL); } if (target_fd) { - btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); + LIBBPF_OPTS(bpf_link_create_opts, target_opts); + + btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err_ptr(btf_id); - return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace"); + target_opts.target_btf_id = btf_id; + + return bpf_program_attach_fd(prog, target_fd, "freplace", + &target_opts); } else { /* no target, so use raw_tracepoint_open for compatibility * with old kernels @@ -10580,7 +13310,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; __u32 target_fd = 0; @@ -10608,77 +13337,203 @@ bpf_program__attach_iter(const struct bpf_program *prog, link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to iterator: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; return link; } -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) +static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_iter(prog, NULL); + *link = bpf_program__attach_iter(prog, NULL); + return libbpf_get_error(*link); +} + +struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, + const struct bpf_netfilter_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, lopts); + struct bpf_link *link; + int prog_fd, link_fd; + + if (!OPTS_VALID(opts, bpf_netfilter_opts)) + return libbpf_err_ptr(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach before loaded\n", prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link = calloc(1, sizeof(*link)); + if (!link) + return libbpf_err_ptr(-ENOMEM); + + link->detach = &bpf_link__detach_fd; + + lopts.netfilter.pf = OPTS_GET(opts, pf, 0); + lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); + lopts.netfilter.priority = OPTS_GET(opts, priority, 0); + lopts.netfilter.flags = OPTS_GET(opts, flags, 0); + + link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); + if (link_fd < 0) { + link_fd = -errno; + free(link); + pr_warn("prog '%s': failed to attach to netfilter: %s\n", + prog->name, errstr(link_fd)); + return libbpf_err_ptr(link_fd); + } + link->fd = link_fd; + + return link; } struct bpf_link *bpf_program__attach(const struct bpf_program *prog) { - if (!prog->sec_def || !prog->sec_def->attach_fn) - return libbpf_err_ptr(-ESRCH); + struct bpf_link *link = NULL; + int err; + + if (!prog->sec_def || !prog->sec_def->prog_attach_fn) + return libbpf_err_ptr(-EOPNOTSUPP); - return prog->sec_def->attach_fn(prog, prog->sec_def->cookie); + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); + if (err) + return libbpf_err_ptr(err); + + /* When calling bpf_program__attach() explicitly, auto-attach support + * is expected to work, so NULL returned link is considered an error. + * This is different for skeleton's attach, see comment in + * bpf_object__attach_skeleton(). + */ + if (!link) + return libbpf_err_ptr(-EOPNOTSUPP); + + return link; } +struct bpf_link_struct_ops { + struct bpf_link link; + int map_fd; +}; + static int bpf_link__detach_struct_ops(struct bpf_link *link) { + struct bpf_link_struct_ops *st_link; __u32 zero = 0; - if (bpf_map_delete_elem(link->fd, &zero)) - return -errno; + st_link = container_of(link, struct bpf_link_struct_ops, link); - return 0; + if (st_link->map_fd < 0) + /* w/o a real link */ + return bpf_map_delete_elem(link->fd, &zero); + + return close(link->fd); } struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) { - struct bpf_struct_ops *st_ops; - struct bpf_link *link; - __u32 i, zero = 0; - int err; + struct bpf_link_struct_ops *link; + __u32 zero = 0; + int err, fd; + + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); + return libbpf_err_ptr(-EINVAL); + } - if (!bpf_map__is_struct_ops(map) || map->fd == -1) + if (map->fd < 0) { + pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); return libbpf_err_ptr(-EINVAL); + } link = calloc(1, sizeof(*link)); if (!link) return libbpf_err_ptr(-EINVAL); - st_ops = map->st_ops; - for (i = 0; i < btf_vlen(st_ops->type); i++) { - struct bpf_program *prog = st_ops->progs[i]; - void *kern_data; - int prog_fd; + /* kern_vdata should be prepared during the loading phase. */ + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { + free(link); + return libbpf_err_ptr(err); + } - if (!prog) - continue; + link->link.detach = bpf_link__detach_struct_ops; - prog_fd = bpf_program__fd(prog); - kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; - *(unsigned long *)kern_data = prog_fd; + if (!(map->def.map_flags & BPF_F_LINK)) { + /* w/o a real link */ + link->link.fd = map->fd; + link->map_fd = -1; + return &link->link; } - err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); - if (err) { - err = -errno; + fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); + if (fd < 0) { free(link); - return libbpf_err_ptr(err); + return libbpf_err_ptr(fd); } - link->detach = bpf_link__detach_struct_ops; - link->fd = map->fd; + link->link.fd = fd; + link->map_fd = map->fd; - return link; + return &link->link; } +/* + * Swap the back struct_ops of a link with a new struct_ops map. + */ +int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) +{ + struct bpf_link_struct_ops *st_ops_link; + __u32 zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + if (map->fd < 0) { + pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + + st_ops_link = container_of(link, struct bpf_link_struct_ops, link); + /* Ensure the type of a link is correct */ + if (st_ops_link->map_fd < 0) + return libbpf_err(-EINVAL); + + err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); + /* It can be EBUSY if the map has been used to create or + * update a link before. We don't allow updating the value of + * a struct_ops once it is set. That ensures that the value + * never changed. So, it is safe to skip EBUSY. + */ + if (err && err != -EBUSY) + return err; + + err = bpf_link_update(link->fd, map->fd, NULL); + if (err < 0) + return err; + + st_ops_link->map_fd = map->fd; + + return 0; +} + +typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, + void *private_data); + static enum bpf_perf_event_ret perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -10727,12 +13582,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } -__attribute__((alias("perf_event_read_simple"))) -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data); - struct perf_buffer; struct perf_buffer_params { @@ -10818,7 +13667,6 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, int cpu, int map_key) { struct perf_cpu_buf *cpu_buf; - char msg[STRERR_BUFSIZE]; int err; cpu_buf = calloc(1, sizeof(*cpu_buf)); @@ -10834,7 +13682,7 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->fd < 0) { err = -errno; pr_warn("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -10845,14 +13693,14 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, cpu_buf->base = NULL; err = -errno; pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -10866,24 +13714,30 @@ error: static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p); -DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, - perf_buffer_sample_fn sample_cb, - perf_buffer_lost_fn lost_cb, - void *ctx, - const struct perf_buffer_opts *opts) +struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, + perf_buffer_lost_fn lost_cb, + void *ctx, + const struct perf_buffer_opts *opts) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_buffer_params p = {}; - struct perf_event_attr attr = {}; + struct perf_event_attr attr; + __u32 sample_period; if (!OPTS_VALID(opts, perf_buffer_opts)) return libbpf_err_ptr(-EINVAL); + sample_period = OPTS_GET(opts, sample_period, 1); + if (!sample_period) + sample_period = 1; + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = 1; - attr.wakeup_events = 1; + attr.wakeup_events = sample_period; p.attr = &attr; p.sample_cb = sample_cb; @@ -10893,26 +13747,14 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) -{ - return perf_buffer__new_v0_6_0(map_fd, page_cnt, - opts ? opts->sample_cb : NULL, - opts ? opts->lost_cb : NULL, - opts ? opts->ctx : NULL, - NULL); -} - -DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) -struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, - struct perf_event_attr *attr, - perf_buffer_event_fn event_cb, void *ctx, - const struct perf_buffer_raw_opts *opts) +struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, + struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts) { struct perf_buffer_params p = {}; - if (page_cnt == 0 || !attr) + if (!attr) return libbpf_err_ptr(-EINVAL); if (!OPTS_VALID(opts, perf_buffer_raw_opts)) @@ -10928,32 +13770,17 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) -struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) -{ - LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, - .cpu_cnt = opts->cpu_cnt, - .cpus = opts->cpus, - .map_keys = opts->map_keys, - ); - - return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, - opts->event_cb, opts->ctx, &inner_opts); -} - static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map; - char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; bool *online = NULL; __u32 map_info_len; int err, i, j, n; - if (page_cnt & (page_cnt - 1)) { + if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { pr_warn("page count should be power of two, but is %zu\n", page_cnt); return ERR_PTR(-EINVAL); @@ -10962,7 +13789,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, /* best-effort sanity checks */ memset(&map, 0, sizeof(map)); map_info_len = sizeof(map); - err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); + err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); if (err) { err = -errno; /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return @@ -10970,7 +13797,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, */ if (err != -EINVAL) { pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + map_fd, errstr(err)); return ERR_PTR(err); } pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", @@ -11000,7 +13827,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (pb->epoll_fd < 0) { err = -errno; pr_warn("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -11031,7 +13858,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = parse_cpu_mask_file(online_cpus_file, &online, &n); if (err) { - pr_warn("failed to get online CPU mask: %d\n", err); + pr_warn("failed to get online CPU mask: %s\n", errstr(err)); goto error; } @@ -11062,7 +13889,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -11073,7 +13900,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } j++; @@ -11168,7 +13995,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("error while processing records: %d\n", err); + pr_warn("error while processing records: %s\n", errstr(err)); return libbpf_err(err); } } @@ -11202,6 +14029,22 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) return cpu_buf->fd; } +int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) +{ + struct perf_cpu_buf *cpu_buf; + + if (buf_idx >= pb->cpu_cnt) + return libbpf_err(-EINVAL); + + cpu_buf = pb->cpu_bufs[buf_idx]; + if (!cpu_buf) + return libbpf_err(-ENOENT); + + *buf = cpu_buf->base; + *buf_size = pb->mmap_size; + return 0; +} + /* * Consume data from perf ring buffer corresponding to slot *buf_idx* in * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to @@ -11236,261 +14079,14 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); + pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", + i, errstr(err)); return libbpf_err(err); } } return 0; } -struct bpf_prog_info_array_desc { - int array_offset; /* e.g. offset of jited_prog_insns */ - int count_offset; /* e.g. offset of jited_prog_len */ - int size_offset; /* > 0: offset of rec size, - * < 0: fix size of -size_offset - */ -}; - -static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { - [BPF_PROG_INFO_JITED_INSNS] = { - offsetof(struct bpf_prog_info, jited_prog_insns), - offsetof(struct bpf_prog_info, jited_prog_len), - -1, - }, - [BPF_PROG_INFO_XLATED_INSNS] = { - offsetof(struct bpf_prog_info, xlated_prog_insns), - offsetof(struct bpf_prog_info, xlated_prog_len), - -1, - }, - [BPF_PROG_INFO_MAP_IDS] = { - offsetof(struct bpf_prog_info, map_ids), - offsetof(struct bpf_prog_info, nr_map_ids), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_JITED_KSYMS] = { - offsetof(struct bpf_prog_info, jited_ksyms), - offsetof(struct bpf_prog_info, nr_jited_ksyms), - -(int)sizeof(__u64), - }, - [BPF_PROG_INFO_JITED_FUNC_LENS] = { - offsetof(struct bpf_prog_info, jited_func_lens), - offsetof(struct bpf_prog_info, nr_jited_func_lens), - -(int)sizeof(__u32), - }, - [BPF_PROG_INFO_FUNC_INFO] = { - offsetof(struct bpf_prog_info, func_info), - offsetof(struct bpf_prog_info, nr_func_info), - offsetof(struct bpf_prog_info, func_info_rec_size), - }, - [BPF_PROG_INFO_LINE_INFO] = { - offsetof(struct bpf_prog_info, line_info), - offsetof(struct bpf_prog_info, nr_line_info), - offsetof(struct bpf_prog_info, line_info_rec_size), - }, - [BPF_PROG_INFO_JITED_LINE_INFO] = { - offsetof(struct bpf_prog_info, jited_line_info), - offsetof(struct bpf_prog_info, nr_jited_line_info), - offsetof(struct bpf_prog_info, jited_line_info_rec_size), - }, - [BPF_PROG_INFO_PROG_TAGS] = { - offsetof(struct bpf_prog_info, prog_tags), - offsetof(struct bpf_prog_info, nr_prog_tags), - -(int)sizeof(__u8) * BPF_TAG_SIZE, - }, - -}; - -static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, - int offset) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u32)]; - return -(int)offset; -} - -static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, - int offset) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - return array[offset / sizeof(__u64)]; - return -(int)offset; -} - -static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, - __u32 val) -{ - __u32 *array = (__u32 *)info; - - if (offset >= 0) - array[offset / sizeof(__u32)] = val; -} - -static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, - __u64 val) -{ - __u64 *array = (__u64 *)info; - - if (offset >= 0) - array[offset / sizeof(__u64)] = val; -} - -struct bpf_prog_info_linear * -bpf_program__get_prog_info_linear(int fd, __u64 arrays) -{ - struct bpf_prog_info_linear *info_linear; - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 data_len = 0; - int i, err; - void *ptr; - - if (arrays >> BPF_PROG_INFO_LAST_ARRAY) - return libbpf_err_ptr(-EINVAL); - - /* step 1: get array dimensions */ - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - return libbpf_err_ptr(-EFAULT); - } - - /* step 2: calculate total size of all arrays */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - bool include_array = (arrays & (1UL << i)) > 0; - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - desc = bpf_prog_info_array_desc + i; - - /* kernel is too old to support this field */ - if (info_len < desc->array_offset + sizeof(__u32) || - info_len < desc->count_offset + sizeof(__u32) || - (desc->size_offset > 0 && info_len < desc->size_offset)) - include_array = false; - - if (!include_array) { - arrays &= ~(1UL << i); /* clear the bit */ - continue; - } - - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - - data_len += count * size; - } - - /* step 3: allocate continuous memory */ - data_len = roundup(data_len, sizeof(__u64)); - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); - if (!info_linear) - return libbpf_err_ptr(-ENOMEM); - - /* step 4: fill data to info_linear->info */ - info_linear->arrays = arrays; - memset(&info_linear->info, 0, sizeof(info)); - ptr = info_linear->data; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 count, size; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->count_offset, count); - bpf_prog_info_set_offset_u32(&info_linear->info, - desc->size_offset, size); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, - ptr_to_u64(ptr)); - ptr += count * size; - } - - /* step 5: call syscall again to get required arrays */ - err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); - if (err) { - pr_debug("can't get prog info: %s", strerror(errno)); - free(info_linear); - return libbpf_err_ptr(-EFAULT); - } - - /* step 6: verify the data */ - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u32 v1, v2; - - if ((arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->count_offset); - if (v1 != v2) - pr_warn("%s: mismatch in element count\n", __func__); - - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, - desc->size_offset); - if (v1 != v2) - pr_warn("%s: mismatch in rec size\n", __func__); - } - - /* step 7: update info_len and data_len */ - info_linear->info_len = sizeof(struct bpf_prog_info); - info_linear->data_len = data_len; - - return info_linear; -} - -void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - addr = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - offs = addr - ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, offs); - } -} - -void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) -{ - int i; - - for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { - struct bpf_prog_info_array_desc *desc; - __u64 addr, offs; - - if ((info_linear->arrays & (1UL << i)) == 0) - continue; - - desc = bpf_prog_info_array_desc + i; - offs = bpf_prog_info_read_offset_u64(&info_linear->info, - desc->array_offset); - addr = offs + ptr_to_u64(info_linear->data); - bpf_prog_info_set_offset_u64(&info_linear->info, - desc->array_offset, addr); - } -} - int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name) @@ -11500,12 +14096,12 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); if (attach_prog_fd && !attach_func_name) { - /* remember attach_prog_fd and let bpf_program__load() find - * BTF ID during the program load + /* Store attach_prog_fd. The BTF ID will be resolved later during + * the normal object/program load phase. */ prog->attach_prog_fd = attach_prog_fd; return 0; @@ -11513,7 +14109,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, - attach_prog_fd); + attach_prog_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { @@ -11595,14 +14191,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { err = len ? -errno : -EINVAL; - pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); return err; } if (len >= sizeof(buf)) { @@ -11640,115 +14236,169 @@ int libbpf_num_possible_cpus(void) return tmp_cpus; } -int bpf_object__open_skeleton(struct bpf_object_skeleton *s, - const struct bpf_object_open_opts *opts) +static int populate_skeleton_maps(const struct bpf_object *obj, + struct bpf_map_skeleton *maps, + size_t map_cnt, size_t map_skel_sz) { - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, - .object_name = s->name, - ); - struct bpf_object *obj; - int i, err; - - /* Attempt to preserve opts->object_name, unless overriden by user - * explicitly. Overwriting object name for skeletons is discouraged, - * as it breaks global data maps, because they contain object name - * prefix as their own map name prefix. When skeleton is generated, - * bpftool is making an assumption that this name will stay the same. - */ - if (opts) { - memcpy(&skel_opts, opts, sizeof(*opts)); - if (!opts->object_name) - skel_opts.object_name = s->name; - } - - obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); - err = libbpf_get_error(obj); - if (err) { - pr_warn("failed to initialize skeleton BPF object '%s': %d\n", - s->name, err); - return libbpf_err(err); - } - - *s->obj = obj; + int i; - for (i = 0; i < s->map_cnt; i++) { - struct bpf_map **map = s->maps[i].map; - const char *name = s->maps[i].name; - void **mmaped = s->maps[i].mmaped; + for (i = 0; i < map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; + struct bpf_map **map = map_skel->map; + const char *name = map_skel->name; + void **mmaped = map_skel->mmaped; *map = bpf_object__find_map_by_name(obj, name); if (!*map) { pr_warn("failed to find skeleton map '%s'\n", name); - return libbpf_err(-ESRCH); + return -ESRCH; } /* externs shouldn't be pre-setup from user code */ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) *mmaped = (*map)->mmaped; } + return 0; +} - for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program **prog = s->progs[i].prog; - const char *name = s->progs[i].name; +static int populate_skeleton_progs(const struct bpf_object *obj, + struct bpf_prog_skeleton *progs, + size_t prog_cnt, size_t prog_skel_sz) +{ + int i; + + for (i = 0; i < prog_cnt; i++) { + struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; + struct bpf_program **prog = prog_skel->prog; + const char *name = prog_skel->name; *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { pr_warn("failed to find skeleton program '%s'\n", name); - return libbpf_err(-ESRCH); + return -ESRCH; } } + return 0; +} + +int bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts) +{ + struct bpf_object *obj; + int err; + + obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_warn("failed to initialize skeleton BPF object '%s': %s\n", + s->name, errstr(err)); + return libbpf_err(err); + } + + *s->obj = obj; + err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); + if (err) { + pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); + return libbpf_err(err); + } + + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); + if (err) { + pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); + return libbpf_err(err); + } return 0; } +int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) +{ + int err, len, var_idx, i; + const char *var_name; + const struct bpf_map *map; + struct btf *btf; + __u32 map_type_id; + const struct btf_type *map_type, *var_type; + const struct bpf_var_skeleton *var_skel; + struct btf_var_secinfo *var; + + if (!s->obj) + return libbpf_err(-EINVAL); + + btf = bpf_object__btf(s->obj); + if (!btf) { + pr_warn("subskeletons require BTF at runtime (object %s)\n", + bpf_object__name(s->obj)); + return libbpf_err(-errno); + } + + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); + if (err) { + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); + return libbpf_err(err); + } + + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); + if (err) { + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); + return libbpf_err(err); + } + + for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { + var_skel = (void *)s->vars + var_idx * s->var_skel_sz; + map = *var_skel->map; + map_type_id = bpf_map__btf_value_type_id(map); + map_type = btf__type_by_id(btf, map_type_id); + + if (!btf_is_datasec(map_type)) { + pr_warn("type for map '%1$s' is not a datasec: %2$s\n", + bpf_map__name(map), + __btf_kind_str(btf_kind(map_type))); + return libbpf_err(-EINVAL); + } + + len = btf_vlen(map_type); + var = btf_var_secinfos(map_type); + for (i = 0; i < len; i++, var++) { + var_type = btf__type_by_id(btf, var->type); + var_name = btf__name_by_offset(btf, var_type->name_off); + if (strcmp(var_name, var_skel->name) == 0) { + *var_skel->addr = map->mmaped + var->offset; + break; + } + } + } + return 0; +} + +void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) +{ + if (!s) + return; + free(s->maps); + free(s->progs); + free(s->vars); + free(s); +} + int bpf_object__load_skeleton(struct bpf_object_skeleton *s) { int i, err; err = bpf_object__load(*s->obj); if (err) { - pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map); - int prot, map_fd = bpf_map__fd(map); - void **mmaped = s->maps[i].mmaped; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; - if (!mmaped) + if (!map_skel->mmaped) continue; - if (!(map->def.map_flags & BPF_F_MMAPABLE)) { - *mmaped = NULL; - continue; - } - - if (map->def.map_flags & BPF_F_RDONLY_PROG) - prot = PROT_READ; - else - prot = PROT_READ | PROT_WRITE; - - /* Remap anonymous mmap()-ed "map initialization image" as - * a BPF map-backed mmap()-ed memory, but preserving the same - * memory address. This will cause kernel to change process' - * page table to point to a different piece of kernel memory, - * but from userspace point of view memory address (and its - * contents, being identical at this point) will stay the - * same. This mapping will be released by bpf_object__close() - * as per normal clean up procedure, so we don't need to worry - * about it from skeleton's clean up perspective. - */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, - MAP_SHARED | MAP_FIXED, map_fd, 0); - if (*mmaped == MAP_FAILED) { - err = -errno; - *mmaped = NULL; - pr_warn("failed to re-mmap() map '%s': %d\n", - bpf_map__name(map), err); - return libbpf_err(err); - } + *map_skel->mmaped = map->mmaped; } return 0; @@ -11759,21 +14409,75 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) int i, err; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program *prog = *s->progs[i].prog; - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_program *prog = *prog_skel->prog; + struct bpf_link **link = prog_skel->link; - if (!prog->load) + if (!prog->autoload || !prog->autoattach) continue; /* auto-attaching not supported for this program */ - if (!prog->sec_def || !prog->sec_def->attach_fn) + if (!prog->sec_def || !prog->sec_def->prog_attach_fn) continue; - *link = bpf_program__attach(prog); - err = libbpf_get_error(*link); + /* if user already set the link manually, don't attempt auto-attach */ + if (*link) + continue; + + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); if (err) { - pr_warn("failed to auto-attach program '%s': %d\n", - bpf_program__name(prog), err); + pr_warn("prog '%s': failed to auto-attach: %s\n", + bpf_program__name(prog), errstr(err)); + return libbpf_err(err); + } + + /* It's possible that for some SEC() definitions auto-attach + * is supported in some cases (e.g., if definition completely + * specifies target information), but is not in other cases. + * SEC("uprobe") is one such case. If user specified target + * binary and function name, such BPF program can be + * auto-attached. But if not, it shouldn't trigger skeleton's + * attach to fail. It should just be skipped. + * attach_fn signals such case with returning 0 (no error) and + * setting link to NULL. + */ + } + + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; + struct bpf_link **link; + + if (!map->autocreate || !map->autoattach) + continue; + + /* only struct_ops maps can be attached */ + if (!bpf_map__is_struct_ops(map)) + continue; + + /* skeleton is created with earlier version of bpftool, notify user */ + if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { + pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", + bpf_map__name(map)); + continue; + } + + link = map_skel->link; + if (!link) { + pr_warn("map '%s': BPF map skeleton link is uninitialized\n", + bpf_map__name(map)); + continue; + } + + if (*link) + continue; + + *link = bpf_map__attach_struct_ops(map); + if (!*link) { + err = -errno; + pr_warn("map '%s': failed to auto-attach: %s\n", + bpf_map__name(map), errstr(err)); return libbpf_err(err); } } @@ -11786,17 +14490,33 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) int i; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_link **link = prog_skel->link; bpf_link__destroy(*link); *link = NULL; } + + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_link **link = map_skel->link; + + if (link) { + bpf_link__destroy(*link); + *link = NULL; + } + } } void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { - if (s->progs) - bpf_object__detach_skeleton(s); + if (!s) + return; + + bpf_object__detach_skeleton(s); if (s->obj) bpf_object__close(*s->obj); free(s->maps); |
