diff options
Diffstat (limited to 'tools/lib')
73 files changed, 4655 insertions, 1501 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 044860ac1ed1..8665c799e0fa 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -31,11 +31,7 @@ CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC ifeq ($(DEBUG),0) -ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 -else - CFLAGS += -O6 -endif endif ifeq ($(DEBUG),0) @@ -99,7 +95,7 @@ install_lib: $(LIBFILE) $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) -HDRS := cpu.h debug.h io.h +HDRS := cpu.h debug.h io.h io_dir.h FD_HDRS := fd/array.h FS_HDRS := fs/fs.h fs/tracing_path.h INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 337fde770e45..edec23406dbc 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -296,7 +296,7 @@ int filename__read_int(const char *filename, int *value) int fd = open(filename, O_RDONLY), err = -1; if (fd < 0) - return -1; + return -errno; if (read(fd, line, sizeof(line)) > 0) { *value = atoi(line); @@ -314,7 +314,7 @@ static int filename__read_ull_base(const char *filename, int fd = open(filename, O_RDONLY), err = -1; if (fd < 0) - return -1; + return -errno; if (read(fd, line, sizeof(line)) > 0) { *value = strtoull(line, NULL, base); @@ -372,7 +372,7 @@ int filename__write_int(const char *filename, int value) char buf[64]; if (fd < 0) - return err; + return -errno; sprintf(buf, "%d", value); if (write(fd, buf, sizeof(buf)) == sizeof(buf)) diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 30745f35d0d2..834fd64c7130 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -69,7 +69,7 @@ char *get_tracing_file(const char *name) { char *file; - if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0) + if (asprintf(&file, "%s%s", tracing_path_mount(), name) < 0) return NULL; return file; diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index 84adf8102018..1731996b2c32 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -43,48 +43,55 @@ static inline void io__init(struct io *io, int fd, io->eof = false; } -/* Reads one character from the "io" file with similar semantics to fgetc. */ -static inline int io__get_char(struct io *io) +/* Read from fd filling the buffer. Called when io->data == io->end. */ +static inline int io__fill_buffer(struct io *io) { - char *ptr = io->data; + ssize_t n; if (io->eof) return -1; - if (ptr == io->end) { - ssize_t n; - - if (io->timeout_ms != 0) { - struct pollfd pfds[] = { - { - .fd = io->fd, - .events = POLLIN, - }, - }; - - n = poll(pfds, 1, io->timeout_ms); - if (n == 0) - errno = ETIMEDOUT; - if (n > 0 && !(pfds[0].revents & POLLIN)) { - errno = EIO; - n = -1; - } - if (n <= 0) { - io->eof = true; - return -1; - } + if (io->timeout_ms != 0) { + struct pollfd pfds[] = { + { + .fd = io->fd, + .events = POLLIN, + }, + }; + + n = poll(pfds, 1, io->timeout_ms); + if (n == 0) + errno = ETIMEDOUT; + if (n > 0 && !(pfds[0].revents & POLLIN)) { + errno = EIO; + n = -1; } - n = read(io->fd, io->buf, io->buf_len); - if (n <= 0) { io->eof = true; return -1; } - ptr = &io->buf[0]; - io->end = &io->buf[n]; } - io->data = ptr + 1; - return *ptr; + n = read(io->fd, io->buf, io->buf_len); + + if (n <= 0) { + io->eof = true; + return -1; + } + io->data = &io->buf[0]; + io->end = &io->buf[n]; + return 0; +} + +/* Reads one character from the "io" file with similar semantics to fgetc. */ +static inline int io__get_char(struct io *io) +{ + if (io->data == io->end) { + int ret = io__fill_buffer(io); + + if (ret) + return ret; + } + return *io->data++; } /* Read a hexadecimal value with no 0x prefix into the out argument hex. If the @@ -182,6 +189,7 @@ static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_ err_out: free(line); *line_out = NULL; + *line_len_out = 0; return -ENOMEM; } diff --git a/tools/lib/api/io_dir.h b/tools/lib/api/io_dir.h new file mode 100644 index 000000000000..ef83e967e48c --- /dev/null +++ b/tools/lib/api/io_dir.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* + * Lightweight directory reading library. + */ +#ifndef __API_IO_DIR__ +#define __API_IO_DIR__ + +#include <dirent.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <linux/limits.h> + +#if !defined(SYS_getdents64) +#if defined(__x86_64__) || defined(__arm__) + #define SYS_getdents64 217 +#elif defined(__i386__) || defined(__s390x__) || defined(__sh__) + #define SYS_getdents64 220 +#elif defined(__alpha__) + #define SYS_getdents64 377 +#elif defined(__mips__) + #define SYS_getdents64 308 +#elif defined(__powerpc64__) || defined(__powerpc__) + #define SYS_getdents64 202 +#elif defined(__sparc64__) || defined(__sparc__) + #define SYS_getdents64 154 +#elif defined(__xtensa__) + #define SYS_getdents64 60 +#else + #define SYS_getdents64 61 +#endif +#endif /* !defined(SYS_getdents64) */ + +static inline ssize_t perf_getdents64(int fd, void *dirp, size_t count) +{ +#ifdef MEMORY_SANITIZER + memset(dirp, 0, count); +#endif + return syscall(SYS_getdents64, fd, dirp, count); +} + +struct io_dirent64 { + ino64_t d_ino; /* 64-bit inode number */ + off64_t d_off; /* 64-bit offset to next structure */ + unsigned short d_reclen; /* Size of this dirent */ + unsigned char d_type; /* File type */ + char d_name[NAME_MAX + 1]; /* Filename (null-terminated) */ +}; + +struct io_dir { + int dirfd; + ssize_t available_bytes; + struct io_dirent64 *next; + struct io_dirent64 buff[4]; +}; + +static inline void io_dir__init(struct io_dir *iod, int dirfd) +{ + iod->dirfd = dirfd; + iod->available_bytes = 0; +} + +static inline void io_dir__rewinddir(struct io_dir *iod) +{ + lseek(iod->dirfd, 0, SEEK_SET); + iod->available_bytes = 0; +} + +static inline struct io_dirent64 *io_dir__readdir(struct io_dir *iod) +{ + struct io_dirent64 *entry; + + if (iod->available_bytes <= 0) { + ssize_t rc = perf_getdents64(iod->dirfd, iod->buff, sizeof(iod->buff)); + + if (rc <= 0) + return NULL; + iod->available_bytes = rc; + iod->next = iod->buff; + } + entry = iod->next; + iod->next = (struct io_dirent64 *)((char *)entry + entry->d_reclen); + iod->available_bytes -= entry->d_reclen; + return entry; +} + +static inline bool io_dir__is_dir(const struct io_dir *iod, struct io_dirent64 *dent) +{ + if (dent->d_type == DT_UNKNOWN) { + struct stat st; + + if (fstatat(iod->dirfd, dent->d_name, &st, /*flags=*/0)) + return false; + + if (S_ISDIR(st.st_mode)) { + dent->d_type = DT_DIR; + return true; + } + } + return dent->d_type == DT_DIR; +} + +#endif /* __API_IO_DIR__ */ diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index c3e4871967bc..51255c69754d 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -100,3 +100,43 @@ bool __bitmap_intersects(const unsigned long *bitmap1, return true; return false; } + +void __bitmap_set(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_set >= 0) { + *p |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (len) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + *p |= mask_to_set; + } +} + +void __bitmap_clear(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 0da84cb9e66d..f02725b123b3 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -5,3 +5,4 @@ TAGS tags cscope.* /bpf_helper_defs.h +fixdep diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index b6619199a706..e2cd558ca0b4 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o features.o + usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 2cf892774346..168140f8e646 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -53,15 +53,9 @@ include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - INCLUDES = -I$(or $(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include export prefix libdir src obj @@ -95,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. @@ -108,6 +96,8 @@ MAKEOVERRIDES= all: +OUTPUT ?= ./ +OUTPUT := $(abspath $(OUTPUT))/ export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include @@ -141,7 +131,10 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force $(BPF_GENERATED) +$(SHARED_OBJDIR) $(STATIC_OBJDIR): + $(Q)mkdir -p $@ + +$(BPF_IN_SHARED): force $(BPF_GENERATED) | $(SHARED_OBJDIR) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -151,9 +144,11 @@ $(BPF_IN_SHARED): force $(BPF_GENERATED) @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true + $(SILENT_MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= OUTPUT=$(SHARED_OBJDIR) $(SHARED_OBJDIR)fixdep $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force $(BPF_GENERATED) +$(BPF_IN_STATIC): force $(BPF_GENERATED) | $(STATIC_OBJDIR) + $(SILENT_MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= OUTPUT=$(STATIC_OBJDIR) $(STATIC_OBJDIR)fixdep $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -263,7 +258,7 @@ install_pkgconfig: $(PC_FILE) install: install_lib install_pkgconfig install_headers -clean: +clean: fixdep-clean $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_GENERATED) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 97ec005c3c47..a9c3e33d0f8a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -105,7 +105,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) */ int probe_memcg_account(int token_fd) { - const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); struct bpf_insn insns[] = { BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), BPF_EXIT_INSN(), @@ -238,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -311,6 +311,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0); if (log_level) { attr.log_buf = ptr_to_u64(log_buf); @@ -766,6 +767,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); break; case BPF_TRACE_KPROBE_MULTI: + case BPF_TRACE_KPROBE_SESSION: attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0); attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0); attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0)); @@ -775,6 +777,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); break; case BPF_TRACE_UPROBE_MULTI: + case BPF_TRACE_UPROBE_SESSION: attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); @@ -785,6 +788,7 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, uprobe_multi)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: @@ -1093,7 +1097,7 @@ int bpf_map_get_fd_by_id(__u32 id) int bpf_btf_get_fd_by_id_opts(__u32 id, const struct bpf_get_fd_by_id_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, open_flags); + const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd); union bpf_attr attr; int fd; @@ -1103,6 +1107,7 @@ int bpf_btf_get_fd_by_id_opts(__u32 id, memset(&attr, 0, attr_sz); attr.btf_id = id; attr.open_flags = OPTS_GET(opts, open_flags, 0); + attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0); fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); @@ -1173,20 +1178,31 @@ int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info return bpf_obj_get_info_by_fd(link_fd, info, info_len); } -int bpf_raw_tracepoint_open(const char *name, int prog_fd) +int bpf_raw_tracepoint_open_opts(int prog_fd, struct bpf_raw_tp_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint); union bpf_attr attr; int fd; + if (!OPTS_VALID(opts, bpf_raw_tp_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); - attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; + attr.raw_tracepoint.name = ptr_to_u64(OPTS_GET(opts, tp_name, NULL)); + attr.raw_tracepoint.cookie = OPTS_GET(opts, cookie, 0); fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); return libbpf_err_errno(fd); } +int bpf_raw_tracepoint_open(const char *name, int prog_fd) +{ + LIBBPF_OPTS(bpf_raw_tp_opts, opts, .tp_name = name); + + return bpf_raw_tracepoint_open_opts(prog_fd, &opts); +} + int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index df0db2f0cdb7..777627d33d25 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -100,16 +100,19 @@ struct bpf_prog_load_opts { __u32 log_level; __u32 log_size; char *log_buf; - /* output: actual total log contents size (including termintaing zero). + /* output: actual total log contents size (including terminating zero). * It could be both larger than original log_size (if log was * truncated), or smaller (if log buffer wasn't filled completely). * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; __u32 token_fd; + + /* if set, provides the length of fd_array */ + __u32 fd_array_cnt; size_t :0; }; -#define bpf_prog_load_opts__last_field token_fd +#define bpf_prog_load_opts__last_field fd_array_cnt LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, @@ -129,7 +132,7 @@ struct bpf_btf_load_opts { char *log_buf; __u32 log_level; __u32 log_size; - /* output: actual total log contents size (including termintaing zero). + /* output: actual total log contents size (including terminating zero). * It could be both larger than original log_size (if log was * truncated), or smaller (if log buffer wasn't filled completely). * If kernel doesn't support this feature, log_size is left unchanged. @@ -484,9 +487,10 @@ LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id); struct bpf_get_fd_by_id_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 open_flags; /* permissions requested for the operation on fd */ + __u32 token_fd; size_t :0; }; -#define bpf_get_fd_by_id_opts__last_field open_flags +#define bpf_get_fd_by_id_opts__last_field token_fd LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id, @@ -617,6 +621,15 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); +struct bpf_raw_tp_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + const char *tp_name; + __u64 cookie; + size_t :0; +}; +#define bpf_raw_tp_opts__last_field cookie + +LIBBPF_API int bpf_raw_tracepoint_open_opts(int prog_fd, struct bpf_raw_tp_opts *opts); LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 1ce738d91685..b997c68bd945 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -2,7 +2,7 @@ #ifndef __BPF_CORE_READ_H__ #define __BPF_CORE_READ_H__ -#include <bpf/bpf_helpers.h> +#include "bpf_helpers.h" /* * enum bpf_field_info_kind is passed as a second argument into @@ -104,6 +104,7 @@ enum bpf_enum_value_kind { case 2: val = *(const unsigned short *)p; break; \ case 4: val = *(const unsigned int *)p; break; \ case 8: val = *(const unsigned long long *)p; break; \ + default: val = 0; break; \ } \ val <<= __CORE_RELO(s, field, LSHIFT_U64); \ if (__CORE_RELO(s, field, SIGNED)) \ @@ -387,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index fdf44403ff36..6ff963a491d9 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -34,6 +34,7 @@ struct bpf_gen { void *data_cur; void *insn_start; void *insn_cur; + bool swapped_endian; ssize_t cleanup_label; __u32 nr_progs; __u32 nr_maps; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index cd17f6d0791f..a50773d4616e 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -15,6 +15,14 @@ #define __array(name, val) typeof(val) *name[] #define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name +#ifndef likely +#define likely(x) (__builtin_expect(!!(x), 1)) +#endif + +#ifndef unlikely +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#endif + /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names @@ -137,7 +145,8 @@ /* * Helper function to perform a tail call with a constant/immediate map slot. */ -#if __clang_major__ >= 8 && defined(__bpf__) +#if (defined(__clang__) && __clang_major__ >= 8) || (!defined(__clang__) && __GNUC__ > 12) +#if defined(__bpf__) static __always_inline void bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) { @@ -165,6 +174,7 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) : "r0", "r1", "r2", "r3", "r4", "r5"); } #endif +#endif enum libbpf_pin_type { LIBBPF_PIN_NONE, @@ -183,11 +193,23 @@ enum libbpf_tristate { #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) +#define __uptr __attribute__((btf_type_tag("uptr"))) -#define bpf_ksym_exists(sym) ({ \ - _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ - !!sym; \ +#if defined (__clang__) +#define bpf_ksym_exists(sym) ({ \ + _Static_assert(!__builtin_constant_p(!!sym), \ + #sym " should be marked as __weak"); \ + !!sym; \ +}) +#elif __GNUC__ > 8 +#define bpf_ksym_exists(sym) ({ \ + _Static_assert(__builtin_has_attribute (*sym, __weak__), \ + #sym " should be marked as __weak"); \ + !!sym; \ }) +#else +#define bpf_ksym_exists(sym) !!sym +#endif #define __arg_ctx __attribute__((btf_decl_tag("arg:ctx"))) #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) @@ -328,7 +350,7 @@ extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym; * I.e., it looks almost like high-level for each loop in other languages, * supports continue/break, and is verifiable by BPF verifier. * - * For iterating integers, the difference betwen bpf_for_each(num, i, N, M) + * For iterating integers, the difference between bpf_for_each(num, i, N, M) * and bpf_for(i, N, M) is in that bpf_for() provides additional proof to * verifier that i is in [N, M) range, and in bpf_for_each() case i is `int * *`, not just `int`. So for integers bpf_for() is more convenient. diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 1c13f8e88833..a8f6cd4841b0 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -163,7 +163,7 @@ struct pt_regs___s390 { unsigned long orig_gpr2; -}; +} __attribute__((preserve_access_index)); /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) @@ -179,7 +179,7 @@ struct pt_regs___s390 { #define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG #define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG #define __PT_PARM6_SYSCALL_REG gprs[7] -#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___s390 *)(x))->__PT_PARM1_SYSCALL_REG) #define PT_REGS_PARM1_CORE_SYSCALL(x) \ BPF_CORE_READ((const struct pt_regs___s390 *)(x), __PT_PARM1_SYSCALL_REG) @@ -222,7 +222,7 @@ struct pt_regs___s390 { struct pt_regs___arm64 { unsigned long orig_x0; -}; +} __attribute__((preserve_access_index)); /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) @@ -241,7 +241,7 @@ struct pt_regs___arm64 { #define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG #define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG #define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG -#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x) +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___arm64 *)(x))->__PT_PARM1_SYSCALL_REG) #define PT_REGS_PARM1_CORE_SYSCALL(x) \ BPF_CORE_READ((const struct pt_regs___arm64 *)(x), __PT_PARM1_SYSCALL_REG) @@ -351,6 +351,10 @@ struct pt_regs___arm64 { * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions */ +struct pt_regs___riscv { + unsigned long orig_a0; +} __attribute__((preserve_access_index)); + /* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) #define __PT_PARM1_REG a0 @@ -362,12 +366,15 @@ struct pt_regs___arm64 { #define __PT_PARM7_REG a6 #define __PT_PARM8_REG a7 -#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG +#define __PT_PARM1_SYSCALL_REG orig_a0 #define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG #define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG #define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG #define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG #define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG +#define PT_REGS_PARM1_SYSCALL(x) (((const struct pt_regs___riscv *)(x))->__PT_PARM1_SYSCALL_REG) +#define PT_REGS_PARM1_CORE_SYSCALL(x) \ + BPF_CORE_READ((const struct pt_regs___riscv *)(x), __PT_PARM1_SYSCALL_REG) #define __PT_RET_REG ra #define __PT_FP_REG s0 @@ -473,7 +480,7 @@ struct pt_regs; #endif /* * Similarly, syscall-specific conventions might differ between function call - * conventions within each architecutre. All supported architectures pass + * conventions within each architecture. All supported architectures pass * either 6 or 7 syscall arguments in registers. * * See syscall(2) manpage for succinct table with information on each arch. @@ -515,7 +522,7 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_sparc) +#elif defined(bpf_target_sparc) || defined(bpf_target_arm64) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP @@ -633,25 +640,25 @@ struct pt_regs; #endif #define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] -#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] -#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] -#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), ctx[11] #define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) /* * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and * similar kinds of BPF programs, that accept input arguments as a single * pointer to untyped u64 array, where each u64 can actually be a typed - * pointer or integer of different size. Instead of requring user to write + * pointer or integer of different size. Instead of requiring user to write * manual casts and work with array elements by index, BPF_PROG macro * allows user to declare a list of named and typed input arguments in the * same syntax as for normal C function. All the casting is hidden and @@ -786,14 +793,14 @@ ____##name(unsigned long long *ctx ___bpf_ctx_decl(args)) struct pt_regs; #define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args6(x, args...) ___bpf_kprobe_args5(args), (void *)PT_REGS_PARM6(ctx) -#define ___bpf_kprobe_args7(x, args...) ___bpf_kprobe_args6(args), (void *)PT_REGS_PARM7(ctx) -#define ___bpf_kprobe_args8(x, args...) ___bpf_kprobe_args7(args), (void *)PT_REGS_PARM8(ctx) +#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (unsigned long long)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (unsigned long long)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (unsigned long long)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (unsigned long long)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (unsigned long long)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args6(x, args...) ___bpf_kprobe_args5(args), (unsigned long long)PT_REGS_PARM6(ctx) +#define ___bpf_kprobe_args7(x, args...) ___bpf_kprobe_args6(args), (unsigned long long)PT_REGS_PARM7(ctx) +#define ___bpf_kprobe_args8(x, args...) ___bpf_kprobe_args7(args), (unsigned long long)PT_REGS_PARM8(ctx) #define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) /* @@ -801,7 +808,7 @@ struct pt_regs; * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific * low-level way of getting kprobe input arguments from struct pt_regs, and * provides a familiar typed and named function arguments syntax and - * semantics of accessing kprobe input paremeters. + * semantics of accessing kprobe input parameters. * * Original struct pt_regs* context is preserved as 'ctx' argument. This might * be necessary when using BPF helpers like bpf_perf_event_output(). @@ -821,7 +828,7 @@ static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (unsigned long long)PT_REGS_RC(ctx) #define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* @@ -845,24 +852,24 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) /* If kernel has CONFIG_ARCH_HAS_SYSCALL_WRAPPER, read pt_regs directly */ #define ___bpf_syscall_args0() ctx -#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_SYSCALL(regs) -#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_SYSCALL(regs) -#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs) -#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs) -#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs) -#define ___bpf_syscall_args6(x, args...) ___bpf_syscall_args5(args), (void *)PT_REGS_PARM6_SYSCALL(regs) -#define ___bpf_syscall_args7(x, args...) ___bpf_syscall_args6(args), (void *)PT_REGS_PARM7_SYSCALL(regs) +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (unsigned long long)PT_REGS_PARM1_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (unsigned long long)PT_REGS_PARM2_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (unsigned long long)PT_REGS_PARM3_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (unsigned long long)PT_REGS_PARM4_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (unsigned long long)PT_REGS_PARM5_SYSCALL(regs) +#define ___bpf_syscall_args6(x, args...) ___bpf_syscall_args5(args), (unsigned long long)PT_REGS_PARM6_SYSCALL(regs) +#define ___bpf_syscall_args7(x, args...) ___bpf_syscall_args6(args), (unsigned long long)PT_REGS_PARM7_SYSCALL(regs) #define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) /* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */ #define ___bpf_syswrap_args0() ctx -#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) -#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) -#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) -#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) -#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) -#define ___bpf_syswrap_args6(x, args...) ___bpf_syswrap_args5(args), (void *)PT_REGS_PARM6_CORE_SYSCALL(regs) -#define ___bpf_syswrap_args7(x, args...) ___bpf_syswrap_args6(args), (void *)PT_REGS_PARM7_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args1(x) ___bpf_syswrap_args0(), (unsigned long long)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args2(x, args...) ___bpf_syswrap_args1(args), (unsigned long long)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (unsigned long long)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (unsigned long long)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (unsigned long long)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args6(x, args...) ___bpf_syswrap_args5(args), (unsigned long long)PT_REGS_PARM6_CORE_SYSCALL(regs) +#define ___bpf_syswrap_args7(x, args...) ___bpf_syswrap_args6(args), (unsigned long long)PT_REGS_PARM7_CORE_SYSCALL(regs) #define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args) /* diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d0840ef599a..f1d495dc66bb 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -12,6 +12,7 @@ #include <sys/utsname.h> #include <sys/param.h> #include <sys/stat.h> +#include <sys/mman.h> #include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> @@ -22,6 +23,7 @@ #include "libbpf_internal.h" #include "hashmap.h" #include "strset.h" +#include "str_error.h" #define BTF_MAX_NR_TYPES 0x7fffffffU #define BTF_MAX_STR_OFFSET 0x7fffffffU @@ -116,6 +118,12 @@ struct btf { /* whether strings are already deduplicated */ bool strs_deduped; + /* whether base_btf should be freed in btf_free for this instance */ + bool owns_base; + + /* whether raw_data is a (read-only) mmap */ + bool raw_data_is_mmap; + /* BTF object FD, if loaded into kernel */ int fd; @@ -279,7 +287,7 @@ static int btf_parse_str_sec(struct btf *btf) return -EINVAL; } if (!btf->base_btf && start[0]) { - pr_debug("Invalid BTF string section\n"); + pr_debug("Malformed BTF string section, did you forget to provide base BTF?\n"); return -EINVAL; } return 0; @@ -598,7 +606,7 @@ static int btf_sanity_check(const struct btf *btf) __u32 i, n = btf__type_cnt(btf); int err; - for (i = 1; i < n; i++) { + for (i = btf->start_id; i < n; i++) { t = btf_type_by_id(btf, i); err = btf_validate_type(btf, t, i); if (err) @@ -947,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf) return (void *)btf->hdr != btf->raw_data; } +static void btf_free_raw_data(struct btf *btf) +{ + if (btf->raw_data_is_mmap) { + munmap(btf->raw_data, btf->raw_size); + btf->raw_data_is_mmap = false; + } else { + free(btf->raw_data); + } + btf->raw_data = NULL; +} + void btf__free(struct btf *btf) { if (IS_ERR_OR_NULL(btf)) @@ -966,9 +985,11 @@ void btf__free(struct btf *btf) free(btf->types_data); strset__free(btf->strs_set); } - free(btf->raw_data); + btf_free_raw_data(btf); free(btf->raw_data_swapped); free(btf->type_offs); + if (btf->owns_base) + btf__free(btf->base_btf); free(btf); } @@ -990,7 +1011,8 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; + btf->swapped_endian = base_btf->swapped_endian; } /* +1 for empty string at offset 0 */ @@ -1023,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf) return libbpf_ptr(btf_new_empty(base_btf)); } -static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap) { struct btf *btf; int err; @@ -1043,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) btf->start_str_off = base_btf->hdr->str_len; } - btf->raw_data = malloc(size); - if (!btf->raw_data) { - err = -ENOMEM; - goto done; + if (is_mmap) { + btf->raw_data = (void *)data; + btf->raw_data_is_mmap = true; + } else { + btf->raw_data = malloc(size); + if (!btf->raw_data) { + err = -ENOMEM; + goto done; + } + memcpy(btf->raw_data, data, size); } - memcpy(btf->raw_data, data, size); + btf->raw_size = size; btf->hdr = btf->raw_data; @@ -1076,61 +1104,46 @@ done: struct btf *btf__new(const void *data, __u32 size) { - return libbpf_ptr(btf_new(data, size, NULL)); + return libbpf_ptr(btf_new(data, size, NULL, false)); } struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) { - return libbpf_ptr(btf_new(data, size, base_btf)); + return libbpf_ptr(btf_new(data, size, base_btf, false)); } -static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, - struct btf_ext **btf_ext) +struct btf_elf_secs { + Elf_Data *btf_data; + Elf_Data *btf_ext_data; + Elf_Data *btf_base_data; +}; + +static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs *secs) { - Elf_Data *btf_data = NULL, *btf_ext_data = NULL; - int err = 0, fd = -1, idx = 0; - struct btf *btf = NULL; Elf_Scn *scn = NULL; - Elf *elf = NULL; + Elf_Data *data; GElf_Ehdr ehdr; size_t shstrndx; + int idx = 0; - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warn("failed to init libelf for %s\n", path); - return ERR_PTR(-LIBBPF_ERRNO__LIBELF); - } - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); - return ERR_PTR(err); - } - - err = -LIBBPF_ERRNO__FORMAT; - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - pr_warn("failed to open %s as ELF file\n", path); - goto done; - } if (!gelf_getehdr(elf, &ehdr)) { pr_warn("failed to get EHDR from %s\n", path); - goto done; + goto err; } if (elf_getshdrstrndx(elf, &shstrndx)) { pr_warn("failed to get section names section index for %s\n", path); - goto done; + goto err; } if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); - goto done; + goto err; } while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data **field; GElf_Shdr sh; char *name; @@ -1138,42 +1151,109 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, if (gelf_getshdr(scn, &sh) != &sh) { pr_warn("failed to get section(%d) header from %s\n", idx, path); - goto done; + goto err; } name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); - goto done; + goto err; } - if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = elf_getdata(scn, 0); - if (!btf_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } - continue; - } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { - btf_ext_data = elf_getdata(scn, 0); - if (!btf_ext_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } + + if (strcmp(name, BTF_ELF_SEC) == 0) + field = &secs->btf_data; + else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) + field = &secs->btf_ext_data; + else if (strcmp(name, BTF_BASE_ELF_SEC) == 0) + field = &secs->btf_base_data; + else continue; + + if (sh.sh_type != SHT_PROGBITS) { + pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n", + sh.sh_type, idx, name, path); + goto err; + } + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto err; } + *field = data; + } + + return 0; + +err: + return -LIBBPF_ERRNO__FORMAT; +} + +static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, + struct btf_ext **btf_ext) +{ + struct btf_elf_secs secs = {}; + struct btf *dist_base_btf = NULL; + struct btf *btf = NULL; + int err = 0, fd = -1; + Elf *elf = NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open %s: %s\n", path, errstr(err)); + return ERR_PTR(err); + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + err = -LIBBPF_ERRNO__FORMAT; + pr_warn("failed to open %s as ELF file\n", path); + goto done; } - if (!btf_data) { + err = btf_find_elf_sections(elf, path, &secs); + if (err) + goto done; + + if (!secs.btf_data) { pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path); err = -ENODATA; goto done; } - btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); - err = libbpf_get_error(btf); - if (err) + + if (secs.btf_base_data) { + dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, + NULL, false); + if (IS_ERR(dist_base_btf)) { + err = PTR_ERR(dist_base_btf); + dist_base_btf = NULL; + goto done; + } + } + + btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, + dist_base_btf ?: base_btf, false); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); goto done; + } + if (dist_base_btf && base_btf) { + err = btf__relocate(btf, base_btf); + if (err) + goto done; + btf__free(dist_base_btf); + dist_base_btf = NULL; + } + + if (dist_base_btf) + btf->owns_base = true; switch (gelf_getclass(elf)) { case ELFCLASS32: @@ -1187,11 +1267,12 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, break; } - if (btf_ext && btf_ext_data) { - *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); - err = libbpf_get_error(*btf_ext); - if (err) + if (btf_ext && secs.btf_ext_data) { + *btf_ext = btf_ext__new(secs.btf_ext_data->d_buf, secs.btf_ext_data->d_size); + if (IS_ERR(*btf_ext)) { + err = PTR_ERR(*btf_ext); goto done; + } } else if (btf_ext) { *btf_ext = NULL; } @@ -1205,6 +1286,7 @@ done: if (btf_ext) btf_ext__free(*btf_ext); + btf__free(dist_base_btf); btf__free(btf); return ERR_PTR(err); @@ -1274,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) } /* finally parse BTF data */ - btf = btf_new(data, sz, base_btf); + btf = btf_new(data, sz, base_btf, false); err_out: free(data); @@ -1293,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse_raw(path, base_btf)); } +static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) +{ + struct stat st; + void *data; + struct btf *btf; + int fd, err; + + fd = open(path, O_RDONLY); + if (fd < 0) + return libbpf_err_ptr(-errno); + + if (fstat(fd, &st) < 0) { + err = -errno; + close(fd); + return libbpf_err_ptr(err); + } + + data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + err = -errno; + close(fd); + + if (data == MAP_FAILED) + return libbpf_err_ptr(err); + + btf = btf_new(data, st.st_size, base_btf, true); + if (IS_ERR(btf)) + munmap(data, st.st_size); + + return btf; +} + static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; @@ -1392,7 +1505,7 @@ retry_load: goto retry_load; err = -errno; - pr_warn("BTF loading error: %d\n", err); + pr_warn("BTF loading error: %s\n", errstr(err)); /* don't print out contents of custom log_buf */ if (!log_buf && buf[0]) pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); @@ -1557,19 +1670,25 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) goto exit_free; } - btf = btf_new(ptr, btf_info.btf_size, base_btf); + btf = btf_new(ptr, btf_info.btf_size, base_btf, false); exit_free: free(ptr); return btf; } -struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) +struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd) { struct btf *btf; int btf_fd; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); - btf_fd = bpf_btf_get_fd_by_id(id); + if (token_fd) { + opts.open_flags |= BPF_F_TOKEN_FD; + opts.token_fd = token_fd; + } + + btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts); if (btf_fd < 0) return libbpf_err_ptr(-errno); @@ -1579,6 +1698,11 @@ struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) return libbpf_ptr(btf); } +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) +{ + return btf_load_from_kernel(id, base_btf, 0); +} + struct btf *btf__load_from_kernel_by_id(__u32 id) { return btf__load_from_kernel_by_id_split(id, NULL); @@ -1586,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) static void btf_invalidate_raw_data(struct btf *btf) { - if (btf->raw_data) { - free(btf->raw_data); - btf->raw_data = NULL; - } + if (btf->raw_data) + btf_free_raw_data(btf); if (btf->raw_data_swapped) { free(btf->raw_data_swapped); btf->raw_data_swapped = NULL; @@ -1739,9 +1861,8 @@ struct btf_pipe { struct hashmap *str_off_map; /* map string offsets from src to dst */ }; -static int btf_rewrite_str(__u32 *str_off, void *ctx) +static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off) { - struct btf_pipe *p = ctx; long mapped_off; int off, err; @@ -1771,10 +1892,11 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx) return 0; } -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type) { - struct btf_pipe p = { .src = src_btf, .dst = btf }; + struct btf_field_iter it; struct btf_type *t; + __u32 *str_off; int sz, err; sz = btf_type_size(src_type); @@ -1782,35 +1904,33 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) + if (btf_ensure_modifiable(p->dst)) return libbpf_err(-ENOMEM); - t = btf_add_type_mem(btf, sz); + t = btf_add_type_mem(p->dst, sz); if (!t) return libbpf_err(-ENOMEM); memcpy(t, src_type, sz); - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return libbpf_err(err); - return btf_commit_type(btf, sz); + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(p, str_off); + if (err) + return libbpf_err(err); + } + + return btf_commit_type(p->dst, sz); } -static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) { - struct btf *btf = ctx; - - if (!*type_id) /* nothing to do for VOID references */ - return 0; + struct btf_pipe p = { .src = src_btf, .dst = btf }; - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; - return 0; + return btf_add_type(&p, src_type); } static size_t btf_dedup_identity_hash_fn(long key, void *ctx); @@ -1858,6 +1978,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) memcpy(t, src_btf->types_data, data_sz); for (i = 0; i < cnt; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + sz = btf_type_size(t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ @@ -1869,15 +1992,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) *off = t - btf->types_data; /* add, dedup, and remap strings referenced by this BTF type */ - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) goto err_out; + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(&p, str_off); + if (err) + goto err_out; + } /* remap all type IDs referenced from this BTF type */ - err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) goto err_out; + while ((type_id = btf_field_iter_next(&it))) { + if (!*type_id) /* nothing to do for VOID references */ + continue; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + } + /* go to next type data and type offset index entry */ t += sz; off++; @@ -2018,7 +2157,7 @@ static int validate_type_id(int id) } /* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ -static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) +static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag) { struct btf_type *t; int sz, name_off = 0; @@ -2041,7 +2180,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref } t->name_off = name_off; - t->info = btf_type_info(kind, 0, 0); + t->info = btf_type_info(kind, 0, kflag); t->type = ref_type_id; return btf_commit_type(btf, sz); @@ -2056,7 +2195,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref */ int btf__add_ptr(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0); } /* @@ -2434,7 +2573,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) struct btf_type *t; int id; - id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); + id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0); if (id <= 0) return id; t = btf_type_by_id(btf, id); @@ -2464,7 +2603,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) if (!name || !name[0]) return libbpf_err(-EINVAL); - return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0); } /* @@ -2476,7 +2615,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) */ int btf__add_volatile(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0); } /* @@ -2488,7 +2627,7 @@ int btf__add_volatile(struct btf *btf, int ref_type_id) */ int btf__add_const(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0); } /* @@ -2500,7 +2639,7 @@ int btf__add_const(struct btf *btf, int ref_type_id) */ int btf__add_restrict(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0); } /* @@ -2516,7 +2655,24 @@ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) if (!value || !value[0]) return libbpf_err(-EINVAL); - return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0); +} + +/* + * Append new BTF_KIND_TYPE_TAG type with: + * - *value*, non-empty/non-NULL tag value; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Set info->kflag to 1, indicating this tag is an __attribute__ + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id) +{ + if (!value || !value[0]) + return libbpf_err(-EINVAL); + + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1); } /* @@ -2538,7 +2694,7 @@ int btf__add_func(struct btf *btf, const char *name, linkage != BTF_FUNC_EXTERN) return libbpf_err(-EINVAL); - id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); + id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0); if (id > 0) { struct btf_type *t = btf_type_by_id(btf, id); @@ -2773,18 +2929,8 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return 0; } -/* - * Append new BTF_KIND_DECL_TAG type with: - * - *value* - non-empty/non-NULL string; - * - *ref_type_id* - referenced type ID, it might not exist yet; - * - *component_idx* - -1 for tagging reference type, otherwise struct/union - * member or function argument index; - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, - int component_idx) +static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx, int kflag) { struct btf_type *t; int sz, value_off; @@ -2808,14 +2954,47 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, return value_off; t->name_off = value_off; - t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag); t->type = ref_type_id; btf_decl_tag(t)->component_idx = component_idx; return btf_commit_type(btf, sz); } -struct btf_ext_sec_setup_param { +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0); +} + +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Set info->kflag to 1, indicating this tag is an __attribute__ + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1); +} + +struct btf_ext_sec_info_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -2823,14 +3002,20 @@ struct btf_ext_sec_setup_param { const char *desc; }; -static int btf_ext_setup_info(struct btf_ext *btf_ext, - struct btf_ext_sec_setup_param *ext_sec) +/* + * Parse a single info subsection of the BTF.ext info data: + * - validate subsection structure and elements + * - save info subsection start and sizing details in struct btf_ext + * - endian-independent operation, for calling before byte-swapping + */ +static int btf_ext_parse_sec_info(struct btf_ext *btf_ext, + struct btf_ext_sec_info_param *ext_sec, + bool is_native) { const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; size_t sec_cnt = 0; - /* The start of the info sec (including the __u32 record_size). */ void *info; if (ext_sec->len == 0) @@ -2842,6 +3027,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } + /* The start of the info sec (including the __u32 record_size). */ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; info_left = ext_sec->len; @@ -2857,9 +3043,13 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - /* The record size needs to meet the minimum standard */ - record_size = *(__u32 *)info; + /* The record size needs to meet either the minimum standard or, when + * handling non-native endianness data, the exact standard so as + * to allow safe byte-swapping. + */ + record_size = is_native ? *(__u32 *)info : bswap_32(*(__u32 *)info); if (record_size < ext_sec->min_rec_size || + (!is_native && record_size != ext_sec->min_rec_size) || record_size & 0x03) { pr_debug("%s section in .BTF.ext has invalid record size %u\n", ext_sec->desc, record_size); @@ -2871,7 +3061,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records\n", ext_sec->desc); return -EINVAL; } @@ -2886,7 +3076,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - num_records = sinfo->num_info; + num_records = is_native ? sinfo->num_info : bswap_32(sinfo->num_info); if (num_records == 0) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2914,64 +3104,157 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return 0; } -static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +/* Parse all info secs in the BTF.ext info data */ +static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) { - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param func_info = { .off = btf_ext->hdr->func_info_off, .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_line_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param line_info = { .off = btf_ext->hdr->line_info_off, .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->core_relo_off, - .len = btf_ext->hdr->core_relo_len, + struct btf_ext_sec_info_param core_relo = { .min_rec_size = sizeof(struct bpf_core_relo), .ext_info = &btf_ext->core_relo_info, .desc = "core_relo", }; + int err; + + err = btf_ext_parse_sec_info(btf_ext, &func_info, is_native); + if (err) + return err; + + err = btf_ext_parse_sec_info(btf_ext, &line_info, is_native); + if (err) + return err; + + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return 0; /* skip core relos parsing */ + + core_relo.off = btf_ext->hdr->core_relo_off; + core_relo.len = btf_ext->hdr->core_relo_len; + err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); + if (err) + return err; + + return 0; +} + +/* Swap byte-order of BTF.ext header with any endianness */ +static void btf_ext_bswap_hdr(struct btf_ext_header *h) +{ + bool is_native = h->magic == BTF_MAGIC; + __u32 hdr_len; + + hdr_len = is_native ? h->hdr_len : bswap_32(h->hdr_len); + + h->magic = bswap_16(h->magic); + h->hdr_len = bswap_32(h->hdr_len); + h->func_info_off = bswap_32(h->func_info_off); + h->func_info_len = bswap_32(h->func_info_len); + h->line_info_off = bswap_32(h->line_info_off); + h->line_info_len = bswap_32(h->line_info_len); + + if (hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + h->core_relo_off = bswap_32(h->core_relo_off); + h->core_relo_len = bswap_32(h->core_relo_len); +} + +/* Swap byte-order of generic info subsection */ +static void btf_ext_bswap_info_sec(void *info, __u32 len, bool is_native, + info_rec_bswap_fn bswap_fn) +{ + struct btf_ext_info_sec *sec; + __u32 info_left, rec_size, *rs; + + if (len == 0) + return; + + rs = info; /* info record size */ + rec_size = is_native ? *rs : bswap_32(*rs); + *rs = bswap_32(*rs); + + sec = info + sizeof(__u32); /* info sec #1 */ + info_left = len - sizeof(__u32); + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, num_recs; + void *p; + + num_recs = is_native ? sec->num_info : bswap_32(sec->num_info); + sec->sec_name_off = bswap_32(sec->sec_name_off); + sec->num_info = bswap_32(sec->num_info); + p = sec->data; /* info rec #1 */ + for (i = 0; i < num_recs; i++, p += rec_size) + bswap_fn(p); + sec = p; + info_left -= sec_hdrlen + (__u64)rec_size * num_recs; + } +} + +/* + * Swap byte-order of all info data in a BTF.ext section + * - requires BTF.ext hdr in native endianness + */ +static void btf_ext_bswap_info(struct btf_ext *btf_ext, void *data) +{ + const bool is_native = btf_ext->swapped_endian; + const struct btf_ext_header *h = data; + void *info; - return btf_ext_setup_info(btf_ext, ¶m); + /* Swap func_info subsection byte-order */ + info = data + h->hdr_len + h->func_info_off; + btf_ext_bswap_info_sec(info, h->func_info_len, is_native, + (info_rec_bswap_fn)bpf_func_info_bswap); + + /* Swap line_info subsection byte-order */ + info = data + h->hdr_len + h->line_info_off; + btf_ext_bswap_info_sec(info, h->line_info_len, is_native, + (info_rec_bswap_fn)bpf_line_info_bswap); + + /* Swap core_relo subsection byte-order (if present) */ + if (h->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + info = data + h->hdr_len + h->core_relo_off; + btf_ext_bswap_info_sec(info, h->core_relo_len, is_native, + (info_rec_bswap_fn)bpf_core_relo_bswap); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +/* Parse hdr data and info sections: check and convert to native endianness */ +static int btf_ext_parse(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + __u32 hdr_len, data_size = btf_ext->data_size; + struct btf_ext_header *hdr = btf_ext->hdr; + bool swapped_endian = false; + int err; - if (data_size < offsetofend(struct btf_ext_header, hdr_len) || - data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found"); + if (data_size < offsetofend(struct btf_ext_header, hdr_len)) { + pr_debug("BTF.ext header too short\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; if (hdr->magic == bswap_16(BTF_MAGIC)) { - pr_warn("BTF.ext in non-native endianness is not supported\n"); - return -ENOTSUP; + swapped_endian = true; + hdr_len = bswap_32(hdr_len); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } - if (hdr->version != BTF_VERSION) { + /* Ensure known version of structs, current BTF_VERSION == 1 */ + if (hdr->version != 1) { pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } @@ -2981,11 +3264,39 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) return -ENOTSUP; } - if (data_size == hdr->hdr_len) { + if (data_size < hdr_len) { + pr_debug("BTF.ext header not found\n"); + return -EINVAL; + } else if (data_size == hdr_len) { pr_debug("BTF.ext has no data\n"); return -EINVAL; } + /* Verify mandatory hdr info details present */ + if (hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + pr_warn("BTF.ext header missing func_info, line_info\n"); + return -EINVAL; + } + + /* Keep hdr native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_hdr(btf_ext->hdr); + + /* Validate info subsections and cache key metadata */ + err = btf_ext_parse_info(btf_ext, !swapped_endian); + if (err) + return err; + + /* Keep infos native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_info(btf_ext, btf_ext->data); + + /* + * Set btf_ext->swapped_endian only after all header and info data has + * been swapped, helping bswap functions determine if their data are + * in native byte-order when called. + */ + btf_ext->swapped_endian = swapped_endian; return 0; } @@ -2997,6 +3308,7 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext->line_info.sec_idxs); free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); + free(btf_ext->data_swapped); free(btf_ext); } @@ -3017,29 +3329,7 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); - err = btf_ext_parse_hdr(btf_ext->data, size); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { - err = -EINVAL; - goto done; - } - - err = btf_ext_setup_func_info(btf_ext); - if (err) - goto done; - - err = btf_ext_setup_line_info(btf_ext); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) - goto done; /* skip core relos parsing */ - - err = btf_ext_setup_core_relos(btf_ext); - if (err) - goto done; + err = btf_ext_parse(btf_ext); done: if (err) { @@ -3050,15 +3340,66 @@ done: return btf_ext; } +static void *btf_ext_raw_data(const struct btf_ext *btf_ext_ro, bool swap_endian) +{ + struct btf_ext *btf_ext = (struct btf_ext *)btf_ext_ro; + const __u32 data_sz = btf_ext->data_size; + void *data; + + /* Return native data (always present) or swapped data if present */ + if (!swap_endian) + return btf_ext->data; + else if (btf_ext->data_swapped) + return btf_ext->data_swapped; + + /* Recreate missing swapped data, then cache and return */ + data = calloc(1, data_sz); + if (!data) + return NULL; + memcpy(data, btf_ext->data, data_sz); + + btf_ext_bswap_info(btf_ext, data); + btf_ext_bswap_hdr(data); + btf_ext->data_swapped = data; + return data; +} + const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { + void *data; + + data = btf_ext_raw_data(btf_ext, btf_ext->swapped_endian); + if (!data) + return errno = ENOMEM, NULL; + *size = btf_ext->data_size; - return btf_ext->data; + return data; } __attribute__((alias("btf_ext__raw_data"))) const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); +enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext) +{ + if (is_host_big_endian()) + return btf_ext->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; + else + return btf_ext->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; +} + +int btf_ext__set_endianness(struct btf_ext *btf_ext, enum btf_endianness endian) +{ + if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) + return libbpf_err(-EINVAL); + + btf_ext->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); + + if (!btf_ext->swapped_endian) { + free(btf_ext->data_swapped); + btf_ext->data_swapped = NULL; + } + return 0; +} struct btf_dedup; @@ -3221,7 +3562,7 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { - pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + pr_debug("btf_dedup_new failed: %ld\n", PTR_ERR(d)); return libbpf_err(-EINVAL); } @@ -3232,42 +3573,42 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) err = btf_dedup_prep(d); if (err) { - pr_debug("btf_dedup_prep failed:%d\n", err); + pr_debug("btf_dedup_prep failed: %s\n", errstr(err)); goto done; } err = btf_dedup_strings(d); if (err < 0) { - pr_debug("btf_dedup_strings failed:%d\n", err); + pr_debug("btf_dedup_strings failed: %s\n", errstr(err)); goto done; } err = btf_dedup_prim_types(d); if (err < 0) { - pr_debug("btf_dedup_prim_types failed:%d\n", err); + pr_debug("btf_dedup_prim_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_struct_types(d); if (err < 0) { - pr_debug("btf_dedup_struct_types failed:%d\n", err); + pr_debug("btf_dedup_struct_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_resolve_fwds(d); if (err < 0) { - pr_debug("btf_dedup_resolve_fwds failed:%d\n", err); + pr_debug("btf_dedup_resolve_fwds failed: %s\n", errstr(err)); goto done; } err = btf_dedup_ref_types(d); if (err < 0) { - pr_debug("btf_dedup_ref_types failed:%d\n", err); + pr_debug("btf_dedup_ref_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_compact_types(d); if (err < 0) { - pr_debug("btf_dedup_compact_types failed:%d\n", err); + pr_debug("btf_dedup_compact_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_remap_types(d); if (err < 0) { - pr_debug("btf_dedup_remap_types failed:%d\n", err); + pr_debug("btf_dedup_remap_types failed: %s\n", errstr(err)); goto done; } @@ -3315,7 +3656,7 @@ struct btf_dedup { struct strset *strs_set; }; -static long hash_combine(long h, long value) +static unsigned long hash_combine(unsigned long h, unsigned long value) { return h * 31 + value; } @@ -3453,11 +3794,19 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void * int i, r; for (i = 0; i < d->btf->nr_types; i++) { + struct btf_field_iter it; struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + __u32 *str_off; - r = btf_type_visit_str_offs(t, fn, ctx); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (r) return r; + + while ((str_off = btf_field_iter_next(&it))) { + r = fn(str_off, ctx); + if (r) + return r; + } } if (!d->btf_ext) @@ -4057,46 +4406,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; } -/* Check if given two types are identical ARRAY definitions */ -static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth) { struct btf_type *t1, *t2; + int k1, k2; +recur: + if (depth <= 0) + return false; t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); - if (!btf_is_array(t1) || !btf_is_array(t2)) + + k1 = btf_kind(t1); + k2 = btf_kind(t2); + if (k1 != k2) return false; - return btf_equal_array(t1, t2); -} + switch (k1) { + case BTF_KIND_UNKN: /* VOID */ + return true; + case BTF_KIND_INT: + return btf_equal_int_tag(t1, t2); + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return btf_compat_enum(t1, t2); + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + return btf_equal_common(t1, t2); + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: + if (t1->info != t2->info || t1->name_off != t2->name_off) + return false; + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_ARRAY: { + struct btf_array *a1, *a2; -/* Check if given two types are identical STRUCT/UNION definitions */ -static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) -{ - const struct btf_member *m1, *m2; - struct btf_type *t1, *t2; - int n, i; + if (!btf_compat_array(t1, t2)) + return false; - t1 = btf_type_by_id(d->btf, id1); - t2 = btf_type_by_id(d->btf, id2); + a1 = btf_array(t1); + a2 = btf_array(t1); - if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) - return false; + if (a1->index_type != a2->index_type && + !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1)) + return false; - if (!btf_shallow_equal_struct(t1, t2)) - return false; + if (a1->type != a2->type && + !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1)) + return false; - m1 = btf_members(t1); - m2 = btf_members(t2); - for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { - if (m1->type != m2->type && - !btf_dedup_identical_arrays(d, m1->type, m2->type) && - !btf_dedup_identical_structs(d, m1->type, m2->type)) + return true; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m1, *m2; + int i, n; + + if (!btf_shallow_equal_struct(t1, t2)) return false; + + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { + if (m1->type == m2->type) + continue; + if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) + return false; + } + return true; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p1, *p2; + int i, n; + + if (!btf_compat_fnproto(t1, t2)) + return false; + + if (t1->type != t2->type && + !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1)) + return false; + + p1 = btf_params(t1); + p2 = btf_params(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) { + if (p1->type == p2->type) + continue; + if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1)) + return false; + } + return true; + } + default: + return false; } - return true; } + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -4114,7 +4526,7 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id * and canonical graphs are not compatible structurally, whole graphs are * incompatible. If types are structurally equivalent (i.e., all information * except referenced type IDs is exactly the same), a mapping from `canon_id` to - * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * a `cand_id` is recoded in hypothetical mapping (`btf_dedup->hypot_map`). * If a type references other types, then those referenced types are checked * for equivalence recursively. * @@ -4152,7 +4564,7 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id * consists of portions of the graph that come from multiple compilation units. * This is due to the fact that types within single compilation unit are always * deduplicated and FWDs are already resolved, if referenced struct/union - * definiton is available. So, if we had unresolved FWD and found corresponding + * definition is available. So, if we had unresolved FWD and found corresponding * STRUCT/UNION, they will be from different compilation units. This * consequently means that when we "link" FWD to corresponding STRUCT/UNION, * type graph will likely have at least two different BTF types that describe @@ -4215,19 +4627,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * different fields within the *same* struct. This breaks type * equivalence check, which makes an assumption that candidate * types sub-graph has a consistent and deduped-by-compiler - * types within a single CU. So work around that by explicitly - * allowing identical array types here. - */ - if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) - return 1; - /* It turns out that similar situation can happen with - * struct/union sometimes, sigh... Handle the case where - * structs/unions are exactly the same, down to the referenced - * type IDs. Anything more complicated (e.g., if referenced - * types are different, but equivalent) is *way more* - * complicated and requires a many-to-many equivalence mapping. + * types within a single CU. And similar situation can happen + * with struct/union sometimes, and event with pointers. + * So accommodate cases like this doing a structural + * comparison recursively, but avoiding being stuck in endless + * loops by limiting the depth up to which we check. */ - if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) + if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16)) return 1; return 0; } @@ -4919,10 +5325,23 @@ static int btf_dedup_remap_types(struct btf_dedup *d) for (i = 0; i < d->btf->nr_types; i++) { struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + struct btf_field_iter it; + __u32 *type_id; - r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (r) return r; + + while ((type_id = btf_field_iter_next(&it))) { + __u32 resolved_id, new_id; + + resolved_id = resolve_type_id(d, *type_id); + new_id = d->hypot_map[resolved_id]; + if (new_id > BTF_MAX_NR_TYPES) + return -EINVAL; + + *type_id = new_id; + } } if (!d->btf_ext) @@ -4962,10 +5381,14 @@ struct btf *btf__load_vmlinux_btf(void) pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", sysfs_btf_path); } else { - btf = btf__parse(sysfs_btf_path, NULL); + btf = btf_parse_raw_mmap(sysfs_btf_path, NULL); + if (IS_ERR(btf)) + btf = btf__parse(sysfs_btf_path, NULL); + if (!btf) { err = -errno; - pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); + pr_warn("failed to read kernel BTF from '%s': %s\n", + sysfs_btf_path, errstr(err)); return libbpf_err_ptr(err); } pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path); @@ -4982,7 +5405,7 @@ struct btf *btf__load_vmlinux_btf(void) btf = btf__parse(path, NULL); err = libbpf_get_error(btf); - pr_debug("loading kernel BTF '%s': %d\n", path, err); + pr_debug("loading kernel BTF '%s': %s\n", path, errstr(err)); if (err) continue; @@ -5003,136 +5426,6 @@ struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_bt return btf__parse_split(path, vmlinux_btf); } -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) -{ - int i, n, err; - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_FLOAT: - case BTF_KIND_ENUM: - case BTF_KIND_ENUM64: - return 0; - - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: - return visit(&t->type, ctx); - - case BTF_KIND_ARRAY: { - struct btf_array *a = btf_array(t); - - err = visit(&a->type, ctx); - err = err ?: visit(&a->index_type, ctx); - return err; - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - err = visit(&t->type, ctx); - if (err) - return err; - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_DATASEC: { - struct btf_var_secinfo *m = btf_var_secinfos(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - default: - return -EINVAL; - } -} - -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx) -{ - int i, n, err; - - err = visit(&t->name_off, ctx); - if (err) - return err; - - switch (btf_kind(t)) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM: { - struct btf_enum *m = btf_enum(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM64: { - struct btf_enum64 *m = btf_enum64(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - default: - break; - } - - return 0; -} - int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx) { const struct btf_ext_info *seg; @@ -5212,3 +5505,328 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void return 0; } + +struct btf_distill { + struct btf_pipe pipe; + int *id_map; + unsigned int split_start_id; + unsigned int split_start_str; + int diff_id; +}; + +static int btf_add_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *split_t = btf_type_by_id(dist->pipe.src, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, split_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + struct btf_type *base_t; + + if (!*id) + continue; + /* split BTF id, not needed */ + if (*id >= dist->split_start_id) + continue; + /* already added ? */ + if (dist->id_map[*id] > 0) + continue; + + /* only a subset of base BTF types should be referenced from + * split BTF; ensure nothing unexpected is referenced. + */ + base_t = btf_type_by_id(dist->pipe.src, *id); + switch (btf_kind(base_t)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + dist->id_map[*id] = *id; + break; + default: + pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n", + *id, btf_kind(base_t)); + return -EINVAL; + } + /* If a base type is used, ensure types it refers to are + * marked as used also; so for example if we find a PTR to INT + * we need both the PTR and INT. + * + * The only exception is named struct/unions, since distilled + * base BTF composite types have no members. + */ + if (btf_is_composite(base_t) && base_t->name_off) + continue; + err = btf_add_distilled_type_ids(dist, *id); + if (err) + return err; + } + return 0; +} + +static int btf_add_distilled_types(struct btf_distill *dist) +{ + bool adding_to_base = dist->pipe.dst->start_id == 1; + int id = btf__type_cnt(dist->pipe.dst); + struct btf_type *t; + int i, err = 0; + + + /* Add types for each of the required references to either distilled + * base or split BTF, depending on type characteristics. + */ + for (i = 1; i < dist->split_start_id; i++) { + const char *name; + int kind; + + if (!dist->id_map[i]) + continue; + t = btf_type_by_id(dist->pipe.src, i); + kind = btf_kind(t); + name = btf__name_by_offset(dist->pipe.src, t->name_off); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + /* Named int, float, fwd are added to base. */ + if (!adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Named struct/union are added to base as 0-vlen + * struct/union of same size. Anonymous struct/unions + * are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf_add_composite(dist->pipe.dst, kind, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* Named enum[64]s are added to base as a sized + * enum; relocation will match with appropriately-named + * and sized enum or enum64. + * + * Anonymous enums are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf__add_enum(dist->pipe.dst, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ARRAY: + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + /* All other types are added to split BTF. */ + if (adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + default: + pr_warn("unexpected kind when adding base type '%s'[%u] of kind [%u] to distilled base BTF.\n", + name, i, kind); + return -EINVAL; + + } + if (err < 0) + break; + dist->id_map[i] = id++; + } + return err; +} + +/* Split BTF ids without a mapping will be shifted downwards since distilled + * base BTF is smaller than the original base BTF. For those that have a + * mapping (either to base or updated split BTF), update the id based on + * that mapping. + */ +static int btf_update_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *t = btf_type_by_id(dist->pipe.dst, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + if (dist->id_map[*id]) + *id = dist->id_map[*id]; + else if (*id >= dist->split_start_id) + *id -= dist->diff_id; + } + return 0; +} + +/* Create updated split BTF with distilled base BTF; distilled base BTF + * consists of BTF information required to clarify the types that split + * BTF refers to, omitting unneeded details. Specifically it will contain + * base types and memberless definitions of named structs, unions and enumerated + * types. Associated reference types like pointers, arrays and anonymous + * structs, unions and enumerated types will be added to split BTF. + * Size is recorded for named struct/unions to help guide matching to the + * target base BTF during later relocation. + * + * The only case where structs, unions or enumerated types are fully represented + * is when they are anonymous; in such cases, the anonymous type is added to + * split BTF in full. + * + * We return newly-created split BTF where the split BTF refers to a newly-created + * distilled base BTF. Both must be freed separately by the caller. + */ +int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf) +{ + struct btf *new_base = NULL, *new_split = NULL; + const struct btf *old_base; + unsigned int n = btf__type_cnt(src_btf); + struct btf_distill dist = {}; + struct btf_type *t; + int i, err = 0; + + /* src BTF must be split BTF. */ + old_base = btf__base_btf(src_btf); + if (!new_base_btf || !new_split_btf || !old_base) + return libbpf_err(-EINVAL); + + new_base = btf__new_empty(); + if (!new_base) + return libbpf_err(-ENOMEM); + + btf__set_endianness(new_base, btf__endianness(src_btf)); + + dist.id_map = calloc(n, sizeof(*dist.id_map)); + if (!dist.id_map) { + err = -ENOMEM; + goto done; + } + dist.pipe.src = src_btf; + dist.pipe.dst = new_base; + dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(dist.pipe.str_off_map)) { + err = -ENOMEM; + goto done; + } + dist.split_start_id = btf__type_cnt(old_base); + dist.split_start_str = old_base->hdr->str_len; + + /* Pass over src split BTF; generate the list of base BTF type ids it + * references; these will constitute our distilled BTF set to be + * distributed over base and split BTF as appropriate. + */ + for (i = src_btf->start_id; i < n; i++) { + err = btf_add_distilled_type_ids(&dist, i); + if (err < 0) + goto done; + } + /* Next add types for each of the required references to base BTF and split BTF + * in turn. + */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* Create new split BTF with distilled base BTF as its base; the final + * state is split BTF with distilled base BTF that represents enough + * about its base references to allow it to be relocated with the base + * BTF available. + */ + new_split = btf__new_empty_split(new_base); + if (!new_split) { + err = -errno; + goto done; + } + dist.pipe.dst = new_split; + /* First add all split types */ + for (i = src_btf->start_id; i < n; i++) { + t = btf_type_by_id(src_btf, i); + err = btf_add_type(&dist.pipe, t); + if (err < 0) + goto done; + } + /* Now add distilled types to split BTF that are not added to base. */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* All split BTF ids will be shifted downwards since there are less base + * BTF ids in distilled base BTF. + */ + dist.diff_id = dist.split_start_id - btf__type_cnt(new_base); + + n = btf__type_cnt(new_split); + /* Now update base/split BTF ids. */ + for (i = 1; i < n; i++) { + err = btf_update_distilled_type_ids(&dist, i); + if (err < 0) + break; + } +done: + free(dist.id_map); + hashmap__free(dist.pipe.str_off_map); + if (err) { + btf__free(new_split); + btf__free(new_base); + return libbpf_err(err); + } + *new_base_btf = new_base; + *new_split_btf = new_split; + + return 0; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf__type_cnt(base_btf); + btf->start_str_off = base_btf->hdr->str_len; +} + +int btf__relocate(struct btf *btf, const struct btf *base_btf) +{ + int err = btf_relocate(btf, base_btf, NULL); + + if (!err) + btf->owns_base = false; + return libbpf_err(err); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 8e6880d91c84..4392451d634b 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -18,6 +18,7 @@ extern "C" { #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" +#define BTF_BASE_ELF_SEC ".BTF.base" #define MAPS_ELF_SEC ".maps" struct btf; @@ -107,6 +108,27 @@ LIBBPF_API struct btf *btf__new_empty(void); */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); +/** + * @brief **btf__distill_base()** creates new versions of the split BTF + * *src_btf* and its base BTF. The new base BTF will only contain the types + * needed to improve robustness of the split BTF to small changes in base BTF. + * When that split BTF is loaded against a (possibly changed) base, this + * distilled base BTF will help update references to that (possibly changed) + * base BTF. + * + * Both the new split and its associated new base BTF must be freed by + * the caller. + * + * If successful, 0 is returned and **new_base_btf** and **new_split_btf** + * will point at new base/split BTF. Both the new split and its associated + * new base BTF must be freed by the caller. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf); + LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); @@ -145,6 +167,9 @@ LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); +LIBBPF_API enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext); +LIBBPF_API int btf_ext__set_endianness(struct btf_ext *btf_ext, + enum btf_endianness endian); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); @@ -202,6 +227,7 @@ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); +LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id); /* func and func_proto construction APIs */ LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -218,6 +244,8 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, /* tag construction API */ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx); +LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, + int component_idx); struct btf_dedup_opts { size_t sz; @@ -231,6 +259,20 @@ struct btf_dedup_opts { LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); +/** + * @brief **btf__relocate()** will check the split BTF *btf* for references + * to base BTF kinds, and verify those references are compatible with + * *base_btf*; if they are, *btf* is adjusted such that is re-parented to + * *base_btf* and type ids and strings are adjusted to accommodate this. + * + * If successful, 0 is returned and **btf** now has **base_btf** as its + * base. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf); + struct btf_dump; struct btf_dump_opts { @@ -250,7 +292,7 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); struct btf_dump_emit_type_decl_opts { - /* size of this struct, for forward/backward compatiblity */ + /* size of this struct, for forward/backward compatibility */ size_t sz; /* optional field name for type declaration, e.g.: * - struct my_struct <FNAME> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 4d9f30bf7f01..460c3e57fadb 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -21,6 +21,7 @@ #include "hashmap.h" #include "libbpf.h" #include "libbpf_internal.h" +#include "str_error.h" static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -304,7 +305,7 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) * definition, in which case they have to be declared inline as part of field * type declaration; or as a top-level anonymous enum, typically used for * declaring global constants. It's impossible to distinguish between two - * without knowning whether given enum type was referenced from other type: + * without knowing whether given enum type was referenced from other type: * top-level anonymous enum won't be referenced by anything, while embedded * one will. */ @@ -867,8 +868,8 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, } pads[] = { {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} }; - int new_off, pad_bits, bits, i; - const char *pad_type; + int new_off = 0, pad_bits = 0, bits, i; + const char *pad_type = NULL; if (cur_off >= next_off) return; /* no gap */ @@ -1304,7 +1305,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warn("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack: %s\n", errstr(err)); d->decl_stack_cnt = stack_start; return; } @@ -1493,7 +1494,10 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_TYPE_TAG: btf_dump_emit_mods(d, decls); name = btf_name_of(d, t->name_off); - btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); + if (btf_kflag(t)) + btf_dump_printf(d, " __attribute__((%s))", name); + else + btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); break; case BTF_KIND_ARRAY: { const struct btf_array *a = btf_array(t); @@ -1559,10 +1563,12 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, * Clang for BPF target generates func_proto with no * args as a func_proto with a single void arg (e.g., * `int (*f)(void)` vs just `int (*f)()`). We are - * going to pretend there are no args for such case. + * going to emit valid empty args (void) syntax for + * such case. Similarly and conveniently, valid + * no args case can be special-cased here as well. */ - if (vlen == 1 && p->type == 0) { - btf_dump_printf(d, ")"); + if (vlen == 0 || (vlen == 1 && p->type == 0)) { + btf_dump_printf(d, "void)"); return; } @@ -1929,6 +1935,7 @@ static int btf_dump_int_data(struct btf_dump *d, if (d->typed_dump->is_array_terminated) break; if (*(char *)data == '\0') { + btf_dump_type_values(d, "'\\0'"); d->typed_dump->is_array_terminated = true; break; } @@ -2031,6 +2038,7 @@ static int btf_dump_array_data(struct btf_dump *d, __u32 i, elem_type_id; __s64 elem_size; bool is_array_member; + bool is_array_terminated; elem_type_id = array->type; elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); @@ -2066,12 +2074,15 @@ static int btf_dump_array_data(struct btf_dump *d, */ is_array_member = d->typed_dump->is_array_member; d->typed_dump->is_array_member = true; + is_array_terminated = d->typed_dump->is_array_terminated; + d->typed_dump->is_array_terminated = false; for (i = 0; i < array->nelems; i++, data += elem_size) { if (d->typed_dump->is_array_terminated) break; btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); } d->typed_dump->is_array_member = is_array_member; + d->typed_dump->is_array_terminated = is_array_terminated; d->typed_dump->depth--; btf_dump_data_pfx(d); btf_dump_type_values(d, "]"); diff --git a/tools/lib/bpf/btf_iter.c b/tools/lib/bpf/btf_iter.c new file mode 100644 index 000000000000..9a6c822c2294 --- /dev/null +++ b/tools/lib/bpf/btf_iter.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> + +#define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t) + +#else +#include "btf.h" +#include "libbpf_internal.h" +#endif + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind) +{ + it->p = NULL; + it->m_idx = -1; + it->off_idx = 0; + it->vlen = 0; + + switch (iter_kind) { + case BTF_FIELD_ITER_IDS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} }; + break; + case BTF_KIND_ARRAY: + it->desc = (struct btf_field_desc) { + 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type), + sizeof(struct btf_type) + offsetof(struct btf_array, index_type)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, type)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, type)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, type)} + }; + break; + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_var_secinfo), + 1, {offsetof(struct btf_var_secinfo, type)} + }; + break; + default: + return -EINVAL; + } + break; + case BTF_FIELD_ITER_STRS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)} + }; + break; + case BTF_KIND_ENUM: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum), + 1, {offsetof(struct btf_enum, name_off)} + }; + break; + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum64), + 1, {offsetof(struct btf_enum64, name_off)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, name_off)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, name_off)} + }; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + if (it->desc.m_sz) + it->vlen = btf_vlen(t); + + it->p = t; + return 0; +} + +__u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + if (!it->p) + return NULL; + + if (it->m_idx < 0) { + if (it->off_idx < it->desc.t_off_cnt) + return it->p + it->desc.t_offs[it->off_idx++]; + /* move to per-member iteration */ + it->m_idx = 0; + it->p += sizeof(struct btf_type); + it->off_idx = 0; + } + + /* if type doesn't have members, stop */ + if (it->desc.m_sz == 0) { + it->p = NULL; + return NULL; + } + + if (it->off_idx >= it->desc.m_off_cnt) { + /* exhausted this member's fields, go to the next member */ + it->m_idx++; + it->p += it->desc.m_sz; + it->off_idx = 0; + } + + if (it->m_idx < it->vlen) + return it->p + it->desc.m_offs[it->off_idx++]; + + it->p = NULL; + return NULL; +} diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c new file mode 100644 index 000000000000..53d1f3541bce --- /dev/null +++ b/tools/lib/bpf/btf_relocate.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/bsearch.h> +#include <linux/btf.h> +#include <linux/sort.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> + +#define btf_type_by_id (struct btf_type *)btf_type_by_id +#define btf__type_cnt btf_nr_types +#define btf__base_btf btf_base_btf +#define btf__name_by_offset btf_name_by_offset +#define btf__str_by_offset btf_str_by_offset +#define btf_kflag btf_type_kflag + +#define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN) +#define free(ptr) kvfree(ptr) +#define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL) + +#else + +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +#endif /* __KERNEL__ */ + +struct btf; + +struct btf_relocate { + struct btf *btf; + const struct btf *base_btf; + const struct btf *dist_base_btf; + unsigned int nr_base_types; + unsigned int nr_split_types; + unsigned int nr_dist_base_types; + int dist_str_len; + int base_str_len; + __u32 *id_map; + __u32 *str_map; +}; + +/* Set temporarily in relocation id_map if distilled base struct/union is + * embedded in a split BTF struct/union; in such a case, size information must + * match between distilled base BTF and base BTF representation of type. + */ +#define BTF_IS_EMBEDDED ((__u32)-1) + +/* <name, size, id> triple used in sorting/searching distilled base BTF. */ +struct btf_name_info { + const char *name; + /* set when search requires a size match */ + bool needs_size: 1; + unsigned int size: 31; + __u32 id; +}; + +static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) + *id = r->id_map[*id]; + return 0; +} + +/* Simple string comparison used for sorting within BTF, since all distilled + * types are named. If strings match, and size is non-zero for both elements + * fall back to using size for ordering. + */ +static int cmp_btf_name_size(const void *n1, const void *n2) +{ + const struct btf_name_info *ni1 = n1; + const struct btf_name_info *ni2 = n2; + int name_diff = strcmp(ni1->name, ni2->name); + + if (!name_diff && ni1->needs_size && ni2->needs_size) + return ni2->size - ni1->size; + return name_diff; +} + +/* Binary search with a small twist; find leftmost element that matches + * so that we can then iterate through all exact matches. So for example + * searching { "a", "bb", "bb", "c" } we would always match on the + * leftmost "bb". + */ +static struct btf_name_info *search_btf_name_size(struct btf_name_info *key, + struct btf_name_info *vals, + int nelems) +{ + struct btf_name_info *ret = NULL; + int high = nelems - 1; + int low = 0; + + while (low <= high) { + int mid = (low + high)/2; + struct btf_name_info *val = &vals[mid]; + int diff = cmp_btf_name_size(key, val); + + if (diff == 0) + ret = val; + /* even if found, keep searching for leftmost match */ + if (diff <= 0) + high = mid - 1; + else + low = mid + 1; + } + return ret; +} + +/* If a member of a split BTF struct/union refers to a base BTF + * struct/union, mark that struct/union id temporarily in the id_map + * with BTF_IS_EMBEDDED. Members can be const/restrict/volatile/typedef + * reference types, but if a pointer is encountered, the type is no longer + * considered embedded. + */ +static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + if (!btf_is_composite(t)) + return 0; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) { + __u32 next_id = *id; + + while (next_id) { + t = btf_type_by_id(r->btf, next_id); + switch (btf_kind(t)) { + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: + next_id = t->type; + break; + case BTF_KIND_ARRAY: { + struct btf_array *a = btf_array(t); + + next_id = a->type; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (next_id < r->nr_dist_base_types) + r->id_map[next_id] = BTF_IS_EMBEDDED; + next_id = 0; + break; + default: + next_id = 0; + break; + } + } + } + + return 0; +} + +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate + * through base BTF looking up distilled type (using binary search) equivalents. + */ +static int btf_relocate_map_distilled_base(struct btf_relocate *r) +{ + struct btf_name_info *info, *info_end; + struct btf_type *base_t, *dist_t; + __u8 *base_name_cnt = NULL; + int err = 0; + __u32 id; + + /* generate a sort index array of name/type ids sorted by name for + * distilled base BTF to speed name-based lookups. + */ + info = calloc(r->nr_dist_base_types, sizeof(*info)); + if (!info) { + err = -ENOMEM; + goto done; + } + info_end = info + r->nr_dist_base_types; + for (id = 0; id < r->nr_dist_base_types; id++) { + dist_t = btf_type_by_id(r->dist_base_btf, id); + info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + info[id].id = id; + info[id].size = dist_t->size; + info[id].needs_size = true; + } + qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size); + + /* Mark distilled base struct/union members of split BTF structs/unions + * in id_map with BTF_IS_EMBEDDED; this signals that these types + * need to match both name and size, otherwise embedding the base + * struct/union in the split type is invalid. + */ + for (id = r->nr_dist_base_types; id < r->nr_dist_base_types + r->nr_split_types; id++) { + err = btf_mark_embedded_composite_type_ids(r, id); + if (err) + goto done; + } + + /* Collect name counts for composite types in base BTF. If multiple + * instances of a struct/union of the same name exist, we need to use + * size to determine which to map to since name alone is ambiguous. + */ + base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt)); + if (!base_name_cnt) { + err = -ENOMEM; + goto done; + } + for (id = 1; id < r->nr_base_types; id++) { + base_t = btf_type_by_id(r->base_btf, id); + if (!btf_is_composite(base_t) || !base_t->name_off) + continue; + if (base_name_cnt[base_t->name_off] < 255) + base_name_cnt[base_t->name_off]++; + } + + /* Now search base BTF for matching distilled base BTF types. */ + for (id = 1; id < r->nr_base_types; id++) { + struct btf_name_info *dist_info, base_info = {}; + int dist_kind, base_kind; + + base_t = btf_type_by_id(r->base_btf, id); + /* distilled base consists of named types only. */ + if (!base_t->name_off) + continue; + base_kind = btf_kind(base_t); + base_info.id = id; + base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off); + switch (base_kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* These types should match both name and size */ + base_info.needs_size = true; + base_info.size = base_t->size; + break; + case BTF_KIND_FWD: + /* No size considerations for fwds. */ + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Size only needs to be used for struct/union if there + * are multiple types in base BTF with the same name. + * If there are multiple _distilled_ types with the same + * name (a very unlikely scenario), that doesn't matter + * unless corresponding _base_ types to match them are + * missing. + */ + base_info.needs_size = base_name_cnt[base_t->name_off] > 1; + base_info.size = base_t->size; + break; + default: + continue; + } + /* iterate over all matching distilled base types */ + for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types); + dist_info != NULL && dist_info < info_end && + cmp_btf_name_size(&base_info, dist_info) == 0; + dist_info++) { + if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) { + pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n", + id, dist_info->id); + err = -EINVAL; + goto done; + } + dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id); + dist_kind = btf_kind(dist_t); + + /* Validate that the found distilled type is compatible. + * Do not error out on mismatch as another match may + * occur for an identically-named type. + */ + switch (dist_kind) { + case BTF_KIND_FWD: + switch (base_kind) { + case BTF_KIND_FWD: + if (btf_kflag(dist_t) != btf_kflag(base_t)) + continue; + break; + case BTF_KIND_STRUCT: + if (btf_kflag(base_t)) + continue; + break; + case BTF_KIND_UNION: + if (!btf_kflag(base_t)) + continue; + break; + default: + continue; + } + break; + case BTF_KIND_INT: + if (dist_kind != base_kind || + btf_int_encoding(base_t) != btf_int_encoding(dist_t)) + continue; + break; + case BTF_KIND_FLOAT: + if (dist_kind != base_kind) + continue; + break; + case BTF_KIND_ENUM: + /* ENUM and ENUM64 are encoded as sized ENUM in + * distilled base BTF. + */ + if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64) + continue; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* size verification is required for embedded + * struct/unions. + */ + if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED && + base_t->size != dist_t->size) + continue; + break; + default: + continue; + } + if (r->id_map[dist_info->id] && + r->id_map[dist_info->id] != BTF_IS_EMBEDDED) { + /* we already have a match; this tells us that + * multiple base types of the same name + * have the same size, since for cases where + * multiple types have the same name we match + * on name and size. In this case, we have + * no way of determining which to relocate + * to in base BTF, so error out. + */ + pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n", + base_info.name, dist_info->id, + base_t->size, id, r->id_map[dist_info->id]); + err = -EINVAL; + goto done; + } + /* map id and name */ + r->id_map[dist_info->id] = id; + r->str_map[dist_t->name_off] = base_t->name_off; + } + } + /* ensure all distilled BTF ids now have a mapping... */ + for (id = 1; id < r->nr_dist_base_types; id++) { + const char *name; + + if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED) + continue; + dist_t = btf_type_by_id(r->dist_base_btf, id); + name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n", + name, id); + err = -EINVAL; + break; + } +done: + free(base_name_cnt); + free(info); + return err; +} + +/* distilled base should only have named int/float/enum/fwd/struct/union types. */ +static int btf_relocate_validate_distilled_base(struct btf_relocate *r) +{ + unsigned int i; + + for (i = 1; i < r->nr_dist_base_types; i++) { + struct btf_type *t = btf_type_by_id(r->dist_base_btf, i); + int kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FWD: + if (t->name_off) + break; + pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n", + i, kind); + return -EINVAL; + default: + pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n", + i, kind); + return -EINVAL; + } + } + return 0; +} + +static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *str_off; + int off, err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + + while ((str_off = btf_field_iter_next(&it))) { + if (!*str_off) + continue; + if (*str_off >= r->dist_str_len) { + *str_off += r->base_str_len - r->dist_str_len; + } else { + off = r->str_map[*str_off]; + if (!off) { + pr_warn("string '%s' [offset %u] is not mapped to base BTF\n", + btf__str_by_offset(r->btf, off), *str_off); + return -ENOENT; + } + *str_off = off; + } + } + return 0; +} + +/* If successful, output of relocation is updated BTF with base BTF pointing + * at base_btf, and type ids, strings adjusted accordingly. + */ +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map) +{ + unsigned int nr_types = btf__type_cnt(btf); + const struct btf_header *dist_base_hdr; + const struct btf_header *base_hdr; + struct btf_relocate r = {}; + int err = 0; + __u32 id, i; + + r.dist_base_btf = btf__base_btf(btf); + if (!base_btf || r.dist_base_btf == base_btf) + return -EINVAL; + + r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf); + r.nr_base_types = btf__type_cnt(base_btf); + r.nr_split_types = nr_types - r.nr_dist_base_types; + r.btf = btf; + r.base_btf = base_btf; + + r.id_map = calloc(nr_types, sizeof(*r.id_map)); + r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map)); + dist_base_hdr = btf_header(r.dist_base_btf); + base_hdr = btf_header(r.base_btf); + r.dist_str_len = dist_base_hdr->str_len; + r.base_str_len = base_hdr->str_len; + if (!r.id_map || !r.str_map) { + err = -ENOMEM; + goto err_out; + } + + err = btf_relocate_validate_distilled_base(&r); + if (err) + goto err_out; + + /* Split BTF ids need to be adjusted as base and distilled base + * have different numbers of types, changing the start id of split + * BTF. + */ + for (id = r.nr_dist_base_types; id < nr_types; id++) + r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types; + + /* Build a map from distilled base ids to actual base BTF ids; it is used + * to update split BTF id references. Also build a str_map mapping from + * distilled base BTF names to base BTF names. + */ + err = btf_relocate_map_distilled_base(&r); + if (err) + goto err_out; + + /* Next, rewrite type ids in split BTF, replacing split ids with updated + * ids based on number of types in base BTF, and base ids with + * relocated ids from base_btf. + */ + for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) { + err = btf_relocate_rewrite_type_id(&r, id); + if (err) + goto err_out; + } + /* String offsets now need to be updated using the str_map. */ + for (i = 0; i < r.nr_split_types; i++) { + err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types); + if (err) + goto err_out; + } + /* Finally reset base BTF to be base_btf */ + btf_set_base_btf(btf, base_btf); + + if (id_map) { + *id_map = r.id_map; + r.id_map = NULL; + } +err_out: + free(r.id_map); + free(r.str_map); + return err; +} diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index c92e02394159..823f83ad819c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -24,10 +24,12 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd) { - char errmsg[STRERR_BUFSIZE]; int fd, ret; Elf *elf; + elf_fd->elf = NULL; + elf_fd->fd = -1; + if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("elf: failed to init libelf for %s\n", binary_path); return -LIBBPF_ERRNO__LIBELF; @@ -35,8 +37,7 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd) fd = open(binary_path, O_RDONLY | O_CLOEXEC); if (fd < 0) { ret = -errno; - pr_warn("elf: failed to open %s: %s\n", binary_path, - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + pr_warn("elf: failed to open %s: %s\n", binary_path, errstr(ret)); return ret; } elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 4e783cc7fc4b..760657f5224c 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -22,7 +22,7 @@ int probe_fd(int fd) static int probe_kern_prog_name(int token_fd) { - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -47,7 +47,6 @@ static int probe_kern_prog_name(int token_fd) static int probe_kern_global_data(int token_fd) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), @@ -67,9 +66,8 @@ static int probe_kern_global_data(int token_fd) map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); return ret; } @@ -267,7 +265,6 @@ static int probe_kern_probe_read_kernel(int token_fd) static int probe_prog_bind_map(int token_fd) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -285,9 +282,8 @@ static int probe_prog_bind_map(int token_fd) map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); return ret; } @@ -392,11 +388,41 @@ static int probe_uprobe_multi_link(int token_fd) link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); err = -errno; /* close() can clobber errno */ + if (link_fd >= 0 || err != -EBADF) { + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + return 0; + } + + /* Initial multi-uprobe support in kernel didn't handle PID filtering + * correctly (it was doing thread filtering, not process filtering). + * So now we'll detect if PID filtering logic was fixed, and, if not, + * we'll pretend multi-uprobes are not supported, if not. + * Multi-uprobes are used in USDT attachment logic, and we need to be + * conservative here, because multi-uprobe selection happens early at + * load time, while the use of PID filtering is known late at + * attachment time, at which point it's too late to undo multi-uprobe + * selection. + * + * Creating uprobe with pid == -1 for (invalid) '/' binary will fail + * early with -EINVAL on kernels with fixed PID filtering logic; + * otherwise -ESRCH would be returned if passed correct binary path + * (but we'll just get -BADF, of course). + */ + link_opts.uprobe_multi.pid = -1; /* invalid PID */ + link_opts.uprobe_multi.path = "/"; /* invalid path */ + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + if (link_fd >= 0) close(link_fd); close(prog_fd); - return link_fd < 0 && err == -EBADF; + return link_fd < 0 && err == -EINVAL; } static int probe_kern_bpf_cookie(int token_fd) @@ -574,7 +600,8 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_ } else if (ret == 0) { WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + pr_warn("Detection of kernel %s support failed: %s\n", + feat->desc, errstr(ret)); WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } } diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index cf3323fd47b8..113ae4abd345 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -14,6 +14,7 @@ #include "bpf_gen_internal.h" #include "skel_internal.h" #include <asm/byteorder.h> +#include "str_error.h" #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 @@ -393,7 +394,7 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); - pr_debug("gen: finish %d\n", gen->error); + pr_debug("gen: finish %s\n", errstr(gen->error)); if (!gen->error) { struct gen_loader_opts *opts = gen->opts; @@ -401,6 +402,15 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; opts->data_sz = gen->data_cur - gen->data_start; + + /* use target endianness for embedded loader */ + if (gen->swapped_endian) { + struct bpf_insn *insn = (struct bpf_insn *)opts->insns; + int insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + + for (i = 0; i < insn_cnt; i++) + bpf_insn_bswap(insn++); + } } return gen->error; } @@ -414,6 +424,28 @@ void bpf_gen__free(struct bpf_gen *gen) free(gen); } +/* + * Fields of bpf_attr are set to values in native byte-order before being + * written to the target-bound data blob, and may need endian conversion. + * This macro allows providing the correct value in situ more simply than + * writing a separate converter for *all fields* of *all records* included + * in union bpf_attr. Note that sizeof(rval) should match the assignment + * target to avoid runtime problems. + */ +#define tgt_endian(rval) ({ \ + typeof(rval) _val = (rval); \ + if (gen->swapped_endian) { \ + switch (sizeof(_val)) { \ + case 1: break; \ + case 2: _val = bswap_16(_val); break; \ + case 4: _val = bswap_32(_val); break; \ + case 8: _val = bswap_64(_val); break; \ + default: pr_warn("unsupported bswap size!\n"); \ + } \ + } \ + _val; \ +}) + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -422,11 +454,12 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: load_btf: size %d\n", btf_raw_size); btf_data = add_data(gen, btf_raw_data, btf_raw_size); - attr.btf_size = btf_raw_size; + attr.btf_size = tgt_endian(btf_raw_size); btf_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: load_btf: off %d size %d, attr: off %d size %d\n", + btf_data, btf_raw_size, btf_load_attr, attr_size); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, @@ -457,28 +490,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; + attr.map_type = tgt_endian(map_type); + attr.key_size = tgt_endian(key_size); + attr.value_size = tgt_endian(value_size); + attr.map_flags = tgt_endian(map_attr->map_flags); + attr.map_extra = tgt_endian(map_attr->map_extra); if (map_name) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.numa_node = map_attr->numa_node; - attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - - pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.numa_node = tgt_endian(map_attr->numa_node); + attr.map_ifindex = tgt_endian(map_attr->map_ifindex); + attr.max_entries = tgt_endian(max_entries); + attr.btf_key_type_id = tgt_endian(map_attr->btf_key_type_id); + attr.btf_value_type_id = tgt_endian(map_attr->btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); - if (attr.btf_value_type_id) + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d, attr: off %d size %d\n", + map_name, map_idx, map_type, map_attr->btf_value_type_id, + map_create_attr, attr_size); + + if (map_attr->btf_value_type_id) /* populate union bpf_attr with btf_fd saved in the stack earlier */ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, stack_off(btf_fd)); - switch (attr.map_type) { + switch (map_type) { case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, @@ -498,8 +532,8 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, - attr.btf_value_type_id); + map_name, map_idx, map_type, value_size, + map_attr->btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ if (map_idx < 0) { @@ -784,12 +818,12 @@ log: emit_ksym_relo_log(gen, relo, kdesc->ref); } -static __u32 src_reg_mask(void) +static __u32 src_reg_mask(struct bpf_gen *gen) { -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ +#if defined(__LITTLE_ENDIAN_BITFIELD) /* src_reg,dst_reg,... */ + return gen->swapped_endian ? 0xf0 : 0x0f; +#elif defined(__BIG_ENDIAN_BITFIELD) /* dst_reg,src_reg,... */ + return gen->swapped_endian ? 0x0f : 0xf0; #else #error "Unsupported bit endianness, cannot proceed" #endif @@ -840,7 +874,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); + reg_mask = src_reg_mask(gen); emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); @@ -931,48 +965,94 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) cleanup_core_relo(gen); } +/* Convert func, line, and core relo info blobs to target endianness */ +static void info_blob_bswap(struct bpf_gen *gen, int func_info, int line_info, + int core_relos, struct bpf_prog_load_opts *load_attr) +{ + struct bpf_func_info *fi = gen->data_start + func_info; + struct bpf_line_info *li = gen->data_start + line_info; + struct bpf_core_relo *cr = gen->data_start + core_relos; + int i; + + for (i = 0; i < load_attr->func_info_cnt; i++) + bpf_func_info_bswap(fi++); + + for (i = 0; i < load_attr->line_info_cnt; i++) + bpf_line_info_bswap(li++); + + for (i = 0; i < gen->core_relo_cnt; i++) + bpf_core_relo_bswap(cr++); +} + void bpf_gen__prog_load(struct bpf_gen *gen, enum bpf_prog_type prog_type, const char *prog_name, const char *license, struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *load_attr, int prog_idx) { + int func_info_tot_sz = load_attr->func_info_cnt * + load_attr->func_info_rec_size; + int line_info_tot_sz = load_attr->line_info_cnt * + load_attr->line_info_rec_size; + int core_relo_tot_sz = gen->core_relo_cnt * + sizeof(struct bpf_core_relo); int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + pr_debug("gen: prog_load: prog_idx %d type %d insn off %d insns_cnt %zd license off %d\n", + prog_idx, prog_type, insns_off, insn_cnt, license_off); - attr.prog_type = prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - attr.attach_btf_id = load_attr->attach_btf_id; - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; - attr.prog_flags = load_attr->prog_flags; - - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - func_info = add_data(gen, load_attr->func_info, - attr.func_info_cnt * attr.func_info_rec_size); + /* convert blob insns to target endianness */ + if (gen->swapped_endian) { + struct bpf_insn *insn = gen->data_start + insns_off; + int i; - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - line_info = add_data(gen, load_attr->line_info, - attr.line_info_cnt * attr.line_info_rec_size); + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); + } - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); + attr.prog_type = tgt_endian(prog_type); + attr.expected_attach_type = tgt_endian(load_attr->expected_attach_type); + attr.attach_btf_id = tgt_endian(load_attr->attach_btf_id); + attr.prog_ifindex = tgt_endian(load_attr->prog_ifindex); + attr.kern_version = 0; + attr.insn_cnt = tgt_endian((__u32)insn_cnt); + attr.prog_flags = tgt_endian(load_attr->prog_flags); + + attr.func_info_rec_size = tgt_endian(load_attr->func_info_rec_size); + attr.func_info_cnt = tgt_endian(load_attr->func_info_cnt); + func_info = add_data(gen, load_attr->func_info, func_info_tot_sz); + pr_debug("gen: prog_load: func_info: off %d cnt %d rec size %d\n", + func_info, load_attr->func_info_cnt, + load_attr->func_info_rec_size); + + attr.line_info_rec_size = tgt_endian(load_attr->line_info_rec_size); + attr.line_info_cnt = tgt_endian(load_attr->line_info_cnt); + line_info = add_data(gen, load_attr->line_info, line_info_tot_sz); + pr_debug("gen: prog_load: line_info: off %d cnt %d rec size %d\n", + line_info, load_attr->line_info_cnt, + load_attr->line_info_rec_size); + + attr.core_relo_rec_size = tgt_endian((__u32)sizeof(struct bpf_core_relo)); + attr.core_relo_cnt = tgt_endian(gen->core_relo_cnt); + core_relos = add_data(gen, gen->core_relos, core_relo_tot_sz); + pr_debug("gen: prog_load: core_relos: off %d cnt %d rec size %zd\n", + core_relos, gen->core_relo_cnt, + sizeof(struct bpf_core_relo)); + + /* convert all info blobs to target endianness */ + if (gen->swapped_endian) + info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: prog_load: attr: off %d size %d\n", + prog_load_attr, attr_size); /* populate union bpf_attr with a pointer to license */ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); @@ -1040,7 +1120,6 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, int zero = 0; memset(&attr, 0, attr_size); - pr_debug("gen: map_update_elem: idx %d\n", map_idx); value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); @@ -1068,6 +1147,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", + map_idx, value, value_size, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1084,14 +1165,16 @@ void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slo int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; + int tgt_slot; memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - key = add_data(gen, &slot, sizeof(slot)); + tgt_slot = tgt_endian(slot); + key = add_data(gen, &tgt_slot, sizeof(tgt_slot)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d, attr: off %d size %d\n", + outer_map_idx, slot, inner_map_idx, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, outer_map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1112,8 +1195,9 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_freeze: idx %d, attr: off %d size %d\n", + map_idx, map_freeze_attr, attr_size); move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a2061fcd612d..e9c641a2fb20 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -60,6 +60,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -132,6 +134,8 @@ static const char * const attach_type_name[] = { [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_NETKIT_PRIMARY] = "netkit_primary", [BPF_NETKIT_PEER] = "netkit_peer", + [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", + [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", }; static const char * const link_type_name[] = { @@ -149,6 +153,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_TCX] = "tcx", [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", [BPF_LINK_TYPE_NETKIT] = "netkit", + [BPF_LINK_TYPE_SOCKMAP] = "sockmap", }; static const char * const map_type_name[] = { @@ -227,7 +232,30 @@ static const char * const prog_type_name[] = { static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { - if (level == LIBBPF_DEBUG) + const char *env_var = "LIBBPF_LOG_LEVEL"; + static enum libbpf_print_level min_level = LIBBPF_INFO; + static bool initialized; + + if (!initialized) { + char *verbosity; + + initialized = true; + verbosity = getenv(env_var); + if (verbosity) { + if (strcasecmp(verbosity, "warn") == 0) + min_level = LIBBPF_WARN; + else if (strcasecmp(verbosity, "debug") == 0) + min_level = LIBBPF_DEBUG; + else if (strcasecmp(verbosity, "info") == 0) + min_level = LIBBPF_INFO; + else + fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", + env_var, verbosity); + } + } + + /* if too verbose, skip logging */ + if (level > min_level) return 0; return vfprintf(stderr, format, args); @@ -258,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) old_errno = errno; va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); errno = old_errno; @@ -471,8 +499,6 @@ struct bpf_program { }; struct bpf_struct_ops { - const char *tname; - const struct btf_type *type; struct bpf_program **progs; __u32 *kern_func_off; /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ @@ -547,6 +573,7 @@ struct bpf_map { bool pinned; bool reused; bool autocreate; + bool autoattach; __u64 map_extra; }; @@ -645,11 +672,18 @@ struct elf_state { struct usdt_manager; +enum bpf_object_state { + OBJ_OPEN, + OBJ_PREPARED, + OBJ_LOADED, +}; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; __u32 kern_version; + enum bpf_object_state state; struct bpf_program *programs; size_t nr_programs; struct bpf_map *maps; @@ -661,7 +695,6 @@ struct bpf_object { int nr_extern; int kconfig_map_idx; - bool loaded; bool has_subcalls; bool has_rodata; @@ -670,6 +703,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; + unsigned char byteorder; + struct btf *btf; struct btf_ext *btf_ext; @@ -863,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -916,6 +951,20 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } +static void bpf_object_bswap_progs(struct bpf_object *obj) +{ + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; + + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); +} + static const struct btf_member * find_member_by_offset(const struct btf_type *t, __u32 bit_offset) { @@ -962,7 +1011,7 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, { const struct btf_type *kern_type, *kern_vtype; const struct btf_member *kern_data_member; - struct btf *btf; + struct btf *btf = NULL; __s32 kern_vtype_id, kern_type_id; char tname[256]; __u32 i; @@ -1057,11 +1106,14 @@ static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) continue; for (j = 0; j < obj->nr_maps; ++j) { + const struct btf_type *type; + map = &obj->maps[j]; if (!bpf_map__is_struct_ops(map)) continue; - vlen = btf_vlen(map->st_ops->type); + type = btf__type_by_id(obj->btf, map->st_ops->type_id); + vlen = btf_vlen(type); for (k = 0; k < vlen; ++k) { slot_prog = map->st_ops->progs[k]; if (prog != slot_prog) @@ -1089,14 +1141,14 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) const struct btf *btf = obj->btf; struct bpf_struct_ops *st_ops; const struct btf *kern_btf; - struct module_btf *mod_btf; + struct module_btf *mod_btf = NULL; void *data, *kern_data; const char *tname; int err; st_ops = map->st_ops; - type = st_ops->type; - tname = st_ops->tname; + type = btf__type_by_id(btf, st_ops->type_id); + tname = btf__name_by_offset(btf, type->name_off); err = find_struct_ops_kern_types(obj, tname, &mod_btf, &kern_type, &kern_type_id, &kern_vtype, &kern_vtype_id, @@ -1126,17 +1178,46 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) const struct btf_type *mtype, *kern_mtype; __u32 mtype_id, kern_mtype_id; void *mdata, *kern_mdata; + struct bpf_program *prog; __s64 msize, kern_msize; __u32 moff, kern_moff; __u32 kern_member_idx; const char *mname; mname = btf__name_by_offset(btf, member->name_off); + moff = member->offset / 8; + mdata = data + moff; + msize = btf__resolve_size(btf, member->type); + if (msize < 0) { + pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", + map->name, mname); + return msize; + } + kern_member = find_member_by_name(kern_btf, kern_type, mname); if (!kern_member) { - pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + if (!libbpf_is_mem_zeroed(mdata, msize)) { + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + map->name, mname); + return -ENOTSUP; + } + + if (st_ops->progs[i]) { + /* If we had declaratively set struct_ops callback, we need to + * force its autoload to false, because it doesn't have + * a chance of succeeding from POV of the current struct_ops map. + * If this program is still referenced somewhere else, though, + * then bpf_object_adjust_struct_ops_autoload() will update its + * autoload accordingly. + */ + st_ops->progs[i]->autoload = false; + st_ops->progs[i] = NULL; + } + + /* Skip all-zero/NULL fields if they are not present in the kernel BTF */ + pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", map->name, mname); - return -ENOTSUP; + continue; } kern_member_idx = kern_member - btf_members(kern_type); @@ -1147,10 +1228,7 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) return -ENOTSUP; } - moff = member->offset / 8; kern_moff = kern_member->offset / 8; - - mdata = data + moff; kern_mdata = kern_data + kern_moff; mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); @@ -1165,13 +1243,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) } if (btf_is_ptr(mtype)) { - struct bpf_program *prog; + prog = *(void **)mdata; + /* just like for !kern_member case above, reset declaratively + * set (at compile time) program's autload to false, + * if user replaced it with another program or NULL + */ + if (st_ops->progs[i] && st_ops->progs[i] != prog) + st_ops->progs[i]->autoload = false; /* Update the value from the shadow type */ - prog = *(void **)mdata; st_ops->progs[i] = prog; if (!prog) continue; + if (!is_valid_st_ops_program(obj, prog)) { pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", map->name, mname); @@ -1230,9 +1314,8 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map) continue; } - msize = btf__resolve_size(btf, mtype_id); kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); - if (msize < 0 || kern_msize < 0 || msize != kern_msize) { + if (kern_msize < 0 || msize != kern_msize) { pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", map->name, mname, (ssize_t)msize, (ssize_t)kern_msize); @@ -1344,6 +1427,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->def.value_size = type->size; map->def.max_entries = 1; map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; + map->autoattach = true; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -1365,8 +1449,6 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, memcpy(st_ops->data, data->d_buf + vsi->offset, type->size); - st_ops->tname = tname; - st_ops->type = type; st_ops->type_id = type_id; pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", @@ -1437,7 +1519,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); - obj->loaded = false; + obj->state = OBJ_OPEN; return obj; } @@ -1449,6 +1531,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; obj->efile.arena_data = NULL; @@ -1476,11 +1559,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE], *cp; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("elf: failed to open %s: %s\n", obj->path, cp); + pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); return err; } @@ -1514,6 +1594,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1542,19 +1632,15 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int @@ -1641,24 +1727,27 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } -/* Some versions of Android don't provide memfd_create() in their libc - * implementation, so avoid complications and just go straight to Linux - * syscall. - */ -static int sys_memfd_create(const char *name, unsigned flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif +#ifndef MFD_NOEXEC_SEAL +#define MFD_NOEXEC_SEAL 0x0008U +#endif static int create_placeholder_fd(void) { + unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; + const char *name = "libbpf-placeholder-fd"; int fd; - fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); + fd = ensure_good_fd(sys_memfd_create(name, flags)); + if (fd >= 0) + return fd; + else if (errno != EINVAL) + return -errno; + + /* Possibly running on kernel without MFD_NOEXEC_SEAL */ + fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); if (fd < 0) return -errno; return fd; @@ -1791,7 +1880,7 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, sfx_len, real_name); - /* sanitise map name to characters allowed by kernel */ + /* sanities map name to characters allowed by kernel */ for (p = map_name; *p && p < map_name + sizeof(map_name); p++) if (!isalnum(*p) && *p != '_' && *p != '.') *p = '_'; @@ -1880,8 +1969,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("failed to alloc map '%s' content buffer: %d\n", - map->name, err); + pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); zfree(&map->real_name); zfree(&map->name); return err; @@ -1956,6 +2044,20 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, return NULL; } +static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj, + const void *name, int len) +{ + const char *ext_name; + int i; + + for (i = 0; i < obj->nr_extern; i++) { + ext_name = obj->externs[i].name; + if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0) + return &obj->externs[i]; + } + return NULL; +} + static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, char value) { @@ -2003,7 +2105,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, } len = strlen(value); - if (value[len - 1] != '"') { + if (len < 2 || value[len - 1] != '"') { pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; @@ -2031,7 +2133,7 @@ static int parse_u64(const char *value, __u64 *res) *res = strtoull(value, &value_end, 0); if (errno) { err = -errno; - pr_warn("failed to parse '%s' as integer: %d\n", value, err); + pr_warn("failed to parse '%s': %s\n", value, errstr(err)); return err; } if (*value_end) { @@ -2197,8 +2299,8 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) while (gzgets(file, buf, sizeof(buf))) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing system Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing system Kconfig line '%s': %s\n", + buf, errstr(err)); goto out; } } @@ -2218,15 +2320,15 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, file = fmemopen((void *)config, strlen(config), "r"); if (!file) { err = -errno; - pr_warn("failed to open in-memory Kconfig: %d\n", err); + pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); return err; } while (fgets(buf, sizeof(buf), file)) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing in-memory Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing in-memory Kconfig line '%s': %s\n", + buf, errstr(err)); break; } } @@ -3141,7 +3243,7 @@ static int bpf_object__init_btf(struct bpf_object *obj, err = libbpf_get_error(obj->btf); if (err) { obj->btf = NULL; - pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); goto out; } /* enforce 8-byte pointers for BPF-targeted BTFs */ @@ -3159,8 +3261,8 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", - BTF_EXT_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", + BTF_EXT_ELF_SEC, errstr(err)); obj->btf_ext = NULL; goto out; } @@ -3252,8 +3354,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, if (t->size == 0) { err = find_elf_sec_sz(obj, sec_name, &size); if (err || !size) { - pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", - sec_name, size, err); + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", + sec_name, size, errstr(err)); return -ENOENT; } @@ -3407,7 +3509,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { - pr_warn("Error loading vmlinux BTF: %d\n", err); + pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); obj->btf_vmlinux = NULL; return err; } @@ -3510,11 +3612,14 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) report: if (err) { btf_mandatory = kernel_needs_btf(obj); - pr_warn("Error loading .BTF into kernel: %d. %s\n", err, - btf_mandatory ? "BTF is mandatory, can't proceed." - : "BTF is optional, ignoring."); - if (!btf_mandatory) + if (btf_mandatory) { + pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", + errstr(err)); + } else { + pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", + errstr(err)); err = 0; + } } return err; } @@ -3882,6 +3987,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset * for faster search */ @@ -3914,7 +4023,7 @@ static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) return true; /* global function */ - return bind == STB_GLOBAL && type == STT_FUNC; + return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; } static int find_extern_btf_id(const struct btf *btf, const char *ext_name) @@ -4318,7 +4427,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; + return prog->sec_idx == obj->efile.text_shndx; } struct bpf_program * @@ -4712,8 +4821,8 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) fp = fopen(file, "re"); if (!fp) { err = -errno; - pr_warn("failed to open %s: %d. No procfs support?\n", file, - err); + pr_warn("failed to open %s: %s. No procfs support?\n", file, + errstr(err)); return err; } @@ -4735,6 +4844,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +static bool map_is_created(const struct bpf_map *map) +{ + return map->obj->state >= OBJ_PREPARED || map->reused; +} + bool bpf_map__autocreate(const struct bpf_map *map) { return map->autocreate; @@ -4742,13 +4856,27 @@ bool bpf_map__autocreate(const struct bpf_map *map) int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) { - if (map->obj->loaded) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->autocreate = autocreate; return 0; } +int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) +{ + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + map->autoattach = autoattach; + return 0; +} + +bool bpf_map__autoattach(const struct bpf_map *map) +{ + return map->autoattach; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info; @@ -4822,7 +4950,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->obj->loaded) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.max_entries = max_entries; @@ -4854,8 +4982,8 @@ static int bpf_object_prepare_token(struct bpf_object *obj) bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); if (bpffs_fd < 0) { err = -errno; - __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", - obj->name, err, bpffs_path, + __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", + obj->name, errstr(err), bpffs_path, mandatory ? "" : ", skipping optional step..."); return mandatory ? err : 0; } @@ -4889,7 +5017,6 @@ static int bpf_object_prepare_token(struct bpf_object *obj) static int bpf_object__probe_loading(struct bpf_object *obj) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -4905,7 +5032,8 @@ bpf_object__probe_loading(struct bpf_object *obj) ret = bump_rlimit_memlock(); if (ret) - pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", + errstr(ret)); /* make sure basic loading works */ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); @@ -4913,11 +5041,8 @@ bpf_object__probe_loading(struct bpf_object *obj) ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " - "program. Make sure your kernel supports BPF " - "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " - "set to big enough value.\n", __func__, cp, ret); + pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", + __func__, errstr(ret)); return -ret; } close(ret); @@ -4942,7 +5067,6 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; - char msg[STRERR_BUFSIZE]; __u32 map_info_len = sizeof(map_info); int err; @@ -4952,7 +5076,7 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); if (err) { pr_warn("failed to get map info for map FD %d: %s\n", map_fd, - libbpf_strerror_r(errno, msg, sizeof(msg))); + errstr(err)); return false; } @@ -4967,7 +5091,6 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) static int bpf_object__reuse_map(struct bpf_map *map) { - char *cp, errmsg[STRERR_BUFSIZE]; int err, pin_fd; pin_fd = bpf_obj_get(map->pin_path); @@ -4979,9 +5102,8 @@ bpf_object__reuse_map(struct bpf_map *map) return 0; } - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warn("couldn't retrieve pinned map '%s': %s\n", - map->pin_path, cp); + map->pin_path, errstr(err)); return err; } @@ -5007,8 +5129,8 @@ static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { enum libbpf_map_type map_type = map->libbpf_type; - char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + size_t mmap_sz; if (obj->gen_loader) { bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, @@ -5021,9 +5143,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error setting initial map(%s) contents: %s\n", - map->name, cp); + pr_warn("map '%s': failed to set initial contents: %s\n", + bpf_map__name(map), errstr(err)); return err; } @@ -5032,22 +5153,48 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("map '%s': failed to freeze as read-only: %s\n", + bpf_map__name(map), errstr(err)); + return err; + } + } + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure. + */ + mmap_sz = bpf_map_mmap_sz(map); + if (map->def.map_flags & BPF_F_MMAPABLE) { + void *mmaped; + int prot; + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); + if (mmaped == MAP_FAILED) { + err = -errno; + pr_warn("map '%s': failed to re-mmap() contents: %s\n", + bpf_map__name(map), errstr(err)); return err; } + map->mmaped = mmaped; + } else if (map->mmaped) { + munmap(map->mmaped, mmap_sz); + map->mmaped = NULL; } + return 0; } static void bpf_map__destroy(struct bpf_map *map); -static bool map_is_created(const struct bpf_map *map) -{ - return map->obj->loaded || map->reused; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -5086,8 +5233,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { - pr_warn("map '%s': failed to create inner map: %d\n", - map->name, err); + pr_warn("map '%s': failed to create inner map: %s\n", + map->name, errstr(err)); return err; } map->inner_map_fd = map->inner_map->fd; @@ -5141,12 +5288,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b def->max_entries, &create_attr); } if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { - char *cp, errmsg[STRERR_BUFSIZE]; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", + map->name, errstr(err)); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5201,8 +5345,8 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) } if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", + map->name, i, targ_map->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -5234,8 +5378,8 @@ static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &i, &fd, 0); if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", - map->name, i, targ_prog->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", + map->name, i, targ_prog->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", @@ -5288,7 +5432,6 @@ static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_map *map; - char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; bool retried; @@ -5354,8 +5497,7 @@ retry: err = bpf_object__populate_internal_map(obj, map); if (err < 0) goto err_out; - } - if (map->def.type == BPF_MAP_TYPE_ARENA) { + } else if (map->def.type == BPF_MAP_TYPE_ARENA) { map->mmaped = mmap((void *)(long)map->map_extra, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, @@ -5363,8 +5505,8 @@ retry: if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("map '%s': failed to mmap arena: %d\n", - map->name, err); + pr_warn("map '%s': failed to mmap arena: %s\n", + map->name, errstr(err)); return err; } if (obj->arena_data) { @@ -5386,8 +5528,8 @@ retry: retried = true; goto retry; } - pr_warn("map '%s': failed to auto-pin at '%s': %d\n", - map->name, map->pin_path, err); + pr_warn("map '%s': failed to auto-pin at '%s': %s\n", + map->name, map->pin_path, errstr(err)); goto err_out; } } @@ -5396,8 +5538,7 @@ retry: return 0; err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); + pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); @@ -5521,7 +5662,7 @@ static int load_module_btfs(struct bpf_object *obj) } if (err) { err = -errno; - pr_warn("failed to iterate BTF objects: %d\n", err); + pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); return err; } @@ -5530,7 +5671,7 @@ static int load_module_btfs(struct bpf_object *obj) if (errno == ENOENT) continue; /* expected race: BTF was unloaded */ err = -errno; - pr_warn("failed to get BTF object #%d FD: %d\n", id, err); + pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); return err; } @@ -5542,7 +5683,7 @@ static int load_module_btfs(struct bpf_object *obj) err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; - pr_warn("failed to get BTF object #%d info: %d\n", id, err); + pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); goto err_out; } @@ -5555,8 +5696,8 @@ static int load_module_btfs(struct bpf_object *obj) btf = btf_get_from_fd(fd, obj->btf_vmlinux); err = libbpf_get_error(btf); if (err) { - pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", - name, id, err); + pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", + name, id, errstr(err)); goto err_out; } @@ -5785,7 +5926,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); err = libbpf_get_error(obj->btf_vmlinux_override); if (err) { - pr_warn("failed to parse target BTF: %d\n", err); + pr_warn("failed to parse target BTF: %s\n", errstr(err)); return err; } } @@ -5845,8 +5986,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = record_relo_core(prog, rec, insn_idx); if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", + prog->name, i, errstr(err)); goto out; } @@ -5855,15 +5996,15 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", + prog->name, i, errstr(err)); goto out; } err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", - prog->name, i, insn_idx, err); + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", + prog->name, i, insn_idx, errstr(err)); goto out; } } @@ -6131,8 +6272,8 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, &main_prog->func_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->func_info) { @@ -6159,8 +6300,8 @@ line_info: &main_prog->line_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->line_info) { @@ -6924,8 +7065,8 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warn("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %s\n", + errstr(err)); return err; } bpf_object__sort_relos(obj); @@ -6969,8 +7110,8 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat err = bpf_object__relocate_calls(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate calls: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate calls: %s\n", + prog->name, errstr(err)); return err; } @@ -7006,16 +7147,16 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat /* Process data relos for main programs */ err = bpf_object__relocate_data(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate data references: %s\n", + prog->name, errstr(err)); return err; } /* Fix up .BTF.ext information, if necessary */ err = bpf_program_fixup_func_info(obj, prog); if (err) { - pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", + prog->name, errstr(err)); return err; } } @@ -7267,8 +7408,14 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; /* special check for usdt to use uprobe_multi link */ - if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { + /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type + * in prog, and expected_attach_type we set in kernel is from opts, so we + * update both. + */ prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + } if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; @@ -7318,14 +7465,17 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog { LIBBPF_OPTS(bpf_prog_load_opts, load_attr); const char *prog_name = NULL; - char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL, *tmp; - int btf_fd, ret, err; bool own_log_buf = true; __u32 log_level = prog->log_level; + int ret, err; - if (prog->type == BPF_PROG_TYPE_UNSPEC) { + /* Be more helpful by rejecting programs that can't be validated early + * with more meaningful and actionable error message. + */ + switch (prog->type) { + case BPF_PROG_TYPE_UNSPEC: /* * The program type must be set. Most likely we couldn't find a proper * section definition at load time, and thus we didn't infer the type. @@ -7333,6 +7483,15 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", prog->name, prog->sec_name); return -EINVAL; + case BPF_PROG_TYPE_STRUCT_OPS: + if (prog->attach_btf_id == 0) { + pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n", + prog->name); + return -EINVAL; + } + break; + default: + break; } if (!insns || !insns_cnt) @@ -7345,11 +7504,11 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.expected_attach_type = prog->expected_attach_type; /* specify func_info/line_info only if kernel supports them */ - btf_fd = btf__fd(obj->btf); - if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { - load_attr.prog_btf_fd = btf_fd; + if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { + load_attr.prog_btf_fd = btf__fd(obj->btf); load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@ -7369,17 +7528,14 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to prepare load attributes: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to prepare load attributes: %s\n", + prog->name, errstr(err)); return err; } insns = prog->insns; insns_cnt = prog->insns_cnt; } - /* allow prog_prepare_load_fn to change expected_attach_type */ - load_attr.expected_attach_type = prog->expected_attach_type; - if (obj->gen_loader) { bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, license, insns, insns_cnt, &load_attr, @@ -7437,9 +7593,8 @@ retry_load: continue; if (bpf_prog_bind_map(ret, map->fd, NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': failed to bind map '%s': %s\n", - prog->name, map->real_name, cp); + prog->name, map->real_name, errstr(errno)); /* Don't fail hard if can't bind rodata. */ } } @@ -7469,8 +7624,7 @@ retry_load: /* post-process verifier log to improve error descriptions */ fixup_verifier_log(prog, log_buf, log_buf_size); - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); pr_perm_msg(ret); if (own_log_buf && log_buf && log_buf[0] != '\0') { @@ -7742,13 +7896,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - err = bpf_object__sanitize_prog(obj, prog); - if (err) - return err; - } - - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; if (!prog->autoload) { @@ -7763,7 +7910,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, obj->license, obj->kern_version, &prog->fd); if (err) { - pr_warn("prog '%s': failed to load: %d\n", prog->name, err); + pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); return err; } } @@ -7772,6 +7919,21 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } +static int bpf_object_prepare_progs(struct bpf_object *obj) +{ + struct bpf_program *prog; + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + err = bpf_object__sanitize_prog(obj, prog); + if (err) + return err; + } + return 0; +} + static const struct bpf_sec_def *find_sec_def(const char *sec_name); static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) @@ -7797,8 +7959,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object if (prog->sec_def->prog_setup_fn) { err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to initialize: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to initialize: %s\n", + prog->name, errstr(err)); return err; } } @@ -7808,16 +7970,19 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object } static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const char *obj_name, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path, *token_path; + const char *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; - char tmp_name[64]; int err; char *log_buf; size_t log_size; __u32 log_level; + if (obj_buf && !obj_name) + return ERR_PTR(-EINVAL); + if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", path ? : "(mem buf)"); @@ -7827,16 +7992,12 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (!OPTS_VALID(opts, bpf_object_open_opts)) return ERR_PTR(-EINVAL); - obj_name = OPTS_GET(opts, object_name, NULL); + obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name; if (obj_buf) { - if (!obj_name) { - snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", - (unsigned long)obj_buf, - (unsigned long)obj_buf_sz); - obj_name = tmp_name; - } path = obj_name; pr_debug("loading object '%s' from buffer\n", obj_name); + } else { + pr_debug("loading object from %s\n", path); } log_buf = OPTS_GET(opts, kernel_log_buf, NULL); @@ -7896,7 +8057,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object_fixup_btf(obj); @@ -7920,9 +8080,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) if (!path) return libbpf_err_ptr(-EINVAL); - pr_debug("loading %s\n", path); - - return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); + return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts)); } struct bpf_object *bpf_object__open(const char *path) @@ -7934,10 +8092,15 @@ struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { + char tmp_name[64]; + if (!obj_buf || obj_buf_sz == 0) return libbpf_err_ptr(-EINVAL); - return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); + /* create a (quite useless) default "name" for this memory buffer object */ + snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz); + + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts)); } static int bpf_object_unload(struct bpf_object *obj) @@ -7973,7 +8136,10 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) return 0; } -int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) +typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx); + +static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) { char sym_type, sym_name[500]; unsigned long long sym_addr; @@ -7983,7 +8149,7 @@ int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; - pr_warn("failed to open /proc/kallsyms: %d\n", err); + pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); return err; } @@ -8013,8 +8179,13 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, struct bpf_object *obj = ctx; const struct btf_type *t; struct extern_desc *ext; + char *res; - ext = find_extern_by_name(obj, sym_name); + res = strstr(sym_name, ".llvm."); + if (sym_type == 'd' && res) + ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name); + else + ext = find_extern_by_name(obj, sym_name); if (!ext || ext->type != EXT_KSYM) return 0; @@ -8339,11 +8510,13 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, static void bpf_map_prepare_vdata(const struct bpf_map *map) { + const struct btf_type *type; struct bpf_struct_ops *st_ops; __u32 i; st_ops = map->st_ops; - for (i = 0; i < btf_vlen(st_ops->type); i++) { + type = btf__type_by_id(map->obj->btf, st_ops->type_id); + for (i = 0; i < btf_vlen(type); i++) { struct bpf_program *prog = st_ops->progs[i]; void *kern_data; int prog_fd; @@ -8377,20 +8550,45 @@ static int bpf_object_prepare_struct_ops(struct bpf_object *obj) return 0; } -static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +static void bpf_object_unpin(struct bpf_object *obj) { - int err, i; + int i; - if (!obj) - return libbpf_err(-EINVAL); + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); +} - if (obj->loaded) { - pr_warn("object '%s': load can't be attempted twice\n", obj->name); - return libbpf_err(-EINVAL); +static void bpf_object_post_load_cleanup(struct bpf_object *obj) +{ + int i; + + /* clean up fd_array */ + zfree(&obj->fd_array); + + /* clean up module BTFs */ + for (i = 0; i < obj->btf_module_cnt; i++) { + close(obj->btf_modules[i].fd); + btf__free(obj->btf_modules[i].btf); + free(obj->btf_modules[i].name); } + obj->btf_module_cnt = 0; + zfree(&obj->btf_modules); - if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + /* clean up vmlinux BTF */ + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; +} + +static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) +{ + int err; + + if (obj->state >= OBJ_PREPARED) { + pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); + return -EINVAL; + } err = bpf_object_prepare_token(obj); err = err ? : bpf_object__probe_loading(obj); @@ -8402,7 +8600,47 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__load_progs(obj, extra_log_level); + err = err ? : bpf_object_prepare_progs(obj); + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + obj->state = OBJ_LOADED; + return err; + } + + obj->state = OBJ_PREPARED; + return 0; +} + +static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) +{ + int err; + + if (!obj) + return libbpf_err(-EINVAL); + + if (obj->state >= OBJ_LOADED) { + pr_warn("object '%s': load can't be attempted twice\n", obj->name); + return libbpf_err(-EINVAL); + } + + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { + bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } + + if (obj->state < OBJ_PREPARED) { + err = bpf_object_prepare(obj, target_btf_path); + if (err) + return libbpf_err(err); + } + err = bpf_object__load_progs(obj, extra_log_level); err = err ? : bpf_object_init_prog_arrays(obj); err = err ? : bpf_object_prepare_struct_ops(obj); @@ -8414,36 +8652,22 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } - /* clean up fd_array */ - zfree(&obj->fd_array); + bpf_object_post_load_cleanup(obj); + obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ - /* clean up module BTFs */ - for (i = 0; i < obj->btf_module_cnt; i++) { - close(obj->btf_modules[i].fd); - btf__free(obj->btf_modules[i].btf); - free(obj->btf_modules[i].name); + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + pr_warn("failed to load object '%s'\n", obj->path); + return libbpf_err(err); } - free(obj->btf_modules); - - /* clean up vmlinux BTF */ - btf__free(obj->btf_vmlinux); - obj->btf_vmlinux = NULL; - - obj->loaded = true; /* doesn't matter if successfully or not */ - - if (err) - goto out; return 0; -out: - /* unpin any maps that were auto-pinned during load */ - for (i = 0; i < obj->nr_maps; i++) - if (obj->maps[i].pinned && !obj->maps[i].reused) - bpf_map__unpin(&obj->maps[i], NULL); +} - bpf_object_unload(obj); - pr_warn("failed to load object '%s'\n", obj->path); - return libbpf_err(err); +int bpf_object__prepare(struct bpf_object *obj) +{ + return libbpf_err(bpf_object_prepare(obj, NULL)); } int bpf_object__load(struct bpf_object *obj) @@ -8453,7 +8677,6 @@ int bpf_object__load(struct bpf_object *obj) static int make_parent_dir(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; char *dname, *dir; int err = 0; @@ -8467,15 +8690,13 @@ static int make_parent_dir(const char *path) free(dname); if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); + pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); } return err; } static int check_path(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -8489,8 +8710,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); err = -errno; } free(dname); @@ -8505,7 +8725,6 @@ static int check_path(const char *path) int bpf_program__pin(struct bpf_program *prog, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (prog->fd < 0) { @@ -8523,8 +8742,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (bpf_obj_pin(prog->fd, path)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); + pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); return libbpf_err(err); } @@ -8555,7 +8773,6 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) int bpf_map__pin(struct bpf_map *map, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (map == NULL) { @@ -8563,6 +8780,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return libbpf_err(-EINVAL); } + if (map->fd < 0) { + pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + if (map->pin_path) { if (path && strcmp(path, map->pin_path)) { pr_warn("map '%s' already has pin path '%s' different from '%s'\n", @@ -8609,8 +8831,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return 0; out_err: - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); + pr_warn("failed to pin map: %s\n", errstr(err)); return libbpf_err(err); } @@ -8696,7 +8917,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_PREPARED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -8775,7 +8996,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_LOADED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -8894,6 +9115,13 @@ void bpf_object__close(struct bpf_object *obj) if (IS_ERR_OR_NULL(obj)) return; + /* + * if user called bpf_object__prepare() without ever getting to + * bpf_object__load(), we need to clean up stuff that is normally + * cleaned up at the end of loading step + */ + bpf_object_post_load_cleanup(obj); + usdt_manager_free(obj->usdt_man); obj->usdt_man = NULL; @@ -8945,6 +9173,11 @@ unsigned int bpf_object__kversion(const struct bpf_object *obj) return obj ? obj->kern_version : 0; } +int bpf_object__token_fd(const struct bpf_object *obj) +{ + return obj->token_fd ?: -1; +} + struct btf *bpf_object__btf(const struct bpf_object *obj) { return obj ? obj->btf : NULL; @@ -8957,7 +9190,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj) int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) { - if (obj->loaded) + if (obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); obj->kern_version = kern_version; @@ -8970,13 +9203,14 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) struct bpf_gen *gen; if (!opts) - return -EFAULT; + return libbpf_err(-EFAULT); if (!OPTS_VALID(opts, gen_loader_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); gen = calloc(sizeof(*gen), 1); if (!gen) - return -ENOMEM; + return libbpf_err(-ENOMEM); gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } @@ -9053,7 +9287,7 @@ bool bpf_program__autoload(const struct bpf_program *prog) int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); prog->autoload = autoload; @@ -9085,14 +9319,14 @@ int bpf_program__set_insns(struct bpf_program *prog, { struct bpf_insn *insns; - if (prog->obj->loaded) - return -EBUSY; + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); /* NULL is a valid return from reallocarray if the new count is zero */ if (!insns && new_insn_cnt) { pr_warn("prog '%s': failed to realloc prog code\n", prog->name); - return -ENOMEM; + return libbpf_err(-ENOMEM); } memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); @@ -9128,7 +9362,7 @@ static int last_custom_sec_def_handler_id; int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); /* if type is not changed, do nothing */ @@ -9159,7 +9393,7 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->expected_attach_type = type; @@ -9173,7 +9407,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog) int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->prog_flags = flags; @@ -9187,7 +9421,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog) int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->log_level = log_level; @@ -9203,17 +9437,41 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) { if (log_size && !log_buf) - return -EINVAL; + return libbpf_err(-EINVAL); if (prog->log_size > UINT_MAX) - return -EINVAL; - if (prog->obj->loaded) - return -EBUSY; + return libbpf_err(-EINVAL); + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); prog->log_buf = log_buf; prog->log_size = log_size; return 0; } +struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) +{ + if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->func_info; +} + +__u32 bpf_program__func_info_cnt(const struct bpf_program *prog) +{ + return prog->func_info_cnt; +} + +struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) +{ + if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->line_info; +} + +__u32 bpf_program__line_info_cnt(const struct bpf_program *prog) +{ + return prog->line_info_cnt; +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -9231,6 +9489,7 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -9247,10 +9506,13 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), @@ -9298,6 +9560,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), + SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), @@ -9598,6 +9861,7 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { + const struct btf_type *type; const struct btf_member *member; struct bpf_struct_ops *st_ops; struct bpf_program *prog; @@ -9657,13 +9921,14 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, } insn_idx = sym->st_value / BPF_INSN_SZ; - member = find_member_by_offset(st_ops->type, moff * 8); + type = btf__type_by_id(btf, st_ops->type_id); + member = find_member_by_offset(type, moff * 8); if (!member) { pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", map->name, moff); return -EINVAL; } - member_idx = member - btf_members(st_ops->type); + member_idx = member - btf_members(type); name = btf__name_by_offset(btf, member->name_off); if (!resolve_func_ptr(btf, member->type, NULL)) { @@ -9776,7 +10041,7 @@ int libbpf_find_vmlinux_btf_id(const char *name, return libbpf_err(err); } -static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) { struct bpf_prog_info info; __u32 info_len = sizeof(info); @@ -9786,8 +10051,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) memset(&info, 0, info_len); err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", - attach_prog_fd, err); + pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", + attach_prog_fd, errstr(err)); return err; } @@ -9796,10 +10061,10 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info.btf_id); + btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); err = libbpf_get_error(btf); if (err) { - pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); + pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -9816,16 +10081,28 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, enum bpf_attach_type attach_type, int *btf_obj_fd, int *btf_type_id) { - int ret, i; + int ret, i, mod_len; + const char *fn_name, *mod_name = NULL; - ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); - if (ret > 0) { - *btf_obj_fd = 0; /* vmlinux BTF */ - *btf_type_id = ret; - return 0; + fn_name = strchr(attach_name, ':'); + if (fn_name) { + mod_name = attach_name; + mod_len = fn_name - mod_name; + fn_name++; + } + + if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) { + ret = find_attach_btf_id(obj->btf_vmlinux, + mod_name ? fn_name : attach_name, + attach_type); + if (ret > 0) { + *btf_obj_fd = 0; /* vmlinux BTF */ + *btf_type_id = ret; + return 0; + } + if (ret != -ENOENT) + return ret; } - if (ret != -ENOENT) - return ret; ret = load_module_btfs(obj); if (ret) @@ -9834,7 +10111,12 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, for (i = 0; i < obj->btf_module_cnt; i++) { const struct module_btf *mod = &obj->btf_modules[i]; - ret = find_attach_btf_id(mod->btf, attach_name, attach_type); + if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) + continue; + + ret = find_attach_btf_id(mod->btf, + mod_name ? fn_name : attach_name, + attach_type); if (ret > 0) { *btf_obj_fd = mod->fd; *btf_type_id = ret; @@ -9862,10 +10144,10 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac pr_warn("prog '%s': attach program FD is not set\n", prog->name); return -EINVAL; } - err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); + err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); if (err < 0) { - pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", - prog->name, attach_prog_fd, attach_name, err); + pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", + prog->name, attach_prog_fd, attach_name, errstr(err)); return err; } *btf_obj_fd = 0; @@ -9884,8 +10166,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac btf_type_id); } if (err) { - pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", - prog->name, attach_name, err); + pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", + prog->name, attach_name, errstr(err)); return err; } return 0; @@ -10099,7 +10381,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { - if (map->obj->loaded || map->reused) + if (map_is_created(map)) return libbpf_err(-EBUSY); if (map->mmaped) { @@ -10107,20 +10389,20 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) int err; if (map->def.type != BPF_MAP_TYPE_ARRAY) - return -EOPNOTSUPP; + return libbpf_err(-EOPNOTSUPP); mmap_old_sz = bpf_map_mmap_sz(map); mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); if (err) { - pr_warn("map '%s': failed to resize memory-mapped region: %d\n", - bpf_map__name(map), err); - return err; + pr_warn("map '%s': failed to resize memory-mapped region: %s\n", + bpf_map__name(map), errstr(err)); + return libbpf_err(err); } err = map_btf_datasec_resize(map, size); if (err && err != -ENOENT) { - pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", - bpf_map__name(map), err); + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", + bpf_map__name(map), errstr(err)); map->btf_value_type_id = 0; map->btf_key_type_id = 0; } @@ -10145,7 +10427,7 @@ int bpf_map__set_initial_value(struct bpf_map *map, { size_t actual_sz; - if (map->obj->loaded || map->reused) + if (map_is_created(map)) return libbpf_err(-EBUSY); if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) @@ -10244,7 +10526,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { - if (prev == NULL) + if (prev == NULL && obj != NULL) return obj->maps; return __bpf_map__iter(prev, obj, 1); @@ -10253,7 +10535,7 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { - if (next == NULL) { + if (next == NULL && obj != NULL) { if (!obj->nr_maps) return NULL; return obj->maps + obj->nr_maps - 1; @@ -10307,6 +10589,11 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz, return -EINVAL; } + if (map->fd < 0) { + pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); + return -EINVAL; + } + if (!check_value_sz) return 0; @@ -10419,8 +10706,15 @@ long libbpf_get_error(const void *ptr) int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { int ret; + int prog_fd = bpf_program__fd(prog); + + if (prog_fd < 0) { + pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err(-EINVAL); + } - ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); + ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); return libbpf_err_errno(ret); } @@ -10599,7 +10893,6 @@ static void bpf_link_perf_dealloc(struct bpf_link *link) struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { - char errmsg[STRERR_BUFSIZE]; struct bpf_link_perf *link; int prog_fd, link_fd = -1, err; bool force_ioctl_attach; @@ -10614,7 +10907,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -10634,9 +10927,8 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); if (link_fd < 0) { err = -errno; - pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", - prog->name, pfd, - err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); goto err_out; } link->link.fd = link_fd; @@ -10650,7 +10942,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); if (err == -EPROTO) pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd); @@ -10661,7 +10953,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); goto err_out; } @@ -10685,22 +10977,19 @@ struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, */ static int parse_uint_from_file(const char *file, const char *fmt) { - char buf[STRERR_BUFSIZE]; int err, ret; FILE *f; f = fopen(file, "re"); if (!f) { err = -errno; - pr_debug("failed to open '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to open '%s': %s\n", file, errstr(err)); return err; } err = fscanf(f, fmt, &ret); if (err != 1) { err = err == EOF ? -EIO : -errno; - pr_debug("failed to parse '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to parse '%s': %s\n", file, errstr(err)); fclose(f); return err; } @@ -10744,7 +11033,6 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int type, pfd; if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) @@ -10757,7 +11045,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (type < 0) { pr_warn("failed to determine %s perf type: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + errstr(type)); return type; } if (retprobe) { @@ -10767,7 +11055,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (bit < 0) { pr_warn("failed to determine %s retprobe bit: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); + errstr(bit)); return bit; } attr.config |= 1 << bit; @@ -10850,16 +11138,16 @@ static const char *tracefs_available_filter_functions_addrs(void) : TRACEFS"/available_filter_functions_addrs"; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); - /* sanitize binary_path in the probe name */ + /* sanitize name in the probe name */ for (i = 0; buf[i]; i++) { if (!isalnum(buf[i])) buf[i] = '_'; @@ -10896,14 +11184,13 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int type, pfd, err; err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); if (err < 0) { pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } type = determine_kprobe_perf_type_legacy(probe_name, retprobe); @@ -10911,7 +11198,7 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, err = type; pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } @@ -10927,7 +11214,7 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, if (pfd < 0) { err = -errno; pr_warn("legacy kprobe perf_event_open() failed: %s\n", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } return pfd; @@ -10985,9 +11272,9 @@ int probe_kern_syscall_wrapper(int token_fd) return pfd >= 0 ? 1 : 0; } else { /* legacy mode */ - char probe_name[128]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) return 0; @@ -11003,7 +11290,6 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); enum probe_attach_mode attach_mode; - char errmsg[STRERR_BUFSIZE]; char *legacy_probe = NULL; struct bpf_link *link; size_t offset; @@ -11044,10 +11330,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11061,7 +11347,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -11071,7 +11357,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -11183,9 +11469,33 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, struct kprobe_multi_resolve *res = data->res; int err; - if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) + if (!glob_match(sym_name, res->pattern)) return 0; + if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) { + /* Some versions of kernel strip out .llvm.<hash> suffix from + * function names reported in available_filter_functions, but + * don't do so for kallsyms. While this is clearly a kernel + * bug (fixed by [0]) we try to accommodate that in libbpf to + * make multi-kprobe usability a bit better: if no match is + * found, we will strip .llvm. suffix and try one more time. + * + * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") + */ + char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + + if (!(sym_sfx = strstr(sym_name, ".llvm."))) + return 0; + + /* psym_trim vs sym_trim dance is done to avoid pointer vs array + * coercion differences and get proper `const char **` pointer + * which avail_func_cmp() expects + */ + snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name); + if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) + return 0; + } + err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); if (err) return err; @@ -11207,7 +11517,7 @@ static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) f = fopen(available_functions_file, "re"); if (!f) { err = -errno; - pr_warn("failed to open %s: %d\n", available_functions_file, err); + pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); return err; } @@ -11282,7 +11592,7 @@ static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) f = fopen(available_path, "re"); if (!f) { err = -errno; - pr_warn("failed to open %s: %d\n", available_path, err); + pr_warn("failed to open %s: %s\n", available_path, errstr(err)); return err; } @@ -11326,22 +11636,30 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, struct kprobe_multi_resolve res = { .pattern = pattern, }; + enum bpf_attach_type attach_type; struct bpf_link *link = NULL; - char errmsg[STRERR_BUFSIZE]; const unsigned long *addrs; int err, link_fd, prog_fd; + bool retprobe, session, unique_match; const __u64 *cookies; const char **syms; - bool retprobe; size_t cnt; if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + syms = OPTS_GET(opts, syms, false); addrs = OPTS_GET(opts, addrs, false); cnt = OPTS_GET(opts, cnt, false); cookies = OPTS_GET(opts, cookies, false); + unique_match = OPTS_GET(opts, unique_match, false); if (!pattern && !addrs && !syms) return libbpf_err_ptr(-EINVAL); @@ -11349,6 +11667,8 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); if (!pattern && !cnt) return libbpf_err_ptr(-EINVAL); + if (!pattern && unique_match) + return libbpf_err_ptr(-EINVAL); if (addrs && syms) return libbpf_err_ptr(-EINVAL); @@ -11359,11 +11679,25 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, err = libbpf_available_kallsyms_parse(&res); if (err) goto error; + + if (unique_match && res.cnt != 1) { + pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n", + prog->name, pattern, res.cnt); + err = -EINVAL; + goto error; + } + addrs = res.addrs; cnt = res.cnt; } retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); + + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + + attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI; lopts.kprobe_multi.syms = syms; lopts.kprobe_multi.addrs = addrs; @@ -11378,12 +11712,11 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, } link->detach = &bpf_link__detach_fd; - prog_fd = bpf_program__fd(prog); - link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts); + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); if (link_fd < 0) { err = -errno; pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, errstr(err)); goto error; } link->fd = link_fd; @@ -11476,7 +11809,7 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); if (n < 1) { - pr_warn("kprobe multi pattern is invalid: %s\n", pattern); + pr_warn("kprobe multi pattern is invalid: %s\n", spec); return -EINVAL; } @@ -11485,6 +11818,32 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru return libbpf_get_error(*link); } +static int attach_kprobe_session(const struct bpf_program *prog, long cookie, + struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true); + const char *spec; + char *pattern; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe.session") */ + if (strcmp(prog->sec_name, "kprobe.session") == 0) + return 0; + + spec = prog->sec_name + sizeof("kprobe.session/") - 1; + n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); + if (n < 1) { + pr_warn("kprobe session pattern is invalid: %s\n", spec); + return -EINVAL; + } + + *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); + free(pattern); + return *link ? 0 : -errno; +} + static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) { char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; @@ -11501,7 +11860,9 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru ret = 0; break; case 3: - opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; + opts.session = str_has_pfx(probe_type, "uprobe.session"); + opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); ret = libbpf_get_error(*link); break; @@ -11516,20 +11877,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { @@ -11564,15 +11911,15 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); if (err < 0) { - pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", - binary_path, (size_t)offset, err); + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", + binary_path, (size_t)offset, errstr(err)); return err; } type = determine_uprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { err = type; - pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", - binary_path, offset, err); + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", + binary_path, offset, errstr(err)); goto err_clean_legacy; } @@ -11587,7 +11934,7 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); + pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); goto err_clean_legacy; } return pfd; @@ -11750,10 +12097,11 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; LIBBPF_OPTS(bpf_link_create_opts, lopts); unsigned long *resolved_offsets = NULL; + enum bpf_attach_type attach_type; int err = 0, link_fd, prog_fd; struct bpf_link *link = NULL; - char errmsg[STRERR_BUFSIZE]; char full_path[PATH_MAX]; + bool retprobe, session; const __u64 *cookies; const char **syms; size_t cnt; @@ -11761,11 +12109,20 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + syms = OPTS_GET(opts, syms, NULL); offsets = OPTS_GET(opts, offsets, NULL); ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); cookies = OPTS_GET(opts, cookies, NULL); cnt = OPTS_GET(opts, cnt, 0); + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); /* * User can specify 2 mutually exclusive set of inputs: @@ -11794,12 +12151,15 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); } + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + if (func_pattern) { if (!strchr(path, '/')) { err = resolve_full_path(path, full_path, sizeof(full_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, path, errstr(err)); return libbpf_err_ptr(err); } path = full_path; @@ -11817,12 +12177,14 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, offsets = resolved_offsets; } + attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; + lopts.uprobe_multi.path = path; lopts.uprobe_multi.offsets = offsets; lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; lopts.uprobe_multi.cookies = cookies; lopts.uprobe_multi.cnt = cnt; - lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; + lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; if (pid == 0) pid = getpid(); @@ -11836,12 +12198,11 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, } link->detach = &bpf_link__detach_fd; - prog_fd = bpf_program__fd(prog); - link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); if (link_fd < 0) { err = -errno; pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", - prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, errstr(err)); goto error; } link->fd = link_fd; @@ -11860,7 +12221,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const struct bpf_uprobe_opts *opts) { const char *archive_path = NULL, *archive_sep = NULL; - char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char *legacy_probe = NULL; DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); enum probe_attach_mode attach_mode; char full_path[PATH_MAX]; @@ -11892,8 +12253,8 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, } else if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, full_path, sizeof(full_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, binary_path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); return libbpf_err_ptr(err); } binary_path = full_path; @@ -11939,13 +12300,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[PATH_MAX + 64]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11959,7 +12321,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } @@ -11970,7 +12332,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -12080,7 +12442,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); if (bpf_program__fd(prog) < 0) { - pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -12091,8 +12453,8 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, binary_path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); return libbpf_err_ptr(err); } binary_path = resolved_path; @@ -12170,14 +12532,13 @@ static int perf_event_open_tracepoint(const char *tp_category, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int tp_id, pfd, err; tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + errstr(tp_id)); return tp_id; } @@ -12192,7 +12553,7 @@ static int perf_event_open_tracepoint(const char *tp_category, err = -errno; pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } return pfd; @@ -12204,7 +12565,6 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p const struct bpf_tracepoint_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int pfd, err; @@ -12217,7 +12577,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p if (pfd < 0) { pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + errstr(pfd)); return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -12226,7 +12586,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p close(pfd); pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return libbpf_err_ptr(err); } return link; @@ -12271,13 +12631,18 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin return libbpf_get_error(*link); } -struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, - const char *tp_name) +struct bpf_link * +bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, + const char *tp_name, + struct bpf_raw_tracepoint_opts *opts) { - char errmsg[STRERR_BUFSIZE]; + LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -12289,18 +12654,26 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); + raw_opts.tp_name = tp_name; + raw_opts.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); if (pfd < 0) { pfd = -errno; free(link); pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", - prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, tp_name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; return link; } +struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, + const char *tp_name) +{ + return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); +} + static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { @@ -12347,7 +12720,6 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro const struct bpf_trace_opts *opts) { LIBBPF_OPTS(bpf_link_create_opts, link_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; @@ -12372,7 +12744,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro pfd = -errno; free(link); pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; @@ -12413,7 +12785,6 @@ bpf_program_attach_fd(const struct bpf_program *prog, const struct bpf_link_create_opts *opts) { enum bpf_attach_type attach_type; - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; @@ -12435,7 +12806,7 @@ bpf_program_attach_fd(const struct bpf_program *prog, free(link); pr_warn("prog '%s': failed to attach to %s: %s\n", prog->name, target_name, - libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12454,6 +12825,12 @@ bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); } +struct bpf_link * +bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd) +{ + return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL); +} + struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) { /* target_fd/target_ifindex use the same field in LINK_CREATE */ @@ -12542,7 +12919,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } if (prog->type != BPF_PROG_TYPE_EXT) { - pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -12550,7 +12927,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, if (target_fd) { LIBBPF_OPTS(bpf_link_create_opts, target_opts); - btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); + btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err_ptr(btf_id); @@ -12571,7 +12948,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; __u32 target_fd = 0; @@ -12599,7 +12975,7 @@ bpf_program__attach_iter(const struct bpf_program *prog, link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to iterator: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12641,12 +13017,10 @@ struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); if (link_fd < 0) { - char errmsg[STRERR_BUFSIZE]; - link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to netfilter: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12662,6 +13036,12 @@ struct bpf_link *bpf_program__attach(const struct bpf_program *prog) if (!prog->sec_def || !prog->sec_def->prog_attach_fn) return libbpf_err_ptr(-EOPNOTSUPP); + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); if (err) return libbpf_err_ptr(err); @@ -12702,8 +13082,15 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) __u32 zero = 0; int err, fd; - if (!bpf_map__is_struct_ops(map) || map->fd == -1) + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); return libbpf_err_ptr(-EINVAL); + } + + if (map->fd < 0) { + pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); + return libbpf_err_ptr(-EINVAL); + } link = calloc(1, sizeof(*link)); if (!link) @@ -12751,13 +13138,18 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) __u32 zero = 0; int err; - if (!bpf_map__is_struct_ops(map) || !map_is_created(map)) - return -EINVAL; + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + if (map->fd < 0) { + pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } st_ops_link = container_of(link, struct bpf_link_struct_ops, link); /* Ensure the type of a link is correct */ if (st_ops_link->map_fd < 0) - return -EINVAL; + return libbpf_err(-EINVAL); err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); /* It can be EBUSY if the map has been used to create or @@ -12913,7 +13305,6 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, int cpu, int map_key) { struct perf_cpu_buf *cpu_buf; - char msg[STRERR_BUFSIZE]; int err; cpu_buf = calloc(1, sizeof(*cpu_buf)); @@ -12929,7 +13320,7 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->fd < 0) { err = -errno; pr_warn("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -12940,14 +13331,14 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, cpu_buf->base = NULL; err = -errno; pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -12984,7 +13375,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = sample_period; attr.wakeup_events = sample_period; p.attr = &attr; @@ -13023,7 +13413,6 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, { const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map; - char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; bool *online = NULL; __u32 map_info_len; @@ -13046,7 +13435,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, */ if (err != -EINVAL) { pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + map_fd, errstr(err)); return ERR_PTR(err); } pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", @@ -13076,7 +13465,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (pb->epoll_fd < 0) { err = -errno; pr_warn("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -13107,7 +13496,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = parse_cpu_mask_file(online_cpus_file, &online, &n); if (err) { - pr_warn("failed to get online CPU mask: %d\n", err); + pr_warn("failed to get online CPU mask: %s\n", errstr(err)); goto error; } @@ -13138,7 +13527,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -13149,7 +13538,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } j++; @@ -13244,7 +13633,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("error while processing records: %d\n", err); + pr_warn("error while processing records: %s\n", errstr(err)); return libbpf_err(err); } } @@ -13328,7 +13717,8 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); + pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", + i, errstr(err)); return libbpf_err(err); } } @@ -13344,7 +13734,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); if (attach_prog_fd && !attach_func_name) { @@ -13357,7 +13747,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, - attach_prog_fd); + attach_prog_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { @@ -13439,14 +13829,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { err = len ? -errno : -EINVAL; - pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); return err; } if (len >= sizeof(buf)) { @@ -13486,14 +13876,15 @@ int libbpf_num_possible_cpus(void) static int populate_skeleton_maps(const struct bpf_object *obj, struct bpf_map_skeleton *maps, - size_t map_cnt) + size_t map_cnt, size_t map_skel_sz) { int i; for (i = 0; i < map_cnt; i++) { - struct bpf_map **map = maps[i].map; - const char *name = maps[i].name; - void **mmaped = maps[i].mmaped; + struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; + struct bpf_map **map = map_skel->map; + const char *name = map_skel->name; + void **mmaped = map_skel->mmaped; *map = bpf_object__find_map_by_name(obj, name); if (!*map) { @@ -13510,13 +13901,14 @@ static int populate_skeleton_maps(const struct bpf_object *obj, static int populate_skeleton_progs(const struct bpf_object *obj, struct bpf_prog_skeleton *progs, - size_t prog_cnt) + size_t prog_cnt, size_t prog_skel_sz) { int i; for (i = 0; i < prog_cnt; i++) { - struct bpf_program **prog = progs[i].prog; - const char *name = progs[i].name; + struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; + struct bpf_program **prog = prog_skel->prog; + const char *name = prog_skel->name; *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { @@ -13530,42 +13922,27 @@ static int populate_skeleton_progs(const struct bpf_object *obj, int bpf_object__open_skeleton(struct bpf_object_skeleton *s, const struct bpf_object_open_opts *opts) { - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, - .object_name = s->name, - ); struct bpf_object *obj; int err; - /* Attempt to preserve opts->object_name, unless overriden by user - * explicitly. Overwriting object name for skeletons is discouraged, - * as it breaks global data maps, because they contain object name - * prefix as their own map name prefix. When skeleton is generated, - * bpftool is making an assumption that this name will stay the same. - */ - if (opts) { - memcpy(&skel_opts, opts, sizeof(*opts)); - if (!opts->object_name) - skel_opts.object_name = s->name; - } - - obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); - err = libbpf_get_error(obj); - if (err) { - pr_warn("failed to initialize skeleton BPF object '%s': %d\n", - s->name, err); + obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_warn("failed to initialize skeleton BPF object '%s': %s\n", + s->name, errstr(err)); return libbpf_err(err); } *s->obj = obj; - err = populate_skeleton_maps(obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { - pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); + pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } - err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { - pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); + pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } @@ -13593,26 +13970,26 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) return libbpf_err(-errno); } - err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { - pr_warn("failed to populate subskeleton maps: %d\n", err); + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); return libbpf_err(err); } - err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { - pr_warn("failed to populate subskeleton maps: %d\n", err); + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); return libbpf_err(err); } for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { - var_skel = &s->vars[var_idx]; + var_skel = (void *)s->vars + var_idx * s->var_skel_sz; map = *var_skel->map; map_type_id = bpf_map__btf_value_type_id(map); map_type = btf__type_by_id(btf, map_type_id); if (!btf_is_datasec(map_type)) { - pr_warn("type for map '%1$s' is not a datasec: %2$s", + pr_warn("type for map '%1$s' is not a datasec: %2$s\n", bpf_map__name(map), __btf_kind_str(btf_kind(map_type))); return libbpf_err(-EINVAL); @@ -13648,52 +14025,18 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) err = bpf_object__load(*s->obj); if (err) { - pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map); - int prot, map_fd = map->fd; - void **mmaped = s->maps[i].mmaped; - - if (!mmaped) - continue; - - if (!(map->def.map_flags & BPF_F_MMAPABLE)) { - *mmaped = NULL; - continue; - } + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; - if (map->def.type == BPF_MAP_TYPE_ARENA) { - *mmaped = map->mmaped; + if (!map_skel->mmaped) continue; - } - - if (map->def.map_flags & BPF_F_RDONLY_PROG) - prot = PROT_READ; - else - prot = PROT_READ | PROT_WRITE; - /* Remap anonymous mmap()-ed "map initialization image" as - * a BPF map-backed mmap()-ed memory, but preserving the same - * memory address. This will cause kernel to change process' - * page table to point to a different piece of kernel memory, - * but from userspace point of view memory address (and its - * contents, being identical at this point) will stay the - * same. This mapping will be released by bpf_object__close() - * as per normal clean up procedure, so we don't need to worry - * about it from skeleton's clean up perspective. - */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); - if (*mmaped == MAP_FAILED) { - err = -errno; - *mmaped = NULL; - pr_warn("failed to re-mmap() map '%s': %d\n", - bpf_map__name(map), err); - return libbpf_err(err); - } + *map_skel->mmaped = map->mmaped; } return 0; @@ -13704,8 +14047,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) int i, err; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program *prog = *s->progs[i].prog; - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_program *prog = *prog_skel->prog; + struct bpf_link **link = prog_skel->link; if (!prog->autoload || !prog->autoattach) continue; @@ -13720,8 +14064,8 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); if (err) { - pr_warn("prog '%s': failed to auto-attach: %d\n", - bpf_program__name(prog), err); + pr_warn("prog '%s': failed to auto-attach: %s\n", + bpf_program__name(prog), errstr(err)); return libbpf_err(err); } @@ -13737,6 +14081,45 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) */ } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; + struct bpf_link **link; + + if (!map->autocreate || !map->autoattach) + continue; + + /* only struct_ops maps can be attached */ + if (!bpf_map__is_struct_ops(map)) + continue; + + /* skeleton is created with earlier version of bpftool, notify user */ + if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { + pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", + bpf_map__name(map)); + continue; + } + + link = map_skel->link; + if (!link) { + pr_warn("map '%s': BPF map skeleton link is uninitialized\n", + bpf_map__name(map)); + continue; + } + + if (*link) + continue; + + *link = bpf_map__attach_struct_ops(map); + if (!*link) { + err = -errno; + pr_warn("map '%s': failed to auto-attach: %s\n", + bpf_map__name(map), errstr(err)); + return libbpf_err(err); + } + } + return 0; } @@ -13745,11 +14128,25 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) int i; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_link **link = prog_skel->link; bpf_link__destroy(*link); *link = NULL; } + + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_link **link = map_skel->link; + + if (link) { + bpf_link__destroy(*link); + *link = NULL; + } + } } void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) @@ -13757,8 +14154,7 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) if (!s) return; - if (s->progs) - bpf_object__detach_skeleton(s); + bpf_object__detach_skeleton(s); if (s->obj) bpf_object__close(*s->obj); free(s->maps); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 7b510761f545..1137e7d2e1b5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -98,7 +98,10 @@ typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, /** * @brief **libbpf_set_print()** sets user-provided log callback function to - * be used for libbpf warnings and informational messages. + * be used for libbpf warnings and informational messages. If the user callback + * is not set, messages are logged to stderr by default. The verbosity of these + * messages can be controlled by setting the environment variable + * LIBBPF_LOG_LEVEL to either warn, info, or debug. * @param fn The log print function. If NULL, libbpf won't print anything. * @return Pointer to old print function. * @@ -149,7 +152,7 @@ struct bpf_object_open_opts { * log_buf and log_level settings. * * If specified, this log buffer will be passed for: - * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden + * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overridden * with bpf_program__set_log() on per-program level, to get * BPF verifier log output. * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get @@ -239,6 +242,19 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); /** + * @brief **bpf_object__prepare()** prepares BPF object for loading: + * performs ELF processing, relocations, prepares final state of BPF program + * instructions (accessible with bpf_program__insns()), creates and + * (potentially) pins maps. Leaves BPF object in the state ready for program + * loading. + * @param obj Pointer to a valid BPF object instance returned by + * **bpf_object__open*()** API + * @return 0, on success; negative error code, otherwise, error code is + * stored in errno + */ +int bpf_object__prepare(struct bpf_object *obj); + +/** * @brief **bpf_object__load()** loads BPF object into kernel. * @param obj Pointer to a valid BPF object instance returned by * **bpf_object__open*()** APIs @@ -291,6 +307,14 @@ LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version); +/** + * @brief **bpf_object__token_fd** is an accessor for BPF token FD associated + * with BPF object. + * @param obj Pointer to a valid BPF object + * @return BPF token FD or -1, if it wasn't set + */ +LIBBPF_API int bpf_object__token_fd(const struct bpf_object *obj); + struct btf; LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); @@ -452,7 +476,7 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link); /** * @brief **bpf_program__attach()** is a generic function for attaching * a BPF program based on auto-detection of program type, attach type, - * and extra paremeters, where applicable. + * and extra parameters, where applicable. * * @param prog BPF program to attach * @return Reference to the newly created BPF link; or NULL is returned on error, @@ -539,10 +563,14 @@ struct bpf_kprobe_multi_opts { size_t cnt; /* create return kprobes */ bool retprobe; + /* create session kprobes */ + bool session; + /* enforce unique match */ + bool unique_match; size_t :0; }; -#define bpf_kprobe_multi_opts__last_field retprobe +#define bpf_kprobe_multi_opts__last_field unique_match LIBBPF_API struct bpf_link * bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, @@ -564,10 +592,12 @@ struct bpf_uprobe_multi_opts { size_t cnt; /* create return uprobes */ bool retprobe; + /* create session kprobes */ + bool session; size_t :0; }; -#define bpf_uprobe_multi_opts__last_field retprobe +#define bpf_uprobe_multi_opts__last_field session /** * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program @@ -674,7 +704,7 @@ struct bpf_uprobe_opts { /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program * to the userspace function which is found by binary path and - * offset. You can optionally specify a particular proccess to attach + * offset. You can optionally specify a particular process to attach * to. You can also optionally attach the program to the function * exit instead of entry. * @@ -760,9 +790,20 @@ bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, const char *tp_name, const struct bpf_tracepoint_opts *opts); +struct bpf_raw_tracepoint_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u64 cookie; + size_t :0; +}; +#define bpf_raw_tracepoint_opts__last_field cookie + LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, + const char *tp_name, + struct bpf_raw_tracepoint_opts *opts); struct bpf_trace_opts { /* size of this struct, for forward/backward compatibility */ @@ -784,6 +825,8 @@ bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); LIBBPF_API struct bpf_link * bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd); LIBBPF_API struct bpf_link * +bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd); +LIBBPF_API struct bpf_link * bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); LIBBPF_API struct bpf_link * bpf_program__attach_freplace(const struct bpf_program *prog, @@ -897,6 +940,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog); + +LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog); + /** * @brief **bpf_program__set_attach_target()** sets BTF-based attach target * for supported BPF program types: @@ -961,6 +1010,23 @@ LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); /** + * @brief **bpf_map__set_autoattach()** sets whether libbpf has to auto-attach + * map during BPF skeleton attach phase. + * @param map the BPF map instance + * @param autoattach whether to attach map during BPF skeleton attach phase + * @return 0 on success; negative error code, otherwise + */ +LIBBPF_API int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach); + +/** + * @brief **bpf_map__autoattach()** returns whether BPF map is configured to + * auto-attach during BPF skeleton attach phase. + * @param map the BPF map instance + * @return true if map is set to auto-attach during skeleton attach phase; false, otherwise + */ +LIBBPF_API bool bpf_map__autoattach(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -1223,6 +1289,7 @@ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, BPF_TC_EGRESS = 1 << 1, BPF_TC_CUSTOM = 1 << 2, + BPF_TC_QDISC = 1 << 3, }; #define BPF_TC_PARENT(a, b) \ @@ -1237,9 +1304,11 @@ struct bpf_tc_hook { int ifindex; enum bpf_tc_attach_point attach_point; __u32 parent; + __u32 handle; + const char *qdisc; size_t :0; }; -#define bpf_tc_hook__last_field parent +#define bpf_tc_hook__last_field qdisc struct bpf_tc_opts { size_t sz; @@ -1282,6 +1351,7 @@ LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd, ring_buffer_sample_fn sample_cb, void *ctx); LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); +LIBBPF_API int ring_buffer__consume_n(struct ring_buffer *rb, size_t n); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); /** @@ -1356,6 +1426,17 @@ LIBBPF_API int ring__map_fd(const struct ring *r); */ LIBBPF_API int ring__consume(struct ring *r); +/** + * @brief **ring__consume_n()** consumes up to a requested amount of items from + * a ringbuffer without event polling. + * + * @param r A ringbuffer object. + * @param n Maximum amount of items to consume. + * @return The number of items consumed, or a negative number if any of the + * callbacks return an error. + */ +LIBBPF_API int ring__consume_n(struct ring *r, size_t n); + struct user_ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatibility */ }; @@ -1546,11 +1627,11 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i * memory region of the ring buffer. * This ring buffer can be used to implement a custom events consumer. * The ring buffer starts with the *struct perf_event_mmap_page*, which - * holds the ring buffer managment fields, when accessing the header + * holds the ring buffer management fields, when accessing the header * structure it's important to be SMP aware. * You can refer to *perf_event_read_simple* for a simple example. * @param pb the perf buffer structure - * @param buf_idx the buffer index to retreive + * @param buf_idx the buffer index to retrieve * @param buf (out) gets the base pointer of the mmap()'ed memory * @param buf_size (out) gets the size of the mmap()'ed region * @return 0 on success, negative error code for failure @@ -1642,6 +1723,7 @@ struct bpf_map_skeleton { const char *name; struct bpf_map **map; void **mmaped; + struct bpf_link **link; }; struct bpf_prog_skeleton { @@ -1738,9 +1820,14 @@ struct bpf_linker_file_opts { struct bpf_linker; LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); +LIBBPF_API struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts); LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts); +LIBBPF_API int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts); LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 86804fd90dd1..1205f9a4fe04 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -410,7 +410,37 @@ LIBBPF_1.3.0 { LIBBPF_1.4.0 { global: + bpf_program__attach_raw_tracepoint_opts; + bpf_raw_tracepoint_open_opts; bpf_token_create; btf__new_split; btf_ext__raw_data; } LIBBPF_1.3.0; + +LIBBPF_1.5.0 { + global: + btf__distill_base; + btf__relocate; + btf_ext__endianness; + btf_ext__set_endianness; + bpf_map__autoattach; + bpf_map__set_autoattach; + bpf_object__token_fd; + bpf_program__attach_sockmap; + ring__consume_n; + ring_buffer__consume_n; +} LIBBPF_1.4.0; + +LIBBPF_1.6.0 { + global: + bpf_linker__add_buf; + bpf_linker__add_fd; + bpf_linker__new_fd; + bpf_object__prepare; + bpf_program__func_info; + bpf_program__func_info_cnt; + bpf_program__line_info; + bpf_program__line_info_cnt; + btf__add_decl_attr; + btf__add_type_attr; +} LIBBPF_1.5.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 864b36177424..477a3b3389a0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include <stdlib.h> +#include <byteswap.h> #include <limits.h> #include <errno.h> #include <linux/err.h> @@ -234,6 +235,9 @@ struct btf_type; struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); +const struct btf_header *btf_header(const struct btf *btf); +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map); static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) { @@ -405,6 +409,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level, int token_fd); +struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -445,11 +450,11 @@ struct btf_ext_info { * * The func_info subsection layout: * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 + * struct btf_ext_info_sec for section #1 * a list of bpf_func_info records for section #1 * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h * but may not be identical - * struct btf_sec_func_info for section #2 + * struct btf_ext_info_sec for section #2 * a list of bpf_func_info records for section #2 * ...... * @@ -481,6 +486,8 @@ struct btf_ext { struct btf_ext_header *hdr; void *data; }; + void *data_swapped; + bool swapped_endian; struct btf_ext_info func_info; struct btf_ext_info line_info; struct btf_ext_info core_relo_info; @@ -508,21 +515,64 @@ struct bpf_line_info_min { __u32 line_col; }; +/* Functions to byte-swap info records */ + +typedef void (*info_rec_bswap_fn)(void *); + +static inline void bpf_func_info_bswap(struct bpf_func_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); +} + +static inline void bpf_line_info_bswap(struct bpf_line_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->file_name_off = bswap_32(i->file_name_off); + i->line_off = bswap_32(i->line_off); + i->line_col = bswap_32(i->line_col); +} + +static inline void bpf_core_relo_bswap(struct bpf_core_relo *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); + i->access_str_off = bswap_32(i->access_str_off); + i->kind = bswap_32(i->kind); +} + +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_off_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_off_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx); -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, __u32 kind); -typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, - const char *sym_name, void *ctx); - -int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *arg); - /* handle direct returned errors */ static inline int libbpf_err(int ret) { @@ -568,6 +618,16 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static inline void bpf_insn_bswap(struct bpf_insn *insn) +{ + __u8 tmp_reg = insn->dst_reg; + + insn->dst_reg = insn->src_reg; + insn->src_reg = tmp_reg; + insn->off = bswap_16(insn->off); + insn->imm = bswap_32(insn->imm); +} + /* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. * Original FD is not closed or altered in any other way. * Preserves original FD value, if it's invalid (negative). @@ -602,13 +662,18 @@ static inline int ensure_good_fd(int fd) return fd; } -static inline int sys_dup2(int oldfd, int newfd) +static inline int sys_dup3(int oldfd, int newfd, int flags) { -#ifdef __NR_dup2 - return syscall(__NR_dup2, oldfd, newfd); -#else - return syscall(__NR_dup3, oldfd, newfd, 0); -#endif + return syscall(__NR_dup3, oldfd, newfd, flags); +} + +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); } /* Point *fixed_fd* to the same file that *tmp_fd* points to. @@ -619,7 +684,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd) { int err; - err = sys_dup2(tmp_fd, fixed_fd); + err = sys_dup3(tmp_fd, fixed_fd, O_CLOEXEC); err = err < 0 ? -errno : 0; close(tmp_fd); /* clean up temporary FD */ return err; diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 1e1be467bede..60b2600be88a 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -76,7 +76,7 @@ enum libbpf_strict_mode { * first BPF program or map creation operation. This is done only if * kernel is too old to support memcg-based memory accounting for BPF * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, - * but it can be overriden with libbpf_set_memlock_rlim() API. + * but it can be overridden with libbpf_set_memlock_rlim() API. * Note that libbpf_set_memlock_rlim() needs to be called before * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() * operation. @@ -97,7 +97,7 @@ LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); * @brief **libbpf_get_error()** extracts the error code from the passed * pointer * @param ptr pointer returned from libbpf API function - * @return error code; or 0 if no error occured + * @return error code; or 0 if no error occurred * * Note, as of libbpf 1.0 this function is not necessary and not recommended * to be used. Libbpf doesn't return error code embedded into the pointer diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 302188122439..9dfbe7750f56 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -448,7 +448,8 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) * at all, it will emit something like "invalid func unknown#181". * If BPF verifier recognizes BPF helper but it's not supported for - * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". + * given BPF program type, it will emit "unknown func bpf_sys_bpf#166" + * or "program of this type cannot use helper bpf_sys_bpf#166". * In both cases, provided combination of BPF program type and BPF * helper is not supported by the kernel. * In all other cases, probe_prog_load() above will either succeed (e.g., @@ -457,7 +458,8 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe * that), or we'll get some more specific BPF verifier error about * some unsatisfied conditions. */ - if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) + if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func ") || + strstr(buf, "program of this type cannot use helper "))) return 0; return 1; /* assume supported */ } diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index e783a47da815..28c58fb17250 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 4 +#define LIBBPF_MINOR_VERSION 6 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 0d4be829551b..a469e5d4fee7 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -4,6 +4,10 @@ * * Copyright (c) 2021 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include <stdbool.h> #include <stddef.h> #include <stdio.h> @@ -16,10 +20,12 @@ #include <elf.h> #include <libelf.h> #include <fcntl.h> +#include <sys/mman.h> #include "libbpf.h" #include "btf.h" #include "libbpf_internal.h" #include "strset.h" +#include "str_error.h" #define BTF_EXTERN_SEC ".extern" @@ -135,6 +141,7 @@ struct bpf_linker { int fd; Elf *elf; Elf64_Ehdr *elf_hdr; + bool swapped_endian; /* Output sections metadata */ struct dst_sec *secs; @@ -150,15 +157,19 @@ struct bpf_linker { /* global (including extern) ELF symbols */ int glob_sym_cnt; struct glob_sym *glob_syms; + + bool fd_is_owned; }; #define pr_warn_elf(fmt, ...) \ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1)) -static int init_output_elf(struct bpf_linker *linker, const char *file); +static int init_output_elf(struct bpf_linker *linker); -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename); + +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); @@ -189,7 +200,7 @@ void bpf_linker__free(struct bpf_linker *linker) if (linker->elf) elf_end(linker->elf); - if (linker->fd >= 0) + if (linker->fd >= 0 && linker->fd_is_owned) close(linker->fd); strset__free(linker->strtab_strs); @@ -231,9 +242,63 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts if (!linker) return errno = ENOMEM, NULL; - linker->fd = -1; + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); + if (linker->fd < 0) { + err = -errno; + pr_warn("failed to create '%s': %d\n", filename, err); + goto err_out; + } + linker->fd_is_owned = true; + + err = init_output_elf(linker); + if (err) + goto err_out; + + return linker; + +err_out: + bpf_linker__free(linker); + return errno = -err, NULL; +} + +struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts) +{ + struct bpf_linker *linker; + char filename[32]; + int err; + + if (fd < 0) + return errno = EINVAL, NULL; + + if (!OPTS_VALID(opts, bpf_linker_opts)) + return errno = EINVAL, NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn_elf("libelf initialization failed"); + return errno = EINVAL, NULL; + } - err = init_output_elf(linker, filename); + linker = calloc(1, sizeof(*linker)); + if (!linker) + return errno = ENOMEM, NULL; + + snprintf(filename, sizeof(filename), "fd:%d", fd); + linker->filename = strdup(filename); + if (!linker->filename) { + err = -ENOMEM; + goto err_out; + } + + linker->fd = fd; + linker->fd_is_owned = false; + + err = init_output_elf(linker); if (err) goto err_out; @@ -292,23 +357,12 @@ static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx) return sym; } -static int init_output_elf(struct bpf_linker *linker, const char *file) +static int init_output_elf(struct bpf_linker *linker) { int err, str_off; Elf64_Sym *init_sym; struct dst_sec *sec; - linker->filename = strdup(file); - if (!linker->filename) - return -ENOMEM; - - linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); - if (linker->fd < 0) { - err = -errno; - pr_warn("failed to create '%s': %d\n", file, err); - return err; - } - linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL); if (!linker->elf) { pr_warn_elf("failed to create ELF object"); @@ -324,13 +378,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif + /* Set unknown ELF endianness, assign later from input files */ + linker->elf_hdr->e_ident[EI_DATA] = ELFDATANONE; /* STRTAB */ /* initialize strset with an empty string to conform to ELF */ @@ -396,6 +445,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) pr_warn_elf("failed to create SYMTAB data"); return -EINVAL; } + /* Ensure libelf translates byte-order of symbol records */ + sec->data->d_type = ELF_T_SYM; str_off = strset__add_str(linker->strtab_strs, sec->sec_name); if (str_off < 0) @@ -437,19 +488,16 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) return 0; } -int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts) +static int bpf_linker_add_file(struct bpf_linker *linker, int fd, + const char *filename) { struct src_obj obj = {}; int err = 0; - if (!OPTS_VALID(opts, bpf_linker_file_opts)) - return libbpf_err(-EINVAL); - - if (!linker->elf) - return libbpf_err(-EINVAL); + obj.filename = filename; + obj.fd = fd; - err = err ?: linker_load_obj_file(linker, filename, opts, &obj); + err = err ?: linker_load_obj_file(linker, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); @@ -464,12 +512,91 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, free(obj.sym_map); if (obj.elf) elf_end(obj.elf); - if (obj.fd >= 0) - close(obj.fd); + return err; +} + +int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts) +{ + int fd, err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); + return libbpf_err(err); + } + + err = bpf_linker_add_file(linker, fd, filename); + close(fd); + return libbpf_err(err); +} + +int bpf_linker__add_fd(struct bpf_linker *linker, int fd, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int err; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + if (fd < 0) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "fd:%d", fd); + err = bpf_linker_add_file(linker, fd, filename); return libbpf_err(err); } +int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, + const struct bpf_linker_file_opts *opts) +{ + char filename[32]; + int fd, written, ret; + + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + + if (!linker->elf) + return libbpf_err(-EINVAL); + + snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); + + fd = sys_memfd_create(filename, 0); + if (fd < 0) { + ret = -errno; + pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); + return libbpf_err(ret); + } + + written = 0; + while (written < buf_sz) { + ret = write(fd, buf, buf_sz); + if (ret < 0) { + ret = -errno; + pr_warn("failed to write '%s': %s\n", filename, errstr(ret)); + goto err_out; + } + written += ret; + } + + ret = bpf_linker_add_file(linker, fd, filename); +err_out: + close(fd); + return libbpf_err(ret); +} + static bool is_dwarf_sec_name(const char *name) { /* approximation, but the actual list is too long */ @@ -535,65 +662,69 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) return sec; } -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, - const struct bpf_linker_file_opts *opts, +static int linker_load_obj_file(struct bpf_linker *linker, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const int host_endianness = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif int err = 0; Elf_Scn *scn; Elf_Data *data; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; struct src_sec *sec; + unsigned char obj_byteorder; + unsigned char link_byteorder = linker->elf_hdr->e_ident[EI_DATA]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2MSB; +#else +#error "Unknown __BYTE_ORDER__" +#endif - pr_debug("linker: adding object file '%s'...\n", filename); - - obj->filename = filename; + pr_debug("linker: adding object file '%s'...\n", obj->filename); - obj->fd = open(filename, O_RDONLY | O_CLOEXEC); - if (obj->fd < 0) { - err = -errno; - pr_warn("failed to open file '%s': %d\n", filename, err); - return err; - } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); if (!obj->elf) { - err = -errno; - pr_warn_elf("failed to parse ELF file '%s'", filename); - return err; + pr_warn_elf("failed to parse ELF file '%s'", obj->filename); + return -EINVAL; } /* Sanity check ELF file high-level properties */ ehdr = elf64_getehdr(obj->elf); if (!ehdr) { - err = -errno; - pr_warn_elf("failed to get ELF header for %s", filename); + pr_warn_elf("failed to get ELF header for %s", obj->filename); + return -EINVAL; + } + + /* Linker output endianness set by first input object */ + obj_byteorder = ehdr->e_ident[EI_DATA]; + if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { + err = -EOPNOTSUPP; + pr_warn("unknown byte order of ELF file %s\n", obj->filename); return err; } - if (ehdr->e_ident[EI_DATA] != host_endianness) { + if (link_byteorder == ELFDATANONE) { + linker->elf_hdr->e_ident[EI_DATA] = obj_byteorder; + linker->swapped_endian = obj_byteorder != host_byteorder; + pr_debug("linker: set %s-endian output byte order\n", + obj_byteorder == ELFDATA2MSB ? "big" : "little"); + } else if (link_byteorder != obj_byteorder) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported byte order of ELF file %s", filename); + pr_warn("byte order mismatch with ELF file %s\n", obj->filename); return err; } + if (ehdr->e_type != ET_REL || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported kind of ELF file %s", filename); + pr_warn_elf("unsupported kind of ELF file %s", obj->filename); return err; } if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { - err = -errno; - pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); - return err; + pr_warn_elf("failed to get SHSTRTAB section index for %s", obj->filename); + return -EINVAL; } scn = NULL; @@ -603,26 +734,23 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, shdr = elf64_getshdr(scn); if (!shdr) { - err = -errno; pr_warn_elf("failed to get section #%zu header for %s", - sec_idx, filename); - return err; + sec_idx, obj->filename); + return -EINVAL; } sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); if (!sec_name) { - err = -errno; pr_warn_elf("failed to get section #%zu name for %s", - sec_idx, filename); - return err; + sec_idx, obj->filename); + return -EINVAL; } data = elf_getdata(scn, 0); if (!data) { - err = -errno; pr_warn_elf("failed to get section #%zu (%s) data from %s", - sec_idx, sec_name, filename); - return err; + sec_idx, sec_name, obj->filename); + return -EINVAL; } sec = add_src_sec(obj, sec_name); @@ -656,7 +784,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf = btf__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf); if (err) { - pr_warn("failed to parse .BTF from %s: %d\n", filename, err); + pr_warn("failed to parse .BTF from %s: %s\n", + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -666,7 +795,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err); + pr_warn("failed to parse .BTF.ext from '%s': %s\n", + obj->filename, errstr(err)); return err; } sec->skipped = true; @@ -683,7 +813,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, break; default: pr_warn("unrecognized section #%zu (%s) in %s\n", - sec_idx, sec_name, filename); + sec_idx, sec_name, obj->filename); err = -EINVAL; return err; } @@ -957,19 +1087,33 @@ static int check_btf_str_off(__u32 *str_off, void *ctx) static int linker_sanity_check_btf(struct src_obj *obj) { struct btf_type *t; - int i, n, err = 0; + int i, n, err; if (!obj->btf) return 0; n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + t = btf_type_by_id(obj->btf, i); - err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); - err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) return err; + while ((type_id = btf_field_iter_next(&it))) { + if (*type_id >= n) + return -EINVAL; + } + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + while ((str_off = btf_field_iter_next(&it))) { + if (!btf__str_by_offset(obj->btf, *str_off)) + return -EINVAL; + } } return 0; @@ -1095,6 +1239,24 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } +static bool is_exec_sec(struct dst_sec *sec) +{ + if (!sec || sec->ephemeral) + return false; + return (sec->shdr->sh_type == SHT_PROGBITS) && + (sec->shdr->sh_flags & SHF_EXECINSTR); +} + +static void exec_sec_bswap(void *raw_data, int size) +{ + const int insn_cnt = size / sizeof(struct bpf_insn); + struct bpf_insn *insn = raw_data; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); +} + static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; @@ -1154,6 +1316,10 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); /* now copy src data at a properly aligned offset */ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); + + /* convert added bpf insns to native byte-order */ + if (linker->swapped_endian && is_exec_sec(dst)) + exec_sec_bswap(dst->raw_data + dst_align_sz, src->shdr->sh_size); } dst->sec_sz = dst_final_sz; @@ -1210,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -1399,7 +1565,7 @@ recur: return true; case BTF_KIND_PTR: /* just validate overall shape of the referenced type, so no - * contents comparison for struct/union, and allowd fwd vs + * contents comparison for struct/union, and allowed fwd vs * struct/union */ exact = false; @@ -1948,7 +2114,7 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, /* If existing symbol is a strong resolved symbol, bail out, * because we lost resolution battle have nothing to - * contribute. We already checked abover that there is no + * contribute. We already checked above that there is no * strong-strong conflict. We also already tightened binding * and visibility, so nothing else to contribute at that point. */ @@ -1997,7 +2163,7 @@ add_sym: obj->sym_map[src_sym_idx] = dst_sym_idx; - if (sym_type == STT_SECTION && dst_sym) { + if (sym_type == STT_SECTION && dst_sec) { dst_sec->sec_sym_idx = dst_sym_idx; dst_sym->st_value = 0; } @@ -2057,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* shdr->sh_link points to SYMTAB */ @@ -2213,10 +2379,17 @@ static int linker_fixup_btf(struct src_obj *obj) vi = btf_var_secinfos(t); for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type); - const char *var_name = btf__str_by_offset(obj->btf, vt->name_off); - int var_linkage = btf_var(vt)->linkage; + const char *var_name; + int var_linkage; Elf64_Sym *sym; + /* could be a variable or function */ + if (!btf_is_var(vt)) + continue; + + var_name = btf__str_by_offset(obj->btf, vt->name_off); + var_linkage = btf_var(vt)->linkage; + /* no need to patch up static or extern vars */ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED) continue; @@ -2234,26 +2407,10 @@ static int linker_fixup_btf(struct src_obj *obj) return 0; } -static int remap_type_id(__u32 *type_id, void *ctx) -{ - int *id_map = ctx; - int new_id = id_map[*type_id]; - - /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ - if (new_id == 0 && *type_id != 0) { - pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); - return -EINVAL; - } - - *type_id = id_map[*type_id]; - - return 0; -} - static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) { const struct btf_type *t; - int i, j, n, start_id, id; + int i, j, n, start_id, id, err; const char *name; if (!obj->btf) @@ -2324,9 +2481,25 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) n = btf__type_cnt(linker->btf); for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); + struct btf_field_iter it; + __u32 *type_id; - if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) - return -EINVAL; + err = btf_field_iter_init(&it, dst_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((type_id = btf_field_iter_next(&it))) { + int new_id = obj->btf_type_map[*type_id]; + + /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ + if (new_id == 0 && *type_id != 0) { + pr_warn("failed to find new ID mapping for original BTF type ID %u\n", + *type_id); + return -EINVAL; + } + + *type_id = obj->btf_type_map[*type_id]; + } } /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's @@ -2394,6 +2567,10 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (glob_sym && glob_sym->var_idx >= 0) { __s64 sz; + /* FUNCs don't have size, nothing to update */ + if (btf_is_func(t)) + continue; + dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; /* Because underlying BTF type might have * changed, so might its size have changed, so @@ -2607,27 +2784,32 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (!sec->scn) continue; + /* restore sections with bpf insns to target byte-order */ + if (linker->swapped_endian && is_exec_sec(sec)) + exec_sec_bswap(sec->raw_data, sec->sec_sz); + sec->data->d_buf = sec->raw_data; } /* Finalize ELF layout */ if (elf_update(linker->elf, ELF_C_NULL) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to finalize ELF layout"); return libbpf_err(err); } /* Write out final ELF contents */ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to write ELF contents"); return libbpf_err(err); } elf_end(linker->elf); - close(linker->fd); - linker->elf = NULL; + + if (linker->fd_is_owned) + close(linker->fd); linker->fd = -1; return 0; @@ -2675,6 +2857,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + enum btf_endianness link_endianness; LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; @@ -2708,17 +2891,24 @@ static int finalize_btf(struct bpf_linker *linker) err = finalize_btf_ext(linker); if (err) { - pr_warn(".BTF.ext generation failed: %d\n", err); + pr_warn(".BTF.ext generation failed: %s\n", errstr(err)); return err; } opts.btf_ext = linker->btf_ext; err = btf__dedup(linker->btf, &opts); if (err) { - pr_warn("BTF dedup failed: %d\n", err); + pr_warn("BTF dedup failed: %s\n", errstr(err)); return err; } + /* Set .BTF and .BTF.ext output byte order */ + link_endianness = linker->elf_hdr->e_ident[EI_DATA] == ELFDATA2MSB ? + BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; + btf__set_endianness(linker->btf, link_endianness); + if (linker->btf_ext) + btf_ext__set_endianness(linker->btf_ext, link_endianness); + /* Emit .BTF section */ raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) @@ -2726,7 +2916,7 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF ELF section: %d\n", err); + pr_warn("failed to write out .BTF ELF section: %s\n", errstr(err)); return err; } @@ -2738,7 +2928,7 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF.ext ELF section: %d\n", err); + pr_warn("failed to write out .BTF.ext ELF section: %s\n", errstr(err)); return err; } } @@ -2914,7 +3104,7 @@ static int finalize_btf_ext(struct bpf_linker *linker) err = libbpf_get_error(linker->btf_ext); if (err) { linker->btf_ext = NULL; - pr_warn("failed to parse final .BTF.ext data: %d\n", err); + pr_warn("failed to parse final .BTF.ext data: %s\n", errstr(err)); goto out; } diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 68a2def17175..c997e69d507f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) } -typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); +typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook); -static int clsact_config(struct libbpf_nla_req *req) +static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) { req->tc.tcm_parent = TC_H_CLSACT; req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); @@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req) return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact")); } +static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) +{ + const char *qdisc = OPTS_GET(hook, qdisc, NULL); + + req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT); + req->tc.tcm_handle = OPTS_GET(hook, handle, 0); + + return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1); +} + static int attach_point_to_config(struct bpf_tc_hook *hook, qdisc_config_t *config) { @@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook, return 0; case BPF_TC_CUSTOM: return -EOPNOTSUPP; + case BPF_TC_QDISC: + *config = &qdisc_config; + return 0; default: return -EINVAL; } @@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) req.tc.tcm_family = AF_UNSPEC; req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); - ret = config(&req); + ret = config(&req, hook); if (ret < 0) return ret; @@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) case BPF_TC_INGRESS: case BPF_TC_EGRESS: return libbpf_err(__bpf_tc_detach(hook, NULL, true)); + case BPF_TC_QDISC: case BPF_TC_INGRESS | BPF_TC_EGRESS: return libbpf_err(tc_qdisc_delete(hook)); case BPF_TC_CUSTOM: diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3b..06663f9ea581 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 63a4d5ad12d1..2b83c98a1137 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -683,7 +683,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, { const struct bpf_core_accessor *acc; const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id; const struct btf_member *m; const struct btf_type *mt; bool bitfield; @@ -706,8 +706,14 @@ static int bpf_core_calc_field_relo(const char *prog_name, if (!acc->name) { if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; - /* remember field size for load/store mem size */ - sz = btf__resolve_size(spec->btf, acc->type_id); + /* remember field size for load/store mem size; + * note, for arrays we care about individual element + * sizes, not the overall array size + */ + t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id); + while (btf_is_array(t)) + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); + sz = btf__resolve_size(spec->btf, elem_id); if (sz < 0) return -EINVAL; *field_sz = sz; @@ -767,7 +773,17 @@ static int bpf_core_calc_field_relo(const char *prog_name, case BPF_CORE_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { - *field_sz = byte_sz; + /* remember field size for load/store mem size; + * note, for arrays we care about individual element + * sizes, not the overall array size + */ + t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id); + while (btf_is_array(t)) + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); + sz = btf__resolve_size(spec->btf, elem_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; *type_id = field_type_id; } break; @@ -1339,7 +1355,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, cands->cands[i].id, cand_spec); if (err < 0) { bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); - pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n", prog_name, relo_idx, i, spec_buf, err); return err; } diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index aacb64278a01..9702b70da444 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -21,6 +21,7 @@ #include "libbpf.h" #include "libbpf_internal.h" #include "bpf.h" +#include "str_error.h" struct ring { ring_buffer_sample_fn sample_cb; @@ -88,8 +89,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return libbpf_err(err); } @@ -123,8 +124,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } r->consumer_pos = tmp; @@ -142,8 +143,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } r->producer_pos = tmp; @@ -156,8 +157,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to epoll add map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } @@ -205,7 +206,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } @@ -231,7 +232,7 @@ static inline int roundup_len(__u32 len) return (len + 7) / 8 * 8; } -static int64_t ringbuf_process_ring(struct ring *r) +static int64_t ringbuf_process_ring(struct ring *r, size_t n) { int *len_ptr, len, err; /* 64-bit to avoid overflow in case of extreme application behavior */ @@ -268,12 +269,42 @@ static int64_t ringbuf_process_ring(struct ring *r) } smp_store_release(r->consumer_pos, cons_pos); + + if (cnt >= n) + goto done; } } while (got_new_data); done: return cnt; } +/* Consume available ring buffer(s) data without event polling, up to n + * records. + * + * Returns number of records consumed across all registered ring buffers (or + * n, whichever is less), or negative number if any of the callbacks return + * error. + */ +int ring_buffer__consume_n(struct ring_buffer *rb, size_t n) +{ + int64_t err, res = 0; + int i; + + for (i = 0; i < rb->ring_cnt; i++) { + struct ring *ring = rb->rings[i]; + + err = ringbuf_process_ring(ring, n); + if (err < 0) + return libbpf_err(err); + res += err; + n -= err; + + if (n == 0) + break; + } + return res > INT_MAX ? INT_MAX : res; +} + /* Consume available ring buffer(s) data without event polling. * Returns number of records consumed across all registered ring buffers (or * INT_MAX, whichever is less), or negative number if any of the callbacks @@ -287,13 +318,15 @@ int ring_buffer__consume(struct ring_buffer *rb) for (i = 0; i < rb->ring_cnt; i++) { struct ring *ring = rb->rings[i]; - err = ringbuf_process_ring(ring); + err = ringbuf_process_ring(ring, INT_MAX); if (err < 0) return libbpf_err(err); res += err; + if (res > INT_MAX) { + res = INT_MAX; + break; + } } - if (res > INT_MAX) - return INT_MAX; return res; } @@ -314,13 +347,13 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) __u32 ring_id = rb->events[i].data.fd; struct ring *ring = rb->rings[ring_id]; - err = ringbuf_process_ring(ring); + err = ringbuf_process_ring(ring, INT_MAX); if (err < 0) return libbpf_err(err); res += err; } if (res > INT_MAX) - return INT_MAX; + res = INT_MAX; return res; } @@ -371,17 +404,22 @@ int ring__map_fd(const struct ring *r) return r->map_fd; } -int ring__consume(struct ring *r) +int ring__consume_n(struct ring *r, size_t n) { int64_t res; - res = ringbuf_process_ring(r); + res = ringbuf_process_ring(r, n); if (res < 0) return libbpf_err(res); return res > INT_MAX ? INT_MAX : res; } +int ring__consume(struct ring *r) +{ + return ring__consume_n(r, INT_MAX); +} + static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) { if (rb->consumer_pos) { @@ -421,7 +459,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err); + pr_warn("user ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return err; } @@ -437,8 +476,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); + pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); return err; } rb->consumer_pos = tmp; @@ -457,8 +496,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); + pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); return err; } @@ -469,7 +508,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) rb_epoll->events = EPOLLOUT; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) { err = -errno; - pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); + pr_warn("user ringbuf: failed to epoll add map fd=%d: %s\n", map_fd, errstr(err)); return err; } @@ -494,7 +533,7 @@ user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts) rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("user ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("user ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 1e82ab06c3eb..4d5fa079b5d6 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -107,7 +107,7 @@ static inline void skel_free(const void *p) * The loader program will perform probe_read_kernel() from maps.rodata.initial_value. * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree - * is not nessary. + * is not necessary. * * For user space: * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly. @@ -351,10 +351,11 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.test.ctx_size_in = opts->ctx->sz; err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { - opts->errstr = "failed to execute loader prog"; if (err < 0) { + opts->errstr = "failed to execute loader prog"; set_err; } else { + opts->errstr = "error returned by loader prog"; err = (int)attr.test.retval; #ifndef __KERNEL__ errno = -err; diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index 146da01979c7..9a541762f54c 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -2,8 +2,13 @@ #undef _GNU_SOURCE #include <string.h> #include <stdio.h> +#include <errno.h> #include "str_error.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -15,7 +20,85 @@ char *libbpf_strerror_r(int err, char *dst, int len) { int ret = strerror_r(err < 0 ? -err : err, dst, len); - if (ret) - snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); + /* on glibc <2.13, ret == -1 and errno is set, if strerror_r() can't + * handle the error, on glibc >=2.13 *positive* (errno-like) error + * code is returned directly + */ + if (ret == -1) + ret = errno; + if (ret) { + if (ret == EINVAL) + /* strerror_r() doesn't recognize this specific error */ + snprintf(dst, len, "unknown error (%d)", err < 0 ? err : -err); + else + snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); + } return dst; } + +const char *libbpf_errstr(int err) +{ + static __thread char buf[12]; + + if (err > 0) + err = -err; + + switch (err) { + case -E2BIG: return "-E2BIG"; + case -EACCES: return "-EACCES"; + case -EADDRINUSE: return "-EADDRINUSE"; + case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL"; + case -EAGAIN: return "-EAGAIN"; + case -EALREADY: return "-EALREADY"; + case -EBADF: return "-EBADF"; + case -EBADFD: return "-EBADFD"; + case -EBUSY: return "-EBUSY"; + case -ECANCELED: return "-ECANCELED"; + case -ECHILD: return "-ECHILD"; + case -EDEADLK: return "-EDEADLK"; + case -EDOM: return "-EDOM"; + case -EEXIST: return "-EEXIST"; + case -EFAULT: return "-EFAULT"; + case -EFBIG: return "-EFBIG"; + case -EILSEQ: return "-EILSEQ"; + case -EINPROGRESS: return "-EINPROGRESS"; + case -EINTR: return "-EINTR"; + case -EINVAL: return "-EINVAL"; + case -EIO: return "-EIO"; + case -EISDIR: return "-EISDIR"; + case -ELOOP: return "-ELOOP"; + case -EMFILE: return "-EMFILE"; + case -EMLINK: return "-EMLINK"; + case -EMSGSIZE: return "-EMSGSIZE"; + case -ENAMETOOLONG: return "-ENAMETOOLONG"; + case -ENFILE: return "-ENFILE"; + case -ENODATA: return "-ENODATA"; + case -ENODEV: return "-ENODEV"; + case -ENOENT: return "-ENOENT"; + case -ENOEXEC: return "-ENOEXEC"; + case -ENOLINK: return "-ENOLINK"; + case -ENOMEM: return "-ENOMEM"; + case -ENOSPC: return "-ENOSPC"; + case -ENOTBLK: return "-ENOTBLK"; + case -ENOTDIR: return "-ENOTDIR"; + case -ENOTSUPP: return "-ENOTSUPP"; + case -ENOTTY: return "-ENOTTY"; + case -ENXIO: return "-ENXIO"; + case -EOPNOTSUPP: return "-EOPNOTSUPP"; + case -EOVERFLOW: return "-EOVERFLOW"; + case -EPERM: return "-EPERM"; + case -EPIPE: return "-EPIPE"; + case -EPROTO: return "-EPROTO"; + case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT"; + case -ERANGE: return "-ERANGE"; + case -EROFS: return "-EROFS"; + case -ESPIPE: return "-ESPIPE"; + case -ESRCH: return "-ESRCH"; + case -ETXTBSY: return "-ETXTBSY"; + case -EUCLEAN: return "-EUCLEAN"; + case -EXDEV: return "-EXDEV"; + default: + snprintf(buf, sizeof(buf), "%d", err); + return buf; + } +} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index 626d7ffb03d6..53e7fbffc13e 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -6,4 +6,14 @@ char *libbpf_strerror_r(int err, char *dst, int len); +/** + * @brief **libbpf_errstr()** returns string corresponding to numeric errno + * @param err negative numeric errno + * @return pointer to string representation of the errno, that is invalidated + * upon the next call. + */ +const char *libbpf_errstr(int err); + +#define errstr(err) libbpf_errstr(err) + #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index f6763300b26a..2a7865c8e3fe 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -39,7 +39,7 @@ enum __bpf_usdt_arg_type { struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; - /* arg location case, see bpf_udst_arg() for details */ + /* arg location case, see bpf_usdt_arg() for details */ enum __bpf_usdt_arg_type arg_type; /* offset of referenced register within struct pt_regs */ short reg_off; @@ -108,6 +108,38 @@ int bpf_usdt_arg_cnt(struct pt_regs *ctx) return spec->arg_cnt; } +/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument. + * Returns negative error if argument is not found or arg_num is invalid. + */ +static __always_inline +int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num) +{ + struct __bpf_usdt_arg_spec *arg_spec; + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT) + return -ENOENT; + barrier_var(arg_num); + if (arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + + /* arg_spec->arg_bitshift = 64 - arg_sz * 8 + * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8 + */ + return (unsigned int)(64 - arg_spec->arg_bitshift) / 8; +} + /* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. * Returns 0 on success; negative error, otherwise. * On error *res is guaranteed to be set to zero. @@ -214,18 +246,18 @@ long bpf_usdt_cookie(struct pt_regs *ctx) /* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ #define ___bpf_usdt_args0() ctx -#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) -#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) -#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) -#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) -#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) -#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) -#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) -#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) -#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) -#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) -#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) -#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); _x; }) +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); _x; }) +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); _x; }) +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); _x; }) +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); _x; }) +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); _x; }) +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); _x; }) +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); _x; }) +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); _x; }) +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); _x; }) +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); _x; }) +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); _x; }) #define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) /* diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 93794f01bb67..4e4a52742b01 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -20,6 +20,7 @@ #include "libbpf_common.h" #include "libbpf_internal.h" #include "hashmap.h" +#include "str_error.h" /* libbpf's USDT support consists of BPF-side state/code and user-space * state/code working together in concert. BPF-side parts are defined in @@ -465,8 +466,8 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, goto proceed; if (!realpath(lib_path, path)) { - pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", - lib_path, -errno); + pr_warn("usdt: failed to get absolute path of '%s' (err %s), using path as is...\n", + lib_path, errstr(-errno)); libbpf_strlcpy(path, lib_path, sizeof(path)); } @@ -475,8 +476,8 @@ proceed: f = fopen(line, "re"); if (!f) { err = -errno; - pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", - line, lib_path, err); + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %s\n", + line, lib_path, errstr(err)); return err; } @@ -606,7 +607,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err = parse_elf_segs(elf, path, &segs, &seg_cnt); if (err) { - pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + pr_warn("usdt: failed to process ELF program segments for '%s': %s\n", + path, errstr(err)); goto err_out; } @@ -659,7 +661,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation */ usdt_abs_ip = note.loc_addr; - if (base_addr) + if (base_addr && note.base_addr) usdt_abs_ip += base_addr - note.base_addr; /* When attaching uprobes (which is what USDTs basically are) @@ -708,8 +710,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * if (vma_seg_cnt == 0) { err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { - pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", - pid, path, err); + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %s\n", + pid, path, errstr(err)); goto err_out; } } @@ -1047,8 +1049,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { err = -errno; - pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", - spec_id, usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %s\n", + spec_id, usdt_provider, usdt_name, path, errstr(err)); goto err_out; } if (!man->has_bpf_cookie && @@ -1058,9 +1060,9 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", spec_id, usdt_provider, usdt_name, path); } else { - pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %s\n", target->abs_ip, spec_id, usdt_provider, usdt_name, - path, err); + path, errstr(err)); } goto err_out; } @@ -1076,8 +1078,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct target->rel_ip, &opts); err = libbpf_get_error(uprobe_link); if (err) { - pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", - i, usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %s\n", + i, usdt_provider, usdt_name, path, errstr(err)); goto err_out; } @@ -1099,8 +1101,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct NULL, &opts_multi); if (!link->multi_link) { err = -errno; - pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n", - usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %s\n", + usdt_provider, usdt_name, path, errstr(err)); goto err_out; } diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c index 3f26d629b2b4..88c376a8348d 100644 --- a/tools/lib/bpf/zip.c +++ b/tools/lib/bpf/zip.c @@ -223,7 +223,7 @@ struct zip_archive *zip_archive_open(const char *path) if (!archive) { munmap(data, size); return ERR_PTR(-ENOMEM); - }; + } archive->data = data; archive->size = size; diff --git a/tools/lib/cmdline.c b/tools/lib/cmdline.c new file mode 100644 index 000000000000..c85f00f43c5e --- /dev/null +++ b/tools/lib/cmdline.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * From lib/cmdline.c + */ +#include <stdlib.h> + +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ +#endif + +unsigned long long memparse(const char *ptr, char **retptr) +{ + char *endptr; /* local pointer to end of parsed string */ + + unsigned long long ret = strtoll(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + fallthrough; + case 'P': + case 'p': + ret <<= 10; + fallthrough; + case 'T': + case 't': + ret <<= 10; + fallthrough; + case 'G': + case 'g': + ret <<= 10; + fallthrough; + case 'M': + case 'm': + ret <<= 10; + fallthrough; + case 'K': + case 'k': + ret <<= 10; + endptr++; + fallthrough; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 10c067e3a8d2..bb99e493dcd1 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> #include <linux/compiler.h> #include <linux/export.h> -#include <linux/string.h> #include <linux/list_sort.h> #include <linux/list.h> @@ -52,7 +50,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; - u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ @@ -78,15 +75,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, /* Finish linking remainder of list b on to tail */ tail->next = b; do { - /* - * If the merge is highly unbalanced (e.g. the input is - * already sorted), this loop may run many iterations. - * Continue callbacks to the client even though no - * element comparison is needed, so the client's cmp() - * routine can invoke cond_resched() periodically. - */ - if (unlikely(!++count)) - cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; diff --git a/tools/lib/perf/.gitignore b/tools/lib/perf/.gitignore new file mode 100644 index 000000000000..0f5b4af63f62 --- /dev/null +++ b/tools/lib/perf/.gitignore @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +libperf.pc +libperf.so.* +tests-shared +tests-static diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile index 972754082a85..573ca5b27556 100644 --- a/tools/lib/perf/Documentation/Makefile +++ b/tools/lib/perf/Documentation/Makefile @@ -121,7 +121,7 @@ install-man: all $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); -install-html: +install-html: $(MAN_HTML) $(call QUIET_INSTALL, html) \ $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index fcfb9499ef9c..59aabdd3cabf 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -39,7 +39,6 @@ SYNOPSIS struct perf_cpu_map *perf_cpu_map__new_any_cpu(void); struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); - struct perf_cpu_map *perf_cpu_map__read(FILE *file); struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 3a9b2140aa04..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -39,29 +39,10 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - TEST_ARGS := $(if $(V),-v) -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - INCLUDES = \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -70,11 +51,12 @@ INCLUDES = \ -I$(srctree)/tools/include/uapi # Append required CFLAGS -override CFLAGS += $(EXTRA_WARNINGS) -override CFLAGS += -Werror -Wall +override CFLAGS += -g -Werror -Wall override CFLAGS += -fPIC override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += $(EXTRA_CFLAGS) all: @@ -118,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) @@ -139,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 4adcd7920d03..4454a5987570 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> #include <perf/cpumap.h> #include <stdlib.h> #include <linux/refcount.h> @@ -10,6 +11,9 @@ #include <ctype.h> #include <limits.h> #include "internal.h" +#include <api/fs/fs.h> + +#define MAX_NR_CPUS 4096 void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) { @@ -18,9 +22,13 @@ void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus) struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) { - RC_STRUCT(perf_cpu_map) *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); + RC_STRUCT(perf_cpu_map) *cpus; struct perf_cpu_map *result; + if (nr_cpus == 0) + return NULL; + + cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); if (ADD_RC_CHK(result, cpus)) { cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); @@ -96,12 +104,12 @@ static struct perf_cpu_map *cpu_map__new_sysconf(void) static struct perf_cpu_map *cpu_map__new_sysfs_online(void) { struct perf_cpu_map *cpus = NULL; - FILE *onlnf; + char *buf = NULL; + size_t buf_len; - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (onlnf) { - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); + if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) { + cpus = perf_cpu_map__new(buf); + free(buf); } return cpus; } @@ -154,62 +162,6 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu return cpus; } -struct perf_cpu_map *perf_cpu_map__read(FILE *file) -{ - struct perf_cpu_map *cpus = NULL; - int nr_cpus = 0; - struct perf_cpu *tmp_cpus = NULL, *tmp; - int max_entries = 0; - int n, cpu, prev; - char sep; - - sep = 0; - prev = -1; - for (;;) { - n = fscanf(file, "%u%c", &cpu, &sep); - if (n <= 0) - break; - if (prev >= 0) { - int new_max = nr_cpus + cpu - prev - 1; - - WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. " - "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS); - - if (new_max >= max_entries) { - max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; - } - - while (++prev < cpu) - tmp_cpus[nr_cpus++].cpu = prev; - } - if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; - } - - tmp_cpus[nr_cpus++].cpu = cpu; - if (n == 2 && sep == '-') - prev = cpu; - else - prev = -1; - if (n == 1 || sep == '\n') - break; - } - - if (nr_cpus > 0) - cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); -out_free_tmp: - free(tmp_cpus); - return cpus; -} - struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) { struct perf_cpu_map *cpus = NULL; @@ -233,8 +185,8 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) while (isdigit(*cpu_list)) { p = NULL; start_cpu = strtoul(cpu_list, &p, 0); - if (start_cpu >= INT_MAX - || (*p != '\0' && *p != ',' && *p != '-')) + if (start_cpu >= INT16_MAX + || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n')) goto invalid; if (*p == '-') { @@ -242,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) p = NULL; end_cpu = strtoul(cpu_list, &p, 0); - if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',')) + if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n')) goto invalid; if (end_cpu < start_cpu) @@ -257,17 +209,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i].cpu == (int)start_cpu) + if (tmp_cpus[i].cpu == (int16_t)start_cpu) goto invalid; if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; + max_entries += max(end_cpu - start_cpu + 1, 16UL); tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++].cpu = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu; } if (*p) ++p; @@ -275,14 +227,15 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) cpu_list = p; } - if (nr_cpus > 0) + if (nr_cpus > 0) { cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') { + } else if (*cpu_list != '\0') { pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.", cpu_list); cpus = perf_cpu_map__new_online_cpus(); - } else + } else { cpus = perf_cpu_map__new_any_cpu(); + } invalid: free(tmp_cpus); out: @@ -316,6 +269,19 @@ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map) return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true; } +bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map) +{ + if (!map) + return true; + + return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1; +} + +bool perf_cpu_map__is_empty(const struct perf_cpu_map *map) +{ + return map == NULL; +} + int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { int low, high; @@ -372,6 +338,20 @@ bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map) return map && __perf_cpu_map__cpu(map, 0).cpu == -1; } +struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map) +{ + struct perf_cpu cpu, result = { + .cpu = -1 + }; + int idx; + + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) { + result = cpu; + break; + } + return result; +} + struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map) { struct perf_cpu result = { @@ -405,46 +385,49 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu } /* - * Merge two cpumaps + * Merge two cpumaps. * - * orig either gets freed and replaced with a new map, or reused - * with no reference count change (similar to "realloc") - * other has its reference count increased. + * If 'other' is subset of '*orig', '*orig' keeps itself with no reference count + * change (similar to "realloc"). + * + * If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference + * count increased. + * + * Otherwise, '*orig' gets freed and replaced with a new map. */ - -struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other) +int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other) { struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; - if (perf_cpu_map__is_subset(orig, other)) - return orig; - if (perf_cpu_map__is_subset(other, orig)) { - perf_cpu_map__put(orig); - return perf_cpu_map__get(other); + if (perf_cpu_map__is_subset(*orig, other)) + return 0; + if (perf_cpu_map__is_subset(other, *orig)) { + perf_cpu_map__put(*orig); + *orig = perf_cpu_map__get(other); + return 0; } - tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other); + tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other); tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) - return NULL; + return -ENOMEM; /* Standard merge algorithm from wikipedia */ i = j = k = 0; - while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) { - if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) { - if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu) + while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) { + if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) { + if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu) j++; - tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); + tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++); } else tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); } - while (i < __perf_cpu_map__nr(orig)) - tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++); + while (i < __perf_cpu_map__nr(*orig)) + tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++); while (j < __perf_cpu_map__nr(other)) tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++); @@ -452,8 +435,9 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, merged = cpu_map__trim_new(k, tmp_cpus); free(tmp_cpus); - perf_cpu_map__put(orig); - return merged; + perf_cpu_map__put(*orig); + *orig = merged; + return 0; } struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index c6d67fc9e57e..b1f4c8176b32 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -47,6 +47,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); + + /* + * Empty cpu lists would eventually get opened as "any" so remove + * genuinely empty ones before they're opened in the wrong place. + */ + if (perf_cpu_map__is_empty(evsel->cpus)) { + struct perf_evsel *next = perf_evlist__next(evlist, evsel); + + perf_evlist__remove(evlist, evsel); + /* Keep idx contiguous */ + if (next) + list_for_each_entry_from(next, &evlist->entries, node) + next->idx--; + } } else if (!evsel->own_cpus || evlist->has_user_cpus || (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { /* @@ -75,16 +89,16 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, evsel->threads = perf_thread_map__get(evlist->threads); } - evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); + perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus); } static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel, *n; evlist->needs_map_propagation = true; - perf_evlist__for_each_evsel(evlist, evsel) + list_for_each_entry_safe(evsel, n, &evlist->entries, node) __perf_evlist__propagate_maps(evlist, evsel); } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c07160953224..c475319e2e41 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include <perf/evsel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> +#include <linux/hash.h> #include <linux/list.h> #include <internal/evsel.h> #include <linux/zalloc.h> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->per_stream_periods); evsel->attr = *attr; evsel->idx = idx; evsel->leader = evsel; @@ -531,10 +533,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + free(pos); + } +} + +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) && + (evsel->attr.sample_type & PERF_SAMPLE_TID) && + evsel->attr.inherit; +} + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!per_thread) + return &sid->period; + + hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head = &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid == tid) + return &res->period; + + if (sid->evsel == NULL) + return NULL; + + res = zalloc(sizeof(struct perf_sample_id_period)); + if (res == NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid = tid; + + list_add_tail(&res->node, &sid->evsel->per_stream_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; } void perf_counts_values__scale(struct perf_counts_values *count, diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 49649eb51ce4..e2be2d17c32b 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -21,10 +21,6 @@ DECLARE_RC_STRUCT(perf_cpu_map) { struct perf_cpu map[]; }; -#ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 2048 -#endif - struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus); int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 5cd220a61962..ea78defa77d0 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -34,8 +60,32 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { @@ -58,6 +108,10 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; /* @@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 228c6c629b0c..8c1ab0f9194e 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,12 +3,12 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> -#include <stdio.h> #include <stdbool.h> +#include <stdint.h> /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ struct perf_cpu { - int cpu; + int16_t cpu; }; struct perf_cache { @@ -37,10 +37,9 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void); * perf_cpu_map__new_online_cpus is returned. */ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other); +LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig, + struct perf_cpu_map *other); LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); @@ -61,6 +60,22 @@ LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value. */ LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__is_empty - does the map contain no values and it doesn't + * contain the special "any CPU"/dummy value. + */ +LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map); +/** + * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ +LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map); +/** + * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value. + */ LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map); LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index ae64090184d3..37bb7771d914 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -77,6 +77,12 @@ struct perf_record_lost_samples { __u64 lost; }; +#define MAX_ID_HDR_ENTRIES 6 +struct perf_record_lost_samples_and_ids { + struct perf_record_lost_samples lost; + __u64 sample_ids[MAX_ID_HDR_ENTRIES]; +}; + /* * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 10b3f3722642..fdd8304fe9d0 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -6,10 +6,13 @@ LIBPERF_0.0.1 { perf_cpu_map__get; perf_cpu_map__put; perf_cpu_map__new; - perf_cpu_map__read; perf_cpu_map__nr; perf_cpu_map__cpu; perf_cpu_map__has_any_cpu_or_is_empty; + perf_cpu_map__is_any_cpu_or_is_empty; + perf_cpu_map__is_empty; + perf_cpu_map__has_any_cpu; + perf_cpu_map__min; perf_cpu_map__max; perf_cpu_map__has; perf_thread_map__new_array; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 0c903c2372c9..c1a51d925e0e 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -279,7 +279,7 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map) if (!refcount_read(&map->refcnt)) return NULL; - /* non-overwirte doesn't pause the ringbuffer */ + /* non-overwrite doesn't pause the ringbuffer */ if (!map->overwrite) map->end = perf_mmap__read_head(map); diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 727396de6be5..9e7307186b7f 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -58,7 +58,7 @@ static inline void rb_set_black(struct rb_node *rb) { - rb->__rb_parent_color |= RB_BLACK; + rb->__rb_parent_color += RB_BLACK; } static inline struct rb_node *rb_red_parent(struct rb_node *red) diff --git a/tools/lib/slab.c b/tools/lib/slab.c index 959997fb0652..981a21404f32 100644 --- a/tools/lib/slab.c +++ b/tools/lib/slab.c @@ -36,3 +36,19 @@ void kfree(void *p) printf("Freeing %p to malloc\n", p); free(p); } + +void *kmalloc_array(size_t n, size_t size, gfp_t gfp) +{ + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = calloc(n, size); + uatomic_inc(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Allocating %p from calloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, n * size); + return ret; +} diff --git a/tools/lib/string.c b/tools/lib/string.c index 8b6892f959ab..3126d2cff716 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -153,6 +153,19 @@ char *strim(char *s) return skip_spaces(s); } +/* + * remove_spaces - Removes whitespaces from @s + */ +void remove_spaces(char *s) +{ + char *d = s; + + do { + while (*d == ' ') + ++d; + } while ((*s++ = *d++)); +} + /** * strreplace - Replace all occurrences of character in string. * @s: The string to operate on. diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index b87213263a5e..8703ab487b68 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -38,10 +38,8 @@ endif ifeq ($(DEBUG),1) CFLAGS += -O0 -else ifeq ($(CC_NO_CLANG), 0) - CFLAGS += -O3 else - CFLAGS += -O6 + CFLAGS += -O3 endif # Treat warnings as errors unless directed not to @@ -76,7 +74,7 @@ include $(srctree)/tools/build/Makefile.include all: fixdep $(LIBFILE) -$(SUBCMD_IN): FORCE +$(SUBCMD_IN): fixdep FORCE @$(MAKE) $(build)=libsubcmd $(LIBFILE): $(SUBCMD_IN) diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 9fa75943f2ed..555d617c1f50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -806,18 +806,30 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { - int nr_opts = 0, nr_group = 0, len; - const struct option *o = opts; - struct option *opt, *ordered, *group; - - for (o = opts; o->type != OPTION_END; o++) - ++nr_opts; - - len = sizeof(*o) * (nr_opts + 1); - ordered = malloc(len); - if (!ordered) - goto out; - memcpy(ordered, opts, len); + int nr_opts = 0, nr_group = 0, nr_parent = 0, len; + const struct option *o = NULL, *p = opts; + struct option *opt, *ordered = NULL, *group; + + /* flatten the options that have parents */ + for (p = opts; p != NULL; p = o->parent) { + for (o = p; o->type != OPTION_END; o++) + ++nr_opts; + + /* + * the length is given by the number of options plus a null + * terminator for the last loop iteration. + */ + len = sizeof(*o) * (nr_opts + !o->parent); + group = realloc(ordered, len); + if (!group) + goto out; + ordered = group; + memcpy(&ordered[nr_parent], p, sizeof(*o) * (nr_opts - nr_parent)); + + nr_parent = nr_opts; + } + /* copy the last OPTION_END */ + memcpy(&ordered[nr_opts], o, sizeof(*o)); /* sort each option group individually */ for (opt = group = ordered; opt->type != OPTION_END; opt++) { diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index d435eb42354b..0a764c25c384 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -2,6 +2,7 @@ #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> +#include <ctype.h> #include <fcntl.h> #include <string.h> #include <linux/string.h> @@ -165,43 +166,97 @@ int start_command(struct child_process *cmd) return 0; } -static int wait_or_whine(pid_t pid) +static int wait_or_whine(struct child_process *cmd, bool block) { - char sbuf[STRERR_BUFSIZE]; + bool finished = cmd->finished; + int result = cmd->finish_result; - for (;;) { + while (!finished) { int status, code; - pid_t waiting = waitpid(pid, &status, 0); + pid_t waiting = waitpid(cmd->pid, &status, block ? 0 : WNOHANG); + + if (!block && waiting == 0) + break; + + if (waiting < 0 && errno == EINTR) + continue; + finished = true; if (waiting < 0) { - if (errno == EINTR) - continue; + char sbuf[STRERR_BUFSIZE]; + fprintf(stderr, " Error: waitpid failed (%s)", str_error_r(errno, sbuf, sizeof(sbuf))); - return -ERR_RUN_COMMAND_WAITPID; - } - if (waiting != pid) - return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; - if (WIFSIGNALED(status)) - return -ERR_RUN_COMMAND_WAITPID_SIGNAL; - - if (!WIFEXITED(status)) - return -ERR_RUN_COMMAND_WAITPID_NOEXIT; - code = WEXITSTATUS(status); - switch (code) { - case 127: - return -ERR_RUN_COMMAND_EXEC; - case 0: - return 0; - default: - return -code; + result = -ERR_RUN_COMMAND_WAITPID; + } else if (waiting != cmd->pid) { + result = -ERR_RUN_COMMAND_WAITPID_WRONG_PID; + } else if (WIFSIGNALED(status)) { + result = -ERR_RUN_COMMAND_WAITPID_SIGNAL; + } else if (!WIFEXITED(status)) { + result = -ERR_RUN_COMMAND_WAITPID_NOEXIT; + } else { + code = WEXITSTATUS(status); + switch (code) { + case 127: + result = -ERR_RUN_COMMAND_EXEC; + break; + case 0: + result = 0; + break; + default: + result = -code; + break; + } } } + if (finished) { + cmd->finished = 1; + cmd->finish_result = result; + } + return result; +} + +int check_if_command_finished(struct child_process *cmd) +{ +#ifdef __linux__ + char filename[FILENAME_MAX + 12]; + char status_line[256]; + FILE *status_file; + + /* + * Check by reading /proc/<pid>/status as calling waitpid causes + * stdout/stderr to be closed and data lost. + */ + sprintf(filename, "/proc/%d/status", cmd->pid); + status_file = fopen(filename, "r"); + if (status_file == NULL) { + /* Open failed assume finish_command was called. */ + return true; + } + while (fgets(status_line, sizeof(status_line), status_file) != NULL) { + char *p; + + if (strncmp(status_line, "State:", 6)) + continue; + + fclose(status_file); + p = status_line + 6; + while (isspace(*p)) + p++; + return *p == 'Z' ? 1 : 0; + } + /* Read failed assume finish_command was called. */ + fclose(status_file); + return 1; +#else + wait_or_whine(cmd, /*block=*/false); + return cmd->finished; +#endif } int finish_command(struct child_process *cmd) { - return wait_or_whine(cmd->pid); + return wait_or_whine(cmd, /*block=*/true); } int run_command(struct child_process *cmd) diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h index d794138a797f..b2d39de6e690 100644 --- a/tools/lib/subcmd/run-command.h +++ b/tools/lib/subcmd/run-command.h @@ -41,17 +41,20 @@ struct child_process { int err; const char *dir; const char *const *env; + int finish_result; unsigned no_stdin:1; unsigned no_stdout:1; unsigned no_stderr:1; unsigned exec_cmd:1; /* if this is to be external sub-command */ unsigned stdout_to_stderr:1; + unsigned finished:1; void (*preexec_cb)(void); /* If set, call function in child rather than doing an exec. */ int (*no_exec_cmd)(struct child_process *process); }; int start_command(struct child_process *); +int check_if_command_finished(struct child_process *); int finish_command(struct child_process *); int run_command(struct child_process *); diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index dfac76e35ac7..c742b08815dc 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -20,8 +20,8 @@ static __noreturn inline void die(const char *err, ...) va_start(params, err); report(" Fatal: ", err, params); - exit(128); va_end(params); + exit(128); } #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile index 13d43c6f92b4..426b845edfac 100644 --- a/tools/lib/symbol/Makefile +++ b/tools/lib/symbol/Makefile @@ -31,11 +31,7 @@ CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC ifeq ($(DEBUG),0) -ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 -else - CFLAGS += -O6 -endif endif ifeq ($(DEBUG),0) diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 2d0d255fd0e1..a1f5e388644d 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) @@ -121,7 +108,9 @@ all: fixdep clean: $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ - *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) \ + .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) \ + $(srctree)/tools/$(THERMAL_UAPI) $(LIBTHERMAL_PC): $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c index 73d4d4e8d6ec..4998cec793ed 100644 --- a/tools/lib/thermal/commands.c +++ b/tools/lib/thermal/commands.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <limits.h> #include <thermal.h> #include "thermal_nl.h" @@ -33,6 +34,11 @@ static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, + + /* Thresholds */ + [THERMAL_GENL_ATTR_THRESHOLD] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_THRESHOLD_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_THRESHOLD_DIRECTION] = { .type = NLA_U32 }, }; static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) @@ -182,6 +188,48 @@ static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) return THERMAL_SUCCESS; } +static int parse_threshold_get(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_threshold *__tt = NULL; + size_t size = 0; + int rem; + + /* + * The size contains the size of the array and we want to + * access the last element, size - 1. + * + * The variable size is initialized to zero but it will be + * then incremented by the first if() statement. The message + * attributes are ordered, so the first if() statement will be + * always called before the second one. If it happens that is + * not the case, then it is a kernel bug. + */ + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_THRESHOLD], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_TEMP) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].temperature = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_DIRECTION) + __tt[size - 1].direction = nla_get_u32(attr); + } + + if (__tt) + __tt[size].temperature = INT_MAX; + + tz->thresholds = __tt; + + return THERMAL_SUCCESS; +} + static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd, struct genl_info *info, void *arg) @@ -210,6 +258,10 @@ static int handle_netlink(struct nl_cache_ops *unused, ret = parse_tz_get_gov(info, arg); break; + case THERMAL_GENL_CMD_THRESHOLD_GET: + ret = parse_threshold_get(info, arg); + break; + default: return THERMAL_ERROR; } @@ -253,6 +305,34 @@ static struct genl_cmd thermal_cmds[] = { .c_maxattr = THERMAL_GENL_ATTR_MAX, .c_attr_policy = thermal_genl_policy, }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_GET, + .c_name = (char *)"Get thresholds list", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_ADD, + .c_name = (char *)"Add a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_DELETE, + .c_name = (char *)"Delete a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_FLUSH, + .c_name = (char *)"Flush the thresholds", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, }; static struct genl_ops thermal_cmd_ops = { @@ -261,9 +341,41 @@ static struct genl_ops thermal_cmd_ops = { .o_ncmds = ARRAY_SIZE(thermal_cmds), }; -static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd, - int flags, void *arg) +struct cmd_param { + int tz_id; + int temp; + int direction; +}; + +typedef int (*cmd_cb_t)(struct nl_msg *, struct cmd_param *); + +static int thermal_genl_tz_id_encode(struct nl_msg *msg, struct cmd_param *p) { + if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) + return -1; + + return 0; +} + +static int thermal_genl_threshold_encode(struct nl_msg *msg, struct cmd_param *p) +{ + if (thermal_genl_tz_id_encode(msg, p)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, p->temp)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, p->direction)) + return -1; + + return 0; +} + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, cmd_cb_t cmd_cb, + struct cmd_param *param, + int cmd, int flags, void *arg) +{ + thermal_error_t ret = THERMAL_ERROR; struct nl_msg *msg; void *hdr; @@ -274,45 +386,95 @@ static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, 0, flags, cmd, THERMAL_GENL_VERSION); if (!hdr) - return THERMAL_ERROR; + goto out; - if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id)) - return THERMAL_ERROR; + if (cmd_cb && cmd_cb(msg, param)) + goto out; if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) - return THERMAL_ERROR; + goto out; + ret = THERMAL_SUCCESS; +out: nlmsg_free(msg); - return THERMAL_SUCCESS; + return ret; } thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) { - return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID, + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_TZ_GET_ID, NLM_F_DUMP | NLM_F_ACK, tz); } thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) { - return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET, + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_CDEV_GET, NLM_F_DUMP | NLM_F_ACK, tc); } thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP, - 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TRIP, 0, tz); } thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); } thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_GET, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_ADD, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_DELETE, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_FLUSH, 0, tz); } thermal_error_t thermal_cmd_exit(struct thermal_handler *th) diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c index a7a55d1a0c4c..bd851c869029 100644 --- a/tools/lib/thermal/events.c +++ b/tools/lib/thermal/events.c @@ -94,6 +94,30 @@ static int handle_thermal_event(struct nl_msg *n, void *arg) case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_ADD: + return ops->threshold_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DELETE: + return ops->threshold_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_FLUSH: + return ops->threshold_flush(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_UP: + return ops->threshold_up(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DOWN: + return ops->threshold_down(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + default: return -1; } @@ -101,19 +125,24 @@ static int handle_thermal_event(struct nl_msg *n, void *arg) static void thermal_events_ops_init(struct thermal_events_ops *ops) { - enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; - enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; - enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; - enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; - enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; - enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; - enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; - enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_ADD] = !!ops->threshold_add; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DELETE] = !!ops->threshold_delete; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_FLUSH] = !!ops->threshold_flush; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_UP] = !!ops->threshold_up; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DOWN] = !!ops->threshold_down; } thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h index 1abc560602cf..818ecdfb46e5 100644 --- a/tools/lib/thermal/include/thermal.h +++ b/tools/lib/thermal/include/thermal.h @@ -4,11 +4,20 @@ #define __LIBTHERMAL_H #include <linux/thermal.h> +#include <sys/types.h> #ifndef LIBTHERMAL_API #define LIBTHERMAL_API __attribute__((visibility("default"))) #endif +#ifndef THERMAL_THRESHOLD_WAY_UP +#define THERMAL_THRESHOLD_WAY_UP 0x1 +#endif + +#ifndef THERMAL_THRESHOLD_WAY_DOWN +#define THERMAL_THRESHOLD_WAY_DOWN 0x2 +#endif + #ifdef __cplusplus extern "C" { #endif @@ -31,6 +40,11 @@ struct thermal_events_ops { int (*cdev_delete)(int cdev_id, void *arg); int (*cdev_update)(int cdev_id, int cur_state, void *arg); int (*gov_change)(int tz_id, const char *gov_name, void *arg); + int (*threshold_add)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_delete)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_flush)(int tz_id, void *arg); + int (*threshold_up)(int tz_id, int temp, int prev_temp, void *arg); + int (*threshold_down)(int tz_id, int temp, int prev_temp, void *arg); }; struct thermal_ops { @@ -45,12 +59,18 @@ struct thermal_trip { int hyst; }; +struct thermal_threshold { + int temperature; + int direction; +}; + struct thermal_zone { int id; int temp; char name[THERMAL_NAME_LENGTH]; char governor[THERMAL_NAME_LENGTH]; struct thermal_trip *trip; + struct thermal_threshold *thresholds; }; struct thermal_cdev { @@ -74,12 +94,16 @@ typedef int (*cb_tt_t)(struct thermal_trip *, void *); typedef int (*cb_tc_t)(struct thermal_cdev *, void *); +typedef int (*cb_th_t)(struct thermal_threshold *, void *); + LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); +LIBTHERMAL_API int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg); + LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, const char *name); @@ -124,6 +148,22 @@ LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler * LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz); +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz); + /* * Netlink thermal samples */ diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map index d5e77738c7a4..d657176aa47f 100644 --- a/tools/lib/thermal/libthermal.map +++ b/tools/lib/thermal/libthermal.map @@ -4,6 +4,7 @@ LIBTHERMAL_0.0.1 { for_each_thermal_zone; for_each_thermal_trip; for_each_thermal_cdev; + for_each_thermal_threshold; thermal_zone_find_by_name; thermal_zone_find_by_id; thermal_zone_discover; @@ -17,6 +18,10 @@ LIBTHERMAL_0.0.1 { thermal_cmd_get_trip; thermal_cmd_get_governor; thermal_cmd_get_temp; + thermal_cmd_threshold_get; + thermal_cmd_threshold_add; + thermal_cmd_threshold_delete; + thermal_cmd_threshold_flush; thermal_sampling_init; thermal_sampling_handle; thermal_sampling_fd; diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c index 70577423a9f0..f67c1f9ea1d7 100644 --- a/tools/lib/thermal/sampling.c +++ b/tools/lib/thermal/sampling.c @@ -16,6 +16,8 @@ static int handle_thermal_sample(struct nl_msg *n, void *arg) struct thermal_handler_param *thp = arg; struct thermal_handler *th = thp->th; + arg = thp->arg; + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); switch (genlhdr->cmd) { diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c index 72a76dc205bc..6f02e3539159 100644 --- a/tools/lib/thermal/thermal.c +++ b/tools/lib/thermal/thermal.c @@ -1,10 +1,24 @@ // SPDX-License-Identifier: LGPL-2.1+ // Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> #include <stdio.h> +#include <limits.h> #include <thermal.h> #include "thermal_nl.h" +int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg) +{ + int i, ret = 0; + + if (!th) + return 0; + + for (i = 0; th[i].temperature != INT_MAX; i++) + ret |= cb(&th[i], arg); + + return ret; +} + int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) { int i, ret = 0; @@ -80,6 +94,9 @@ static int __thermal_zone_discover(struct thermal_zone *tz, void *th) if (thermal_cmd_get_trip(th, tz) < 0) return -1; + if (thermal_cmd_threshold_get(th, tz)) + return -1; + if (thermal_cmd_get_governor(th, tz)) return -1; |